1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
15 #include <rte_interrupts.h>
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
19 #include <rte_debug.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_memzone.h>
24 #include <rte_launch.h>
26 #include <rte_per_lcore.h>
27 #include <rte_lcore.h>
28 #include <rte_atomic.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_mempool.h>
31 #include <rte_malloc.h>
33 #include <rte_ether.h>
34 #include <ethdev_driver.h>
35 #include <rte_prefetch.h>
40 #include <rte_string_fns.h>
42 #include "e1000_logs.h"
43 #include "base/e1000_api.h"
44 #include "e1000_ethdev.h"
46 #ifdef RTE_LIBRTE_IEEE1588
47 #define IGB_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
49 #define IGB_TX_IEEE1588_TMST 0
51 /* Bit Mask to indicate what bits required for building TX context */
52 #define IGB_TX_OFFLOAD_MASK ( \
63 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
64 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
67 * Structure associated with each descriptor of the RX ring of a RX queue.
70 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
74 * Structure associated with each descriptor of the TX ring of a TX queue.
77 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
78 uint16_t next_id; /**< Index of next descriptor in ring. */
79 uint16_t last_id; /**< Index of last scattered descriptor. */
86 IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
90 * Structure associated with each RX queue.
93 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
94 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
95 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
96 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
97 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
98 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
99 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
100 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
101 uint16_t nb_rx_desc; /**< number of RX descriptors. */
102 uint16_t rx_tail; /**< current value of RDT register. */
103 uint16_t nb_rx_hold; /**< number of held free RX desc. */
104 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
105 uint16_t queue_id; /**< RX queue index. */
106 uint16_t reg_idx; /**< RX queue register index. */
107 uint16_t port_id; /**< Device port identifier. */
108 uint8_t pthresh; /**< Prefetch threshold register. */
109 uint8_t hthresh; /**< Host threshold register. */
110 uint8_t wthresh; /**< Write-back threshold register. */
111 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
112 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
113 uint32_t flags; /**< RX flags. */
114 uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */
118 * Hardware context number
120 enum igb_advctx_num {
121 IGB_CTX_0 = 0, /**< CTX0 */
122 IGB_CTX_1 = 1, /**< CTX1 */
123 IGB_CTX_NUM = 2, /**< CTX_NUM */
126 /** Offload features */
127 union igb_tx_offload {
130 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
131 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
132 uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
133 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
134 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
136 /* uint64_t unused:8; */
141 * Compare mask for igb_tx_offload.data,
142 * should be in sync with igb_tx_offload layout.
144 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
145 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
146 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
147 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
148 /** Mac + IP + TCP + Mss mask. */
149 #define TX_TSO_CMP_MASK \
150 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
153 * Strucutre to check if new context need be built
155 struct igb_advctx_info {
156 uint64_t flags; /**< ol_flags related to context build. */
157 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
158 union igb_tx_offload tx_offload;
159 /** compare mask for tx offload. */
160 union igb_tx_offload tx_offload_mask;
164 * Structure associated with each TX queue.
166 struct igb_tx_queue {
167 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
168 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
169 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
170 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
171 uint32_t txd_type; /**< Device-specific TXD type */
172 uint16_t nb_tx_desc; /**< number of TX descriptors. */
173 uint16_t tx_tail; /**< Current value of TDT register. */
175 /**< Index of first used TX descriptor. */
176 uint16_t queue_id; /**< TX queue index. */
177 uint16_t reg_idx; /**< TX queue register index. */
178 uint16_t port_id; /**< Device port identifier. */
179 uint8_t pthresh; /**< Prefetch threshold register. */
180 uint8_t hthresh; /**< Host threshold register. */
181 uint8_t wthresh; /**< Write-back threshold register. */
183 /**< Current used hardware descriptor. */
185 /**< Start context position for transmit queue. */
186 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
187 /**< Hardware context history.*/
188 uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */
192 #define RTE_PMD_USE_PREFETCH
195 #ifdef RTE_PMD_USE_PREFETCH
196 #define rte_igb_prefetch(p) rte_prefetch0(p)
198 #define rte_igb_prefetch(p) do {} while(0)
201 #ifdef RTE_PMD_PACKET_PREFETCH
202 #define rte_packet_prefetch(p) rte_prefetch1(p)
204 #define rte_packet_prefetch(p) do {} while(0)
208 * Macro for VMDq feature for 1 GbE NIC.
210 #define E1000_VMOLR_SIZE (8)
211 #define IGB_TSO_MAX_HDRLEN (512)
212 #define IGB_TSO_MAX_MSS (9216)
214 /*********************************************************************
218 **********************************************************************/
221 *There're some limitations in hardware for TCP segmentation offload. We
222 *should check whether the parameters are valid.
224 static inline uint64_t
225 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
227 if (!(ol_req & PKT_TX_TCP_SEG))
229 if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
230 ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
231 ol_req &= ~PKT_TX_TCP_SEG;
232 ol_req |= PKT_TX_TCP_CKSUM;
238 * Advanced context descriptor are almost same between igb/ixgbe
239 * This is a separate function, looking for optimization opportunity here
240 * Rework required to go with the pre-defined values.
244 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
245 volatile struct e1000_adv_tx_context_desc *ctx_txd,
246 uint64_t ol_flags, union igb_tx_offload tx_offload)
248 uint32_t type_tucmd_mlhl;
249 uint32_t mss_l4len_idx;
250 uint32_t ctx_idx, ctx_curr;
251 uint32_t vlan_macip_lens;
252 union igb_tx_offload tx_offload_mask;
254 ctx_curr = txq->ctx_curr;
255 ctx_idx = ctx_curr + txq->ctx_start;
257 tx_offload_mask.data = 0;
260 /* Specify which HW CTX to upload. */
261 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
263 if (ol_flags & PKT_TX_VLAN_PKT)
264 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
266 /* check if TCP segmentation required for this packet */
267 if (ol_flags & PKT_TX_TCP_SEG) {
268 /* implies IP cksum in IPv4 */
269 if (ol_flags & PKT_TX_IP_CKSUM)
270 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
271 E1000_ADVTXD_TUCMD_L4T_TCP |
272 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
274 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
275 E1000_ADVTXD_TUCMD_L4T_TCP |
276 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
278 tx_offload_mask.data |= TX_TSO_CMP_MASK;
279 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
280 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
281 } else { /* no TSO, check if hardware checksum is needed */
282 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
283 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
285 if (ol_flags & PKT_TX_IP_CKSUM)
286 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
288 switch (ol_flags & PKT_TX_L4_MASK) {
289 case PKT_TX_UDP_CKSUM:
290 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
291 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
292 mss_l4len_idx |= sizeof(struct rte_udp_hdr)
293 << E1000_ADVTXD_L4LEN_SHIFT;
295 case PKT_TX_TCP_CKSUM:
296 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
297 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
298 mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
299 << E1000_ADVTXD_L4LEN_SHIFT;
301 case PKT_TX_SCTP_CKSUM:
302 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
303 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
304 mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
305 << E1000_ADVTXD_L4LEN_SHIFT;
308 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
309 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
314 txq->ctx_cache[ctx_curr].flags = ol_flags;
315 txq->ctx_cache[ctx_curr].tx_offload.data =
316 tx_offload_mask.data & tx_offload.data;
317 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
319 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
320 vlan_macip_lens = (uint32_t)tx_offload.data;
321 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
322 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
323 ctx_txd->u.seqnum_seed = 0;
327 * Check which hardware context can be used. Use the existing match
328 * or create a new context descriptor.
330 static inline uint32_t
331 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
332 union igb_tx_offload tx_offload)
334 /* If match with the current context */
335 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
336 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
337 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
338 return txq->ctx_curr;
341 /* If match with the second context */
343 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
344 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
345 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
346 return txq->ctx_curr;
349 /* Mismatch, use the previous context */
353 static inline uint32_t
354 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
356 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
357 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
360 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
361 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
362 tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0];
366 static inline uint32_t
367 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
370 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
371 static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
372 cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
373 cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0];
378 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
381 struct igb_tx_queue *txq;
382 struct igb_tx_entry *sw_ring;
383 struct igb_tx_entry *txe, *txn;
384 volatile union e1000_adv_tx_desc *txr;
385 volatile union e1000_adv_tx_desc *txd;
386 struct rte_mbuf *tx_pkt;
387 struct rte_mbuf *m_seg;
388 uint64_t buf_dma_addr;
389 uint32_t olinfo_status;
390 uint32_t cmd_type_len;
399 uint32_t new_ctx = 0;
401 union igb_tx_offload tx_offload = {0};
404 sw_ring = txq->sw_ring;
406 tx_id = txq->tx_tail;
407 txe = &sw_ring[tx_id];
409 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
411 pkt_len = tx_pkt->pkt_len;
413 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
416 * The number of descriptors that must be allocated for a
417 * packet is the number of segments of that packet, plus 1
418 * Context Descriptor for the VLAN Tag Identifier, if any.
419 * Determine the last TX descriptor to allocate in the TX ring
420 * for the packet, starting from the current position (tx_id)
423 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
425 ol_flags = tx_pkt->ol_flags;
426 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
428 /* If a Context Descriptor need be built . */
430 tx_offload.l2_len = tx_pkt->l2_len;
431 tx_offload.l3_len = tx_pkt->l3_len;
432 tx_offload.l4_len = tx_pkt->l4_len;
433 tx_offload.vlan_tci = tx_pkt->vlan_tci;
434 tx_offload.tso_segsz = tx_pkt->tso_segsz;
435 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
437 ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
438 /* Only allocate context descriptor if required*/
439 new_ctx = (ctx == IGB_CTX_NUM);
440 ctx = txq->ctx_curr + txq->ctx_start;
441 tx_last = (uint16_t) (tx_last + new_ctx);
443 if (tx_last >= txq->nb_tx_desc)
444 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
446 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
447 " tx_first=%u tx_last=%u",
448 (unsigned) txq->port_id,
449 (unsigned) txq->queue_id,
455 * Check if there are enough free descriptors in the TX ring
456 * to transmit the next packet.
457 * This operation is based on the two following rules:
459 * 1- Only check that the last needed TX descriptor can be
460 * allocated (by construction, if that descriptor is free,
461 * all intermediate ones are also free).
463 * For this purpose, the index of the last TX descriptor
464 * used for a packet (the "last descriptor" of a packet)
465 * is recorded in the TX entries (the last one included)
466 * that are associated with all TX descriptors allocated
469 * 2- Avoid to allocate the last free TX descriptor of the
470 * ring, in order to never set the TDT register with the
471 * same value stored in parallel by the NIC in the TDH
472 * register, which makes the TX engine of the NIC enter
473 * in a deadlock situation.
475 * By extension, avoid to allocate a free descriptor that
476 * belongs to the last set of free descriptors allocated
477 * to the same packet previously transmitted.
481 * The "last descriptor" of the previously sent packet, if any,
482 * which used the last descriptor to allocate.
484 tx_end = sw_ring[tx_last].last_id;
487 * The next descriptor following that "last descriptor" in the
490 tx_end = sw_ring[tx_end].next_id;
493 * The "last descriptor" associated with that next descriptor.
495 tx_end = sw_ring[tx_end].last_id;
498 * Check that this descriptor is free.
500 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
507 * Set common flags of all TX Data Descriptors.
509 * The following bits must be set in all Data Descriptors:
510 * - E1000_ADVTXD_DTYP_DATA
511 * - E1000_ADVTXD_DCMD_DEXT
513 * The following bits must be set in the first Data Descriptor
514 * and are ignored in the other ones:
515 * - E1000_ADVTXD_DCMD_IFCS
516 * - E1000_ADVTXD_MAC_1588
517 * - E1000_ADVTXD_DCMD_VLE
519 * The following bits must only be set in the last Data
521 * - E1000_TXD_CMD_EOP
523 * The following bits can be set in any Data Descriptor, but
524 * are only set in the last Data Descriptor:
527 cmd_type_len = txq->txd_type |
528 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
529 if (tx_ol_req & PKT_TX_TCP_SEG)
530 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
531 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
532 #if defined(RTE_LIBRTE_IEEE1588)
533 if (ol_flags & PKT_TX_IEEE1588_TMST)
534 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
537 /* Setup TX Advanced context descriptor if required */
539 volatile struct e1000_adv_tx_context_desc *
542 ctx_txd = (volatile struct
543 e1000_adv_tx_context_desc *)
546 txn = &sw_ring[txe->next_id];
547 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
549 if (txe->mbuf != NULL) {
550 rte_pktmbuf_free_seg(txe->mbuf);
554 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
556 txe->last_id = tx_last;
557 tx_id = txe->next_id;
561 /* Setup the TX Advanced Data Descriptor */
562 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
563 olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
564 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
569 txn = &sw_ring[txe->next_id];
572 if (txe->mbuf != NULL)
573 rte_pktmbuf_free_seg(txe->mbuf);
577 * Set up transmit descriptor.
579 slen = (uint16_t) m_seg->data_len;
580 buf_dma_addr = rte_mbuf_data_iova(m_seg);
581 txd->read.buffer_addr =
582 rte_cpu_to_le_64(buf_dma_addr);
583 txd->read.cmd_type_len =
584 rte_cpu_to_le_32(cmd_type_len | slen);
585 txd->read.olinfo_status =
586 rte_cpu_to_le_32(olinfo_status);
587 txe->last_id = tx_last;
588 tx_id = txe->next_id;
591 } while (m_seg != NULL);
594 * The last packet data descriptor needs End Of Packet (EOP)
595 * and Report Status (RS).
597 txd->read.cmd_type_len |=
598 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
604 * Set the Transmit Descriptor Tail (TDT).
606 E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
607 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
608 (unsigned) txq->port_id, (unsigned) txq->queue_id,
609 (unsigned) tx_id, (unsigned) nb_tx);
610 txq->tx_tail = tx_id;
615 /*********************************************************************
619 **********************************************************************/
621 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
627 for (i = 0; i < nb_pkts; i++) {
630 /* Check some limitations for TSO in hardware */
631 if (m->ol_flags & PKT_TX_TCP_SEG)
632 if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
633 (m->l2_len + m->l3_len + m->l4_len >
634 IGB_TSO_MAX_HDRLEN)) {
639 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
644 #ifdef RTE_ETHDEV_DEBUG_TX
645 ret = rte_validate_tx_offload(m);
651 ret = rte_net_intel_cksum_prepare(m);
661 /*********************************************************************
665 **********************************************************************/
666 #define IGB_PACKET_TYPE_IPV4 0X01
667 #define IGB_PACKET_TYPE_IPV4_TCP 0X11
668 #define IGB_PACKET_TYPE_IPV4_UDP 0X21
669 #define IGB_PACKET_TYPE_IPV4_SCTP 0X41
670 #define IGB_PACKET_TYPE_IPV4_EXT 0X03
671 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP 0X43
672 #define IGB_PACKET_TYPE_IPV6 0X04
673 #define IGB_PACKET_TYPE_IPV6_TCP 0X14
674 #define IGB_PACKET_TYPE_IPV6_UDP 0X24
675 #define IGB_PACKET_TYPE_IPV6_EXT 0X0C
676 #define IGB_PACKET_TYPE_IPV6_EXT_TCP 0X1C
677 #define IGB_PACKET_TYPE_IPV6_EXT_UDP 0X2C
678 #define IGB_PACKET_TYPE_IPV4_IPV6 0X05
679 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP 0X15
680 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP 0X25
681 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
682 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
683 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
684 #define IGB_PACKET_TYPE_MAX 0X80
685 #define IGB_PACKET_TYPE_MASK 0X7F
686 #define IGB_PACKET_TYPE_SHIFT 0X04
687 static inline uint32_t
688 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
690 static const uint32_t
691 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
692 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
694 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
695 RTE_PTYPE_L3_IPV4_EXT,
696 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
698 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
699 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
700 RTE_PTYPE_INNER_L3_IPV6,
701 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
702 RTE_PTYPE_L3_IPV6_EXT,
703 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
704 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
705 RTE_PTYPE_INNER_L3_IPV6_EXT,
706 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
707 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
708 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
709 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
710 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
711 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
712 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
713 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
714 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
715 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
716 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
717 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
718 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
719 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
720 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
721 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
722 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
723 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
724 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
725 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
726 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
727 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
728 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
729 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
730 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
731 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
732 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
733 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
735 if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
736 return RTE_PTYPE_UNKNOWN;
738 pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
740 return ptype_table[pkt_info];
743 static inline uint64_t
744 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
746 uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ? 0 : PKT_RX_RSS_HASH;
748 #if defined(RTE_LIBRTE_IEEE1588)
749 static uint32_t ip_pkt_etqf_map[8] = {
750 0, 0, 0, PKT_RX_IEEE1588_PTP,
754 struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
755 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
757 /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
758 if (hw->mac.type == e1000_i210)
759 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
761 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
769 static inline uint64_t
770 rx_desc_status_to_pkt_flags(uint32_t rx_status)
774 /* Check if VLAN present */
775 pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
776 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
778 #if defined(RTE_LIBRTE_IEEE1588)
779 if (rx_status & E1000_RXD_STAT_TMST)
780 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
785 static inline uint64_t
786 rx_desc_error_to_pkt_flags(uint32_t rx_status)
789 * Bit 30: IPE, IPv4 checksum error
790 * Bit 29: L4I, L4I integrity error
793 static uint64_t error_to_pkt_flags_map[4] = {
794 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
795 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
796 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
797 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
799 return error_to_pkt_flags_map[(rx_status >>
800 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
804 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
807 struct igb_rx_queue *rxq;
808 volatile union e1000_adv_rx_desc *rx_ring;
809 volatile union e1000_adv_rx_desc *rxdp;
810 struct igb_rx_entry *sw_ring;
811 struct igb_rx_entry *rxe;
812 struct rte_mbuf *rxm;
813 struct rte_mbuf *nmb;
814 union e1000_adv_rx_desc rxd;
817 uint32_t hlen_type_rss;
827 rx_id = rxq->rx_tail;
828 rx_ring = rxq->rx_ring;
829 sw_ring = rxq->sw_ring;
830 while (nb_rx < nb_pkts) {
832 * The order of operations here is important as the DD status
833 * bit must not be read after any other descriptor fields.
834 * rx_ring and rxdp are pointing to volatile data so the order
835 * of accesses cannot be reordered by the compiler. If they were
836 * not volatile, they could be reordered which could lead to
837 * using invalid descriptor fields when read from rxd.
839 rxdp = &rx_ring[rx_id];
840 staterr = rxdp->wb.upper.status_error;
841 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
848 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
849 * likely to be invalid and to be dropped by the various
850 * validation checks performed by the network stack.
852 * Allocate a new mbuf to replenish the RX ring descriptor.
853 * If the allocation fails:
854 * - arrange for that RX descriptor to be the first one
855 * being parsed the next time the receive function is
856 * invoked [on the same queue].
858 * - Stop parsing the RX ring and return immediately.
860 * This policy do not drop the packet received in the RX
861 * descriptor for which the allocation of a new mbuf failed.
862 * Thus, it allows that packet to be later retrieved if
863 * mbuf have been freed in the mean time.
864 * As a side effect, holding RX descriptors instead of
865 * systematically giving them back to the NIC may lead to
866 * RX ring exhaustion situations.
867 * However, the NIC can gracefully prevent such situations
868 * to happen by sending specific "back-pressure" flow control
869 * frames to its peer(s).
871 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
872 "staterr=0x%x pkt_len=%u",
873 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
874 (unsigned) rx_id, (unsigned) staterr,
875 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
877 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
879 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
880 "queue_id=%u", (unsigned) rxq->port_id,
881 (unsigned) rxq->queue_id);
882 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
887 rxe = &sw_ring[rx_id];
889 if (rx_id == rxq->nb_rx_desc)
892 /* Prefetch next mbuf while processing current one. */
893 rte_igb_prefetch(sw_ring[rx_id].mbuf);
896 * When next RX descriptor is on a cache-line boundary,
897 * prefetch the next 4 RX descriptors and the next 8 pointers
900 if ((rx_id & 0x3) == 0) {
901 rte_igb_prefetch(&rx_ring[rx_id]);
902 rte_igb_prefetch(&sw_ring[rx_id]);
908 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
909 rxdp->read.hdr_addr = 0;
910 rxdp->read.pkt_addr = dma_addr;
913 * Initialize the returned mbuf.
914 * 1) setup generic mbuf fields:
915 * - number of segments,
918 * - RX port identifier.
919 * 2) integrate hardware offload data, if any:
921 * - IP checksum flag,
922 * - VLAN TCI, if any,
925 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
927 rxm->data_off = RTE_PKTMBUF_HEADROOM;
928 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
931 rxm->pkt_len = pkt_len;
932 rxm->data_len = pkt_len;
933 rxm->port = rxq->port_id;
935 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
936 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
939 * The vlan_tci field is only valid when PKT_RX_VLAN is
940 * set in the pkt_flags field and must be in CPU byte order.
942 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
943 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
944 rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
946 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
948 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
949 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
950 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
951 rxm->ol_flags = pkt_flags;
952 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
953 lo_dword.hs_rss.pkt_info);
956 * Store the mbuf address into the next entry of the array
957 * of returned packets.
959 rx_pkts[nb_rx++] = rxm;
961 rxq->rx_tail = rx_id;
964 * If the number of free RX descriptors is greater than the RX free
965 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
967 * Update the RDT with the value of the last processed RX descriptor
968 * minus 1, to guarantee that the RDT register is never equal to the
969 * RDH register, which creates a "full" ring situtation from the
970 * hardware point of view...
972 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
973 if (nb_hold > rxq->rx_free_thresh) {
974 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
975 "nb_hold=%u nb_rx=%u",
976 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
977 (unsigned) rx_id, (unsigned) nb_hold,
979 rx_id = (uint16_t) ((rx_id == 0) ?
980 (rxq->nb_rx_desc - 1) : (rx_id - 1));
981 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
984 rxq->nb_rx_hold = nb_hold;
989 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
992 struct igb_rx_queue *rxq;
993 volatile union e1000_adv_rx_desc *rx_ring;
994 volatile union e1000_adv_rx_desc *rxdp;
995 struct igb_rx_entry *sw_ring;
996 struct igb_rx_entry *rxe;
997 struct rte_mbuf *first_seg;
998 struct rte_mbuf *last_seg;
999 struct rte_mbuf *rxm;
1000 struct rte_mbuf *nmb;
1001 union e1000_adv_rx_desc rxd;
1002 uint64_t dma; /* Physical address of mbuf data buffer */
1004 uint32_t hlen_type_rss;
1014 rx_id = rxq->rx_tail;
1015 rx_ring = rxq->rx_ring;
1016 sw_ring = rxq->sw_ring;
1019 * Retrieve RX context of current packet, if any.
1021 first_seg = rxq->pkt_first_seg;
1022 last_seg = rxq->pkt_last_seg;
1024 while (nb_rx < nb_pkts) {
1027 * The order of operations here is important as the DD status
1028 * bit must not be read after any other descriptor fields.
1029 * rx_ring and rxdp are pointing to volatile data so the order
1030 * of accesses cannot be reordered by the compiler. If they were
1031 * not volatile, they could be reordered which could lead to
1032 * using invalid descriptor fields when read from rxd.
1034 rxdp = &rx_ring[rx_id];
1035 staterr = rxdp->wb.upper.status_error;
1036 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1043 * Allocate a new mbuf to replenish the RX ring descriptor.
1044 * If the allocation fails:
1045 * - arrange for that RX descriptor to be the first one
1046 * being parsed the next time the receive function is
1047 * invoked [on the same queue].
1049 * - Stop parsing the RX ring and return immediately.
1051 * This policy does not drop the packet received in the RX
1052 * descriptor for which the allocation of a new mbuf failed.
1053 * Thus, it allows that packet to be later retrieved if
1054 * mbuf have been freed in the mean time.
1055 * As a side effect, holding RX descriptors instead of
1056 * systematically giving them back to the NIC may lead to
1057 * RX ring exhaustion situations.
1058 * However, the NIC can gracefully prevent such situations
1059 * to happen by sending specific "back-pressure" flow control
1060 * frames to its peer(s).
1062 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1063 "staterr=0x%x data_len=%u",
1064 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1065 (unsigned) rx_id, (unsigned) staterr,
1066 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1068 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1070 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1071 "queue_id=%u", (unsigned) rxq->port_id,
1072 (unsigned) rxq->queue_id);
1073 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1078 rxe = &sw_ring[rx_id];
1080 if (rx_id == rxq->nb_rx_desc)
1083 /* Prefetch next mbuf while processing current one. */
1084 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1087 * When next RX descriptor is on a cache-line boundary,
1088 * prefetch the next 4 RX descriptors and the next 8 pointers
1091 if ((rx_id & 0x3) == 0) {
1092 rte_igb_prefetch(&rx_ring[rx_id]);
1093 rte_igb_prefetch(&sw_ring[rx_id]);
1097 * Update RX descriptor with the physical address of the new
1098 * data buffer of the new allocated mbuf.
1102 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1103 rxdp->read.pkt_addr = dma;
1104 rxdp->read.hdr_addr = 0;
1107 * Set data length & data buffer address of mbuf.
1109 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1110 rxm->data_len = data_len;
1111 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1114 * If this is the first buffer of the received packet,
1115 * set the pointer to the first mbuf of the packet and
1116 * initialize its context.
1117 * Otherwise, update the total length and the number of segments
1118 * of the current scattered packet, and update the pointer to
1119 * the last mbuf of the current packet.
1121 if (first_seg == NULL) {
1123 first_seg->pkt_len = data_len;
1124 first_seg->nb_segs = 1;
1126 first_seg->pkt_len += data_len;
1127 first_seg->nb_segs++;
1128 last_seg->next = rxm;
1132 * If this is not the last buffer of the received packet,
1133 * update the pointer to the last mbuf of the current scattered
1134 * packet and continue to parse the RX ring.
1136 if (! (staterr & E1000_RXD_STAT_EOP)) {
1142 * This is the last buffer of the received packet.
1143 * If the CRC is not stripped by the hardware:
1144 * - Subtract the CRC length from the total packet length.
1145 * - If the last buffer only contains the whole CRC or a part
1146 * of it, free the mbuf associated to the last buffer.
1147 * If part of the CRC is also contained in the previous
1148 * mbuf, subtract the length of that CRC part from the
1149 * data length of the previous mbuf.
1152 if (unlikely(rxq->crc_len > 0)) {
1153 first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
1154 if (data_len <= RTE_ETHER_CRC_LEN) {
1155 rte_pktmbuf_free_seg(rxm);
1156 first_seg->nb_segs--;
1157 last_seg->data_len = (uint16_t)
1158 (last_seg->data_len -
1159 (RTE_ETHER_CRC_LEN - data_len));
1160 last_seg->next = NULL;
1162 rxm->data_len = (uint16_t)
1163 (data_len - RTE_ETHER_CRC_LEN);
1167 * Initialize the first mbuf of the returned packet:
1168 * - RX port identifier,
1169 * - hardware offload data, if any:
1170 * - RSS flag & hash,
1171 * - IP checksum flag,
1172 * - VLAN TCI, if any,
1175 first_seg->port = rxq->port_id;
1176 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1179 * The vlan_tci field is only valid when PKT_RX_VLAN is
1180 * set in the pkt_flags field and must be in CPU byte order.
1182 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1183 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1184 first_seg->vlan_tci =
1185 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1187 first_seg->vlan_tci =
1188 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1190 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1191 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1192 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1193 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1194 first_seg->ol_flags = pkt_flags;
1195 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1196 lower.lo_dword.hs_rss.pkt_info);
1198 /* Prefetch data of first segment, if configured to do so. */
1199 rte_packet_prefetch((char *)first_seg->buf_addr +
1200 first_seg->data_off);
1203 * Store the mbuf address into the next entry of the array
1204 * of returned packets.
1206 rx_pkts[nb_rx++] = first_seg;
1209 * Setup receipt context for a new packet.
1215 * Record index of the next RX descriptor to probe.
1217 rxq->rx_tail = rx_id;
1220 * Save receive context.
1222 rxq->pkt_first_seg = first_seg;
1223 rxq->pkt_last_seg = last_seg;
1226 * If the number of free RX descriptors is greater than the RX free
1227 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1229 * Update the RDT with the value of the last processed RX descriptor
1230 * minus 1, to guarantee that the RDT register is never equal to the
1231 * RDH register, which creates a "full" ring situtation from the
1232 * hardware point of view...
1234 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1235 if (nb_hold > rxq->rx_free_thresh) {
1236 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1237 "nb_hold=%u nb_rx=%u",
1238 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1239 (unsigned) rx_id, (unsigned) nb_hold,
1241 rx_id = (uint16_t) ((rx_id == 0) ?
1242 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1243 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1246 rxq->nb_rx_hold = nb_hold;
1251 * Maximum number of Ring Descriptors.
1253 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1254 * desscriptors should meet the following condition:
1255 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1259 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1263 if (txq->sw_ring != NULL) {
1264 for (i = 0; i < txq->nb_tx_desc; i++) {
1265 if (txq->sw_ring[i].mbuf != NULL) {
1266 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1267 txq->sw_ring[i].mbuf = NULL;
1274 igb_tx_queue_release(struct igb_tx_queue *txq)
1277 igb_tx_queue_release_mbufs(txq);
1278 rte_free(txq->sw_ring);
1284 eth_igb_tx_queue_release(void *txq)
1286 igb_tx_queue_release(txq);
1290 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1292 struct igb_tx_entry *sw_ring;
1293 volatile union e1000_adv_tx_desc *txr;
1294 uint16_t tx_first; /* First segment analyzed. */
1295 uint16_t tx_id; /* Current segment being processed. */
1296 uint16_t tx_last; /* Last segment in the current packet. */
1297 uint16_t tx_next; /* First segment of the next packet. */
1303 sw_ring = txq->sw_ring;
1306 /* tx_tail is the last sent packet on the sw_ring. Goto the end
1307 * of that packet (the last segment in the packet chain) and
1308 * then the next segment will be the start of the oldest segment
1309 * in the sw_ring. This is the first packet that will be
1310 * attempted to be freed.
1313 /* Get last segment in most recently added packet. */
1314 tx_first = sw_ring[txq->tx_tail].last_id;
1316 /* Get the next segment, which is the oldest segment in ring. */
1317 tx_first = sw_ring[tx_first].next_id;
1319 /* Set the current index to the first. */
1322 /* Loop through each packet. For each packet, verify that an
1323 * mbuf exists and that the last segment is free. If so, free
1327 tx_last = sw_ring[tx_id].last_id;
1329 if (sw_ring[tx_last].mbuf) {
1330 if (txr[tx_last].wb.status &
1331 E1000_TXD_STAT_DD) {
1332 /* Increment the number of packets
1337 /* Get the start of the next packet. */
1338 tx_next = sw_ring[tx_last].next_id;
1340 /* Loop through all segments in a
1344 if (sw_ring[tx_id].mbuf) {
1345 rte_pktmbuf_free_seg(
1346 sw_ring[tx_id].mbuf);
1347 sw_ring[tx_id].mbuf = NULL;
1348 sw_ring[tx_id].last_id = tx_id;
1351 /* Move to next segemnt. */
1352 tx_id = sw_ring[tx_id].next_id;
1354 } while (tx_id != tx_next);
1356 if (unlikely(count == (int)free_cnt))
1359 /* mbuf still in use, nothing left to
1365 /* There are multiple reasons to be here:
1366 * 1) All the packets on the ring have been
1367 * freed - tx_id is equal to tx_first
1368 * and some packets have been freed.
1370 * 2) Interfaces has not sent a rings worth of
1371 * packets yet, so the segment after tail is
1372 * still empty. Or a previous call to this
1373 * function freed some of the segments but
1374 * not all so there is a hole in the list.
1375 * Hopefully this is a rare case.
1376 * - Walk the list and find the next mbuf. If
1377 * there isn't one, then done.
1379 if (likely(tx_id == tx_first && count != 0))
1382 /* Walk the list and find the next mbuf, if any. */
1384 /* Move to next segemnt. */
1385 tx_id = sw_ring[tx_id].next_id;
1387 if (sw_ring[tx_id].mbuf)
1390 } while (tx_id != tx_first);
1392 /* Determine why previous loop bailed. If there
1393 * is not an mbuf, done.
1395 if (!sw_ring[tx_id].mbuf)
1404 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1406 return igb_tx_done_cleanup(txq, free_cnt);
1410 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1415 memset((void*)&txq->ctx_cache, 0,
1416 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1420 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1422 static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1423 struct igb_tx_entry *txe = txq->sw_ring;
1425 struct e1000_hw *hw;
1427 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1428 /* Zero out HW ring memory */
1429 for (i = 0; i < txq->nb_tx_desc; i++) {
1430 txq->tx_ring[i] = zeroed_desc;
1433 /* Initialize ring entries */
1434 prev = (uint16_t)(txq->nb_tx_desc - 1);
1435 for (i = 0; i < txq->nb_tx_desc; i++) {
1436 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1438 txd->wb.status = E1000_TXD_STAT_DD;
1441 txe[prev].next_id = i;
1445 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1446 /* 82575 specific, each tx queue will use 2 hw contexts */
1447 if (hw->mac.type == e1000_82575)
1448 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1450 igb_reset_tx_queue_stat(txq);
1454 igb_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
1456 uint64_t tx_offload_capa;
1459 tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
1460 DEV_TX_OFFLOAD_IPV4_CKSUM |
1461 DEV_TX_OFFLOAD_UDP_CKSUM |
1462 DEV_TX_OFFLOAD_TCP_CKSUM |
1463 DEV_TX_OFFLOAD_SCTP_CKSUM |
1464 DEV_TX_OFFLOAD_TCP_TSO |
1465 DEV_TX_OFFLOAD_MULTI_SEGS;
1467 return tx_offload_capa;
1471 igb_get_tx_queue_offloads_capa(struct rte_eth_dev *dev)
1473 uint64_t tx_queue_offload_capa;
1475 tx_queue_offload_capa = igb_get_tx_port_offloads_capa(dev);
1477 return tx_queue_offload_capa;
1481 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1484 unsigned int socket_id,
1485 const struct rte_eth_txconf *tx_conf)
1487 const struct rte_memzone *tz;
1488 struct igb_tx_queue *txq;
1489 struct e1000_hw *hw;
1493 offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
1495 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1498 * Validate number of transmit descriptors.
1499 * It must not exceed hardware maximum, and must be multiple
1502 if (nb_desc % IGB_TXD_ALIGN != 0 ||
1503 (nb_desc > E1000_MAX_RING_DESC) ||
1504 (nb_desc < E1000_MIN_RING_DESC)) {
1509 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1512 if (tx_conf->tx_free_thresh != 0)
1513 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1514 "used for the 1G driver.");
1515 if (tx_conf->tx_rs_thresh != 0)
1516 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1517 "used for the 1G driver.");
1518 if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1519 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1520 "consider setting the TX WTHRESH value to 4, 8, "
1523 /* Free memory prior to re-allocation if needed */
1524 if (dev->data->tx_queues[queue_idx] != NULL) {
1525 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1526 dev->data->tx_queues[queue_idx] = NULL;
1529 /* First allocate the tx queue data structure */
1530 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1531 RTE_CACHE_LINE_SIZE);
1536 * Allocate TX ring hardware descriptors. A memzone large enough to
1537 * handle the maximum ring size is allocated in order to allow for
1538 * resizing in later calls to the queue setup function.
1540 size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1541 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1542 E1000_ALIGN, socket_id);
1544 igb_tx_queue_release(txq);
1548 txq->nb_tx_desc = nb_desc;
1549 txq->pthresh = tx_conf->tx_thresh.pthresh;
1550 txq->hthresh = tx_conf->tx_thresh.hthresh;
1551 txq->wthresh = tx_conf->tx_thresh.wthresh;
1552 if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1554 txq->queue_id = queue_idx;
1555 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1556 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1557 txq->port_id = dev->data->port_id;
1559 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1560 txq->tx_ring_phys_addr = tz->iova;
1562 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1563 /* Allocate software ring */
1564 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1565 sizeof(struct igb_tx_entry) * nb_desc,
1566 RTE_CACHE_LINE_SIZE);
1567 if (txq->sw_ring == NULL) {
1568 igb_tx_queue_release(txq);
1571 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1572 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1574 igb_reset_tx_queue(txq, dev);
1575 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1576 dev->tx_pkt_prepare = ð_igb_prep_pkts;
1577 dev->data->tx_queues[queue_idx] = txq;
1578 txq->offloads = offloads;
1584 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1588 if (rxq->sw_ring != NULL) {
1589 for (i = 0; i < rxq->nb_rx_desc; i++) {
1590 if (rxq->sw_ring[i].mbuf != NULL) {
1591 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1592 rxq->sw_ring[i].mbuf = NULL;
1599 igb_rx_queue_release(struct igb_rx_queue *rxq)
1602 igb_rx_queue_release_mbufs(rxq);
1603 rte_free(rxq->sw_ring);
1609 eth_igb_rx_queue_release(void *rxq)
1611 igb_rx_queue_release(rxq);
1615 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1617 static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1620 /* Zero out HW ring memory */
1621 for (i = 0; i < rxq->nb_rx_desc; i++) {
1622 rxq->rx_ring[i] = zeroed_desc;
1626 rxq->pkt_first_seg = NULL;
1627 rxq->pkt_last_seg = NULL;
1631 igb_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
1633 uint64_t rx_offload_capa;
1634 struct e1000_hw *hw;
1636 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1638 rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
1639 DEV_RX_OFFLOAD_VLAN_FILTER |
1640 DEV_RX_OFFLOAD_IPV4_CKSUM |
1641 DEV_RX_OFFLOAD_UDP_CKSUM |
1642 DEV_RX_OFFLOAD_TCP_CKSUM |
1643 DEV_RX_OFFLOAD_JUMBO_FRAME |
1644 DEV_RX_OFFLOAD_KEEP_CRC |
1645 DEV_RX_OFFLOAD_SCATTER |
1646 DEV_RX_OFFLOAD_RSS_HASH;
1648 if (hw->mac.type == e1000_i350 ||
1649 hw->mac.type == e1000_i210 ||
1650 hw->mac.type == e1000_i211)
1651 rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_EXTEND;
1653 return rx_offload_capa;
1657 igb_get_rx_queue_offloads_capa(struct rte_eth_dev *dev)
1659 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1660 uint64_t rx_queue_offload_capa;
1662 switch (hw->mac.type) {
1663 case e1000_vfadapt_i350:
1665 * As only one Rx queue can be used, let per queue offloading
1666 * capability be same to per port queue offloading capability
1667 * for better convenience.
1669 rx_queue_offload_capa = igb_get_rx_port_offloads_capa(dev);
1672 rx_queue_offload_capa = 0;
1674 return rx_queue_offload_capa;
1678 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1681 unsigned int socket_id,
1682 const struct rte_eth_rxconf *rx_conf,
1683 struct rte_mempool *mp)
1685 const struct rte_memzone *rz;
1686 struct igb_rx_queue *rxq;
1687 struct e1000_hw *hw;
1691 offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
1693 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1696 * Validate number of receive descriptors.
1697 * It must not exceed hardware maximum, and must be multiple
1700 if (nb_desc % IGB_RXD_ALIGN != 0 ||
1701 (nb_desc > E1000_MAX_RING_DESC) ||
1702 (nb_desc < E1000_MIN_RING_DESC)) {
1706 /* Free memory prior to re-allocation if needed */
1707 if (dev->data->rx_queues[queue_idx] != NULL) {
1708 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1709 dev->data->rx_queues[queue_idx] = NULL;
1712 /* First allocate the RX queue data structure. */
1713 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1714 RTE_CACHE_LINE_SIZE);
1717 rxq->offloads = offloads;
1719 rxq->nb_rx_desc = nb_desc;
1720 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1721 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1722 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1723 if (rxq->wthresh > 0 &&
1724 (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1726 rxq->drop_en = rx_conf->rx_drop_en;
1727 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1728 rxq->queue_id = queue_idx;
1729 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1730 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1731 rxq->port_id = dev->data->port_id;
1732 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1733 rxq->crc_len = RTE_ETHER_CRC_LEN;
1738 * Allocate RX ring hardware descriptors. A memzone large enough to
1739 * handle the maximum ring size is allocated in order to allow for
1740 * resizing in later calls to the queue setup function.
1742 size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1743 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1744 E1000_ALIGN, socket_id);
1746 igb_rx_queue_release(rxq);
1749 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1750 rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1751 rxq->rx_ring_phys_addr = rz->iova;
1752 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1754 /* Allocate software ring. */
1755 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1756 sizeof(struct igb_rx_entry) * nb_desc,
1757 RTE_CACHE_LINE_SIZE);
1758 if (rxq->sw_ring == NULL) {
1759 igb_rx_queue_release(rxq);
1762 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1763 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1765 dev->data->rx_queues[queue_idx] = rxq;
1766 igb_reset_rx_queue(rxq);
1772 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1774 #define IGB_RXQ_SCAN_INTERVAL 4
1775 volatile union e1000_adv_rx_desc *rxdp;
1776 struct igb_rx_queue *rxq;
1779 rxq = dev->data->rx_queues[rx_queue_id];
1780 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1782 while ((desc < rxq->nb_rx_desc) &&
1783 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1784 desc += IGB_RXQ_SCAN_INTERVAL;
1785 rxdp += IGB_RXQ_SCAN_INTERVAL;
1786 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1787 rxdp = &(rxq->rx_ring[rxq->rx_tail +
1788 desc - rxq->nb_rx_desc]);
1795 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1797 volatile union e1000_adv_rx_desc *rxdp;
1798 struct igb_rx_queue *rxq = rx_queue;
1801 if (unlikely(offset >= rxq->nb_rx_desc))
1803 desc = rxq->rx_tail + offset;
1804 if (desc >= rxq->nb_rx_desc)
1805 desc -= rxq->nb_rx_desc;
1807 rxdp = &rxq->rx_ring[desc];
1808 return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1812 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1814 struct igb_rx_queue *rxq = rx_queue;
1815 volatile uint32_t *status;
1818 if (unlikely(offset >= rxq->nb_rx_desc))
1821 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1822 return RTE_ETH_RX_DESC_UNAVAIL;
1824 desc = rxq->rx_tail + offset;
1825 if (desc >= rxq->nb_rx_desc)
1826 desc -= rxq->nb_rx_desc;
1828 status = &rxq->rx_ring[desc].wb.upper.status_error;
1829 if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1830 return RTE_ETH_RX_DESC_DONE;
1832 return RTE_ETH_RX_DESC_AVAIL;
1836 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1838 struct igb_tx_queue *txq = tx_queue;
1839 volatile uint32_t *status;
1842 if (unlikely(offset >= txq->nb_tx_desc))
1845 desc = txq->tx_tail + offset;
1846 if (desc >= txq->nb_tx_desc)
1847 desc -= txq->nb_tx_desc;
1849 status = &txq->tx_ring[desc].wb.status;
1850 if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1851 return RTE_ETH_TX_DESC_DONE;
1853 return RTE_ETH_TX_DESC_FULL;
1857 igb_dev_clear_queues(struct rte_eth_dev *dev)
1860 struct igb_tx_queue *txq;
1861 struct igb_rx_queue *rxq;
1863 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1864 txq = dev->data->tx_queues[i];
1866 igb_tx_queue_release_mbufs(txq);
1867 igb_reset_tx_queue(txq, dev);
1871 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1872 rxq = dev->data->rx_queues[i];
1874 igb_rx_queue_release_mbufs(rxq);
1875 igb_reset_rx_queue(rxq);
1881 igb_dev_free_queues(struct rte_eth_dev *dev)
1885 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1886 eth_igb_rx_queue_release(dev->data->rx_queues[i]);
1887 dev->data->rx_queues[i] = NULL;
1888 rte_eth_dma_zone_free(dev, "rx_ring", i);
1890 dev->data->nb_rx_queues = 0;
1892 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1893 eth_igb_tx_queue_release(dev->data->tx_queues[i]);
1894 dev->data->tx_queues[i] = NULL;
1895 rte_eth_dma_zone_free(dev, "tx_ring", i);
1897 dev->data->nb_tx_queues = 0;
1901 * Receive Side Scaling (RSS).
1902 * See section 7.1.1.7 in the following document:
1903 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1906 * The source and destination IP addresses of the IP header and the source and
1907 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1908 * against a configurable random key to compute a 32-bit RSS hash result.
1909 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1910 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1911 * RSS output index which is used as the RX queue index where to store the
1913 * The following output is supplied in the RX write-back descriptor:
1914 * - 32-bit result of the Microsoft RSS hash function,
1915 * - 4-bit RSS type field.
1919 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1920 * Used as the default key.
1922 static uint8_t rss_intel_key[40] = {
1923 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1924 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1925 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1926 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1927 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1931 igb_rss_disable(struct rte_eth_dev *dev)
1933 struct e1000_hw *hw;
1936 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1937 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1938 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1939 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1943 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1951 hash_key = rss_conf->rss_key;
1952 if (hash_key != NULL) {
1953 /* Fill in RSS hash key */
1954 for (i = 0; i < 10; i++) {
1955 rss_key = hash_key[(i * 4)];
1956 rss_key |= hash_key[(i * 4) + 1] << 8;
1957 rss_key |= hash_key[(i * 4) + 2] << 16;
1958 rss_key |= hash_key[(i * 4) + 3] << 24;
1959 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1963 /* Set configured hashing protocols in MRQC register */
1964 rss_hf = rss_conf->rss_hf;
1965 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1966 if (rss_hf & ETH_RSS_IPV4)
1967 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1968 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1969 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1970 if (rss_hf & ETH_RSS_IPV6)
1971 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1972 if (rss_hf & ETH_RSS_IPV6_EX)
1973 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1974 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1975 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1976 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1977 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1978 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1979 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1980 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1981 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1982 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1983 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1984 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1988 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1989 struct rte_eth_rss_conf *rss_conf)
1991 struct e1000_hw *hw;
1995 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1998 * Before changing anything, first check that the update RSS operation
1999 * does not attempt to disable RSS, if RSS was enabled at
2000 * initialization time, or does not attempt to enable RSS, if RSS was
2001 * disabled at initialization time.
2003 rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
2004 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2005 if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
2006 if (rss_hf != 0) /* Enable RSS */
2008 return 0; /* Nothing to do */
2011 if (rss_hf == 0) /* Disable RSS */
2013 igb_hw_rss_hash_set(hw, rss_conf);
2017 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
2018 struct rte_eth_rss_conf *rss_conf)
2020 struct e1000_hw *hw;
2027 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2028 hash_key = rss_conf->rss_key;
2029 if (hash_key != NULL) {
2030 /* Return RSS hash key */
2031 for (i = 0; i < 10; i++) {
2032 rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
2033 hash_key[(i * 4)] = rss_key & 0x000000FF;
2034 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2035 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2036 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2040 /* Get RSS functions configured in MRQC register */
2041 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2042 if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
2043 rss_conf->rss_hf = 0;
2047 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
2048 rss_hf |= ETH_RSS_IPV4;
2049 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
2050 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2051 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
2052 rss_hf |= ETH_RSS_IPV6;
2053 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
2054 rss_hf |= ETH_RSS_IPV6_EX;
2055 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2056 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2057 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2058 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2059 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2060 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2061 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2062 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2063 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2064 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2065 rss_conf->rss_hf = rss_hf;
2070 igb_rss_configure(struct rte_eth_dev *dev)
2072 struct rte_eth_rss_conf rss_conf;
2073 struct e1000_hw *hw;
2077 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2079 /* Fill in redirection table. */
2080 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2081 for (i = 0; i < 128; i++) {
2088 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2089 i % dev->data->nb_rx_queues : 0);
2090 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2092 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2096 * Configure the RSS key and the RSS protocols used to compute
2097 * the RSS hash of input packets.
2099 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2100 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2101 igb_rss_disable(dev);
2104 if (rss_conf.rss_key == NULL)
2105 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2106 igb_hw_rss_hash_set(hw, &rss_conf);
2110 * Check if the mac type support VMDq or not.
2111 * Return 1 if it supports, otherwise, return 0.
2114 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2116 const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2118 switch (hw->mac.type) {
2139 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2145 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2147 struct rte_eth_vmdq_rx_conf *cfg;
2148 struct e1000_hw *hw;
2149 uint32_t mrqc, vt_ctl, vmolr, rctl;
2152 PMD_INIT_FUNC_TRACE();
2154 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2155 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2157 /* Check if mac type can support VMDq, return value of 0 means NOT support */
2158 if (igb_is_vmdq_supported(dev) == 0)
2161 igb_rss_disable(dev);
2163 /* RCTL: eanble VLAN filter */
2164 rctl = E1000_READ_REG(hw, E1000_RCTL);
2165 rctl |= E1000_RCTL_VFE;
2166 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2168 /* MRQC: enable vmdq */
2169 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2170 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2171 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2173 /* VTCTL: pool selection according to VLAN tag */
2174 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2175 if (cfg->enable_default_pool)
2176 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2177 vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2178 E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2180 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2181 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2182 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2183 E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2186 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_UNTAG)
2187 vmolr |= E1000_VMOLR_AUPE;
2188 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_MC)
2189 vmolr |= E1000_VMOLR_ROMPE;
2190 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_UC)
2191 vmolr |= E1000_VMOLR_ROPE;
2192 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_BROADCAST)
2193 vmolr |= E1000_VMOLR_BAM;
2194 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_MULTICAST)
2195 vmolr |= E1000_VMOLR_MPME;
2197 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2201 * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2202 * Both 82576 and 82580 support it
2204 if (hw->mac.type != e1000_i350) {
2205 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2206 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2207 vmolr |= E1000_VMOLR_STRVLAN;
2208 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2212 /* VFTA - enable all vlan filters */
2213 for (i = 0; i < IGB_VFTA_SIZE; i++)
2214 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2216 /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2217 if (hw->mac.type != e1000_82580)
2218 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2221 * RAH/RAL - allow pools to read specific mac addresses
2222 * In this case, all pools should be able to read from mac addr 0
2224 E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2225 E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2227 /* VLVF: set up filters for vlan tags as configured */
2228 for (i = 0; i < cfg->nb_pool_maps; i++) {
2229 /* set vlan id in VF register and set the valid bit */
2230 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE | \
2231 (cfg->pool_map[i].vlan_id & ETH_VLAN_ID_MAX) | \
2232 ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT ) & \
2233 E1000_VLVF_POOLSEL_MASK)));
2236 E1000_WRITE_FLUSH(hw);
2242 /*********************************************************************
2244 * Enable receive unit.
2246 **********************************************************************/
2249 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2251 struct igb_rx_entry *rxe = rxq->sw_ring;
2255 /* Initialize software ring entries. */
2256 for (i = 0; i < rxq->nb_rx_desc; i++) {
2257 volatile union e1000_adv_rx_desc *rxd;
2258 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2261 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2262 "queue_id=%hu", rxq->queue_id);
2266 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2267 rxd = &rxq->rx_ring[i];
2268 rxd->read.hdr_addr = 0;
2269 rxd->read.pkt_addr = dma_addr;
2276 #define E1000_MRQC_DEF_Q_SHIFT (3)
2278 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2280 struct e1000_hw *hw =
2281 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2284 if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
2286 * SRIOV active scheme
2287 * FIXME if support RSS together with VMDq & SRIOV
2289 mrqc = E1000_MRQC_ENABLE_VMDQ;
2290 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2291 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2292 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2293 } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2295 * SRIOV inactive scheme
2297 switch (dev->data->dev_conf.rxmode.mq_mode) {
2299 igb_rss_configure(dev);
2301 case ETH_MQ_RX_VMDQ_ONLY:
2302 /*Configure general VMDQ only RX parameters*/
2303 igb_vmdq_rx_hw_configure(dev);
2305 case ETH_MQ_RX_NONE:
2306 /* if mq_mode is none, disable rss mode.*/
2308 igb_rss_disable(dev);
2317 eth_igb_rx_init(struct rte_eth_dev *dev)
2319 struct rte_eth_rxmode *rxmode;
2320 struct e1000_hw *hw;
2321 struct igb_rx_queue *rxq;
2326 uint16_t rctl_bsize;
2330 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2334 * Make sure receives are disabled while setting
2335 * up the descriptor ring.
2337 rctl = E1000_READ_REG(hw, E1000_RCTL);
2338 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2340 rxmode = &dev->data->dev_conf.rxmode;
2343 * Configure support of jumbo frames, if any.
2345 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
2346 uint32_t max_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
2348 rctl |= E1000_RCTL_LPE;
2351 * Set maximum packet length by default, and might be updated
2352 * together with enabling/disabling dual VLAN.
2354 if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
2355 max_len += VLAN_TAG_SIZE;
2357 E1000_WRITE_REG(hw, E1000_RLPML, max_len);
2359 rctl &= ~E1000_RCTL_LPE;
2361 /* Configure and enable each RX queue. */
2363 dev->rx_pkt_burst = eth_igb_recv_pkts;
2364 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2368 rxq = dev->data->rx_queues[i];
2372 * i350 and i354 vlan packets have vlan tags byte swapped.
2374 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2375 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2376 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2378 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2381 /* Allocate buffers for descriptor rings and set up queue */
2382 ret = igb_alloc_rx_queue_mbufs(rxq);
2387 * Reset crc_len in case it was changed after queue setup by a
2390 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2391 rxq->crc_len = RTE_ETHER_CRC_LEN;
2395 bus_addr = rxq->rx_ring_phys_addr;
2396 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2398 sizeof(union e1000_adv_rx_desc));
2399 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2400 (uint32_t)(bus_addr >> 32));
2401 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2403 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2406 * Configure RX buffer size.
2408 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2409 RTE_PKTMBUF_HEADROOM);
2410 if (buf_size >= 1024) {
2412 * Configure the BSIZEPACKET field of the SRRCTL
2413 * register of the queue.
2414 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2415 * If this field is equal to 0b, then RCTL.BSIZE
2416 * determines the RX packet buffer size.
2418 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2419 E1000_SRRCTL_BSIZEPKT_MASK);
2420 buf_size = (uint16_t) ((srrctl &
2421 E1000_SRRCTL_BSIZEPKT_MASK) <<
2422 E1000_SRRCTL_BSIZEPKT_SHIFT);
2424 /* It adds dual VLAN length for supporting dual VLAN */
2425 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2426 2 * VLAN_TAG_SIZE) > buf_size){
2427 if (!dev->data->scattered_rx)
2429 "forcing scatter mode");
2430 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2431 dev->data->scattered_rx = 1;
2435 * Use BSIZE field of the device RCTL register.
2437 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2438 rctl_bsize = buf_size;
2439 if (!dev->data->scattered_rx)
2440 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2441 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2442 dev->data->scattered_rx = 1;
2445 /* Set if packets are dropped when no descriptors available */
2447 srrctl |= E1000_SRRCTL_DROP_EN;
2449 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2451 /* Enable this RX queue. */
2452 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2453 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2454 rxdctl &= 0xFFF00000;
2455 rxdctl |= (rxq->pthresh & 0x1F);
2456 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2457 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2458 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2461 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2462 if (!dev->data->scattered_rx)
2463 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2464 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2465 dev->data->scattered_rx = 1;
2469 * Setup BSIZE field of RCTL register, if needed.
2470 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2471 * register, since the code above configures the SRRCTL register of
2472 * the RX queue in such a case.
2473 * All configurable sizes are:
2474 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2475 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
2476 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
2477 * 2048: rctl |= E1000_RCTL_SZ_2048;
2478 * 1024: rctl |= E1000_RCTL_SZ_1024;
2479 * 512: rctl |= E1000_RCTL_SZ_512;
2480 * 256: rctl |= E1000_RCTL_SZ_256;
2482 if (rctl_bsize > 0) {
2483 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2484 rctl |= E1000_RCTL_SZ_512;
2485 else /* 256 <= buf_size < 512 - use 256 */
2486 rctl |= E1000_RCTL_SZ_256;
2490 * Configure RSS if device configured with multiple RX queues.
2492 igb_dev_mq_rx_configure(dev);
2494 /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2495 rctl |= E1000_READ_REG(hw, E1000_RCTL);
2498 * Setup the Checksum Register.
2499 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2501 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2502 rxcsum |= E1000_RXCSUM_PCSD;
2504 /* Enable both L3/L4 rx checksum offload */
2505 if (rxmode->offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
2506 rxcsum |= E1000_RXCSUM_IPOFL;
2508 rxcsum &= ~E1000_RXCSUM_IPOFL;
2509 if (rxmode->offloads &
2510 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM))
2511 rxcsum |= E1000_RXCSUM_TUOFL;
2513 rxcsum &= ~E1000_RXCSUM_TUOFL;
2514 if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM)
2515 rxcsum |= E1000_RXCSUM_CRCOFL;
2517 rxcsum &= ~E1000_RXCSUM_CRCOFL;
2519 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2521 /* Setup the Receive Control Register. */
2522 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
2523 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2525 /* clear STRCRC bit in all queues */
2526 if (hw->mac.type == e1000_i350 ||
2527 hw->mac.type == e1000_i210 ||
2528 hw->mac.type == e1000_i211 ||
2529 hw->mac.type == e1000_i354) {
2530 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2531 rxq = dev->data->rx_queues[i];
2532 uint32_t dvmolr = E1000_READ_REG(hw,
2533 E1000_DVMOLR(rxq->reg_idx));
2534 dvmolr &= ~E1000_DVMOLR_STRCRC;
2535 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2539 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2541 /* set STRCRC bit in all queues */
2542 if (hw->mac.type == e1000_i350 ||
2543 hw->mac.type == e1000_i210 ||
2544 hw->mac.type == e1000_i211 ||
2545 hw->mac.type == e1000_i354) {
2546 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2547 rxq = dev->data->rx_queues[i];
2548 uint32_t dvmolr = E1000_READ_REG(hw,
2549 E1000_DVMOLR(rxq->reg_idx));
2550 dvmolr |= E1000_DVMOLR_STRCRC;
2551 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2556 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2557 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2558 E1000_RCTL_RDMTS_HALF |
2559 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2561 /* Make sure VLAN Filters are off. */
2562 if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY)
2563 rctl &= ~E1000_RCTL_VFE;
2564 /* Don't store bad packets. */
2565 rctl &= ~E1000_RCTL_SBP;
2567 /* Enable Receives. */
2568 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2571 * Setup the HW Rx Head and Tail Descriptor Pointers.
2572 * This needs to be done after enable.
2574 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2575 rxq = dev->data->rx_queues[i];
2576 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2577 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2583 /*********************************************************************
2585 * Enable transmit unit.
2587 **********************************************************************/
2589 eth_igb_tx_init(struct rte_eth_dev *dev)
2591 struct e1000_hw *hw;
2592 struct igb_tx_queue *txq;
2597 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2599 /* Setup the Base and Length of the Tx Descriptor Rings. */
2600 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2602 txq = dev->data->tx_queues[i];
2603 bus_addr = txq->tx_ring_phys_addr;
2605 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2607 sizeof(union e1000_adv_tx_desc));
2608 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2609 (uint32_t)(bus_addr >> 32));
2610 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2612 /* Setup the HW Tx Head and Tail descriptor pointers. */
2613 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2614 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2616 /* Setup Transmit threshold registers. */
2617 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2618 txdctl |= txq->pthresh & 0x1F;
2619 txdctl |= ((txq->hthresh & 0x1F) << 8);
2620 txdctl |= ((txq->wthresh & 0x1F) << 16);
2621 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2622 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2625 /* Program the Transmit Control Register. */
2626 tctl = E1000_READ_REG(hw, E1000_TCTL);
2627 tctl &= ~E1000_TCTL_CT;
2628 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2629 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2631 e1000_config_collision_dist(hw);
2633 /* This write will effectively turn on the transmit unit. */
2634 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2637 /*********************************************************************
2639 * Enable VF receive unit.
2641 **********************************************************************/
2643 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2645 struct e1000_hw *hw;
2646 struct igb_rx_queue *rxq;
2649 uint16_t rctl_bsize;
2653 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2656 e1000_rlpml_set_vf(hw,
2657 (uint16_t)(dev->data->dev_conf.rxmode.max_rx_pkt_len +
2660 /* Configure and enable each RX queue. */
2662 dev->rx_pkt_burst = eth_igb_recv_pkts;
2663 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2667 rxq = dev->data->rx_queues[i];
2671 * i350VF LB vlan packets have vlan tags byte swapped.
2673 if (hw->mac.type == e1000_vfadapt_i350) {
2674 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2675 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2677 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2680 /* Allocate buffers for descriptor rings and set up queue */
2681 ret = igb_alloc_rx_queue_mbufs(rxq);
2685 bus_addr = rxq->rx_ring_phys_addr;
2686 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2688 sizeof(union e1000_adv_rx_desc));
2689 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2690 (uint32_t)(bus_addr >> 32));
2691 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2693 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2696 * Configure RX buffer size.
2698 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2699 RTE_PKTMBUF_HEADROOM);
2700 if (buf_size >= 1024) {
2702 * Configure the BSIZEPACKET field of the SRRCTL
2703 * register of the queue.
2704 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2705 * If this field is equal to 0b, then RCTL.BSIZE
2706 * determines the RX packet buffer size.
2708 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2709 E1000_SRRCTL_BSIZEPKT_MASK);
2710 buf_size = (uint16_t) ((srrctl &
2711 E1000_SRRCTL_BSIZEPKT_MASK) <<
2712 E1000_SRRCTL_BSIZEPKT_SHIFT);
2714 /* It adds dual VLAN length for supporting dual VLAN */
2715 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2716 2 * VLAN_TAG_SIZE) > buf_size){
2717 if (!dev->data->scattered_rx)
2719 "forcing scatter mode");
2720 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2721 dev->data->scattered_rx = 1;
2725 * Use BSIZE field of the device RCTL register.
2727 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2728 rctl_bsize = buf_size;
2729 if (!dev->data->scattered_rx)
2730 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2731 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2732 dev->data->scattered_rx = 1;
2735 /* Set if packets are dropped when no descriptors available */
2737 srrctl |= E1000_SRRCTL_DROP_EN;
2739 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2741 /* Enable this RX queue. */
2742 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2743 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2744 rxdctl &= 0xFFF00000;
2745 rxdctl |= (rxq->pthresh & 0x1F);
2746 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2747 if (hw->mac.type == e1000_vfadapt) {
2749 * Workaround of 82576 VF Erratum
2750 * force set WTHRESH to 1
2751 * to avoid Write-Back not triggered sometimes
2754 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2757 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2758 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2761 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2762 if (!dev->data->scattered_rx)
2763 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2764 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2765 dev->data->scattered_rx = 1;
2769 * Setup the HW Rx Head and Tail Descriptor Pointers.
2770 * This needs to be done after enable.
2772 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2773 rxq = dev->data->rx_queues[i];
2774 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2775 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2781 /*********************************************************************
2783 * Enable VF transmit unit.
2785 **********************************************************************/
2787 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2789 struct e1000_hw *hw;
2790 struct igb_tx_queue *txq;
2794 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2796 /* Setup the Base and Length of the Tx Descriptor Rings. */
2797 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2800 txq = dev->data->tx_queues[i];
2801 bus_addr = txq->tx_ring_phys_addr;
2802 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2804 sizeof(union e1000_adv_tx_desc));
2805 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2806 (uint32_t)(bus_addr >> 32));
2807 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2809 /* Setup the HW Tx Head and Tail descriptor pointers. */
2810 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2811 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2813 /* Setup Transmit threshold registers. */
2814 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2815 txdctl |= txq->pthresh & 0x1F;
2816 txdctl |= ((txq->hthresh & 0x1F) << 8);
2817 if (hw->mac.type == e1000_82576) {
2819 * Workaround of 82576 VF Erratum
2820 * force set WTHRESH to 1
2821 * to avoid Write-Back not triggered sometimes
2824 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2827 txdctl |= ((txq->wthresh & 0x1F) << 16);
2828 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2829 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2835 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2836 struct rte_eth_rxq_info *qinfo)
2838 struct igb_rx_queue *rxq;
2840 rxq = dev->data->rx_queues[queue_id];
2842 qinfo->mp = rxq->mb_pool;
2843 qinfo->scattered_rx = dev->data->scattered_rx;
2844 qinfo->nb_desc = rxq->nb_rx_desc;
2846 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2847 qinfo->conf.rx_drop_en = rxq->drop_en;
2848 qinfo->conf.offloads = rxq->offloads;
2852 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2853 struct rte_eth_txq_info *qinfo)
2855 struct igb_tx_queue *txq;
2857 txq = dev->data->tx_queues[queue_id];
2859 qinfo->nb_desc = txq->nb_tx_desc;
2861 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2862 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2863 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2864 qinfo->conf.offloads = txq->offloads;
2868 igb_rss_conf_init(struct rte_eth_dev *dev,
2869 struct igb_rte_flow_rss_conf *out,
2870 const struct rte_flow_action_rss *in)
2872 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2874 if (in->key_len > RTE_DIM(out->key) ||
2875 ((hw->mac.type == e1000_82576) &&
2876 (in->queue_num > IGB_MAX_RX_QUEUE_NUM_82576)) ||
2877 ((hw->mac.type != e1000_82576) &&
2878 (in->queue_num > IGB_MAX_RX_QUEUE_NUM)))
2880 out->conf = (struct rte_flow_action_rss){
2884 .key_len = in->key_len,
2885 .queue_num = in->queue_num,
2886 .key = memcpy(out->key, in->key, in->key_len),
2887 .queue = memcpy(out->queue, in->queue,
2888 sizeof(*in->queue) * in->queue_num),
2894 igb_action_rss_same(const struct rte_flow_action_rss *comp,
2895 const struct rte_flow_action_rss *with)
2897 return (comp->func == with->func &&
2898 comp->level == with->level &&
2899 comp->types == with->types &&
2900 comp->key_len == with->key_len &&
2901 comp->queue_num == with->queue_num &&
2902 !memcmp(comp->key, with->key, with->key_len) &&
2903 !memcmp(comp->queue, with->queue,
2904 sizeof(*with->queue) * with->queue_num));
2908 igb_config_rss_filter(struct rte_eth_dev *dev,
2909 struct igb_rte_flow_rss_conf *conf, bool add)
2913 struct rte_eth_rss_conf rss_conf = {
2914 .rss_key = conf->conf.key_len ?
2915 (void *)(uintptr_t)conf->conf.key : NULL,
2916 .rss_key_len = conf->conf.key_len,
2917 .rss_hf = conf->conf.types,
2919 struct e1000_filter_info *filter_info =
2920 E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
2921 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2923 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2926 if (igb_action_rss_same(&filter_info->rss_info.conf,
2928 igb_rss_disable(dev);
2929 memset(&filter_info->rss_info, 0,
2930 sizeof(struct igb_rte_flow_rss_conf));
2936 if (filter_info->rss_info.conf.queue_num)
2939 /* Fill in redirection table. */
2940 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2941 for (i = 0, j = 0; i < 128; i++, j++) {
2948 if (j == conf->conf.queue_num)
2950 q_idx = conf->conf.queue[j];
2951 reta.bytes[i & 3] = (uint8_t)(q_idx << shift);
2953 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2956 /* Configure the RSS key and the RSS protocols used to compute
2957 * the RSS hash of input packets.
2959 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2960 igb_rss_disable(dev);
2963 if (rss_conf.rss_key == NULL)
2964 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2965 igb_hw_rss_hash_set(hw, &rss_conf);
2967 if (igb_rss_conf_init(dev, &filter_info->rss_info, &conf->conf))