1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
15 #include <rte_interrupts.h>
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
19 #include <rte_debug.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_memzone.h>
24 #include <rte_launch.h>
26 #include <rte_per_lcore.h>
27 #include <rte_lcore.h>
28 #include <rte_atomic.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_mempool.h>
31 #include <rte_malloc.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev_driver.h>
35 #include <rte_prefetch.h>
40 #include <rte_string_fns.h>
42 #include "e1000_logs.h"
43 #include "base/e1000_api.h"
44 #include "e1000_ethdev.h"
46 #ifdef RTE_LIBRTE_IEEE1588
47 #define IGB_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
49 #define IGB_TX_IEEE1588_TMST 0
51 /* Bit Mask to indicate what bits required for building TX context */
52 #define IGB_TX_OFFLOAD_MASK ( \
63 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
64 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
67 * Structure associated with each descriptor of the RX ring of a RX queue.
70 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
74 * Structure associated with each descriptor of the TX ring of a TX queue.
77 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
78 uint16_t next_id; /**< Index of next descriptor in ring. */
79 uint16_t last_id; /**< Index of last scattered descriptor. */
86 IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
90 * Structure associated with each RX queue.
93 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
94 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
95 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
96 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
97 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
98 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
99 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
100 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
101 uint16_t nb_rx_desc; /**< number of RX descriptors. */
102 uint16_t rx_tail; /**< current value of RDT register. */
103 uint16_t nb_rx_hold; /**< number of held free RX desc. */
104 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
105 uint16_t queue_id; /**< RX queue index. */
106 uint16_t reg_idx; /**< RX queue register index. */
107 uint16_t port_id; /**< Device port identifier. */
108 uint8_t pthresh; /**< Prefetch threshold register. */
109 uint8_t hthresh; /**< Host threshold register. */
110 uint8_t wthresh; /**< Write-back threshold register. */
111 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
112 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
113 uint32_t flags; /**< RX flags. */
114 uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */
118 * Hardware context number
120 enum igb_advctx_num {
121 IGB_CTX_0 = 0, /**< CTX0 */
122 IGB_CTX_1 = 1, /**< CTX1 */
123 IGB_CTX_NUM = 2, /**< CTX_NUM */
126 /** Offload features */
127 union igb_tx_offload {
130 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
131 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
132 uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
133 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
134 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
136 /* uint64_t unused:8; */
141 * Compare mask for igb_tx_offload.data,
142 * should be in sync with igb_tx_offload layout.
144 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
145 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
146 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
147 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
148 /** Mac + IP + TCP + Mss mask. */
149 #define TX_TSO_CMP_MASK \
150 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
153 * Strucutre to check if new context need be built
155 struct igb_advctx_info {
156 uint64_t flags; /**< ol_flags related to context build. */
157 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
158 union igb_tx_offload tx_offload;
159 /** compare mask for tx offload. */
160 union igb_tx_offload tx_offload_mask;
164 * Structure associated with each TX queue.
166 struct igb_tx_queue {
167 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
168 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
169 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
170 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
171 uint32_t txd_type; /**< Device-specific TXD type */
172 uint16_t nb_tx_desc; /**< number of TX descriptors. */
173 uint16_t tx_tail; /**< Current value of TDT register. */
175 /**< Index of first used TX descriptor. */
176 uint16_t queue_id; /**< TX queue index. */
177 uint16_t reg_idx; /**< TX queue register index. */
178 uint16_t port_id; /**< Device port identifier. */
179 uint8_t pthresh; /**< Prefetch threshold register. */
180 uint8_t hthresh; /**< Host threshold register. */
181 uint8_t wthresh; /**< Write-back threshold register. */
183 /**< Current used hardware descriptor. */
185 /**< Start context position for transmit queue. */
186 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
187 /**< Hardware context history.*/
188 uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */
192 #define RTE_PMD_USE_PREFETCH
195 #ifdef RTE_PMD_USE_PREFETCH
196 #define rte_igb_prefetch(p) rte_prefetch0(p)
198 #define rte_igb_prefetch(p) do {} while(0)
201 #ifdef RTE_PMD_PACKET_PREFETCH
202 #define rte_packet_prefetch(p) rte_prefetch1(p)
204 #define rte_packet_prefetch(p) do {} while(0)
208 * Macro for VMDq feature for 1 GbE NIC.
210 #define E1000_VMOLR_SIZE (8)
211 #define IGB_TSO_MAX_HDRLEN (512)
212 #define IGB_TSO_MAX_MSS (9216)
214 /*********************************************************************
218 **********************************************************************/
221 *There're some limitations in hardware for TCP segmentation offload. We
222 *should check whether the parameters are valid.
224 static inline uint64_t
225 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
227 if (!(ol_req & PKT_TX_TCP_SEG))
229 if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
230 ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
231 ol_req &= ~PKT_TX_TCP_SEG;
232 ol_req |= PKT_TX_TCP_CKSUM;
238 * Advanced context descriptor are almost same between igb/ixgbe
239 * This is a separate function, looking for optimization opportunity here
240 * Rework required to go with the pre-defined values.
244 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
245 volatile struct e1000_adv_tx_context_desc *ctx_txd,
246 uint64_t ol_flags, union igb_tx_offload tx_offload)
248 uint32_t type_tucmd_mlhl;
249 uint32_t mss_l4len_idx;
250 uint32_t ctx_idx, ctx_curr;
251 uint32_t vlan_macip_lens;
252 union igb_tx_offload tx_offload_mask;
254 ctx_curr = txq->ctx_curr;
255 ctx_idx = ctx_curr + txq->ctx_start;
257 tx_offload_mask.data = 0;
260 /* Specify which HW CTX to upload. */
261 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
263 if (ol_flags & PKT_TX_VLAN_PKT)
264 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
266 /* check if TCP segmentation required for this packet */
267 if (ol_flags & PKT_TX_TCP_SEG) {
268 /* implies IP cksum in IPv4 */
269 if (ol_flags & PKT_TX_IP_CKSUM)
270 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
271 E1000_ADVTXD_TUCMD_L4T_TCP |
272 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
274 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
275 E1000_ADVTXD_TUCMD_L4T_TCP |
276 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
278 tx_offload_mask.data |= TX_TSO_CMP_MASK;
279 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
280 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
281 } else { /* no TSO, check if hardware checksum is needed */
282 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
283 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
285 if (ol_flags & PKT_TX_IP_CKSUM)
286 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
288 switch (ol_flags & PKT_TX_L4_MASK) {
289 case PKT_TX_UDP_CKSUM:
290 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
291 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
292 mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
294 case PKT_TX_TCP_CKSUM:
295 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
296 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
297 mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
298 << E1000_ADVTXD_L4LEN_SHIFT;
300 case PKT_TX_SCTP_CKSUM:
301 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
302 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
303 mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
304 << E1000_ADVTXD_L4LEN_SHIFT;
307 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
308 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
313 txq->ctx_cache[ctx_curr].flags = ol_flags;
314 txq->ctx_cache[ctx_curr].tx_offload.data =
315 tx_offload_mask.data & tx_offload.data;
316 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
318 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
319 vlan_macip_lens = (uint32_t)tx_offload.data;
320 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
321 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
322 ctx_txd->seqnum_seed = 0;
326 * Check which hardware context can be used. Use the existing match
327 * or create a new context descriptor.
329 static inline uint32_t
330 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
331 union igb_tx_offload tx_offload)
333 /* If match with the current context */
334 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
335 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
336 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
337 return txq->ctx_curr;
340 /* If match with the second context */
342 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
343 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
344 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
345 return txq->ctx_curr;
348 /* Mismatch, use the previous context */
352 static inline uint32_t
353 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
355 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
356 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
359 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
360 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
361 tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0];
365 static inline uint32_t
366 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
369 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
370 static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
371 cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
372 cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0];
377 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
380 struct igb_tx_queue *txq;
381 struct igb_tx_entry *sw_ring;
382 struct igb_tx_entry *txe, *txn;
383 volatile union e1000_adv_tx_desc *txr;
384 volatile union e1000_adv_tx_desc *txd;
385 struct rte_mbuf *tx_pkt;
386 struct rte_mbuf *m_seg;
387 uint64_t buf_dma_addr;
388 uint32_t olinfo_status;
389 uint32_t cmd_type_len;
398 uint32_t new_ctx = 0;
400 union igb_tx_offload tx_offload = {0};
403 sw_ring = txq->sw_ring;
405 tx_id = txq->tx_tail;
406 txe = &sw_ring[tx_id];
408 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
410 pkt_len = tx_pkt->pkt_len;
412 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
415 * The number of descriptors that must be allocated for a
416 * packet is the number of segments of that packet, plus 1
417 * Context Descriptor for the VLAN Tag Identifier, if any.
418 * Determine the last TX descriptor to allocate in the TX ring
419 * for the packet, starting from the current position (tx_id)
422 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
424 ol_flags = tx_pkt->ol_flags;
425 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
427 /* If a Context Descriptor need be built . */
429 tx_offload.l2_len = tx_pkt->l2_len;
430 tx_offload.l3_len = tx_pkt->l3_len;
431 tx_offload.l4_len = tx_pkt->l4_len;
432 tx_offload.vlan_tci = tx_pkt->vlan_tci;
433 tx_offload.tso_segsz = tx_pkt->tso_segsz;
434 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
436 ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
437 /* Only allocate context descriptor if required*/
438 new_ctx = (ctx == IGB_CTX_NUM);
439 ctx = txq->ctx_curr + txq->ctx_start;
440 tx_last = (uint16_t) (tx_last + new_ctx);
442 if (tx_last >= txq->nb_tx_desc)
443 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
445 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
446 " tx_first=%u tx_last=%u",
447 (unsigned) txq->port_id,
448 (unsigned) txq->queue_id,
454 * Check if there are enough free descriptors in the TX ring
455 * to transmit the next packet.
456 * This operation is based on the two following rules:
458 * 1- Only check that the last needed TX descriptor can be
459 * allocated (by construction, if that descriptor is free,
460 * all intermediate ones are also free).
462 * For this purpose, the index of the last TX descriptor
463 * used for a packet (the "last descriptor" of a packet)
464 * is recorded in the TX entries (the last one included)
465 * that are associated with all TX descriptors allocated
468 * 2- Avoid to allocate the last free TX descriptor of the
469 * ring, in order to never set the TDT register with the
470 * same value stored in parallel by the NIC in the TDH
471 * register, which makes the TX engine of the NIC enter
472 * in a deadlock situation.
474 * By extension, avoid to allocate a free descriptor that
475 * belongs to the last set of free descriptors allocated
476 * to the same packet previously transmitted.
480 * The "last descriptor" of the previously sent packet, if any,
481 * which used the last descriptor to allocate.
483 tx_end = sw_ring[tx_last].last_id;
486 * The next descriptor following that "last descriptor" in the
489 tx_end = sw_ring[tx_end].next_id;
492 * The "last descriptor" associated with that next descriptor.
494 tx_end = sw_ring[tx_end].last_id;
497 * Check that this descriptor is free.
499 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
506 * Set common flags of all TX Data Descriptors.
508 * The following bits must be set in all Data Descriptors:
509 * - E1000_ADVTXD_DTYP_DATA
510 * - E1000_ADVTXD_DCMD_DEXT
512 * The following bits must be set in the first Data Descriptor
513 * and are ignored in the other ones:
514 * - E1000_ADVTXD_DCMD_IFCS
515 * - E1000_ADVTXD_MAC_1588
516 * - E1000_ADVTXD_DCMD_VLE
518 * The following bits must only be set in the last Data
520 * - E1000_TXD_CMD_EOP
522 * The following bits can be set in any Data Descriptor, but
523 * are only set in the last Data Descriptor:
526 cmd_type_len = txq->txd_type |
527 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
528 if (tx_ol_req & PKT_TX_TCP_SEG)
529 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
530 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
531 #if defined(RTE_LIBRTE_IEEE1588)
532 if (ol_flags & PKT_TX_IEEE1588_TMST)
533 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
536 /* Setup TX Advanced context descriptor if required */
538 volatile struct e1000_adv_tx_context_desc *
541 ctx_txd = (volatile struct
542 e1000_adv_tx_context_desc *)
545 txn = &sw_ring[txe->next_id];
546 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
548 if (txe->mbuf != NULL) {
549 rte_pktmbuf_free_seg(txe->mbuf);
553 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
555 txe->last_id = tx_last;
556 tx_id = txe->next_id;
560 /* Setup the TX Advanced Data Descriptor */
561 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
562 olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
563 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
568 txn = &sw_ring[txe->next_id];
571 if (txe->mbuf != NULL)
572 rte_pktmbuf_free_seg(txe->mbuf);
576 * Set up transmit descriptor.
578 slen = (uint16_t) m_seg->data_len;
579 buf_dma_addr = rte_mbuf_data_iova(m_seg);
580 txd->read.buffer_addr =
581 rte_cpu_to_le_64(buf_dma_addr);
582 txd->read.cmd_type_len =
583 rte_cpu_to_le_32(cmd_type_len | slen);
584 txd->read.olinfo_status =
585 rte_cpu_to_le_32(olinfo_status);
586 txe->last_id = tx_last;
587 tx_id = txe->next_id;
590 } while (m_seg != NULL);
593 * The last packet data descriptor needs End Of Packet (EOP)
594 * and Report Status (RS).
596 txd->read.cmd_type_len |=
597 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
603 * Set the Transmit Descriptor Tail (TDT).
605 E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
606 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
607 (unsigned) txq->port_id, (unsigned) txq->queue_id,
608 (unsigned) tx_id, (unsigned) nb_tx);
609 txq->tx_tail = tx_id;
614 /*********************************************************************
618 **********************************************************************/
620 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
626 for (i = 0; i < nb_pkts; i++) {
629 /* Check some limitations for TSO in hardware */
630 if (m->ol_flags & PKT_TX_TCP_SEG)
631 if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
632 (m->l2_len + m->l3_len + m->l4_len >
633 IGB_TSO_MAX_HDRLEN)) {
638 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
639 rte_errno = -ENOTSUP;
643 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
644 ret = rte_validate_tx_offload(m);
650 ret = rte_net_intel_cksum_prepare(m);
660 /*********************************************************************
664 **********************************************************************/
665 #define IGB_PACKET_TYPE_IPV4 0X01
666 #define IGB_PACKET_TYPE_IPV4_TCP 0X11
667 #define IGB_PACKET_TYPE_IPV4_UDP 0X21
668 #define IGB_PACKET_TYPE_IPV4_SCTP 0X41
669 #define IGB_PACKET_TYPE_IPV4_EXT 0X03
670 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP 0X43
671 #define IGB_PACKET_TYPE_IPV6 0X04
672 #define IGB_PACKET_TYPE_IPV6_TCP 0X14
673 #define IGB_PACKET_TYPE_IPV6_UDP 0X24
674 #define IGB_PACKET_TYPE_IPV6_EXT 0X0C
675 #define IGB_PACKET_TYPE_IPV6_EXT_TCP 0X1C
676 #define IGB_PACKET_TYPE_IPV6_EXT_UDP 0X2C
677 #define IGB_PACKET_TYPE_IPV4_IPV6 0X05
678 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP 0X15
679 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP 0X25
680 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
681 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
682 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
683 #define IGB_PACKET_TYPE_MAX 0X80
684 #define IGB_PACKET_TYPE_MASK 0X7F
685 #define IGB_PACKET_TYPE_SHIFT 0X04
686 static inline uint32_t
687 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
689 static const uint32_t
690 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
691 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
693 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
694 RTE_PTYPE_L3_IPV4_EXT,
695 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
697 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
698 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
699 RTE_PTYPE_INNER_L3_IPV6,
700 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
701 RTE_PTYPE_L3_IPV6_EXT,
702 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
703 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
704 RTE_PTYPE_INNER_L3_IPV6_EXT,
705 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
706 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
707 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
708 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
709 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
710 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
711 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
712 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
713 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
714 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
715 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
716 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
717 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
718 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
719 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
720 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
721 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
722 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
723 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
724 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
725 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
726 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
727 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
728 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
729 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
730 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
731 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
732 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
734 if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
735 return RTE_PTYPE_UNKNOWN;
737 pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
739 return ptype_table[pkt_info];
742 static inline uint64_t
743 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
745 uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ? 0 : PKT_RX_RSS_HASH;
747 #if defined(RTE_LIBRTE_IEEE1588)
748 static uint32_t ip_pkt_etqf_map[8] = {
749 0, 0, 0, PKT_RX_IEEE1588_PTP,
753 struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
754 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
756 /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
757 if (hw->mac.type == e1000_i210)
758 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
760 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
768 static inline uint64_t
769 rx_desc_status_to_pkt_flags(uint32_t rx_status)
773 /* Check if VLAN present */
774 pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
775 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
777 #if defined(RTE_LIBRTE_IEEE1588)
778 if (rx_status & E1000_RXD_STAT_TMST)
779 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
784 static inline uint64_t
785 rx_desc_error_to_pkt_flags(uint32_t rx_status)
788 * Bit 30: IPE, IPv4 checksum error
789 * Bit 29: L4I, L4I integrity error
792 static uint64_t error_to_pkt_flags_map[4] = {
793 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
794 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
795 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
796 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
798 return error_to_pkt_flags_map[(rx_status >>
799 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
803 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
806 struct igb_rx_queue *rxq;
807 volatile union e1000_adv_rx_desc *rx_ring;
808 volatile union e1000_adv_rx_desc *rxdp;
809 struct igb_rx_entry *sw_ring;
810 struct igb_rx_entry *rxe;
811 struct rte_mbuf *rxm;
812 struct rte_mbuf *nmb;
813 union e1000_adv_rx_desc rxd;
816 uint32_t hlen_type_rss;
826 rx_id = rxq->rx_tail;
827 rx_ring = rxq->rx_ring;
828 sw_ring = rxq->sw_ring;
829 while (nb_rx < nb_pkts) {
831 * The order of operations here is important as the DD status
832 * bit must not be read after any other descriptor fields.
833 * rx_ring and rxdp are pointing to volatile data so the order
834 * of accesses cannot be reordered by the compiler. If they were
835 * not volatile, they could be reordered which could lead to
836 * using invalid descriptor fields when read from rxd.
838 rxdp = &rx_ring[rx_id];
839 staterr = rxdp->wb.upper.status_error;
840 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
847 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
848 * likely to be invalid and to be dropped by the various
849 * validation checks performed by the network stack.
851 * Allocate a new mbuf to replenish the RX ring descriptor.
852 * If the allocation fails:
853 * - arrange for that RX descriptor to be the first one
854 * being parsed the next time the receive function is
855 * invoked [on the same queue].
857 * - Stop parsing the RX ring and return immediately.
859 * This policy do not drop the packet received in the RX
860 * descriptor for which the allocation of a new mbuf failed.
861 * Thus, it allows that packet to be later retrieved if
862 * mbuf have been freed in the mean time.
863 * As a side effect, holding RX descriptors instead of
864 * systematically giving them back to the NIC may lead to
865 * RX ring exhaustion situations.
866 * However, the NIC can gracefully prevent such situations
867 * to happen by sending specific "back-pressure" flow control
868 * frames to its peer(s).
870 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
871 "staterr=0x%x pkt_len=%u",
872 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
873 (unsigned) rx_id, (unsigned) staterr,
874 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
876 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
878 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
879 "queue_id=%u", (unsigned) rxq->port_id,
880 (unsigned) rxq->queue_id);
881 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
886 rxe = &sw_ring[rx_id];
888 if (rx_id == rxq->nb_rx_desc)
891 /* Prefetch next mbuf while processing current one. */
892 rte_igb_prefetch(sw_ring[rx_id].mbuf);
895 * When next RX descriptor is on a cache-line boundary,
896 * prefetch the next 4 RX descriptors and the next 8 pointers
899 if ((rx_id & 0x3) == 0) {
900 rte_igb_prefetch(&rx_ring[rx_id]);
901 rte_igb_prefetch(&sw_ring[rx_id]);
907 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
908 rxdp->read.hdr_addr = 0;
909 rxdp->read.pkt_addr = dma_addr;
912 * Initialize the returned mbuf.
913 * 1) setup generic mbuf fields:
914 * - number of segments,
917 * - RX port identifier.
918 * 2) integrate hardware offload data, if any:
920 * - IP checksum flag,
921 * - VLAN TCI, if any,
924 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
926 rxm->data_off = RTE_PKTMBUF_HEADROOM;
927 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
930 rxm->pkt_len = pkt_len;
931 rxm->data_len = pkt_len;
932 rxm->port = rxq->port_id;
934 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
935 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
938 * The vlan_tci field is only valid when PKT_RX_VLAN is
939 * set in the pkt_flags field and must be in CPU byte order.
941 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
942 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
943 rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
945 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
947 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
948 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
949 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
950 rxm->ol_flags = pkt_flags;
951 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
952 lo_dword.hs_rss.pkt_info);
955 * Store the mbuf address into the next entry of the array
956 * of returned packets.
958 rx_pkts[nb_rx++] = rxm;
960 rxq->rx_tail = rx_id;
963 * If the number of free RX descriptors is greater than the RX free
964 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
966 * Update the RDT with the value of the last processed RX descriptor
967 * minus 1, to guarantee that the RDT register is never equal to the
968 * RDH register, which creates a "full" ring situtation from the
969 * hardware point of view...
971 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
972 if (nb_hold > rxq->rx_free_thresh) {
973 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
974 "nb_hold=%u nb_rx=%u",
975 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
976 (unsigned) rx_id, (unsigned) nb_hold,
978 rx_id = (uint16_t) ((rx_id == 0) ?
979 (rxq->nb_rx_desc - 1) : (rx_id - 1));
980 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
983 rxq->nb_rx_hold = nb_hold;
988 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
991 struct igb_rx_queue *rxq;
992 volatile union e1000_adv_rx_desc *rx_ring;
993 volatile union e1000_adv_rx_desc *rxdp;
994 struct igb_rx_entry *sw_ring;
995 struct igb_rx_entry *rxe;
996 struct rte_mbuf *first_seg;
997 struct rte_mbuf *last_seg;
998 struct rte_mbuf *rxm;
999 struct rte_mbuf *nmb;
1000 union e1000_adv_rx_desc rxd;
1001 uint64_t dma; /* Physical address of mbuf data buffer */
1003 uint32_t hlen_type_rss;
1013 rx_id = rxq->rx_tail;
1014 rx_ring = rxq->rx_ring;
1015 sw_ring = rxq->sw_ring;
1018 * Retrieve RX context of current packet, if any.
1020 first_seg = rxq->pkt_first_seg;
1021 last_seg = rxq->pkt_last_seg;
1023 while (nb_rx < nb_pkts) {
1026 * The order of operations here is important as the DD status
1027 * bit must not be read after any other descriptor fields.
1028 * rx_ring and rxdp are pointing to volatile data so the order
1029 * of accesses cannot be reordered by the compiler. If they were
1030 * not volatile, they could be reordered which could lead to
1031 * using invalid descriptor fields when read from rxd.
1033 rxdp = &rx_ring[rx_id];
1034 staterr = rxdp->wb.upper.status_error;
1035 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1042 * Allocate a new mbuf to replenish the RX ring descriptor.
1043 * If the allocation fails:
1044 * - arrange for that RX descriptor to be the first one
1045 * being parsed the next time the receive function is
1046 * invoked [on the same queue].
1048 * - Stop parsing the RX ring and return immediately.
1050 * This policy does not drop the packet received in the RX
1051 * descriptor for which the allocation of a new mbuf failed.
1052 * Thus, it allows that packet to be later retrieved if
1053 * mbuf have been freed in the mean time.
1054 * As a side effect, holding RX descriptors instead of
1055 * systematically giving them back to the NIC may lead to
1056 * RX ring exhaustion situations.
1057 * However, the NIC can gracefully prevent such situations
1058 * to happen by sending specific "back-pressure" flow control
1059 * frames to its peer(s).
1061 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1062 "staterr=0x%x data_len=%u",
1063 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1064 (unsigned) rx_id, (unsigned) staterr,
1065 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1067 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1069 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1070 "queue_id=%u", (unsigned) rxq->port_id,
1071 (unsigned) rxq->queue_id);
1072 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1077 rxe = &sw_ring[rx_id];
1079 if (rx_id == rxq->nb_rx_desc)
1082 /* Prefetch next mbuf while processing current one. */
1083 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1086 * When next RX descriptor is on a cache-line boundary,
1087 * prefetch the next 4 RX descriptors and the next 8 pointers
1090 if ((rx_id & 0x3) == 0) {
1091 rte_igb_prefetch(&rx_ring[rx_id]);
1092 rte_igb_prefetch(&sw_ring[rx_id]);
1096 * Update RX descriptor with the physical address of the new
1097 * data buffer of the new allocated mbuf.
1101 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1102 rxdp->read.pkt_addr = dma;
1103 rxdp->read.hdr_addr = 0;
1106 * Set data length & data buffer address of mbuf.
1108 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1109 rxm->data_len = data_len;
1110 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1113 * If this is the first buffer of the received packet,
1114 * set the pointer to the first mbuf of the packet and
1115 * initialize its context.
1116 * Otherwise, update the total length and the number of segments
1117 * of the current scattered packet, and update the pointer to
1118 * the last mbuf of the current packet.
1120 if (first_seg == NULL) {
1122 first_seg->pkt_len = data_len;
1123 first_seg->nb_segs = 1;
1125 first_seg->pkt_len += data_len;
1126 first_seg->nb_segs++;
1127 last_seg->next = rxm;
1131 * If this is not the last buffer of the received packet,
1132 * update the pointer to the last mbuf of the current scattered
1133 * packet and continue to parse the RX ring.
1135 if (! (staterr & E1000_RXD_STAT_EOP)) {
1141 * This is the last buffer of the received packet.
1142 * If the CRC is not stripped by the hardware:
1143 * - Subtract the CRC length from the total packet length.
1144 * - If the last buffer only contains the whole CRC or a part
1145 * of it, free the mbuf associated to the last buffer.
1146 * If part of the CRC is also contained in the previous
1147 * mbuf, subtract the length of that CRC part from the
1148 * data length of the previous mbuf.
1151 if (unlikely(rxq->crc_len > 0)) {
1152 first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
1153 if (data_len <= RTE_ETHER_CRC_LEN) {
1154 rte_pktmbuf_free_seg(rxm);
1155 first_seg->nb_segs--;
1156 last_seg->data_len = (uint16_t)
1157 (last_seg->data_len -
1158 (RTE_ETHER_CRC_LEN - data_len));
1159 last_seg->next = NULL;
1161 rxm->data_len = (uint16_t)
1162 (data_len - RTE_ETHER_CRC_LEN);
1166 * Initialize the first mbuf of the returned packet:
1167 * - RX port identifier,
1168 * - hardware offload data, if any:
1169 * - RSS flag & hash,
1170 * - IP checksum flag,
1171 * - VLAN TCI, if any,
1174 first_seg->port = rxq->port_id;
1175 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1178 * The vlan_tci field is only valid when PKT_RX_VLAN is
1179 * set in the pkt_flags field and must be in CPU byte order.
1181 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1182 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1183 first_seg->vlan_tci =
1184 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1186 first_seg->vlan_tci =
1187 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1189 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1190 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1191 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1192 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1193 first_seg->ol_flags = pkt_flags;
1194 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1195 lower.lo_dword.hs_rss.pkt_info);
1197 /* Prefetch data of first segment, if configured to do so. */
1198 rte_packet_prefetch((char *)first_seg->buf_addr +
1199 first_seg->data_off);
1202 * Store the mbuf address into the next entry of the array
1203 * of returned packets.
1205 rx_pkts[nb_rx++] = first_seg;
1208 * Setup receipt context for a new packet.
1214 * Record index of the next RX descriptor to probe.
1216 rxq->rx_tail = rx_id;
1219 * Save receive context.
1221 rxq->pkt_first_seg = first_seg;
1222 rxq->pkt_last_seg = last_seg;
1225 * If the number of free RX descriptors is greater than the RX free
1226 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1228 * Update the RDT with the value of the last processed RX descriptor
1229 * minus 1, to guarantee that the RDT register is never equal to the
1230 * RDH register, which creates a "full" ring situtation from the
1231 * hardware point of view...
1233 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1234 if (nb_hold > rxq->rx_free_thresh) {
1235 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1236 "nb_hold=%u nb_rx=%u",
1237 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1238 (unsigned) rx_id, (unsigned) nb_hold,
1240 rx_id = (uint16_t) ((rx_id == 0) ?
1241 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1242 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1245 rxq->nb_rx_hold = nb_hold;
1250 * Maximum number of Ring Descriptors.
1252 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1253 * desscriptors should meet the following condition:
1254 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1258 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1262 if (txq->sw_ring != NULL) {
1263 for (i = 0; i < txq->nb_tx_desc; i++) {
1264 if (txq->sw_ring[i].mbuf != NULL) {
1265 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1266 txq->sw_ring[i].mbuf = NULL;
1273 igb_tx_queue_release(struct igb_tx_queue *txq)
1276 igb_tx_queue_release_mbufs(txq);
1277 rte_free(txq->sw_ring);
1283 eth_igb_tx_queue_release(void *txq)
1285 igb_tx_queue_release(txq);
1289 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1291 struct igb_tx_entry *sw_ring;
1292 volatile union e1000_adv_tx_desc *txr;
1293 uint16_t tx_first; /* First segment analyzed. */
1294 uint16_t tx_id; /* Current segment being processed. */
1295 uint16_t tx_last; /* Last segment in the current packet. */
1296 uint16_t tx_next; /* First segment of the next packet. */
1301 sw_ring = txq->sw_ring;
1305 * tx_tail is the last sent packet on the sw_ring. Goto the end
1306 * of that packet (the last segment in the packet chain) and
1307 * then the next segment will be the start of the oldest segment
1308 * in the sw_ring. This is the first packet that will be
1309 * attempted to be freed.
1312 /* Get last segment in most recently added packet. */
1313 tx_first = sw_ring[txq->tx_tail].last_id;
1315 /* Get the next segment, which is the oldest segment in ring. */
1316 tx_first = sw_ring[tx_first].next_id;
1318 /* Set the current index to the first. */
1322 * Loop through each packet. For each packet, verify that an
1323 * mbuf exists and that the last segment is free. If so, free
1327 tx_last = sw_ring[tx_id].last_id;
1329 if (sw_ring[tx_last].mbuf) {
1330 if (txr[tx_last].wb.status &
1331 E1000_TXD_STAT_DD) {
1333 * Increment the number of packets
1338 /* Get the start of the next packet. */
1339 tx_next = sw_ring[tx_last].next_id;
1342 * Loop through all segments in a
1346 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
1347 sw_ring[tx_id].mbuf = NULL;
1348 sw_ring[tx_id].last_id = tx_id;
1350 /* Move to next segemnt. */
1351 tx_id = sw_ring[tx_id].next_id;
1353 } while (tx_id != tx_next);
1355 if (unlikely(count == (int)free_cnt))
1359 * mbuf still in use, nothing left to
1365 * There are multiple reasons to be here:
1366 * 1) All the packets on the ring have been
1367 * freed - tx_id is equal to tx_first
1368 * and some packets have been freed.
1370 * 2) Interfaces has not sent a rings worth of
1371 * packets yet, so the segment after tail is
1372 * still empty. Or a previous call to this
1373 * function freed some of the segments but
1374 * not all so there is a hole in the list.
1375 * Hopefully this is a rare case.
1376 * - Walk the list and find the next mbuf. If
1377 * there isn't one, then done.
1379 if (likely((tx_id == tx_first) && (count != 0)))
1383 * Walk the list and find the next mbuf, if any.
1386 /* Move to next segemnt. */
1387 tx_id = sw_ring[tx_id].next_id;
1389 if (sw_ring[tx_id].mbuf)
1392 } while (tx_id != tx_first);
1395 * Determine why previous loop bailed. If there
1396 * is not an mbuf, done.
1398 if (sw_ring[tx_id].mbuf == NULL)
1409 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1411 return igb_tx_done_cleanup(txq, free_cnt);
1415 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1420 memset((void*)&txq->ctx_cache, 0,
1421 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1425 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1427 static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1428 struct igb_tx_entry *txe = txq->sw_ring;
1430 struct e1000_hw *hw;
1432 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1433 /* Zero out HW ring memory */
1434 for (i = 0; i < txq->nb_tx_desc; i++) {
1435 txq->tx_ring[i] = zeroed_desc;
1438 /* Initialize ring entries */
1439 prev = (uint16_t)(txq->nb_tx_desc - 1);
1440 for (i = 0; i < txq->nb_tx_desc; i++) {
1441 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1443 txd->wb.status = E1000_TXD_STAT_DD;
1446 txe[prev].next_id = i;
1450 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1451 /* 82575 specific, each tx queue will use 2 hw contexts */
1452 if (hw->mac.type == e1000_82575)
1453 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1455 igb_reset_tx_queue_stat(txq);
1459 igb_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
1461 uint64_t tx_offload_capa;
1464 tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
1465 DEV_TX_OFFLOAD_IPV4_CKSUM |
1466 DEV_TX_OFFLOAD_UDP_CKSUM |
1467 DEV_TX_OFFLOAD_TCP_CKSUM |
1468 DEV_TX_OFFLOAD_SCTP_CKSUM |
1469 DEV_TX_OFFLOAD_TCP_TSO |
1470 DEV_TX_OFFLOAD_MULTI_SEGS;
1472 return tx_offload_capa;
1476 igb_get_tx_queue_offloads_capa(struct rte_eth_dev *dev)
1478 uint64_t tx_queue_offload_capa;
1480 tx_queue_offload_capa = igb_get_tx_port_offloads_capa(dev);
1482 return tx_queue_offload_capa;
1486 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1489 unsigned int socket_id,
1490 const struct rte_eth_txconf *tx_conf)
1492 const struct rte_memzone *tz;
1493 struct igb_tx_queue *txq;
1494 struct e1000_hw *hw;
1498 offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
1500 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1503 * Validate number of transmit descriptors.
1504 * It must not exceed hardware maximum, and must be multiple
1507 if (nb_desc % IGB_TXD_ALIGN != 0 ||
1508 (nb_desc > E1000_MAX_RING_DESC) ||
1509 (nb_desc < E1000_MIN_RING_DESC)) {
1514 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1517 if (tx_conf->tx_free_thresh != 0)
1518 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1519 "used for the 1G driver.");
1520 if (tx_conf->tx_rs_thresh != 0)
1521 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1522 "used for the 1G driver.");
1523 if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1524 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1525 "consider setting the TX WTHRESH value to 4, 8, "
1528 /* Free memory prior to re-allocation if needed */
1529 if (dev->data->tx_queues[queue_idx] != NULL) {
1530 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1531 dev->data->tx_queues[queue_idx] = NULL;
1534 /* First allocate the tx queue data structure */
1535 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1536 RTE_CACHE_LINE_SIZE);
1541 * Allocate TX ring hardware descriptors. A memzone large enough to
1542 * handle the maximum ring size is allocated in order to allow for
1543 * resizing in later calls to the queue setup function.
1545 size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1546 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1547 E1000_ALIGN, socket_id);
1549 igb_tx_queue_release(txq);
1553 txq->nb_tx_desc = nb_desc;
1554 txq->pthresh = tx_conf->tx_thresh.pthresh;
1555 txq->hthresh = tx_conf->tx_thresh.hthresh;
1556 txq->wthresh = tx_conf->tx_thresh.wthresh;
1557 if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1559 txq->queue_id = queue_idx;
1560 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1561 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1562 txq->port_id = dev->data->port_id;
1564 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1565 txq->tx_ring_phys_addr = tz->iova;
1567 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1568 /* Allocate software ring */
1569 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1570 sizeof(struct igb_tx_entry) * nb_desc,
1571 RTE_CACHE_LINE_SIZE);
1572 if (txq->sw_ring == NULL) {
1573 igb_tx_queue_release(txq);
1576 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1577 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1579 igb_reset_tx_queue(txq, dev);
1580 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1581 dev->tx_pkt_prepare = ð_igb_prep_pkts;
1582 dev->data->tx_queues[queue_idx] = txq;
1583 txq->offloads = offloads;
1589 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1593 if (rxq->sw_ring != NULL) {
1594 for (i = 0; i < rxq->nb_rx_desc; i++) {
1595 if (rxq->sw_ring[i].mbuf != NULL) {
1596 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1597 rxq->sw_ring[i].mbuf = NULL;
1604 igb_rx_queue_release(struct igb_rx_queue *rxq)
1607 igb_rx_queue_release_mbufs(rxq);
1608 rte_free(rxq->sw_ring);
1614 eth_igb_rx_queue_release(void *rxq)
1616 igb_rx_queue_release(rxq);
1620 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1622 static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1625 /* Zero out HW ring memory */
1626 for (i = 0; i < rxq->nb_rx_desc; i++) {
1627 rxq->rx_ring[i] = zeroed_desc;
1631 rxq->pkt_first_seg = NULL;
1632 rxq->pkt_last_seg = NULL;
1636 igb_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
1638 uint64_t rx_offload_capa;
1641 rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
1642 DEV_RX_OFFLOAD_VLAN_FILTER |
1643 DEV_RX_OFFLOAD_IPV4_CKSUM |
1644 DEV_RX_OFFLOAD_UDP_CKSUM |
1645 DEV_RX_OFFLOAD_TCP_CKSUM |
1646 DEV_RX_OFFLOAD_JUMBO_FRAME |
1647 DEV_RX_OFFLOAD_KEEP_CRC |
1648 DEV_RX_OFFLOAD_SCATTER;
1650 return rx_offload_capa;
1654 igb_get_rx_queue_offloads_capa(struct rte_eth_dev *dev)
1656 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1657 uint64_t rx_queue_offload_capa;
1659 switch (hw->mac.type) {
1660 case e1000_vfadapt_i350:
1662 * As only one Rx queue can be used, let per queue offloading
1663 * capability be same to per port queue offloading capability
1664 * for better convenience.
1666 rx_queue_offload_capa = igb_get_rx_port_offloads_capa(dev);
1669 rx_queue_offload_capa = 0;
1671 return rx_queue_offload_capa;
1675 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1678 unsigned int socket_id,
1679 const struct rte_eth_rxconf *rx_conf,
1680 struct rte_mempool *mp)
1682 const struct rte_memzone *rz;
1683 struct igb_rx_queue *rxq;
1684 struct e1000_hw *hw;
1688 offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
1690 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1693 * Validate number of receive descriptors.
1694 * It must not exceed hardware maximum, and must be multiple
1697 if (nb_desc % IGB_RXD_ALIGN != 0 ||
1698 (nb_desc > E1000_MAX_RING_DESC) ||
1699 (nb_desc < E1000_MIN_RING_DESC)) {
1703 /* Free memory prior to re-allocation if needed */
1704 if (dev->data->rx_queues[queue_idx] != NULL) {
1705 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1706 dev->data->rx_queues[queue_idx] = NULL;
1709 /* First allocate the RX queue data structure. */
1710 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1711 RTE_CACHE_LINE_SIZE);
1714 rxq->offloads = offloads;
1716 rxq->nb_rx_desc = nb_desc;
1717 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1718 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1719 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1720 if (rxq->wthresh > 0 &&
1721 (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1723 rxq->drop_en = rx_conf->rx_drop_en;
1724 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1725 rxq->queue_id = queue_idx;
1726 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1727 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1728 rxq->port_id = dev->data->port_id;
1729 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1730 rxq->crc_len = RTE_ETHER_CRC_LEN;
1735 * Allocate RX ring hardware descriptors. A memzone large enough to
1736 * handle the maximum ring size is allocated in order to allow for
1737 * resizing in later calls to the queue setup function.
1739 size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1740 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1741 E1000_ALIGN, socket_id);
1743 igb_rx_queue_release(rxq);
1746 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1747 rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1748 rxq->rx_ring_phys_addr = rz->iova;
1749 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1751 /* Allocate software ring. */
1752 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1753 sizeof(struct igb_rx_entry) * nb_desc,
1754 RTE_CACHE_LINE_SIZE);
1755 if (rxq->sw_ring == NULL) {
1756 igb_rx_queue_release(rxq);
1759 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1760 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1762 dev->data->rx_queues[queue_idx] = rxq;
1763 igb_reset_rx_queue(rxq);
1769 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1771 #define IGB_RXQ_SCAN_INTERVAL 4
1772 volatile union e1000_adv_rx_desc *rxdp;
1773 struct igb_rx_queue *rxq;
1776 rxq = dev->data->rx_queues[rx_queue_id];
1777 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1779 while ((desc < rxq->nb_rx_desc) &&
1780 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1781 desc += IGB_RXQ_SCAN_INTERVAL;
1782 rxdp += IGB_RXQ_SCAN_INTERVAL;
1783 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1784 rxdp = &(rxq->rx_ring[rxq->rx_tail +
1785 desc - rxq->nb_rx_desc]);
1792 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1794 volatile union e1000_adv_rx_desc *rxdp;
1795 struct igb_rx_queue *rxq = rx_queue;
1798 if (unlikely(offset >= rxq->nb_rx_desc))
1800 desc = rxq->rx_tail + offset;
1801 if (desc >= rxq->nb_rx_desc)
1802 desc -= rxq->nb_rx_desc;
1804 rxdp = &rxq->rx_ring[desc];
1805 return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1809 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1811 struct igb_rx_queue *rxq = rx_queue;
1812 volatile uint32_t *status;
1815 if (unlikely(offset >= rxq->nb_rx_desc))
1818 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1819 return RTE_ETH_RX_DESC_UNAVAIL;
1821 desc = rxq->rx_tail + offset;
1822 if (desc >= rxq->nb_rx_desc)
1823 desc -= rxq->nb_rx_desc;
1825 status = &rxq->rx_ring[desc].wb.upper.status_error;
1826 if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1827 return RTE_ETH_RX_DESC_DONE;
1829 return RTE_ETH_RX_DESC_AVAIL;
1833 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1835 struct igb_tx_queue *txq = tx_queue;
1836 volatile uint32_t *status;
1839 if (unlikely(offset >= txq->nb_tx_desc))
1842 desc = txq->tx_tail + offset;
1843 if (desc >= txq->nb_tx_desc)
1844 desc -= txq->nb_tx_desc;
1846 status = &txq->tx_ring[desc].wb.status;
1847 if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1848 return RTE_ETH_TX_DESC_DONE;
1850 return RTE_ETH_TX_DESC_FULL;
1854 igb_dev_clear_queues(struct rte_eth_dev *dev)
1857 struct igb_tx_queue *txq;
1858 struct igb_rx_queue *rxq;
1860 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1861 txq = dev->data->tx_queues[i];
1863 igb_tx_queue_release_mbufs(txq);
1864 igb_reset_tx_queue(txq, dev);
1868 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1869 rxq = dev->data->rx_queues[i];
1871 igb_rx_queue_release_mbufs(rxq);
1872 igb_reset_rx_queue(rxq);
1878 igb_dev_free_queues(struct rte_eth_dev *dev)
1882 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1883 eth_igb_rx_queue_release(dev->data->rx_queues[i]);
1884 dev->data->rx_queues[i] = NULL;
1886 dev->data->nb_rx_queues = 0;
1888 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1889 eth_igb_tx_queue_release(dev->data->tx_queues[i]);
1890 dev->data->tx_queues[i] = NULL;
1892 dev->data->nb_tx_queues = 0;
1896 * Receive Side Scaling (RSS).
1897 * See section 7.1.1.7 in the following document:
1898 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1901 * The source and destination IP addresses of the IP header and the source and
1902 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1903 * against a configurable random key to compute a 32-bit RSS hash result.
1904 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1905 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1906 * RSS output index which is used as the RX queue index where to store the
1908 * The following output is supplied in the RX write-back descriptor:
1909 * - 32-bit result of the Microsoft RSS hash function,
1910 * - 4-bit RSS type field.
1914 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1915 * Used as the default key.
1917 static uint8_t rss_intel_key[40] = {
1918 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1919 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1920 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1921 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1922 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1926 igb_rss_disable(struct rte_eth_dev *dev)
1928 struct e1000_hw *hw;
1931 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1932 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1933 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1934 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1938 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1946 hash_key = rss_conf->rss_key;
1947 if (hash_key != NULL) {
1948 /* Fill in RSS hash key */
1949 for (i = 0; i < 10; i++) {
1950 rss_key = hash_key[(i * 4)];
1951 rss_key |= hash_key[(i * 4) + 1] << 8;
1952 rss_key |= hash_key[(i * 4) + 2] << 16;
1953 rss_key |= hash_key[(i * 4) + 3] << 24;
1954 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1958 /* Set configured hashing protocols in MRQC register */
1959 rss_hf = rss_conf->rss_hf;
1960 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1961 if (rss_hf & ETH_RSS_IPV4)
1962 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1963 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1964 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1965 if (rss_hf & ETH_RSS_IPV6)
1966 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1967 if (rss_hf & ETH_RSS_IPV6_EX)
1968 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1969 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1970 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1971 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1972 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1973 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1974 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1975 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1976 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1977 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1978 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1979 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1983 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1984 struct rte_eth_rss_conf *rss_conf)
1986 struct e1000_hw *hw;
1990 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1993 * Before changing anything, first check that the update RSS operation
1994 * does not attempt to disable RSS, if RSS was enabled at
1995 * initialization time, or does not attempt to enable RSS, if RSS was
1996 * disabled at initialization time.
1998 rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
1999 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2000 if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
2001 if (rss_hf != 0) /* Enable RSS */
2003 return 0; /* Nothing to do */
2006 if (rss_hf == 0) /* Disable RSS */
2008 igb_hw_rss_hash_set(hw, rss_conf);
2012 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
2013 struct rte_eth_rss_conf *rss_conf)
2015 struct e1000_hw *hw;
2022 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2023 hash_key = rss_conf->rss_key;
2024 if (hash_key != NULL) {
2025 /* Return RSS hash key */
2026 for (i = 0; i < 10; i++) {
2027 rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
2028 hash_key[(i * 4)] = rss_key & 0x000000FF;
2029 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2030 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2031 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2035 /* Get RSS functions configured in MRQC register */
2036 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2037 if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
2038 rss_conf->rss_hf = 0;
2042 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
2043 rss_hf |= ETH_RSS_IPV4;
2044 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
2045 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2046 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
2047 rss_hf |= ETH_RSS_IPV6;
2048 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
2049 rss_hf |= ETH_RSS_IPV6_EX;
2050 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2051 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2052 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2053 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2054 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2055 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2056 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2057 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2058 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2059 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2060 rss_conf->rss_hf = rss_hf;
2065 igb_rss_configure(struct rte_eth_dev *dev)
2067 struct rte_eth_rss_conf rss_conf;
2068 struct e1000_hw *hw;
2072 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2074 /* Fill in redirection table. */
2075 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2076 for (i = 0; i < 128; i++) {
2083 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2084 i % dev->data->nb_rx_queues : 0);
2085 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2087 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2091 * Configure the RSS key and the RSS protocols used to compute
2092 * the RSS hash of input packets.
2094 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2095 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2096 igb_rss_disable(dev);
2099 if (rss_conf.rss_key == NULL)
2100 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2101 igb_hw_rss_hash_set(hw, &rss_conf);
2105 * Check if the mac type support VMDq or not.
2106 * Return 1 if it supports, otherwise, return 0.
2109 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2111 const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2113 switch (hw->mac.type) {
2134 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2140 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2142 struct rte_eth_vmdq_rx_conf *cfg;
2143 struct e1000_hw *hw;
2144 uint32_t mrqc, vt_ctl, vmolr, rctl;
2147 PMD_INIT_FUNC_TRACE();
2149 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2150 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2152 /* Check if mac type can support VMDq, return value of 0 means NOT support */
2153 if (igb_is_vmdq_supported(dev) == 0)
2156 igb_rss_disable(dev);
2158 /* RCTL: eanble VLAN filter */
2159 rctl = E1000_READ_REG(hw, E1000_RCTL);
2160 rctl |= E1000_RCTL_VFE;
2161 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2163 /* MRQC: enable vmdq */
2164 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2165 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2166 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2168 /* VTCTL: pool selection according to VLAN tag */
2169 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2170 if (cfg->enable_default_pool)
2171 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2172 vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2173 E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2175 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2176 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2177 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2178 E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2181 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_UNTAG)
2182 vmolr |= E1000_VMOLR_AUPE;
2183 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_MC)
2184 vmolr |= E1000_VMOLR_ROMPE;
2185 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_UC)
2186 vmolr |= E1000_VMOLR_ROPE;
2187 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_BROADCAST)
2188 vmolr |= E1000_VMOLR_BAM;
2189 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_MULTICAST)
2190 vmolr |= E1000_VMOLR_MPME;
2192 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2196 * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2197 * Both 82576 and 82580 support it
2199 if (hw->mac.type != e1000_i350) {
2200 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2201 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2202 vmolr |= E1000_VMOLR_STRVLAN;
2203 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2207 /* VFTA - enable all vlan filters */
2208 for (i = 0; i < IGB_VFTA_SIZE; i++)
2209 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2211 /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2212 if (hw->mac.type != e1000_82580)
2213 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2216 * RAH/RAL - allow pools to read specific mac addresses
2217 * In this case, all pools should be able to read from mac addr 0
2219 E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2220 E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2222 /* VLVF: set up filters for vlan tags as configured */
2223 for (i = 0; i < cfg->nb_pool_maps; i++) {
2224 /* set vlan id in VF register and set the valid bit */
2225 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE | \
2226 (cfg->pool_map[i].vlan_id & ETH_VLAN_ID_MAX) | \
2227 ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT ) & \
2228 E1000_VLVF_POOLSEL_MASK)));
2231 E1000_WRITE_FLUSH(hw);
2237 /*********************************************************************
2239 * Enable receive unit.
2241 **********************************************************************/
2244 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2246 struct igb_rx_entry *rxe = rxq->sw_ring;
2250 /* Initialize software ring entries. */
2251 for (i = 0; i < rxq->nb_rx_desc; i++) {
2252 volatile union e1000_adv_rx_desc *rxd;
2253 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2256 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2257 "queue_id=%hu", rxq->queue_id);
2261 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2262 rxd = &rxq->rx_ring[i];
2263 rxd->read.hdr_addr = 0;
2264 rxd->read.pkt_addr = dma_addr;
2271 #define E1000_MRQC_DEF_Q_SHIFT (3)
2273 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2275 struct e1000_hw *hw =
2276 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2279 if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
2281 * SRIOV active scheme
2282 * FIXME if support RSS together with VMDq & SRIOV
2284 mrqc = E1000_MRQC_ENABLE_VMDQ;
2285 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2286 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2287 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2288 } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2290 * SRIOV inactive scheme
2292 switch (dev->data->dev_conf.rxmode.mq_mode) {
2294 igb_rss_configure(dev);
2296 case ETH_MQ_RX_VMDQ_ONLY:
2297 /*Configure general VMDQ only RX parameters*/
2298 igb_vmdq_rx_hw_configure(dev);
2300 case ETH_MQ_RX_NONE:
2301 /* if mq_mode is none, disable rss mode.*/
2303 igb_rss_disable(dev);
2312 eth_igb_rx_init(struct rte_eth_dev *dev)
2314 struct rte_eth_rxmode *rxmode;
2315 struct e1000_hw *hw;
2316 struct igb_rx_queue *rxq;
2321 uint16_t rctl_bsize;
2325 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2329 * Make sure receives are disabled while setting
2330 * up the descriptor ring.
2332 rctl = E1000_READ_REG(hw, E1000_RCTL);
2333 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2335 rxmode = &dev->data->dev_conf.rxmode;
2338 * Configure support of jumbo frames, if any.
2340 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
2341 rctl |= E1000_RCTL_LPE;
2344 * Set maximum packet length by default, and might be updated
2345 * together with enabling/disabling dual VLAN.
2347 E1000_WRITE_REG(hw, E1000_RLPML,
2348 dev->data->dev_conf.rxmode.max_rx_pkt_len +
2351 rctl &= ~E1000_RCTL_LPE;
2353 /* Configure and enable each RX queue. */
2355 dev->rx_pkt_burst = eth_igb_recv_pkts;
2356 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2360 rxq = dev->data->rx_queues[i];
2364 * i350 and i354 vlan packets have vlan tags byte swapped.
2366 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2367 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2368 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2370 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2373 /* Allocate buffers for descriptor rings and set up queue */
2374 ret = igb_alloc_rx_queue_mbufs(rxq);
2379 * Reset crc_len in case it was changed after queue setup by a
2382 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2383 rxq->crc_len = RTE_ETHER_CRC_LEN;
2387 bus_addr = rxq->rx_ring_phys_addr;
2388 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2390 sizeof(union e1000_adv_rx_desc));
2391 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2392 (uint32_t)(bus_addr >> 32));
2393 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2395 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2398 * Configure RX buffer size.
2400 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2401 RTE_PKTMBUF_HEADROOM);
2402 if (buf_size >= 1024) {
2404 * Configure the BSIZEPACKET field of the SRRCTL
2405 * register of the queue.
2406 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2407 * If this field is equal to 0b, then RCTL.BSIZE
2408 * determines the RX packet buffer size.
2410 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2411 E1000_SRRCTL_BSIZEPKT_MASK);
2412 buf_size = (uint16_t) ((srrctl &
2413 E1000_SRRCTL_BSIZEPKT_MASK) <<
2414 E1000_SRRCTL_BSIZEPKT_SHIFT);
2416 /* It adds dual VLAN length for supporting dual VLAN */
2417 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2418 2 * VLAN_TAG_SIZE) > buf_size){
2419 if (!dev->data->scattered_rx)
2421 "forcing scatter mode");
2422 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2423 dev->data->scattered_rx = 1;
2427 * Use BSIZE field of the device RCTL register.
2429 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2430 rctl_bsize = buf_size;
2431 if (!dev->data->scattered_rx)
2432 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2433 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2434 dev->data->scattered_rx = 1;
2437 /* Set if packets are dropped when no descriptors available */
2439 srrctl |= E1000_SRRCTL_DROP_EN;
2441 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2443 /* Enable this RX queue. */
2444 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2445 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2446 rxdctl &= 0xFFF00000;
2447 rxdctl |= (rxq->pthresh & 0x1F);
2448 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2449 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2450 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2453 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2454 if (!dev->data->scattered_rx)
2455 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2456 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2457 dev->data->scattered_rx = 1;
2461 * Setup BSIZE field of RCTL register, if needed.
2462 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2463 * register, since the code above configures the SRRCTL register of
2464 * the RX queue in such a case.
2465 * All configurable sizes are:
2466 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2467 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
2468 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
2469 * 2048: rctl |= E1000_RCTL_SZ_2048;
2470 * 1024: rctl |= E1000_RCTL_SZ_1024;
2471 * 512: rctl |= E1000_RCTL_SZ_512;
2472 * 256: rctl |= E1000_RCTL_SZ_256;
2474 if (rctl_bsize > 0) {
2475 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2476 rctl |= E1000_RCTL_SZ_512;
2477 else /* 256 <= buf_size < 512 - use 256 */
2478 rctl |= E1000_RCTL_SZ_256;
2482 * Configure RSS if device configured with multiple RX queues.
2484 igb_dev_mq_rx_configure(dev);
2486 /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2487 rctl |= E1000_READ_REG(hw, E1000_RCTL);
2490 * Setup the Checksum Register.
2491 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2493 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2494 rxcsum |= E1000_RXCSUM_PCSD;
2496 /* Enable both L3/L4 rx checksum offload */
2497 if (rxmode->offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
2498 rxcsum |= E1000_RXCSUM_IPOFL;
2500 rxcsum &= ~E1000_RXCSUM_IPOFL;
2501 if (rxmode->offloads &
2502 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM))
2503 rxcsum |= E1000_RXCSUM_TUOFL;
2505 rxcsum &= ~E1000_RXCSUM_TUOFL;
2506 if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM)
2507 rxcsum |= E1000_RXCSUM_CRCOFL;
2509 rxcsum &= ~E1000_RXCSUM_CRCOFL;
2511 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2513 /* Setup the Receive Control Register. */
2514 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
2515 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2517 /* clear STRCRC bit in all queues */
2518 if (hw->mac.type == e1000_i350 ||
2519 hw->mac.type == e1000_i210 ||
2520 hw->mac.type == e1000_i211 ||
2521 hw->mac.type == e1000_i354) {
2522 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2523 rxq = dev->data->rx_queues[i];
2524 uint32_t dvmolr = E1000_READ_REG(hw,
2525 E1000_DVMOLR(rxq->reg_idx));
2526 dvmolr &= ~E1000_DVMOLR_STRCRC;
2527 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2531 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2533 /* set STRCRC bit in all queues */
2534 if (hw->mac.type == e1000_i350 ||
2535 hw->mac.type == e1000_i210 ||
2536 hw->mac.type == e1000_i211 ||
2537 hw->mac.type == e1000_i354) {
2538 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2539 rxq = dev->data->rx_queues[i];
2540 uint32_t dvmolr = E1000_READ_REG(hw,
2541 E1000_DVMOLR(rxq->reg_idx));
2542 dvmolr |= E1000_DVMOLR_STRCRC;
2543 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2548 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2549 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2550 E1000_RCTL_RDMTS_HALF |
2551 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2553 /* Make sure VLAN Filters are off. */
2554 if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY)
2555 rctl &= ~E1000_RCTL_VFE;
2556 /* Don't store bad packets. */
2557 rctl &= ~E1000_RCTL_SBP;
2559 /* Enable Receives. */
2560 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2563 * Setup the HW Rx Head and Tail Descriptor Pointers.
2564 * This needs to be done after enable.
2566 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2567 rxq = dev->data->rx_queues[i];
2568 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2569 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2575 /*********************************************************************
2577 * Enable transmit unit.
2579 **********************************************************************/
2581 eth_igb_tx_init(struct rte_eth_dev *dev)
2583 struct e1000_hw *hw;
2584 struct igb_tx_queue *txq;
2589 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2591 /* Setup the Base and Length of the Tx Descriptor Rings. */
2592 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2594 txq = dev->data->tx_queues[i];
2595 bus_addr = txq->tx_ring_phys_addr;
2597 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2599 sizeof(union e1000_adv_tx_desc));
2600 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2601 (uint32_t)(bus_addr >> 32));
2602 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2604 /* Setup the HW Tx Head and Tail descriptor pointers. */
2605 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2606 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2608 /* Setup Transmit threshold registers. */
2609 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2610 txdctl |= txq->pthresh & 0x1F;
2611 txdctl |= ((txq->hthresh & 0x1F) << 8);
2612 txdctl |= ((txq->wthresh & 0x1F) << 16);
2613 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2614 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2617 /* Program the Transmit Control Register. */
2618 tctl = E1000_READ_REG(hw, E1000_TCTL);
2619 tctl &= ~E1000_TCTL_CT;
2620 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2621 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2623 e1000_config_collision_dist(hw);
2625 /* This write will effectively turn on the transmit unit. */
2626 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2629 /*********************************************************************
2631 * Enable VF receive unit.
2633 **********************************************************************/
2635 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2637 struct e1000_hw *hw;
2638 struct igb_rx_queue *rxq;
2641 uint16_t rctl_bsize;
2645 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2648 e1000_rlpml_set_vf(hw,
2649 (uint16_t)(dev->data->dev_conf.rxmode.max_rx_pkt_len +
2652 /* Configure and enable each RX queue. */
2654 dev->rx_pkt_burst = eth_igb_recv_pkts;
2655 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2659 rxq = dev->data->rx_queues[i];
2663 * i350VF LB vlan packets have vlan tags byte swapped.
2665 if (hw->mac.type == e1000_vfadapt_i350) {
2666 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2667 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2669 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2672 /* Allocate buffers for descriptor rings and set up queue */
2673 ret = igb_alloc_rx_queue_mbufs(rxq);
2677 bus_addr = rxq->rx_ring_phys_addr;
2678 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2680 sizeof(union e1000_adv_rx_desc));
2681 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2682 (uint32_t)(bus_addr >> 32));
2683 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2685 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2688 * Configure RX buffer size.
2690 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2691 RTE_PKTMBUF_HEADROOM);
2692 if (buf_size >= 1024) {
2694 * Configure the BSIZEPACKET field of the SRRCTL
2695 * register of the queue.
2696 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2697 * If this field is equal to 0b, then RCTL.BSIZE
2698 * determines the RX packet buffer size.
2700 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2701 E1000_SRRCTL_BSIZEPKT_MASK);
2702 buf_size = (uint16_t) ((srrctl &
2703 E1000_SRRCTL_BSIZEPKT_MASK) <<
2704 E1000_SRRCTL_BSIZEPKT_SHIFT);
2706 /* It adds dual VLAN length for supporting dual VLAN */
2707 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2708 2 * VLAN_TAG_SIZE) > buf_size){
2709 if (!dev->data->scattered_rx)
2711 "forcing scatter mode");
2712 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2713 dev->data->scattered_rx = 1;
2717 * Use BSIZE field of the device RCTL register.
2719 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2720 rctl_bsize = buf_size;
2721 if (!dev->data->scattered_rx)
2722 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2723 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2724 dev->data->scattered_rx = 1;
2727 /* Set if packets are dropped when no descriptors available */
2729 srrctl |= E1000_SRRCTL_DROP_EN;
2731 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2733 /* Enable this RX queue. */
2734 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2735 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2736 rxdctl &= 0xFFF00000;
2737 rxdctl |= (rxq->pthresh & 0x1F);
2738 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2739 if (hw->mac.type == e1000_vfadapt) {
2741 * Workaround of 82576 VF Erratum
2742 * force set WTHRESH to 1
2743 * to avoid Write-Back not triggered sometimes
2746 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2749 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2750 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2753 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2754 if (!dev->data->scattered_rx)
2755 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2756 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2757 dev->data->scattered_rx = 1;
2761 * Setup the HW Rx Head and Tail Descriptor Pointers.
2762 * This needs to be done after enable.
2764 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2765 rxq = dev->data->rx_queues[i];
2766 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2767 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2773 /*********************************************************************
2775 * Enable VF transmit unit.
2777 **********************************************************************/
2779 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2781 struct e1000_hw *hw;
2782 struct igb_tx_queue *txq;
2786 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2788 /* Setup the Base and Length of the Tx Descriptor Rings. */
2789 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2792 txq = dev->data->tx_queues[i];
2793 bus_addr = txq->tx_ring_phys_addr;
2794 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2796 sizeof(union e1000_adv_tx_desc));
2797 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2798 (uint32_t)(bus_addr >> 32));
2799 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2801 /* Setup the HW Tx Head and Tail descriptor pointers. */
2802 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2803 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2805 /* Setup Transmit threshold registers. */
2806 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2807 txdctl |= txq->pthresh & 0x1F;
2808 txdctl |= ((txq->hthresh & 0x1F) << 8);
2809 if (hw->mac.type == e1000_82576) {
2811 * Workaround of 82576 VF Erratum
2812 * force set WTHRESH to 1
2813 * to avoid Write-Back not triggered sometimes
2816 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2819 txdctl |= ((txq->wthresh & 0x1F) << 16);
2820 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2821 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2827 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2828 struct rte_eth_rxq_info *qinfo)
2830 struct igb_rx_queue *rxq;
2832 rxq = dev->data->rx_queues[queue_id];
2834 qinfo->mp = rxq->mb_pool;
2835 qinfo->scattered_rx = dev->data->scattered_rx;
2836 qinfo->nb_desc = rxq->nb_rx_desc;
2838 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2839 qinfo->conf.rx_drop_en = rxq->drop_en;
2840 qinfo->conf.offloads = rxq->offloads;
2844 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2845 struct rte_eth_txq_info *qinfo)
2847 struct igb_tx_queue *txq;
2849 txq = dev->data->tx_queues[queue_id];
2851 qinfo->nb_desc = txq->nb_tx_desc;
2853 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2854 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2855 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2856 qinfo->conf.offloads = txq->offloads;
2860 igb_rss_conf_init(struct rte_eth_dev *dev,
2861 struct igb_rte_flow_rss_conf *out,
2862 const struct rte_flow_action_rss *in)
2864 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2866 if (in->key_len > RTE_DIM(out->key) ||
2867 ((hw->mac.type == e1000_82576) &&
2868 (in->queue_num > IGB_MAX_RX_QUEUE_NUM_82576)) ||
2869 ((hw->mac.type != e1000_82576) &&
2870 (in->queue_num > IGB_MAX_RX_QUEUE_NUM)))
2872 out->conf = (struct rte_flow_action_rss){
2876 .key_len = in->key_len,
2877 .queue_num = in->queue_num,
2878 .key = memcpy(out->key, in->key, in->key_len),
2879 .queue = memcpy(out->queue, in->queue,
2880 sizeof(*in->queue) * in->queue_num),
2886 igb_action_rss_same(const struct rte_flow_action_rss *comp,
2887 const struct rte_flow_action_rss *with)
2889 return (comp->func == with->func &&
2890 comp->level == with->level &&
2891 comp->types == with->types &&
2892 comp->key_len == with->key_len &&
2893 comp->queue_num == with->queue_num &&
2894 !memcmp(comp->key, with->key, with->key_len) &&
2895 !memcmp(comp->queue, with->queue,
2896 sizeof(*with->queue) * with->queue_num));
2900 igb_config_rss_filter(struct rte_eth_dev *dev,
2901 struct igb_rte_flow_rss_conf *conf, bool add)
2905 struct rte_eth_rss_conf rss_conf = {
2906 .rss_key = conf->conf.key_len ?
2907 (void *)(uintptr_t)conf->conf.key : NULL,
2908 .rss_key_len = conf->conf.key_len,
2909 .rss_hf = conf->conf.types,
2911 struct e1000_filter_info *filter_info =
2912 E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
2913 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2915 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2918 if (igb_action_rss_same(&filter_info->rss_info.conf,
2920 igb_rss_disable(dev);
2921 memset(&filter_info->rss_info, 0,
2922 sizeof(struct igb_rte_flow_rss_conf));
2928 if (filter_info->rss_info.conf.queue_num)
2931 /* Fill in redirection table. */
2932 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2933 for (i = 0, j = 0; i < 128; i++, j++) {
2940 if (j == conf->conf.queue_num)
2942 q_idx = conf->conf.queue[j];
2943 reta.bytes[i & 3] = (uint8_t)(q_idx << shift);
2945 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2948 /* Configure the RSS key and the RSS protocols used to compute
2949 * the RSS hash of input packets.
2951 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2952 igb_rss_disable(dev);
2955 if (rss_conf.rss_key == NULL)
2956 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2957 igb_hw_rss_hash_set(hw, &rss_conf);
2959 if (igb_rss_conf_init(dev, &filter_info->rss_info, &conf->conf))