1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
15 #include <rte_interrupts.h>
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
19 #include <rte_debug.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_memzone.h>
24 #include <rte_launch.h>
26 #include <rte_per_lcore.h>
27 #include <rte_lcore.h>
28 #include <rte_atomic.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_mempool.h>
31 #include <rte_malloc.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev_driver.h>
35 #include <rte_prefetch.h>
40 #include <rte_string_fns.h>
42 #include "e1000_logs.h"
43 #include "base/e1000_api.h"
44 #include "e1000_ethdev.h"
46 #ifdef RTE_LIBRTE_IEEE1588
47 #define IGB_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
49 #define IGB_TX_IEEE1588_TMST 0
51 /* Bit Mask to indicate what bits required for building TX context */
52 #define IGB_TX_OFFLOAD_MASK ( \
59 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
60 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
63 * Structure associated with each descriptor of the RX ring of a RX queue.
66 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
70 * Structure associated with each descriptor of the TX ring of a TX queue.
73 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
74 uint16_t next_id; /**< Index of next descriptor in ring. */
75 uint16_t last_id; /**< Index of last scattered descriptor. */
82 IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
86 * Structure associated with each RX queue.
89 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
90 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
91 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
92 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
93 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
94 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
95 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
96 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
97 uint16_t nb_rx_desc; /**< number of RX descriptors. */
98 uint16_t rx_tail; /**< current value of RDT register. */
99 uint16_t nb_rx_hold; /**< number of held free RX desc. */
100 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
101 uint16_t queue_id; /**< RX queue index. */
102 uint16_t reg_idx; /**< RX queue register index. */
103 uint16_t port_id; /**< Device port identifier. */
104 uint8_t pthresh; /**< Prefetch threshold register. */
105 uint8_t hthresh; /**< Host threshold register. */
106 uint8_t wthresh; /**< Write-back threshold register. */
107 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
108 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
109 uint32_t flags; /**< RX flags. */
110 uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */
114 * Hardware context number
116 enum igb_advctx_num {
117 IGB_CTX_0 = 0, /**< CTX0 */
118 IGB_CTX_1 = 1, /**< CTX1 */
119 IGB_CTX_NUM = 2, /**< CTX_NUM */
122 /** Offload features */
123 union igb_tx_offload {
126 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
127 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
128 uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
129 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
130 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
132 /* uint64_t unused:8; */
137 * Compare mask for igb_tx_offload.data,
138 * should be in sync with igb_tx_offload layout.
140 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
141 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
142 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
143 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
144 /** Mac + IP + TCP + Mss mask. */
145 #define TX_TSO_CMP_MASK \
146 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
149 * Strucutre to check if new context need be built
151 struct igb_advctx_info {
152 uint64_t flags; /**< ol_flags related to context build. */
153 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
154 union igb_tx_offload tx_offload;
155 /** compare mask for tx offload. */
156 union igb_tx_offload tx_offload_mask;
160 * Structure associated with each TX queue.
162 struct igb_tx_queue {
163 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
164 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
165 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
166 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
167 uint32_t txd_type; /**< Device-specific TXD type */
168 uint16_t nb_tx_desc; /**< number of TX descriptors. */
169 uint16_t tx_tail; /**< Current value of TDT register. */
171 /**< Index of first used TX descriptor. */
172 uint16_t queue_id; /**< TX queue index. */
173 uint16_t reg_idx; /**< TX queue register index. */
174 uint16_t port_id; /**< Device port identifier. */
175 uint8_t pthresh; /**< Prefetch threshold register. */
176 uint8_t hthresh; /**< Host threshold register. */
177 uint8_t wthresh; /**< Write-back threshold register. */
179 /**< Current used hardware descriptor. */
181 /**< Start context position for transmit queue. */
182 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
183 /**< Hardware context history.*/
187 #define RTE_PMD_USE_PREFETCH
190 #ifdef RTE_PMD_USE_PREFETCH
191 #define rte_igb_prefetch(p) rte_prefetch0(p)
193 #define rte_igb_prefetch(p) do {} while(0)
196 #ifdef RTE_PMD_PACKET_PREFETCH
197 #define rte_packet_prefetch(p) rte_prefetch1(p)
199 #define rte_packet_prefetch(p) do {} while(0)
203 * Macro for VMDq feature for 1 GbE NIC.
205 #define E1000_VMOLR_SIZE (8)
206 #define IGB_TSO_MAX_HDRLEN (512)
207 #define IGB_TSO_MAX_MSS (9216)
209 /*********************************************************************
213 **********************************************************************/
216 *There're some limitations in hardware for TCP segmentation offload. We
217 *should check whether the parameters are valid.
219 static inline uint64_t
220 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
222 if (!(ol_req & PKT_TX_TCP_SEG))
224 if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
225 ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
226 ol_req &= ~PKT_TX_TCP_SEG;
227 ol_req |= PKT_TX_TCP_CKSUM;
233 * Advanced context descriptor are almost same between igb/ixgbe
234 * This is a separate function, looking for optimization opportunity here
235 * Rework required to go with the pre-defined values.
239 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
240 volatile struct e1000_adv_tx_context_desc *ctx_txd,
241 uint64_t ol_flags, union igb_tx_offload tx_offload)
243 uint32_t type_tucmd_mlhl;
244 uint32_t mss_l4len_idx;
245 uint32_t ctx_idx, ctx_curr;
246 uint32_t vlan_macip_lens;
247 union igb_tx_offload tx_offload_mask;
249 ctx_curr = txq->ctx_curr;
250 ctx_idx = ctx_curr + txq->ctx_start;
252 tx_offload_mask.data = 0;
255 /* Specify which HW CTX to upload. */
256 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
258 if (ol_flags & PKT_TX_VLAN_PKT)
259 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
261 /* check if TCP segmentation required for this packet */
262 if (ol_flags & PKT_TX_TCP_SEG) {
263 /* implies IP cksum in IPv4 */
264 if (ol_flags & PKT_TX_IP_CKSUM)
265 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
266 E1000_ADVTXD_TUCMD_L4T_TCP |
267 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
269 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
270 E1000_ADVTXD_TUCMD_L4T_TCP |
271 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
273 tx_offload_mask.data |= TX_TSO_CMP_MASK;
274 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
275 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
276 } else { /* no TSO, check if hardware checksum is needed */
277 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
278 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
280 if (ol_flags & PKT_TX_IP_CKSUM)
281 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
283 switch (ol_flags & PKT_TX_L4_MASK) {
284 case PKT_TX_UDP_CKSUM:
285 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
286 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
287 mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
289 case PKT_TX_TCP_CKSUM:
290 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
291 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
292 mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
294 case PKT_TX_SCTP_CKSUM:
295 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
296 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
297 mss_l4len_idx |= sizeof(struct sctp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
300 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
301 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
306 txq->ctx_cache[ctx_curr].flags = ol_flags;
307 txq->ctx_cache[ctx_curr].tx_offload.data =
308 tx_offload_mask.data & tx_offload.data;
309 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
311 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
312 vlan_macip_lens = (uint32_t)tx_offload.data;
313 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
314 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
315 ctx_txd->seqnum_seed = 0;
319 * Check which hardware context can be used. Use the existing match
320 * or create a new context descriptor.
322 static inline uint32_t
323 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
324 union igb_tx_offload tx_offload)
326 /* If match with the current context */
327 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
328 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
329 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
330 return txq->ctx_curr;
333 /* If match with the second context */
335 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
336 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
337 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
338 return txq->ctx_curr;
341 /* Mismatch, use the previous context */
345 static inline uint32_t
346 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
348 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
349 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
352 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
353 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
354 tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0];
358 static inline uint32_t
359 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
362 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
363 static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
364 cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
365 cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0];
370 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
373 struct igb_tx_queue *txq;
374 struct igb_tx_entry *sw_ring;
375 struct igb_tx_entry *txe, *txn;
376 volatile union e1000_adv_tx_desc *txr;
377 volatile union e1000_adv_tx_desc *txd;
378 struct rte_mbuf *tx_pkt;
379 struct rte_mbuf *m_seg;
380 uint64_t buf_dma_addr;
381 uint32_t olinfo_status;
382 uint32_t cmd_type_len;
391 uint32_t new_ctx = 0;
393 union igb_tx_offload tx_offload = {0};
396 sw_ring = txq->sw_ring;
398 tx_id = txq->tx_tail;
399 txe = &sw_ring[tx_id];
401 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
403 pkt_len = tx_pkt->pkt_len;
405 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
408 * The number of descriptors that must be allocated for a
409 * packet is the number of segments of that packet, plus 1
410 * Context Descriptor for the VLAN Tag Identifier, if any.
411 * Determine the last TX descriptor to allocate in the TX ring
412 * for the packet, starting from the current position (tx_id)
415 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
417 ol_flags = tx_pkt->ol_flags;
418 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
420 /* If a Context Descriptor need be built . */
422 tx_offload.l2_len = tx_pkt->l2_len;
423 tx_offload.l3_len = tx_pkt->l3_len;
424 tx_offload.l4_len = tx_pkt->l4_len;
425 tx_offload.vlan_tci = tx_pkt->vlan_tci;
426 tx_offload.tso_segsz = tx_pkt->tso_segsz;
427 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
429 ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
430 /* Only allocate context descriptor if required*/
431 new_ctx = (ctx == IGB_CTX_NUM);
432 ctx = txq->ctx_curr + txq->ctx_start;
433 tx_last = (uint16_t) (tx_last + new_ctx);
435 if (tx_last >= txq->nb_tx_desc)
436 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
438 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
439 " tx_first=%u tx_last=%u",
440 (unsigned) txq->port_id,
441 (unsigned) txq->queue_id,
447 * Check if there are enough free descriptors in the TX ring
448 * to transmit the next packet.
449 * This operation is based on the two following rules:
451 * 1- Only check that the last needed TX descriptor can be
452 * allocated (by construction, if that descriptor is free,
453 * all intermediate ones are also free).
455 * For this purpose, the index of the last TX descriptor
456 * used for a packet (the "last descriptor" of a packet)
457 * is recorded in the TX entries (the last one included)
458 * that are associated with all TX descriptors allocated
461 * 2- Avoid to allocate the last free TX descriptor of the
462 * ring, in order to never set the TDT register with the
463 * same value stored in parallel by the NIC in the TDH
464 * register, which makes the TX engine of the NIC enter
465 * in a deadlock situation.
467 * By extension, avoid to allocate a free descriptor that
468 * belongs to the last set of free descriptors allocated
469 * to the same packet previously transmitted.
473 * The "last descriptor" of the previously sent packet, if any,
474 * which used the last descriptor to allocate.
476 tx_end = sw_ring[tx_last].last_id;
479 * The next descriptor following that "last descriptor" in the
482 tx_end = sw_ring[tx_end].next_id;
485 * The "last descriptor" associated with that next descriptor.
487 tx_end = sw_ring[tx_end].last_id;
490 * Check that this descriptor is free.
492 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
499 * Set common flags of all TX Data Descriptors.
501 * The following bits must be set in all Data Descriptors:
502 * - E1000_ADVTXD_DTYP_DATA
503 * - E1000_ADVTXD_DCMD_DEXT
505 * The following bits must be set in the first Data Descriptor
506 * and are ignored in the other ones:
507 * - E1000_ADVTXD_DCMD_IFCS
508 * - E1000_ADVTXD_MAC_1588
509 * - E1000_ADVTXD_DCMD_VLE
511 * The following bits must only be set in the last Data
513 * - E1000_TXD_CMD_EOP
515 * The following bits can be set in any Data Descriptor, but
516 * are only set in the last Data Descriptor:
519 cmd_type_len = txq->txd_type |
520 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
521 if (tx_ol_req & PKT_TX_TCP_SEG)
522 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
523 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
524 #if defined(RTE_LIBRTE_IEEE1588)
525 if (ol_flags & PKT_TX_IEEE1588_TMST)
526 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
529 /* Setup TX Advanced context descriptor if required */
531 volatile struct e1000_adv_tx_context_desc *
534 ctx_txd = (volatile struct
535 e1000_adv_tx_context_desc *)
538 txn = &sw_ring[txe->next_id];
539 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
541 if (txe->mbuf != NULL) {
542 rte_pktmbuf_free_seg(txe->mbuf);
546 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
548 txe->last_id = tx_last;
549 tx_id = txe->next_id;
553 /* Setup the TX Advanced Data Descriptor */
554 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
555 olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
556 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
561 txn = &sw_ring[txe->next_id];
564 if (txe->mbuf != NULL)
565 rte_pktmbuf_free_seg(txe->mbuf);
569 * Set up transmit descriptor.
571 slen = (uint16_t) m_seg->data_len;
572 buf_dma_addr = rte_mbuf_data_iova(m_seg);
573 txd->read.buffer_addr =
574 rte_cpu_to_le_64(buf_dma_addr);
575 txd->read.cmd_type_len =
576 rte_cpu_to_le_32(cmd_type_len | slen);
577 txd->read.olinfo_status =
578 rte_cpu_to_le_32(olinfo_status);
579 txe->last_id = tx_last;
580 tx_id = txe->next_id;
583 } while (m_seg != NULL);
586 * The last packet data descriptor needs End Of Packet (EOP)
587 * and Report Status (RS).
589 txd->read.cmd_type_len |=
590 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
596 * Set the Transmit Descriptor Tail (TDT).
598 E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
599 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
600 (unsigned) txq->port_id, (unsigned) txq->queue_id,
601 (unsigned) tx_id, (unsigned) nb_tx);
602 txq->tx_tail = tx_id;
607 /*********************************************************************
611 **********************************************************************/
613 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
619 for (i = 0; i < nb_pkts; i++) {
622 /* Check some limitations for TSO in hardware */
623 if (m->ol_flags & PKT_TX_TCP_SEG)
624 if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
625 (m->l2_len + m->l3_len + m->l4_len >
626 IGB_TSO_MAX_HDRLEN)) {
631 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
632 rte_errno = -ENOTSUP;
636 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
637 ret = rte_validate_tx_offload(m);
643 ret = rte_net_intel_cksum_prepare(m);
653 /*********************************************************************
657 **********************************************************************/
658 #define IGB_PACKET_TYPE_IPV4 0X01
659 #define IGB_PACKET_TYPE_IPV4_TCP 0X11
660 #define IGB_PACKET_TYPE_IPV4_UDP 0X21
661 #define IGB_PACKET_TYPE_IPV4_SCTP 0X41
662 #define IGB_PACKET_TYPE_IPV4_EXT 0X03
663 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP 0X43
664 #define IGB_PACKET_TYPE_IPV6 0X04
665 #define IGB_PACKET_TYPE_IPV6_TCP 0X14
666 #define IGB_PACKET_TYPE_IPV6_UDP 0X24
667 #define IGB_PACKET_TYPE_IPV6_EXT 0X0C
668 #define IGB_PACKET_TYPE_IPV6_EXT_TCP 0X1C
669 #define IGB_PACKET_TYPE_IPV6_EXT_UDP 0X2C
670 #define IGB_PACKET_TYPE_IPV4_IPV6 0X05
671 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP 0X15
672 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP 0X25
673 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
674 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
675 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
676 #define IGB_PACKET_TYPE_MAX 0X80
677 #define IGB_PACKET_TYPE_MASK 0X7F
678 #define IGB_PACKET_TYPE_SHIFT 0X04
679 static inline uint32_t
680 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
682 static const uint32_t
683 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
684 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
686 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
687 RTE_PTYPE_L3_IPV4_EXT,
688 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
690 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
691 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
692 RTE_PTYPE_INNER_L3_IPV6,
693 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
694 RTE_PTYPE_L3_IPV6_EXT,
695 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
696 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
697 RTE_PTYPE_INNER_L3_IPV6_EXT,
698 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
699 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
700 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
701 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
702 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
703 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
704 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
705 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
706 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
707 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
708 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
709 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
710 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
711 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
712 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
713 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
714 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
715 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
716 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
717 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
718 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
719 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
720 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
721 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
722 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
723 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
724 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
725 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
727 if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
728 return RTE_PTYPE_UNKNOWN;
730 pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
732 return ptype_table[pkt_info];
735 static inline uint64_t
736 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
738 uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ? 0 : PKT_RX_RSS_HASH;
740 #if defined(RTE_LIBRTE_IEEE1588)
741 static uint32_t ip_pkt_etqf_map[8] = {
742 0, 0, 0, PKT_RX_IEEE1588_PTP,
746 struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
747 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
749 /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
750 if (hw->mac.type == e1000_i210)
751 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
753 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
761 static inline uint64_t
762 rx_desc_status_to_pkt_flags(uint32_t rx_status)
766 /* Check if VLAN present */
767 pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
768 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
770 #if defined(RTE_LIBRTE_IEEE1588)
771 if (rx_status & E1000_RXD_STAT_TMST)
772 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
777 static inline uint64_t
778 rx_desc_error_to_pkt_flags(uint32_t rx_status)
781 * Bit 30: IPE, IPv4 checksum error
782 * Bit 29: L4I, L4I integrity error
785 static uint64_t error_to_pkt_flags_map[4] = {
786 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
787 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
788 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
789 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
791 return error_to_pkt_flags_map[(rx_status >>
792 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
796 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
799 struct igb_rx_queue *rxq;
800 volatile union e1000_adv_rx_desc *rx_ring;
801 volatile union e1000_adv_rx_desc *rxdp;
802 struct igb_rx_entry *sw_ring;
803 struct igb_rx_entry *rxe;
804 struct rte_mbuf *rxm;
805 struct rte_mbuf *nmb;
806 union e1000_adv_rx_desc rxd;
809 uint32_t hlen_type_rss;
819 rx_id = rxq->rx_tail;
820 rx_ring = rxq->rx_ring;
821 sw_ring = rxq->sw_ring;
822 while (nb_rx < nb_pkts) {
824 * The order of operations here is important as the DD status
825 * bit must not be read after any other descriptor fields.
826 * rx_ring and rxdp are pointing to volatile data so the order
827 * of accesses cannot be reordered by the compiler. If they were
828 * not volatile, they could be reordered which could lead to
829 * using invalid descriptor fields when read from rxd.
831 rxdp = &rx_ring[rx_id];
832 staterr = rxdp->wb.upper.status_error;
833 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
840 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
841 * likely to be invalid and to be dropped by the various
842 * validation checks performed by the network stack.
844 * Allocate a new mbuf to replenish the RX ring descriptor.
845 * If the allocation fails:
846 * - arrange for that RX descriptor to be the first one
847 * being parsed the next time the receive function is
848 * invoked [on the same queue].
850 * - Stop parsing the RX ring and return immediately.
852 * This policy do not drop the packet received in the RX
853 * descriptor for which the allocation of a new mbuf failed.
854 * Thus, it allows that packet to be later retrieved if
855 * mbuf have been freed in the mean time.
856 * As a side effect, holding RX descriptors instead of
857 * systematically giving them back to the NIC may lead to
858 * RX ring exhaustion situations.
859 * However, the NIC can gracefully prevent such situations
860 * to happen by sending specific "back-pressure" flow control
861 * frames to its peer(s).
863 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
864 "staterr=0x%x pkt_len=%u",
865 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
866 (unsigned) rx_id, (unsigned) staterr,
867 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
869 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
871 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
872 "queue_id=%u", (unsigned) rxq->port_id,
873 (unsigned) rxq->queue_id);
874 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
879 rxe = &sw_ring[rx_id];
881 if (rx_id == rxq->nb_rx_desc)
884 /* Prefetch next mbuf while processing current one. */
885 rte_igb_prefetch(sw_ring[rx_id].mbuf);
888 * When next RX descriptor is on a cache-line boundary,
889 * prefetch the next 4 RX descriptors and the next 8 pointers
892 if ((rx_id & 0x3) == 0) {
893 rte_igb_prefetch(&rx_ring[rx_id]);
894 rte_igb_prefetch(&sw_ring[rx_id]);
900 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
901 rxdp->read.hdr_addr = 0;
902 rxdp->read.pkt_addr = dma_addr;
905 * Initialize the returned mbuf.
906 * 1) setup generic mbuf fields:
907 * - number of segments,
910 * - RX port identifier.
911 * 2) integrate hardware offload data, if any:
913 * - IP checksum flag,
914 * - VLAN TCI, if any,
917 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
919 rxm->data_off = RTE_PKTMBUF_HEADROOM;
920 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
923 rxm->pkt_len = pkt_len;
924 rxm->data_len = pkt_len;
925 rxm->port = rxq->port_id;
927 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
928 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
931 * The vlan_tci field is only valid when PKT_RX_VLAN is
932 * set in the pkt_flags field and must be in CPU byte order.
934 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
935 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
936 rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
938 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
940 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
941 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
942 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
943 rxm->ol_flags = pkt_flags;
944 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
945 lo_dword.hs_rss.pkt_info);
948 * Store the mbuf address into the next entry of the array
949 * of returned packets.
951 rx_pkts[nb_rx++] = rxm;
953 rxq->rx_tail = rx_id;
956 * If the number of free RX descriptors is greater than the RX free
957 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
959 * Update the RDT with the value of the last processed RX descriptor
960 * minus 1, to guarantee that the RDT register is never equal to the
961 * RDH register, which creates a "full" ring situtation from the
962 * hardware point of view...
964 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
965 if (nb_hold > rxq->rx_free_thresh) {
966 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
967 "nb_hold=%u nb_rx=%u",
968 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
969 (unsigned) rx_id, (unsigned) nb_hold,
971 rx_id = (uint16_t) ((rx_id == 0) ?
972 (rxq->nb_rx_desc - 1) : (rx_id - 1));
973 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
976 rxq->nb_rx_hold = nb_hold;
981 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
984 struct igb_rx_queue *rxq;
985 volatile union e1000_adv_rx_desc *rx_ring;
986 volatile union e1000_adv_rx_desc *rxdp;
987 struct igb_rx_entry *sw_ring;
988 struct igb_rx_entry *rxe;
989 struct rte_mbuf *first_seg;
990 struct rte_mbuf *last_seg;
991 struct rte_mbuf *rxm;
992 struct rte_mbuf *nmb;
993 union e1000_adv_rx_desc rxd;
994 uint64_t dma; /* Physical address of mbuf data buffer */
996 uint32_t hlen_type_rss;
1006 rx_id = rxq->rx_tail;
1007 rx_ring = rxq->rx_ring;
1008 sw_ring = rxq->sw_ring;
1011 * Retrieve RX context of current packet, if any.
1013 first_seg = rxq->pkt_first_seg;
1014 last_seg = rxq->pkt_last_seg;
1016 while (nb_rx < nb_pkts) {
1019 * The order of operations here is important as the DD status
1020 * bit must not be read after any other descriptor fields.
1021 * rx_ring and rxdp are pointing to volatile data so the order
1022 * of accesses cannot be reordered by the compiler. If they were
1023 * not volatile, they could be reordered which could lead to
1024 * using invalid descriptor fields when read from rxd.
1026 rxdp = &rx_ring[rx_id];
1027 staterr = rxdp->wb.upper.status_error;
1028 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1035 * Allocate a new mbuf to replenish the RX ring descriptor.
1036 * If the allocation fails:
1037 * - arrange for that RX descriptor to be the first one
1038 * being parsed the next time the receive function is
1039 * invoked [on the same queue].
1041 * - Stop parsing the RX ring and return immediately.
1043 * This policy does not drop the packet received in the RX
1044 * descriptor for which the allocation of a new mbuf failed.
1045 * Thus, it allows that packet to be later retrieved if
1046 * mbuf have been freed in the mean time.
1047 * As a side effect, holding RX descriptors instead of
1048 * systematically giving them back to the NIC may lead to
1049 * RX ring exhaustion situations.
1050 * However, the NIC can gracefully prevent such situations
1051 * to happen by sending specific "back-pressure" flow control
1052 * frames to its peer(s).
1054 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1055 "staterr=0x%x data_len=%u",
1056 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1057 (unsigned) rx_id, (unsigned) staterr,
1058 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1060 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1062 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1063 "queue_id=%u", (unsigned) rxq->port_id,
1064 (unsigned) rxq->queue_id);
1065 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1070 rxe = &sw_ring[rx_id];
1072 if (rx_id == rxq->nb_rx_desc)
1075 /* Prefetch next mbuf while processing current one. */
1076 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1079 * When next RX descriptor is on a cache-line boundary,
1080 * prefetch the next 4 RX descriptors and the next 8 pointers
1083 if ((rx_id & 0x3) == 0) {
1084 rte_igb_prefetch(&rx_ring[rx_id]);
1085 rte_igb_prefetch(&sw_ring[rx_id]);
1089 * Update RX descriptor with the physical address of the new
1090 * data buffer of the new allocated mbuf.
1094 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1095 rxdp->read.pkt_addr = dma;
1096 rxdp->read.hdr_addr = 0;
1099 * Set data length & data buffer address of mbuf.
1101 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1102 rxm->data_len = data_len;
1103 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1106 * If this is the first buffer of the received packet,
1107 * set the pointer to the first mbuf of the packet and
1108 * initialize its context.
1109 * Otherwise, update the total length and the number of segments
1110 * of the current scattered packet, and update the pointer to
1111 * the last mbuf of the current packet.
1113 if (first_seg == NULL) {
1115 first_seg->pkt_len = data_len;
1116 first_seg->nb_segs = 1;
1118 first_seg->pkt_len += data_len;
1119 first_seg->nb_segs++;
1120 last_seg->next = rxm;
1124 * If this is not the last buffer of the received packet,
1125 * update the pointer to the last mbuf of the current scattered
1126 * packet and continue to parse the RX ring.
1128 if (! (staterr & E1000_RXD_STAT_EOP)) {
1134 * This is the last buffer of the received packet.
1135 * If the CRC is not stripped by the hardware:
1136 * - Subtract the CRC length from the total packet length.
1137 * - If the last buffer only contains the whole CRC or a part
1138 * of it, free the mbuf associated to the last buffer.
1139 * If part of the CRC is also contained in the previous
1140 * mbuf, subtract the length of that CRC part from the
1141 * data length of the previous mbuf.
1144 if (unlikely(rxq->crc_len > 0)) {
1145 first_seg->pkt_len -= ETHER_CRC_LEN;
1146 if (data_len <= ETHER_CRC_LEN) {
1147 rte_pktmbuf_free_seg(rxm);
1148 first_seg->nb_segs--;
1149 last_seg->data_len = (uint16_t)
1150 (last_seg->data_len -
1151 (ETHER_CRC_LEN - data_len));
1152 last_seg->next = NULL;
1155 (uint16_t) (data_len - ETHER_CRC_LEN);
1159 * Initialize the first mbuf of the returned packet:
1160 * - RX port identifier,
1161 * - hardware offload data, if any:
1162 * - RSS flag & hash,
1163 * - IP checksum flag,
1164 * - VLAN TCI, if any,
1167 first_seg->port = rxq->port_id;
1168 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1171 * The vlan_tci field is only valid when PKT_RX_VLAN is
1172 * set in the pkt_flags field and must be in CPU byte order.
1174 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1175 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1176 first_seg->vlan_tci =
1177 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1179 first_seg->vlan_tci =
1180 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1182 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1183 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1184 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1185 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1186 first_seg->ol_flags = pkt_flags;
1187 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1188 lower.lo_dword.hs_rss.pkt_info);
1190 /* Prefetch data of first segment, if configured to do so. */
1191 rte_packet_prefetch((char *)first_seg->buf_addr +
1192 first_seg->data_off);
1195 * Store the mbuf address into the next entry of the array
1196 * of returned packets.
1198 rx_pkts[nb_rx++] = first_seg;
1201 * Setup receipt context for a new packet.
1207 * Record index of the next RX descriptor to probe.
1209 rxq->rx_tail = rx_id;
1212 * Save receive context.
1214 rxq->pkt_first_seg = first_seg;
1215 rxq->pkt_last_seg = last_seg;
1218 * If the number of free RX descriptors is greater than the RX free
1219 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1221 * Update the RDT with the value of the last processed RX descriptor
1222 * minus 1, to guarantee that the RDT register is never equal to the
1223 * RDH register, which creates a "full" ring situtation from the
1224 * hardware point of view...
1226 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1227 if (nb_hold > rxq->rx_free_thresh) {
1228 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1229 "nb_hold=%u nb_rx=%u",
1230 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1231 (unsigned) rx_id, (unsigned) nb_hold,
1233 rx_id = (uint16_t) ((rx_id == 0) ?
1234 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1235 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1238 rxq->nb_rx_hold = nb_hold;
1243 * Maximum number of Ring Descriptors.
1245 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1246 * desscriptors should meet the following condition:
1247 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1251 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1255 if (txq->sw_ring != NULL) {
1256 for (i = 0; i < txq->nb_tx_desc; i++) {
1257 if (txq->sw_ring[i].mbuf != NULL) {
1258 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1259 txq->sw_ring[i].mbuf = NULL;
1266 igb_tx_queue_release(struct igb_tx_queue *txq)
1269 igb_tx_queue_release_mbufs(txq);
1270 rte_free(txq->sw_ring);
1276 eth_igb_tx_queue_release(void *txq)
1278 igb_tx_queue_release(txq);
1282 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1284 struct igb_tx_entry *sw_ring;
1285 volatile union e1000_adv_tx_desc *txr;
1286 uint16_t tx_first; /* First segment analyzed. */
1287 uint16_t tx_id; /* Current segment being processed. */
1288 uint16_t tx_last; /* Last segment in the current packet. */
1289 uint16_t tx_next; /* First segment of the next packet. */
1294 sw_ring = txq->sw_ring;
1298 * tx_tail is the last sent packet on the sw_ring. Goto the end
1299 * of that packet (the last segment in the packet chain) and
1300 * then the next segment will be the start of the oldest segment
1301 * in the sw_ring. This is the first packet that will be
1302 * attempted to be freed.
1305 /* Get last segment in most recently added packet. */
1306 tx_first = sw_ring[txq->tx_tail].last_id;
1308 /* Get the next segment, which is the oldest segment in ring. */
1309 tx_first = sw_ring[tx_first].next_id;
1311 /* Set the current index to the first. */
1315 * Loop through each packet. For each packet, verify that an
1316 * mbuf exists and that the last segment is free. If so, free
1320 tx_last = sw_ring[tx_id].last_id;
1322 if (sw_ring[tx_last].mbuf) {
1323 if (txr[tx_last].wb.status &
1324 E1000_TXD_STAT_DD) {
1326 * Increment the number of packets
1331 /* Get the start of the next packet. */
1332 tx_next = sw_ring[tx_last].next_id;
1335 * Loop through all segments in a
1339 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
1340 sw_ring[tx_id].mbuf = NULL;
1341 sw_ring[tx_id].last_id = tx_id;
1343 /* Move to next segemnt. */
1344 tx_id = sw_ring[tx_id].next_id;
1346 } while (tx_id != tx_next);
1348 if (unlikely(count == (int)free_cnt))
1352 * mbuf still in use, nothing left to
1358 * There are multiple reasons to be here:
1359 * 1) All the packets on the ring have been
1360 * freed - tx_id is equal to tx_first
1361 * and some packets have been freed.
1363 * 2) Interfaces has not sent a rings worth of
1364 * packets yet, so the segment after tail is
1365 * still empty. Or a previous call to this
1366 * function freed some of the segments but
1367 * not all so there is a hole in the list.
1368 * Hopefully this is a rare case.
1369 * - Walk the list and find the next mbuf. If
1370 * there isn't one, then done.
1372 if (likely((tx_id == tx_first) && (count != 0)))
1376 * Walk the list and find the next mbuf, if any.
1379 /* Move to next segemnt. */
1380 tx_id = sw_ring[tx_id].next_id;
1382 if (sw_ring[tx_id].mbuf)
1385 } while (tx_id != tx_first);
1388 * Determine why previous loop bailed. If there
1389 * is not an mbuf, done.
1391 if (sw_ring[tx_id].mbuf == NULL)
1402 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1404 return igb_tx_done_cleanup(txq, free_cnt);
1408 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1413 memset((void*)&txq->ctx_cache, 0,
1414 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1418 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1420 static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1421 struct igb_tx_entry *txe = txq->sw_ring;
1423 struct e1000_hw *hw;
1425 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1426 /* Zero out HW ring memory */
1427 for (i = 0; i < txq->nb_tx_desc; i++) {
1428 txq->tx_ring[i] = zeroed_desc;
1431 /* Initialize ring entries */
1432 prev = (uint16_t)(txq->nb_tx_desc - 1);
1433 for (i = 0; i < txq->nb_tx_desc; i++) {
1434 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1436 txd->wb.status = E1000_TXD_STAT_DD;
1439 txe[prev].next_id = i;
1443 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1444 /* 82575 specific, each tx queue will use 2 hw contexts */
1445 if (hw->mac.type == e1000_82575)
1446 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1448 igb_reset_tx_queue_stat(txq);
1452 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1455 unsigned int socket_id,
1456 const struct rte_eth_txconf *tx_conf)
1458 const struct rte_memzone *tz;
1459 struct igb_tx_queue *txq;
1460 struct e1000_hw *hw;
1463 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1466 * Validate number of transmit descriptors.
1467 * It must not exceed hardware maximum, and must be multiple
1470 if (nb_desc % IGB_TXD_ALIGN != 0 ||
1471 (nb_desc > E1000_MAX_RING_DESC) ||
1472 (nb_desc < E1000_MIN_RING_DESC)) {
1477 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1480 if (tx_conf->tx_free_thresh != 0)
1481 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1482 "used for the 1G driver.");
1483 if (tx_conf->tx_rs_thresh != 0)
1484 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1485 "used for the 1G driver.");
1486 if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1487 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1488 "consider setting the TX WTHRESH value to 4, 8, "
1491 /* Free memory prior to re-allocation if needed */
1492 if (dev->data->tx_queues[queue_idx] != NULL) {
1493 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1494 dev->data->tx_queues[queue_idx] = NULL;
1497 /* First allocate the tx queue data structure */
1498 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1499 RTE_CACHE_LINE_SIZE);
1504 * Allocate TX ring hardware descriptors. A memzone large enough to
1505 * handle the maximum ring size is allocated in order to allow for
1506 * resizing in later calls to the queue setup function.
1508 size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1509 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1510 E1000_ALIGN, socket_id);
1512 igb_tx_queue_release(txq);
1516 txq->nb_tx_desc = nb_desc;
1517 txq->pthresh = tx_conf->tx_thresh.pthresh;
1518 txq->hthresh = tx_conf->tx_thresh.hthresh;
1519 txq->wthresh = tx_conf->tx_thresh.wthresh;
1520 if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1522 txq->queue_id = queue_idx;
1523 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1524 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1525 txq->port_id = dev->data->port_id;
1527 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1528 txq->tx_ring_phys_addr = tz->iova;
1530 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1531 /* Allocate software ring */
1532 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1533 sizeof(struct igb_tx_entry) * nb_desc,
1534 RTE_CACHE_LINE_SIZE);
1535 if (txq->sw_ring == NULL) {
1536 igb_tx_queue_release(txq);
1539 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1540 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1542 igb_reset_tx_queue(txq, dev);
1543 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1544 dev->tx_pkt_prepare = ð_igb_prep_pkts;
1545 dev->data->tx_queues[queue_idx] = txq;
1551 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1555 if (rxq->sw_ring != NULL) {
1556 for (i = 0; i < rxq->nb_rx_desc; i++) {
1557 if (rxq->sw_ring[i].mbuf != NULL) {
1558 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1559 rxq->sw_ring[i].mbuf = NULL;
1566 igb_rx_queue_release(struct igb_rx_queue *rxq)
1569 igb_rx_queue_release_mbufs(rxq);
1570 rte_free(rxq->sw_ring);
1576 eth_igb_rx_queue_release(void *rxq)
1578 igb_rx_queue_release(rxq);
1582 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1584 static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1587 /* Zero out HW ring memory */
1588 for (i = 0; i < rxq->nb_rx_desc; i++) {
1589 rxq->rx_ring[i] = zeroed_desc;
1593 rxq->pkt_first_seg = NULL;
1594 rxq->pkt_last_seg = NULL;
1598 igb_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
1600 uint64_t rx_offload_capa;
1603 rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
1604 DEV_RX_OFFLOAD_VLAN_FILTER |
1605 DEV_RX_OFFLOAD_IPV4_CKSUM |
1606 DEV_RX_OFFLOAD_UDP_CKSUM |
1607 DEV_RX_OFFLOAD_TCP_CKSUM |
1608 DEV_RX_OFFLOAD_JUMBO_FRAME |
1609 DEV_RX_OFFLOAD_CRC_STRIP |
1610 DEV_RX_OFFLOAD_SCATTER;
1612 return rx_offload_capa;
1616 igb_get_rx_queue_offloads_capa(struct rte_eth_dev *dev)
1618 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1619 uint64_t rx_queue_offload_capa;
1621 switch (hw->mac.type) {
1622 case e1000_vfadapt_i350:
1624 * As only one Rx queue can be used, let per queue offloading
1625 * capability be same to per port queue offloading capability
1626 * for better convenience.
1628 rx_queue_offload_capa = igb_get_rx_port_offloads_capa(dev);
1631 rx_queue_offload_capa = 0;
1633 return rx_queue_offload_capa;
1637 igb_check_rx_queue_offloads(struct rte_eth_dev *dev, uint64_t requested)
1639 uint64_t port_offloads = dev->data->dev_conf.rxmode.offloads;
1640 uint64_t queue_supported = igb_get_rx_queue_offloads_capa(dev);
1641 uint64_t port_supported = igb_get_rx_port_offloads_capa(dev);
1643 if ((requested & (queue_supported | port_supported)) != requested)
1646 if ((port_offloads ^ requested) & port_supported)
1653 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1656 unsigned int socket_id,
1657 const struct rte_eth_rxconf *rx_conf,
1658 struct rte_mempool *mp)
1660 const struct rte_memzone *rz;
1661 struct igb_rx_queue *rxq;
1662 struct e1000_hw *hw;
1665 if (!igb_check_rx_queue_offloads(dev, rx_conf->offloads)) {
1666 PMD_INIT_LOG(ERR, "%p: Rx queue offloads 0x%" PRIx64
1667 " don't match port offloads 0x%" PRIx64
1668 " or supported port offloads 0x%" PRIx64
1669 " or supported queue offloads 0x%" PRIx64,
1672 dev->data->dev_conf.rxmode.offloads,
1673 igb_get_rx_port_offloads_capa(dev),
1674 igb_get_rx_queue_offloads_capa(dev));
1678 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1681 * Validate number of receive descriptors.
1682 * It must not exceed hardware maximum, and must be multiple
1685 if (nb_desc % IGB_RXD_ALIGN != 0 ||
1686 (nb_desc > E1000_MAX_RING_DESC) ||
1687 (nb_desc < E1000_MIN_RING_DESC)) {
1691 /* Free memory prior to re-allocation if needed */
1692 if (dev->data->rx_queues[queue_idx] != NULL) {
1693 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1694 dev->data->rx_queues[queue_idx] = NULL;
1697 /* First allocate the RX queue data structure. */
1698 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1699 RTE_CACHE_LINE_SIZE);
1702 rxq->offloads = rx_conf->offloads;
1704 rxq->nb_rx_desc = nb_desc;
1705 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1706 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1707 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1708 if (rxq->wthresh > 0 &&
1709 (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1711 rxq->drop_en = rx_conf->rx_drop_en;
1712 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1713 rxq->queue_id = queue_idx;
1714 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1715 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1716 rxq->port_id = dev->data->port_id;
1717 rxq->crc_len = (uint8_t)((dev->data->dev_conf.rxmode.offloads &
1718 DEV_RX_OFFLOAD_CRC_STRIP) ? 0 : ETHER_CRC_LEN);
1721 * Allocate RX ring hardware descriptors. A memzone large enough to
1722 * handle the maximum ring size is allocated in order to allow for
1723 * resizing in later calls to the queue setup function.
1725 size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1726 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1727 E1000_ALIGN, socket_id);
1729 igb_rx_queue_release(rxq);
1732 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1733 rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1734 rxq->rx_ring_phys_addr = rz->iova;
1735 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1737 /* Allocate software ring. */
1738 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1739 sizeof(struct igb_rx_entry) * nb_desc,
1740 RTE_CACHE_LINE_SIZE);
1741 if (rxq->sw_ring == NULL) {
1742 igb_rx_queue_release(rxq);
1745 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1746 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1748 dev->data->rx_queues[queue_idx] = rxq;
1749 igb_reset_rx_queue(rxq);
1755 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1757 #define IGB_RXQ_SCAN_INTERVAL 4
1758 volatile union e1000_adv_rx_desc *rxdp;
1759 struct igb_rx_queue *rxq;
1762 rxq = dev->data->rx_queues[rx_queue_id];
1763 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1765 while ((desc < rxq->nb_rx_desc) &&
1766 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1767 desc += IGB_RXQ_SCAN_INTERVAL;
1768 rxdp += IGB_RXQ_SCAN_INTERVAL;
1769 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1770 rxdp = &(rxq->rx_ring[rxq->rx_tail +
1771 desc - rxq->nb_rx_desc]);
1778 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1780 volatile union e1000_adv_rx_desc *rxdp;
1781 struct igb_rx_queue *rxq = rx_queue;
1784 if (unlikely(offset >= rxq->nb_rx_desc))
1786 desc = rxq->rx_tail + offset;
1787 if (desc >= rxq->nb_rx_desc)
1788 desc -= rxq->nb_rx_desc;
1790 rxdp = &rxq->rx_ring[desc];
1791 return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1795 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1797 struct igb_rx_queue *rxq = rx_queue;
1798 volatile uint32_t *status;
1801 if (unlikely(offset >= rxq->nb_rx_desc))
1804 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1805 return RTE_ETH_RX_DESC_UNAVAIL;
1807 desc = rxq->rx_tail + offset;
1808 if (desc >= rxq->nb_rx_desc)
1809 desc -= rxq->nb_rx_desc;
1811 status = &rxq->rx_ring[desc].wb.upper.status_error;
1812 if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1813 return RTE_ETH_RX_DESC_DONE;
1815 return RTE_ETH_RX_DESC_AVAIL;
1819 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1821 struct igb_tx_queue *txq = tx_queue;
1822 volatile uint32_t *status;
1825 if (unlikely(offset >= txq->nb_tx_desc))
1828 desc = txq->tx_tail + offset;
1829 if (desc >= txq->nb_tx_desc)
1830 desc -= txq->nb_tx_desc;
1832 status = &txq->tx_ring[desc].wb.status;
1833 if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1834 return RTE_ETH_TX_DESC_DONE;
1836 return RTE_ETH_TX_DESC_FULL;
1840 igb_dev_clear_queues(struct rte_eth_dev *dev)
1843 struct igb_tx_queue *txq;
1844 struct igb_rx_queue *rxq;
1846 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1847 txq = dev->data->tx_queues[i];
1849 igb_tx_queue_release_mbufs(txq);
1850 igb_reset_tx_queue(txq, dev);
1854 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1855 rxq = dev->data->rx_queues[i];
1857 igb_rx_queue_release_mbufs(rxq);
1858 igb_reset_rx_queue(rxq);
1864 igb_dev_free_queues(struct rte_eth_dev *dev)
1868 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1869 eth_igb_rx_queue_release(dev->data->rx_queues[i]);
1870 dev->data->rx_queues[i] = NULL;
1872 dev->data->nb_rx_queues = 0;
1874 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1875 eth_igb_tx_queue_release(dev->data->tx_queues[i]);
1876 dev->data->tx_queues[i] = NULL;
1878 dev->data->nb_tx_queues = 0;
1882 * Receive Side Scaling (RSS).
1883 * See section 7.1.1.7 in the following document:
1884 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1887 * The source and destination IP addresses of the IP header and the source and
1888 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1889 * against a configurable random key to compute a 32-bit RSS hash result.
1890 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1891 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1892 * RSS output index which is used as the RX queue index where to store the
1894 * The following output is supplied in the RX write-back descriptor:
1895 * - 32-bit result of the Microsoft RSS hash function,
1896 * - 4-bit RSS type field.
1900 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1901 * Used as the default key.
1903 static uint8_t rss_intel_key[40] = {
1904 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1905 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1906 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1907 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1908 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1912 igb_rss_disable(struct rte_eth_dev *dev)
1914 struct e1000_hw *hw;
1917 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1918 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1919 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1920 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1924 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1932 hash_key = rss_conf->rss_key;
1933 if (hash_key != NULL) {
1934 /* Fill in RSS hash key */
1935 for (i = 0; i < 10; i++) {
1936 rss_key = hash_key[(i * 4)];
1937 rss_key |= hash_key[(i * 4) + 1] << 8;
1938 rss_key |= hash_key[(i * 4) + 2] << 16;
1939 rss_key |= hash_key[(i * 4) + 3] << 24;
1940 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1944 /* Set configured hashing protocols in MRQC register */
1945 rss_hf = rss_conf->rss_hf;
1946 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1947 if (rss_hf & ETH_RSS_IPV4)
1948 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1949 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1950 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1951 if (rss_hf & ETH_RSS_IPV6)
1952 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1953 if (rss_hf & ETH_RSS_IPV6_EX)
1954 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1955 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1956 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1957 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1958 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1959 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1960 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1961 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1962 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1963 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1964 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1965 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1969 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1970 struct rte_eth_rss_conf *rss_conf)
1972 struct e1000_hw *hw;
1976 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1979 * Before changing anything, first check that the update RSS operation
1980 * does not attempt to disable RSS, if RSS was enabled at
1981 * initialization time, or does not attempt to enable RSS, if RSS was
1982 * disabled at initialization time.
1984 rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
1985 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1986 if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
1987 if (rss_hf != 0) /* Enable RSS */
1989 return 0; /* Nothing to do */
1992 if (rss_hf == 0) /* Disable RSS */
1994 igb_hw_rss_hash_set(hw, rss_conf);
1998 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
1999 struct rte_eth_rss_conf *rss_conf)
2001 struct e1000_hw *hw;
2008 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2009 hash_key = rss_conf->rss_key;
2010 if (hash_key != NULL) {
2011 /* Return RSS hash key */
2012 for (i = 0; i < 10; i++) {
2013 rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
2014 hash_key[(i * 4)] = rss_key & 0x000000FF;
2015 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2016 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2017 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2021 /* Get RSS functions configured in MRQC register */
2022 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2023 if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
2024 rss_conf->rss_hf = 0;
2028 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
2029 rss_hf |= ETH_RSS_IPV4;
2030 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
2031 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2032 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
2033 rss_hf |= ETH_RSS_IPV6;
2034 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
2035 rss_hf |= ETH_RSS_IPV6_EX;
2036 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2037 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2038 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2039 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2040 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2041 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2042 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2043 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2044 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2045 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2046 rss_conf->rss_hf = rss_hf;
2051 igb_rss_configure(struct rte_eth_dev *dev)
2053 struct rte_eth_rss_conf rss_conf;
2054 struct e1000_hw *hw;
2058 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2060 /* Fill in redirection table. */
2061 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2062 for (i = 0; i < 128; i++) {
2069 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2070 i % dev->data->nb_rx_queues : 0);
2071 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2073 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2077 * Configure the RSS key and the RSS protocols used to compute
2078 * the RSS hash of input packets.
2080 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2081 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2082 igb_rss_disable(dev);
2085 if (rss_conf.rss_key == NULL)
2086 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2087 igb_hw_rss_hash_set(hw, &rss_conf);
2091 * Check if the mac type support VMDq or not.
2092 * Return 1 if it supports, otherwise, return 0.
2095 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2097 const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2099 switch (hw->mac.type) {
2120 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2126 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2128 struct rte_eth_vmdq_rx_conf *cfg;
2129 struct e1000_hw *hw;
2130 uint32_t mrqc, vt_ctl, vmolr, rctl;
2133 PMD_INIT_FUNC_TRACE();
2135 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2136 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2138 /* Check if mac type can support VMDq, return value of 0 means NOT support */
2139 if (igb_is_vmdq_supported(dev) == 0)
2142 igb_rss_disable(dev);
2144 /* RCTL: eanble VLAN filter */
2145 rctl = E1000_READ_REG(hw, E1000_RCTL);
2146 rctl |= E1000_RCTL_VFE;
2147 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2149 /* MRQC: enable vmdq */
2150 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2151 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2152 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2154 /* VTCTL: pool selection according to VLAN tag */
2155 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2156 if (cfg->enable_default_pool)
2157 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2158 vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2159 E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2161 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2162 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2163 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2164 E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2167 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_UNTAG)
2168 vmolr |= E1000_VMOLR_AUPE;
2169 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_MC)
2170 vmolr |= E1000_VMOLR_ROMPE;
2171 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_UC)
2172 vmolr |= E1000_VMOLR_ROPE;
2173 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_BROADCAST)
2174 vmolr |= E1000_VMOLR_BAM;
2175 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_MULTICAST)
2176 vmolr |= E1000_VMOLR_MPME;
2178 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2182 * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2183 * Both 82576 and 82580 support it
2185 if (hw->mac.type != e1000_i350) {
2186 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2187 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2188 vmolr |= E1000_VMOLR_STRVLAN;
2189 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2193 /* VFTA - enable all vlan filters */
2194 for (i = 0; i < IGB_VFTA_SIZE; i++)
2195 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2197 /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2198 if (hw->mac.type != e1000_82580)
2199 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2202 * RAH/RAL - allow pools to read specific mac addresses
2203 * In this case, all pools should be able to read from mac addr 0
2205 E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2206 E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2208 /* VLVF: set up filters for vlan tags as configured */
2209 for (i = 0; i < cfg->nb_pool_maps; i++) {
2210 /* set vlan id in VF register and set the valid bit */
2211 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE | \
2212 (cfg->pool_map[i].vlan_id & ETH_VLAN_ID_MAX) | \
2213 ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT ) & \
2214 E1000_VLVF_POOLSEL_MASK)));
2217 E1000_WRITE_FLUSH(hw);
2223 /*********************************************************************
2225 * Enable receive unit.
2227 **********************************************************************/
2230 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2232 struct igb_rx_entry *rxe = rxq->sw_ring;
2236 /* Initialize software ring entries. */
2237 for (i = 0; i < rxq->nb_rx_desc; i++) {
2238 volatile union e1000_adv_rx_desc *rxd;
2239 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2242 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2243 "queue_id=%hu", rxq->queue_id);
2247 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2248 rxd = &rxq->rx_ring[i];
2249 rxd->read.hdr_addr = 0;
2250 rxd->read.pkt_addr = dma_addr;
2257 #define E1000_MRQC_DEF_Q_SHIFT (3)
2259 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2261 struct e1000_hw *hw =
2262 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2265 if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
2267 * SRIOV active scheme
2268 * FIXME if support RSS together with VMDq & SRIOV
2270 mrqc = E1000_MRQC_ENABLE_VMDQ;
2271 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2272 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2273 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2274 } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2276 * SRIOV inactive scheme
2278 switch (dev->data->dev_conf.rxmode.mq_mode) {
2280 igb_rss_configure(dev);
2282 case ETH_MQ_RX_VMDQ_ONLY:
2283 /*Configure general VMDQ only RX parameters*/
2284 igb_vmdq_rx_hw_configure(dev);
2286 case ETH_MQ_RX_NONE:
2287 /* if mq_mode is none, disable rss mode.*/
2289 igb_rss_disable(dev);
2298 eth_igb_rx_init(struct rte_eth_dev *dev)
2300 struct rte_eth_rxmode *rxmode;
2301 struct e1000_hw *hw;
2302 struct igb_rx_queue *rxq;
2307 uint16_t rctl_bsize;
2311 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2315 * Make sure receives are disabled while setting
2316 * up the descriptor ring.
2318 rctl = E1000_READ_REG(hw, E1000_RCTL);
2319 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2321 rxmode = &dev->data->dev_conf.rxmode;
2324 * Configure support of jumbo frames, if any.
2326 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
2327 rctl |= E1000_RCTL_LPE;
2330 * Set maximum packet length by default, and might be updated
2331 * together with enabling/disabling dual VLAN.
2333 E1000_WRITE_REG(hw, E1000_RLPML,
2334 dev->data->dev_conf.rxmode.max_rx_pkt_len +
2337 rctl &= ~E1000_RCTL_LPE;
2339 /* Configure and enable each RX queue. */
2341 dev->rx_pkt_burst = eth_igb_recv_pkts;
2342 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2346 rxq = dev->data->rx_queues[i];
2350 * i350 and i354 vlan packets have vlan tags byte swapped.
2352 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2353 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2354 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2356 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2359 /* Allocate buffers for descriptor rings and set up queue */
2360 ret = igb_alloc_rx_queue_mbufs(rxq);
2365 * Reset crc_len in case it was changed after queue setup by a
2368 rxq->crc_len = (uint8_t)(dev->data->dev_conf.rxmode.offloads &
2369 DEV_RX_OFFLOAD_CRC_STRIP ? 0 : ETHER_CRC_LEN);
2371 bus_addr = rxq->rx_ring_phys_addr;
2372 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2374 sizeof(union e1000_adv_rx_desc));
2375 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2376 (uint32_t)(bus_addr >> 32));
2377 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2379 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2382 * Configure RX buffer size.
2384 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2385 RTE_PKTMBUF_HEADROOM);
2386 if (buf_size >= 1024) {
2388 * Configure the BSIZEPACKET field of the SRRCTL
2389 * register of the queue.
2390 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2391 * If this field is equal to 0b, then RCTL.BSIZE
2392 * determines the RX packet buffer size.
2394 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2395 E1000_SRRCTL_BSIZEPKT_MASK);
2396 buf_size = (uint16_t) ((srrctl &
2397 E1000_SRRCTL_BSIZEPKT_MASK) <<
2398 E1000_SRRCTL_BSIZEPKT_SHIFT);
2400 /* It adds dual VLAN length for supporting dual VLAN */
2401 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2402 2 * VLAN_TAG_SIZE) > buf_size){
2403 if (!dev->data->scattered_rx)
2405 "forcing scatter mode");
2406 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2407 dev->data->scattered_rx = 1;
2411 * Use BSIZE field of the device RCTL register.
2413 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2414 rctl_bsize = buf_size;
2415 if (!dev->data->scattered_rx)
2416 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2417 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2418 dev->data->scattered_rx = 1;
2421 /* Set if packets are dropped when no descriptors available */
2423 srrctl |= E1000_SRRCTL_DROP_EN;
2425 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2427 /* Enable this RX queue. */
2428 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2429 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2430 rxdctl &= 0xFFF00000;
2431 rxdctl |= (rxq->pthresh & 0x1F);
2432 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2433 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2434 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2437 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2438 if (!dev->data->scattered_rx)
2439 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2440 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2441 dev->data->scattered_rx = 1;
2445 * Setup BSIZE field of RCTL register, if needed.
2446 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2447 * register, since the code above configures the SRRCTL register of
2448 * the RX queue in such a case.
2449 * All configurable sizes are:
2450 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2451 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
2452 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
2453 * 2048: rctl |= E1000_RCTL_SZ_2048;
2454 * 1024: rctl |= E1000_RCTL_SZ_1024;
2455 * 512: rctl |= E1000_RCTL_SZ_512;
2456 * 256: rctl |= E1000_RCTL_SZ_256;
2458 if (rctl_bsize > 0) {
2459 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2460 rctl |= E1000_RCTL_SZ_512;
2461 else /* 256 <= buf_size < 512 - use 256 */
2462 rctl |= E1000_RCTL_SZ_256;
2466 * Configure RSS if device configured with multiple RX queues.
2468 igb_dev_mq_rx_configure(dev);
2470 /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2471 rctl |= E1000_READ_REG(hw, E1000_RCTL);
2474 * Setup the Checksum Register.
2475 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2477 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2478 rxcsum |= E1000_RXCSUM_PCSD;
2480 /* Enable both L3/L4 rx checksum offload */
2481 if (rxmode->offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
2482 rxcsum |= E1000_RXCSUM_IPOFL;
2484 rxcsum &= ~E1000_RXCSUM_IPOFL;
2485 if (rxmode->offloads &
2486 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM))
2487 rxcsum |= E1000_RXCSUM_TUOFL;
2489 rxcsum &= ~E1000_RXCSUM_TUOFL;
2490 if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM)
2491 rxcsum |= E1000_RXCSUM_CRCOFL;
2493 rxcsum &= ~E1000_RXCSUM_CRCOFL;
2495 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2497 /* Setup the Receive Control Register. */
2498 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP) {
2499 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2501 /* set STRCRC bit in all queues */
2502 if (hw->mac.type == e1000_i350 ||
2503 hw->mac.type == e1000_i210 ||
2504 hw->mac.type == e1000_i211 ||
2505 hw->mac.type == e1000_i354) {
2506 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2507 rxq = dev->data->rx_queues[i];
2508 uint32_t dvmolr = E1000_READ_REG(hw,
2509 E1000_DVMOLR(rxq->reg_idx));
2510 dvmolr |= E1000_DVMOLR_STRCRC;
2511 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2515 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2517 /* clear STRCRC bit in all queues */
2518 if (hw->mac.type == e1000_i350 ||
2519 hw->mac.type == e1000_i210 ||
2520 hw->mac.type == e1000_i211 ||
2521 hw->mac.type == e1000_i354) {
2522 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2523 rxq = dev->data->rx_queues[i];
2524 uint32_t dvmolr = E1000_READ_REG(hw,
2525 E1000_DVMOLR(rxq->reg_idx));
2526 dvmolr &= ~E1000_DVMOLR_STRCRC;
2527 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2532 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2533 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2534 E1000_RCTL_RDMTS_HALF |
2535 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2537 /* Make sure VLAN Filters are off. */
2538 if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY)
2539 rctl &= ~E1000_RCTL_VFE;
2540 /* Don't store bad packets. */
2541 rctl &= ~E1000_RCTL_SBP;
2543 /* Enable Receives. */
2544 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2547 * Setup the HW Rx Head and Tail Descriptor Pointers.
2548 * This needs to be done after enable.
2550 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2551 rxq = dev->data->rx_queues[i];
2552 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2553 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2559 /*********************************************************************
2561 * Enable transmit unit.
2563 **********************************************************************/
2565 eth_igb_tx_init(struct rte_eth_dev *dev)
2567 struct e1000_hw *hw;
2568 struct igb_tx_queue *txq;
2573 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2575 /* Setup the Base and Length of the Tx Descriptor Rings. */
2576 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2578 txq = dev->data->tx_queues[i];
2579 bus_addr = txq->tx_ring_phys_addr;
2581 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2583 sizeof(union e1000_adv_tx_desc));
2584 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2585 (uint32_t)(bus_addr >> 32));
2586 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2588 /* Setup the HW Tx Head and Tail descriptor pointers. */
2589 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2590 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2592 /* Setup Transmit threshold registers. */
2593 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2594 txdctl |= txq->pthresh & 0x1F;
2595 txdctl |= ((txq->hthresh & 0x1F) << 8);
2596 txdctl |= ((txq->wthresh & 0x1F) << 16);
2597 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2598 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2601 /* Program the Transmit Control Register. */
2602 tctl = E1000_READ_REG(hw, E1000_TCTL);
2603 tctl &= ~E1000_TCTL_CT;
2604 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2605 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2607 e1000_config_collision_dist(hw);
2609 /* This write will effectively turn on the transmit unit. */
2610 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2613 /*********************************************************************
2615 * Enable VF receive unit.
2617 **********************************************************************/
2619 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2621 struct e1000_hw *hw;
2622 struct igb_rx_queue *rxq;
2625 uint16_t rctl_bsize;
2629 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2632 e1000_rlpml_set_vf(hw,
2633 (uint16_t)(dev->data->dev_conf.rxmode.max_rx_pkt_len +
2636 /* Configure and enable each RX queue. */
2638 dev->rx_pkt_burst = eth_igb_recv_pkts;
2639 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2643 rxq = dev->data->rx_queues[i];
2647 * i350VF LB vlan packets have vlan tags byte swapped.
2649 if (hw->mac.type == e1000_vfadapt_i350) {
2650 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2651 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2653 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2656 /* Allocate buffers for descriptor rings and set up queue */
2657 ret = igb_alloc_rx_queue_mbufs(rxq);
2661 bus_addr = rxq->rx_ring_phys_addr;
2662 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2664 sizeof(union e1000_adv_rx_desc));
2665 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2666 (uint32_t)(bus_addr >> 32));
2667 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2669 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2672 * Configure RX buffer size.
2674 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2675 RTE_PKTMBUF_HEADROOM);
2676 if (buf_size >= 1024) {
2678 * Configure the BSIZEPACKET field of the SRRCTL
2679 * register of the queue.
2680 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2681 * If this field is equal to 0b, then RCTL.BSIZE
2682 * determines the RX packet buffer size.
2684 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2685 E1000_SRRCTL_BSIZEPKT_MASK);
2686 buf_size = (uint16_t) ((srrctl &
2687 E1000_SRRCTL_BSIZEPKT_MASK) <<
2688 E1000_SRRCTL_BSIZEPKT_SHIFT);
2690 /* It adds dual VLAN length for supporting dual VLAN */
2691 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2692 2 * VLAN_TAG_SIZE) > buf_size){
2693 if (!dev->data->scattered_rx)
2695 "forcing scatter mode");
2696 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2697 dev->data->scattered_rx = 1;
2701 * Use BSIZE field of the device RCTL register.
2703 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2704 rctl_bsize = buf_size;
2705 if (!dev->data->scattered_rx)
2706 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2707 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2708 dev->data->scattered_rx = 1;
2711 /* Set if packets are dropped when no descriptors available */
2713 srrctl |= E1000_SRRCTL_DROP_EN;
2715 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2717 /* Enable this RX queue. */
2718 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2719 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2720 rxdctl &= 0xFFF00000;
2721 rxdctl |= (rxq->pthresh & 0x1F);
2722 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2723 if (hw->mac.type == e1000_vfadapt) {
2725 * Workaround of 82576 VF Erratum
2726 * force set WTHRESH to 1
2727 * to avoid Write-Back not triggered sometimes
2730 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2733 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2734 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2737 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2738 if (!dev->data->scattered_rx)
2739 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2740 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2741 dev->data->scattered_rx = 1;
2745 * Setup the HW Rx Head and Tail Descriptor Pointers.
2746 * This needs to be done after enable.
2748 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2749 rxq = dev->data->rx_queues[i];
2750 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2751 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2757 /*********************************************************************
2759 * Enable VF transmit unit.
2761 **********************************************************************/
2763 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2765 struct e1000_hw *hw;
2766 struct igb_tx_queue *txq;
2770 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2772 /* Setup the Base and Length of the Tx Descriptor Rings. */
2773 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2776 txq = dev->data->tx_queues[i];
2777 bus_addr = txq->tx_ring_phys_addr;
2778 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2780 sizeof(union e1000_adv_tx_desc));
2781 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2782 (uint32_t)(bus_addr >> 32));
2783 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2785 /* Setup the HW Tx Head and Tail descriptor pointers. */
2786 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2787 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2789 /* Setup Transmit threshold registers. */
2790 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2791 txdctl |= txq->pthresh & 0x1F;
2792 txdctl |= ((txq->hthresh & 0x1F) << 8);
2793 if (hw->mac.type == e1000_82576) {
2795 * Workaround of 82576 VF Erratum
2796 * force set WTHRESH to 1
2797 * to avoid Write-Back not triggered sometimes
2800 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2803 txdctl |= ((txq->wthresh & 0x1F) << 16);
2804 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2805 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2811 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2812 struct rte_eth_rxq_info *qinfo)
2814 struct igb_rx_queue *rxq;
2816 rxq = dev->data->rx_queues[queue_id];
2818 qinfo->mp = rxq->mb_pool;
2819 qinfo->scattered_rx = dev->data->scattered_rx;
2820 qinfo->nb_desc = rxq->nb_rx_desc;
2822 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2823 qinfo->conf.rx_drop_en = rxq->drop_en;
2824 qinfo->conf.offloads = rxq->offloads;
2828 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2829 struct rte_eth_txq_info *qinfo)
2831 struct igb_tx_queue *txq;
2833 txq = dev->data->tx_queues[queue_id];
2835 qinfo->nb_desc = txq->nb_tx_desc;
2837 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2838 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2839 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2843 igb_config_rss_filter(struct rte_eth_dev *dev,
2844 struct igb_rte_flow_rss_conf *conf, bool add)
2848 struct rte_eth_rss_conf rss_conf = conf->rss_conf;
2849 struct e1000_filter_info *filter_info =
2850 E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
2851 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2853 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2856 if (memcmp(conf, &filter_info->rss_info,
2857 sizeof(struct igb_rte_flow_rss_conf)) == 0) {
2858 igb_rss_disable(dev);
2859 memset(&filter_info->rss_info, 0,
2860 sizeof(struct igb_rte_flow_rss_conf));
2866 if (filter_info->rss_info.num)
2869 /* Fill in redirection table. */
2870 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2871 for (i = 0, j = 0; i < 128; i++, j++) {
2880 q_idx = conf->queue[j];
2881 reta.bytes[i & 3] = (uint8_t)(q_idx << shift);
2883 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2886 /* Configure the RSS key and the RSS protocols used to compute
2887 * the RSS hash of input packets.
2889 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2890 igb_rss_disable(dev);
2893 if (rss_conf.rss_key == NULL)
2894 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2895 igb_hw_rss_hash_set(hw, &rss_conf);
2897 rte_memcpy(&filter_info->rss_info,
2898 conf, sizeof(struct igb_rte_flow_rss_conf));