1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
15 #include <rte_interrupts.h>
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
19 #include <rte_debug.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_memzone.h>
24 #include <rte_launch.h>
26 #include <rte_per_lcore.h>
27 #include <rte_lcore.h>
28 #include <rte_atomic.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_mempool.h>
31 #include <rte_malloc.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev.h>
35 #include <rte_prefetch.h>
40 #include <rte_string_fns.h>
42 #include "e1000_logs.h"
43 #include "base/e1000_api.h"
44 #include "e1000_ethdev.h"
46 #ifdef RTE_LIBRTE_IEEE1588
47 #define IGB_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
49 #define IGB_TX_IEEE1588_TMST 0
51 /* Bit Mask to indicate what bits required for building TX context */
52 #define IGB_TX_OFFLOAD_MASK ( \
59 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
60 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
63 * Structure associated with each descriptor of the RX ring of a RX queue.
66 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
70 * Structure associated with each descriptor of the TX ring of a TX queue.
73 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
74 uint16_t next_id; /**< Index of next descriptor in ring. */
75 uint16_t last_id; /**< Index of last scattered descriptor. */
82 IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
86 * Structure associated with each RX queue.
89 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
90 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
91 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
92 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
93 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
94 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
95 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
96 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
97 uint16_t nb_rx_desc; /**< number of RX descriptors. */
98 uint16_t rx_tail; /**< current value of RDT register. */
99 uint16_t nb_rx_hold; /**< number of held free RX desc. */
100 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
101 uint16_t queue_id; /**< RX queue index. */
102 uint16_t reg_idx; /**< RX queue register index. */
103 uint16_t port_id; /**< Device port identifier. */
104 uint8_t pthresh; /**< Prefetch threshold register. */
105 uint8_t hthresh; /**< Host threshold register. */
106 uint8_t wthresh; /**< Write-back threshold register. */
107 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
108 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
109 uint32_t flags; /**< RX flags. */
113 * Hardware context number
115 enum igb_advctx_num {
116 IGB_CTX_0 = 0, /**< CTX0 */
117 IGB_CTX_1 = 1, /**< CTX1 */
118 IGB_CTX_NUM = 2, /**< CTX_NUM */
121 /** Offload features */
122 union igb_tx_offload {
125 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
126 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
127 uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
128 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
129 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
131 /* uint64_t unused:8; */
136 * Compare mask for igb_tx_offload.data,
137 * should be in sync with igb_tx_offload layout.
139 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
140 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
141 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
142 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
143 /** Mac + IP + TCP + Mss mask. */
144 #define TX_TSO_CMP_MASK \
145 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
148 * Strucutre to check if new context need be built
150 struct igb_advctx_info {
151 uint64_t flags; /**< ol_flags related to context build. */
152 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
153 union igb_tx_offload tx_offload;
154 /** compare mask for tx offload. */
155 union igb_tx_offload tx_offload_mask;
159 * Structure associated with each TX queue.
161 struct igb_tx_queue {
162 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
163 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
164 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
165 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
166 uint32_t txd_type; /**< Device-specific TXD type */
167 uint16_t nb_tx_desc; /**< number of TX descriptors. */
168 uint16_t tx_tail; /**< Current value of TDT register. */
170 /**< Index of first used TX descriptor. */
171 uint16_t queue_id; /**< TX queue index. */
172 uint16_t reg_idx; /**< TX queue register index. */
173 uint16_t port_id; /**< Device port identifier. */
174 uint8_t pthresh; /**< Prefetch threshold register. */
175 uint8_t hthresh; /**< Host threshold register. */
176 uint8_t wthresh; /**< Write-back threshold register. */
178 /**< Current used hardware descriptor. */
180 /**< Start context position for transmit queue. */
181 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
182 /**< Hardware context history.*/
186 #define RTE_PMD_USE_PREFETCH
189 #ifdef RTE_PMD_USE_PREFETCH
190 #define rte_igb_prefetch(p) rte_prefetch0(p)
192 #define rte_igb_prefetch(p) do {} while(0)
195 #ifdef RTE_PMD_PACKET_PREFETCH
196 #define rte_packet_prefetch(p) rte_prefetch1(p)
198 #define rte_packet_prefetch(p) do {} while(0)
202 * Macro for VMDq feature for 1 GbE NIC.
204 #define E1000_VMOLR_SIZE (8)
205 #define IGB_TSO_MAX_HDRLEN (512)
206 #define IGB_TSO_MAX_MSS (9216)
208 /*********************************************************************
212 **********************************************************************/
215 *There're some limitations in hardware for TCP segmentation offload. We
216 *should check whether the parameters are valid.
218 static inline uint64_t
219 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
221 if (!(ol_req & PKT_TX_TCP_SEG))
223 if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
224 ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
225 ol_req &= ~PKT_TX_TCP_SEG;
226 ol_req |= PKT_TX_TCP_CKSUM;
232 * Advanced context descriptor are almost same between igb/ixgbe
233 * This is a separate function, looking for optimization opportunity here
234 * Rework required to go with the pre-defined values.
238 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
239 volatile struct e1000_adv_tx_context_desc *ctx_txd,
240 uint64_t ol_flags, union igb_tx_offload tx_offload)
242 uint32_t type_tucmd_mlhl;
243 uint32_t mss_l4len_idx;
244 uint32_t ctx_idx, ctx_curr;
245 uint32_t vlan_macip_lens;
246 union igb_tx_offload tx_offload_mask;
248 ctx_curr = txq->ctx_curr;
249 ctx_idx = ctx_curr + txq->ctx_start;
251 tx_offload_mask.data = 0;
254 /* Specify which HW CTX to upload. */
255 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
257 if (ol_flags & PKT_TX_VLAN_PKT)
258 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
260 /* check if TCP segmentation required for this packet */
261 if (ol_flags & PKT_TX_TCP_SEG) {
262 /* implies IP cksum in IPv4 */
263 if (ol_flags & PKT_TX_IP_CKSUM)
264 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
265 E1000_ADVTXD_TUCMD_L4T_TCP |
266 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
268 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
269 E1000_ADVTXD_TUCMD_L4T_TCP |
270 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
272 tx_offload_mask.data |= TX_TSO_CMP_MASK;
273 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
274 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
275 } else { /* no TSO, check if hardware checksum is needed */
276 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
277 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
279 if (ol_flags & PKT_TX_IP_CKSUM)
280 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
282 switch (ol_flags & PKT_TX_L4_MASK) {
283 case PKT_TX_UDP_CKSUM:
284 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
285 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
286 mss_l4len_idx |= sizeof(struct udp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
288 case PKT_TX_TCP_CKSUM:
289 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
290 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
291 mss_l4len_idx |= sizeof(struct tcp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
293 case PKT_TX_SCTP_CKSUM:
294 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
295 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
296 mss_l4len_idx |= sizeof(struct sctp_hdr) << E1000_ADVTXD_L4LEN_SHIFT;
299 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
300 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
305 txq->ctx_cache[ctx_curr].flags = ol_flags;
306 txq->ctx_cache[ctx_curr].tx_offload.data =
307 tx_offload_mask.data & tx_offload.data;
308 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
310 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
311 vlan_macip_lens = (uint32_t)tx_offload.data;
312 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
313 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
314 ctx_txd->seqnum_seed = 0;
318 * Check which hardware context can be used. Use the existing match
319 * or create a new context descriptor.
321 static inline uint32_t
322 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
323 union igb_tx_offload tx_offload)
325 /* If match with the current context */
326 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
327 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
328 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
329 return txq->ctx_curr;
332 /* If match with the second context */
334 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
335 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
336 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
337 return txq->ctx_curr;
340 /* Mismatch, use the previous context */
344 static inline uint32_t
345 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
347 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
348 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
351 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
352 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
353 tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0];
357 static inline uint32_t
358 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
361 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
362 static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
363 cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
364 cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0];
369 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
372 struct igb_tx_queue *txq;
373 struct igb_tx_entry *sw_ring;
374 struct igb_tx_entry *txe, *txn;
375 volatile union e1000_adv_tx_desc *txr;
376 volatile union e1000_adv_tx_desc *txd;
377 struct rte_mbuf *tx_pkt;
378 struct rte_mbuf *m_seg;
379 uint64_t buf_dma_addr;
380 uint32_t olinfo_status;
381 uint32_t cmd_type_len;
390 uint32_t new_ctx = 0;
392 union igb_tx_offload tx_offload = {0};
395 sw_ring = txq->sw_ring;
397 tx_id = txq->tx_tail;
398 txe = &sw_ring[tx_id];
400 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
402 pkt_len = tx_pkt->pkt_len;
404 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
407 * The number of descriptors that must be allocated for a
408 * packet is the number of segments of that packet, plus 1
409 * Context Descriptor for the VLAN Tag Identifier, if any.
410 * Determine the last TX descriptor to allocate in the TX ring
411 * for the packet, starting from the current position (tx_id)
414 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
416 ol_flags = tx_pkt->ol_flags;
417 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
419 /* If a Context Descriptor need be built . */
421 tx_offload.l2_len = tx_pkt->l2_len;
422 tx_offload.l3_len = tx_pkt->l3_len;
423 tx_offload.l4_len = tx_pkt->l4_len;
424 tx_offload.vlan_tci = tx_pkt->vlan_tci;
425 tx_offload.tso_segsz = tx_pkt->tso_segsz;
426 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
428 ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
429 /* Only allocate context descriptor if required*/
430 new_ctx = (ctx == IGB_CTX_NUM);
431 ctx = txq->ctx_curr + txq->ctx_start;
432 tx_last = (uint16_t) (tx_last + new_ctx);
434 if (tx_last >= txq->nb_tx_desc)
435 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
437 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
438 " tx_first=%u tx_last=%u",
439 (unsigned) txq->port_id,
440 (unsigned) txq->queue_id,
446 * Check if there are enough free descriptors in the TX ring
447 * to transmit the next packet.
448 * This operation is based on the two following rules:
450 * 1- Only check that the last needed TX descriptor can be
451 * allocated (by construction, if that descriptor is free,
452 * all intermediate ones are also free).
454 * For this purpose, the index of the last TX descriptor
455 * used for a packet (the "last descriptor" of a packet)
456 * is recorded in the TX entries (the last one included)
457 * that are associated with all TX descriptors allocated
460 * 2- Avoid to allocate the last free TX descriptor of the
461 * ring, in order to never set the TDT register with the
462 * same value stored in parallel by the NIC in the TDH
463 * register, which makes the TX engine of the NIC enter
464 * in a deadlock situation.
466 * By extension, avoid to allocate a free descriptor that
467 * belongs to the last set of free descriptors allocated
468 * to the same packet previously transmitted.
472 * The "last descriptor" of the previously sent packet, if any,
473 * which used the last descriptor to allocate.
475 tx_end = sw_ring[tx_last].last_id;
478 * The next descriptor following that "last descriptor" in the
481 tx_end = sw_ring[tx_end].next_id;
484 * The "last descriptor" associated with that next descriptor.
486 tx_end = sw_ring[tx_end].last_id;
489 * Check that this descriptor is free.
491 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
498 * Set common flags of all TX Data Descriptors.
500 * The following bits must be set in all Data Descriptors:
501 * - E1000_ADVTXD_DTYP_DATA
502 * - E1000_ADVTXD_DCMD_DEXT
504 * The following bits must be set in the first Data Descriptor
505 * and are ignored in the other ones:
506 * - E1000_ADVTXD_DCMD_IFCS
507 * - E1000_ADVTXD_MAC_1588
508 * - E1000_ADVTXD_DCMD_VLE
510 * The following bits must only be set in the last Data
512 * - E1000_TXD_CMD_EOP
514 * The following bits can be set in any Data Descriptor, but
515 * are only set in the last Data Descriptor:
518 cmd_type_len = txq->txd_type |
519 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
520 if (tx_ol_req & PKT_TX_TCP_SEG)
521 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
522 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
523 #if defined(RTE_LIBRTE_IEEE1588)
524 if (ol_flags & PKT_TX_IEEE1588_TMST)
525 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
528 /* Setup TX Advanced context descriptor if required */
530 volatile struct e1000_adv_tx_context_desc *
533 ctx_txd = (volatile struct
534 e1000_adv_tx_context_desc *)
537 txn = &sw_ring[txe->next_id];
538 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
540 if (txe->mbuf != NULL) {
541 rte_pktmbuf_free_seg(txe->mbuf);
545 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
547 txe->last_id = tx_last;
548 tx_id = txe->next_id;
552 /* Setup the TX Advanced Data Descriptor */
553 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
554 olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
555 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
560 txn = &sw_ring[txe->next_id];
563 if (txe->mbuf != NULL)
564 rte_pktmbuf_free_seg(txe->mbuf);
568 * Set up transmit descriptor.
570 slen = (uint16_t) m_seg->data_len;
571 buf_dma_addr = rte_mbuf_data_iova(m_seg);
572 txd->read.buffer_addr =
573 rte_cpu_to_le_64(buf_dma_addr);
574 txd->read.cmd_type_len =
575 rte_cpu_to_le_32(cmd_type_len | slen);
576 txd->read.olinfo_status =
577 rte_cpu_to_le_32(olinfo_status);
578 txe->last_id = tx_last;
579 tx_id = txe->next_id;
582 } while (m_seg != NULL);
585 * The last packet data descriptor needs End Of Packet (EOP)
586 * and Report Status (RS).
588 txd->read.cmd_type_len |=
589 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
595 * Set the Transmit Descriptor Tail (TDT).
597 E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
598 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
599 (unsigned) txq->port_id, (unsigned) txq->queue_id,
600 (unsigned) tx_id, (unsigned) nb_tx);
601 txq->tx_tail = tx_id;
606 /*********************************************************************
610 **********************************************************************/
612 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
618 for (i = 0; i < nb_pkts; i++) {
621 /* Check some limitations for TSO in hardware */
622 if (m->ol_flags & PKT_TX_TCP_SEG)
623 if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
624 (m->l2_len + m->l3_len + m->l4_len >
625 IGB_TSO_MAX_HDRLEN)) {
630 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
631 rte_errno = -ENOTSUP;
635 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
636 ret = rte_validate_tx_offload(m);
642 ret = rte_net_intel_cksum_prepare(m);
652 /*********************************************************************
656 **********************************************************************/
657 #define IGB_PACKET_TYPE_IPV4 0X01
658 #define IGB_PACKET_TYPE_IPV4_TCP 0X11
659 #define IGB_PACKET_TYPE_IPV4_UDP 0X21
660 #define IGB_PACKET_TYPE_IPV4_SCTP 0X41
661 #define IGB_PACKET_TYPE_IPV4_EXT 0X03
662 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP 0X43
663 #define IGB_PACKET_TYPE_IPV6 0X04
664 #define IGB_PACKET_TYPE_IPV6_TCP 0X14
665 #define IGB_PACKET_TYPE_IPV6_UDP 0X24
666 #define IGB_PACKET_TYPE_IPV6_EXT 0X0C
667 #define IGB_PACKET_TYPE_IPV6_EXT_TCP 0X1C
668 #define IGB_PACKET_TYPE_IPV6_EXT_UDP 0X2C
669 #define IGB_PACKET_TYPE_IPV4_IPV6 0X05
670 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP 0X15
671 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP 0X25
672 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
673 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
674 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
675 #define IGB_PACKET_TYPE_MAX 0X80
676 #define IGB_PACKET_TYPE_MASK 0X7F
677 #define IGB_PACKET_TYPE_SHIFT 0X04
678 static inline uint32_t
679 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
681 static const uint32_t
682 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
683 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
685 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
686 RTE_PTYPE_L3_IPV4_EXT,
687 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
689 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
690 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
691 RTE_PTYPE_INNER_L3_IPV6,
692 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
693 RTE_PTYPE_L3_IPV6_EXT,
694 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
695 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
696 RTE_PTYPE_INNER_L3_IPV6_EXT,
697 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
698 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
699 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
700 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
701 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
702 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
703 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
704 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
705 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
706 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
707 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
708 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
709 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
710 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
711 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
712 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
713 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
714 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
715 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
716 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
717 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
718 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
719 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
720 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
721 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
722 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
723 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
724 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
726 if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
727 return RTE_PTYPE_UNKNOWN;
729 pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
731 return ptype_table[pkt_info];
734 static inline uint64_t
735 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
737 uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ? 0 : PKT_RX_RSS_HASH;
739 #if defined(RTE_LIBRTE_IEEE1588)
740 static uint32_t ip_pkt_etqf_map[8] = {
741 0, 0, 0, PKT_RX_IEEE1588_PTP,
745 struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
746 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
748 /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
749 if (hw->mac.type == e1000_i210)
750 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
752 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
760 static inline uint64_t
761 rx_desc_status_to_pkt_flags(uint32_t rx_status)
765 /* Check if VLAN present */
766 pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
767 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
769 #if defined(RTE_LIBRTE_IEEE1588)
770 if (rx_status & E1000_RXD_STAT_TMST)
771 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
776 static inline uint64_t
777 rx_desc_error_to_pkt_flags(uint32_t rx_status)
780 * Bit 30: IPE, IPv4 checksum error
781 * Bit 29: L4I, L4I integrity error
784 static uint64_t error_to_pkt_flags_map[4] = {
785 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
786 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
787 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
788 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
790 return error_to_pkt_flags_map[(rx_status >>
791 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
795 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
798 struct igb_rx_queue *rxq;
799 volatile union e1000_adv_rx_desc *rx_ring;
800 volatile union e1000_adv_rx_desc *rxdp;
801 struct igb_rx_entry *sw_ring;
802 struct igb_rx_entry *rxe;
803 struct rte_mbuf *rxm;
804 struct rte_mbuf *nmb;
805 union e1000_adv_rx_desc rxd;
808 uint32_t hlen_type_rss;
818 rx_id = rxq->rx_tail;
819 rx_ring = rxq->rx_ring;
820 sw_ring = rxq->sw_ring;
821 while (nb_rx < nb_pkts) {
823 * The order of operations here is important as the DD status
824 * bit must not be read after any other descriptor fields.
825 * rx_ring and rxdp are pointing to volatile data so the order
826 * of accesses cannot be reordered by the compiler. If they were
827 * not volatile, they could be reordered which could lead to
828 * using invalid descriptor fields when read from rxd.
830 rxdp = &rx_ring[rx_id];
831 staterr = rxdp->wb.upper.status_error;
832 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
839 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
840 * likely to be invalid and to be dropped by the various
841 * validation checks performed by the network stack.
843 * Allocate a new mbuf to replenish the RX ring descriptor.
844 * If the allocation fails:
845 * - arrange for that RX descriptor to be the first one
846 * being parsed the next time the receive function is
847 * invoked [on the same queue].
849 * - Stop parsing the RX ring and return immediately.
851 * This policy do not drop the packet received in the RX
852 * descriptor for which the allocation of a new mbuf failed.
853 * Thus, it allows that packet to be later retrieved if
854 * mbuf have been freed in the mean time.
855 * As a side effect, holding RX descriptors instead of
856 * systematically giving them back to the NIC may lead to
857 * RX ring exhaustion situations.
858 * However, the NIC can gracefully prevent such situations
859 * to happen by sending specific "back-pressure" flow control
860 * frames to its peer(s).
862 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
863 "staterr=0x%x pkt_len=%u",
864 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
865 (unsigned) rx_id, (unsigned) staterr,
866 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
868 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
870 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
871 "queue_id=%u", (unsigned) rxq->port_id,
872 (unsigned) rxq->queue_id);
873 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
878 rxe = &sw_ring[rx_id];
880 if (rx_id == rxq->nb_rx_desc)
883 /* Prefetch next mbuf while processing current one. */
884 rte_igb_prefetch(sw_ring[rx_id].mbuf);
887 * When next RX descriptor is on a cache-line boundary,
888 * prefetch the next 4 RX descriptors and the next 8 pointers
891 if ((rx_id & 0x3) == 0) {
892 rte_igb_prefetch(&rx_ring[rx_id]);
893 rte_igb_prefetch(&sw_ring[rx_id]);
899 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
900 rxdp->read.hdr_addr = 0;
901 rxdp->read.pkt_addr = dma_addr;
904 * Initialize the returned mbuf.
905 * 1) setup generic mbuf fields:
906 * - number of segments,
909 * - RX port identifier.
910 * 2) integrate hardware offload data, if any:
912 * - IP checksum flag,
913 * - VLAN TCI, if any,
916 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
918 rxm->data_off = RTE_PKTMBUF_HEADROOM;
919 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
922 rxm->pkt_len = pkt_len;
923 rxm->data_len = pkt_len;
924 rxm->port = rxq->port_id;
926 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
927 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
930 * The vlan_tci field is only valid when PKT_RX_VLAN is
931 * set in the pkt_flags field and must be in CPU byte order.
933 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
934 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
935 rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
937 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
939 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
940 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
941 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
942 rxm->ol_flags = pkt_flags;
943 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
944 lo_dword.hs_rss.pkt_info);
947 * Store the mbuf address into the next entry of the array
948 * of returned packets.
950 rx_pkts[nb_rx++] = rxm;
952 rxq->rx_tail = rx_id;
955 * If the number of free RX descriptors is greater than the RX free
956 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
958 * Update the RDT with the value of the last processed RX descriptor
959 * minus 1, to guarantee that the RDT register is never equal to the
960 * RDH register, which creates a "full" ring situtation from the
961 * hardware point of view...
963 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
964 if (nb_hold > rxq->rx_free_thresh) {
965 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
966 "nb_hold=%u nb_rx=%u",
967 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
968 (unsigned) rx_id, (unsigned) nb_hold,
970 rx_id = (uint16_t) ((rx_id == 0) ?
971 (rxq->nb_rx_desc - 1) : (rx_id - 1));
972 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
975 rxq->nb_rx_hold = nb_hold;
980 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
983 struct igb_rx_queue *rxq;
984 volatile union e1000_adv_rx_desc *rx_ring;
985 volatile union e1000_adv_rx_desc *rxdp;
986 struct igb_rx_entry *sw_ring;
987 struct igb_rx_entry *rxe;
988 struct rte_mbuf *first_seg;
989 struct rte_mbuf *last_seg;
990 struct rte_mbuf *rxm;
991 struct rte_mbuf *nmb;
992 union e1000_adv_rx_desc rxd;
993 uint64_t dma; /* Physical address of mbuf data buffer */
995 uint32_t hlen_type_rss;
1005 rx_id = rxq->rx_tail;
1006 rx_ring = rxq->rx_ring;
1007 sw_ring = rxq->sw_ring;
1010 * Retrieve RX context of current packet, if any.
1012 first_seg = rxq->pkt_first_seg;
1013 last_seg = rxq->pkt_last_seg;
1015 while (nb_rx < nb_pkts) {
1018 * The order of operations here is important as the DD status
1019 * bit must not be read after any other descriptor fields.
1020 * rx_ring and rxdp are pointing to volatile data so the order
1021 * of accesses cannot be reordered by the compiler. If they were
1022 * not volatile, they could be reordered which could lead to
1023 * using invalid descriptor fields when read from rxd.
1025 rxdp = &rx_ring[rx_id];
1026 staterr = rxdp->wb.upper.status_error;
1027 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1034 * Allocate a new mbuf to replenish the RX ring descriptor.
1035 * If the allocation fails:
1036 * - arrange for that RX descriptor to be the first one
1037 * being parsed the next time the receive function is
1038 * invoked [on the same queue].
1040 * - Stop parsing the RX ring and return immediately.
1042 * This policy does not drop the packet received in the RX
1043 * descriptor for which the allocation of a new mbuf failed.
1044 * Thus, it allows that packet to be later retrieved if
1045 * mbuf have been freed in the mean time.
1046 * As a side effect, holding RX descriptors instead of
1047 * systematically giving them back to the NIC may lead to
1048 * RX ring exhaustion situations.
1049 * However, the NIC can gracefully prevent such situations
1050 * to happen by sending specific "back-pressure" flow control
1051 * frames to its peer(s).
1053 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1054 "staterr=0x%x data_len=%u",
1055 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1056 (unsigned) rx_id, (unsigned) staterr,
1057 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1059 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1061 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1062 "queue_id=%u", (unsigned) rxq->port_id,
1063 (unsigned) rxq->queue_id);
1064 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1069 rxe = &sw_ring[rx_id];
1071 if (rx_id == rxq->nb_rx_desc)
1074 /* Prefetch next mbuf while processing current one. */
1075 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1078 * When next RX descriptor is on a cache-line boundary,
1079 * prefetch the next 4 RX descriptors and the next 8 pointers
1082 if ((rx_id & 0x3) == 0) {
1083 rte_igb_prefetch(&rx_ring[rx_id]);
1084 rte_igb_prefetch(&sw_ring[rx_id]);
1088 * Update RX descriptor with the physical address of the new
1089 * data buffer of the new allocated mbuf.
1093 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1094 rxdp->read.pkt_addr = dma;
1095 rxdp->read.hdr_addr = 0;
1098 * Set data length & data buffer address of mbuf.
1100 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1101 rxm->data_len = data_len;
1102 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1105 * If this is the first buffer of the received packet,
1106 * set the pointer to the first mbuf of the packet and
1107 * initialize its context.
1108 * Otherwise, update the total length and the number of segments
1109 * of the current scattered packet, and update the pointer to
1110 * the last mbuf of the current packet.
1112 if (first_seg == NULL) {
1114 first_seg->pkt_len = data_len;
1115 first_seg->nb_segs = 1;
1117 first_seg->pkt_len += data_len;
1118 first_seg->nb_segs++;
1119 last_seg->next = rxm;
1123 * If this is not the last buffer of the received packet,
1124 * update the pointer to the last mbuf of the current scattered
1125 * packet and continue to parse the RX ring.
1127 if (! (staterr & E1000_RXD_STAT_EOP)) {
1133 * This is the last buffer of the received packet.
1134 * If the CRC is not stripped by the hardware:
1135 * - Subtract the CRC length from the total packet length.
1136 * - If the last buffer only contains the whole CRC or a part
1137 * of it, free the mbuf associated to the last buffer.
1138 * If part of the CRC is also contained in the previous
1139 * mbuf, subtract the length of that CRC part from the
1140 * data length of the previous mbuf.
1143 if (unlikely(rxq->crc_len > 0)) {
1144 first_seg->pkt_len -= ETHER_CRC_LEN;
1145 if (data_len <= ETHER_CRC_LEN) {
1146 rte_pktmbuf_free_seg(rxm);
1147 first_seg->nb_segs--;
1148 last_seg->data_len = (uint16_t)
1149 (last_seg->data_len -
1150 (ETHER_CRC_LEN - data_len));
1151 last_seg->next = NULL;
1154 (uint16_t) (data_len - ETHER_CRC_LEN);
1158 * Initialize the first mbuf of the returned packet:
1159 * - RX port identifier,
1160 * - hardware offload data, if any:
1161 * - RSS flag & hash,
1162 * - IP checksum flag,
1163 * - VLAN TCI, if any,
1166 first_seg->port = rxq->port_id;
1167 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1170 * The vlan_tci field is only valid when PKT_RX_VLAN is
1171 * set in the pkt_flags field and must be in CPU byte order.
1173 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1174 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1175 first_seg->vlan_tci =
1176 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1178 first_seg->vlan_tci =
1179 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1181 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1182 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1183 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1184 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1185 first_seg->ol_flags = pkt_flags;
1186 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1187 lower.lo_dword.hs_rss.pkt_info);
1189 /* Prefetch data of first segment, if configured to do so. */
1190 rte_packet_prefetch((char *)first_seg->buf_addr +
1191 first_seg->data_off);
1194 * Store the mbuf address into the next entry of the array
1195 * of returned packets.
1197 rx_pkts[nb_rx++] = first_seg;
1200 * Setup receipt context for a new packet.
1206 * Record index of the next RX descriptor to probe.
1208 rxq->rx_tail = rx_id;
1211 * Save receive context.
1213 rxq->pkt_first_seg = first_seg;
1214 rxq->pkt_last_seg = last_seg;
1217 * If the number of free RX descriptors is greater than the RX free
1218 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1220 * Update the RDT with the value of the last processed RX descriptor
1221 * minus 1, to guarantee that the RDT register is never equal to the
1222 * RDH register, which creates a "full" ring situtation from the
1223 * hardware point of view...
1225 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1226 if (nb_hold > rxq->rx_free_thresh) {
1227 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1228 "nb_hold=%u nb_rx=%u",
1229 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1230 (unsigned) rx_id, (unsigned) nb_hold,
1232 rx_id = (uint16_t) ((rx_id == 0) ?
1233 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1234 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1237 rxq->nb_rx_hold = nb_hold;
1242 * Maximum number of Ring Descriptors.
1244 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1245 * desscriptors should meet the following condition:
1246 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1250 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1254 if (txq->sw_ring != NULL) {
1255 for (i = 0; i < txq->nb_tx_desc; i++) {
1256 if (txq->sw_ring[i].mbuf != NULL) {
1257 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1258 txq->sw_ring[i].mbuf = NULL;
1265 igb_tx_queue_release(struct igb_tx_queue *txq)
1268 igb_tx_queue_release_mbufs(txq);
1269 rte_free(txq->sw_ring);
1275 eth_igb_tx_queue_release(void *txq)
1277 igb_tx_queue_release(txq);
1281 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1283 struct igb_tx_entry *sw_ring;
1284 volatile union e1000_adv_tx_desc *txr;
1285 uint16_t tx_first; /* First segment analyzed. */
1286 uint16_t tx_id; /* Current segment being processed. */
1287 uint16_t tx_last; /* Last segment in the current packet. */
1288 uint16_t tx_next; /* First segment of the next packet. */
1293 sw_ring = txq->sw_ring;
1297 * tx_tail is the last sent packet on the sw_ring. Goto the end
1298 * of that packet (the last segment in the packet chain) and
1299 * then the next segment will be the start of the oldest segment
1300 * in the sw_ring. This is the first packet that will be
1301 * attempted to be freed.
1304 /* Get last segment in most recently added packet. */
1305 tx_first = sw_ring[txq->tx_tail].last_id;
1307 /* Get the next segment, which is the oldest segment in ring. */
1308 tx_first = sw_ring[tx_first].next_id;
1310 /* Set the current index to the first. */
1314 * Loop through each packet. For each packet, verify that an
1315 * mbuf exists and that the last segment is free. If so, free
1319 tx_last = sw_ring[tx_id].last_id;
1321 if (sw_ring[tx_last].mbuf) {
1322 if (txr[tx_last].wb.status &
1323 E1000_TXD_STAT_DD) {
1325 * Increment the number of packets
1330 /* Get the start of the next packet. */
1331 tx_next = sw_ring[tx_last].next_id;
1334 * Loop through all segments in a
1338 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
1339 sw_ring[tx_id].mbuf = NULL;
1340 sw_ring[tx_id].last_id = tx_id;
1342 /* Move to next segemnt. */
1343 tx_id = sw_ring[tx_id].next_id;
1345 } while (tx_id != tx_next);
1347 if (unlikely(count == (int)free_cnt))
1351 * mbuf still in use, nothing left to
1357 * There are multiple reasons to be here:
1358 * 1) All the packets on the ring have been
1359 * freed - tx_id is equal to tx_first
1360 * and some packets have been freed.
1362 * 2) Interfaces has not sent a rings worth of
1363 * packets yet, so the segment after tail is
1364 * still empty. Or a previous call to this
1365 * function freed some of the segments but
1366 * not all so there is a hole in the list.
1367 * Hopefully this is a rare case.
1368 * - Walk the list and find the next mbuf. If
1369 * there isn't one, then done.
1371 if (likely((tx_id == tx_first) && (count != 0)))
1375 * Walk the list and find the next mbuf, if any.
1378 /* Move to next segemnt. */
1379 tx_id = sw_ring[tx_id].next_id;
1381 if (sw_ring[tx_id].mbuf)
1384 } while (tx_id != tx_first);
1387 * Determine why previous loop bailed. If there
1388 * is not an mbuf, done.
1390 if (sw_ring[tx_id].mbuf == NULL)
1401 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1403 return igb_tx_done_cleanup(txq, free_cnt);
1407 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1412 memset((void*)&txq->ctx_cache, 0,
1413 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1417 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1419 static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1420 struct igb_tx_entry *txe = txq->sw_ring;
1422 struct e1000_hw *hw;
1424 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1425 /* Zero out HW ring memory */
1426 for (i = 0; i < txq->nb_tx_desc; i++) {
1427 txq->tx_ring[i] = zeroed_desc;
1430 /* Initialize ring entries */
1431 prev = (uint16_t)(txq->nb_tx_desc - 1);
1432 for (i = 0; i < txq->nb_tx_desc; i++) {
1433 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1435 txd->wb.status = E1000_TXD_STAT_DD;
1438 txe[prev].next_id = i;
1442 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1443 /* 82575 specific, each tx queue will use 2 hw contexts */
1444 if (hw->mac.type == e1000_82575)
1445 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1447 igb_reset_tx_queue_stat(txq);
1451 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1454 unsigned int socket_id,
1455 const struct rte_eth_txconf *tx_conf)
1457 const struct rte_memzone *tz;
1458 struct igb_tx_queue *txq;
1459 struct e1000_hw *hw;
1462 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1465 * Validate number of transmit descriptors.
1466 * It must not exceed hardware maximum, and must be multiple
1469 if (nb_desc % IGB_TXD_ALIGN != 0 ||
1470 (nb_desc > E1000_MAX_RING_DESC) ||
1471 (nb_desc < E1000_MIN_RING_DESC)) {
1476 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1479 if (tx_conf->tx_free_thresh != 0)
1480 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1481 "used for the 1G driver.");
1482 if (tx_conf->tx_rs_thresh != 0)
1483 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1484 "used for the 1G driver.");
1485 if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1486 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1487 "consider setting the TX WTHRESH value to 4, 8, "
1490 /* Free memory prior to re-allocation if needed */
1491 if (dev->data->tx_queues[queue_idx] != NULL) {
1492 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1493 dev->data->tx_queues[queue_idx] = NULL;
1496 /* First allocate the tx queue data structure */
1497 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1498 RTE_CACHE_LINE_SIZE);
1503 * Allocate TX ring hardware descriptors. A memzone large enough to
1504 * handle the maximum ring size is allocated in order to allow for
1505 * resizing in later calls to the queue setup function.
1507 size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1508 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1509 E1000_ALIGN, socket_id);
1511 igb_tx_queue_release(txq);
1515 txq->nb_tx_desc = nb_desc;
1516 txq->pthresh = tx_conf->tx_thresh.pthresh;
1517 txq->hthresh = tx_conf->tx_thresh.hthresh;
1518 txq->wthresh = tx_conf->tx_thresh.wthresh;
1519 if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1521 txq->queue_id = queue_idx;
1522 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1523 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1524 txq->port_id = dev->data->port_id;
1526 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1527 txq->tx_ring_phys_addr = tz->iova;
1529 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1530 /* Allocate software ring */
1531 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1532 sizeof(struct igb_tx_entry) * nb_desc,
1533 RTE_CACHE_LINE_SIZE);
1534 if (txq->sw_ring == NULL) {
1535 igb_tx_queue_release(txq);
1538 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1539 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1541 igb_reset_tx_queue(txq, dev);
1542 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1543 dev->tx_pkt_prepare = ð_igb_prep_pkts;
1544 dev->data->tx_queues[queue_idx] = txq;
1550 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1554 if (rxq->sw_ring != NULL) {
1555 for (i = 0; i < rxq->nb_rx_desc; i++) {
1556 if (rxq->sw_ring[i].mbuf != NULL) {
1557 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1558 rxq->sw_ring[i].mbuf = NULL;
1565 igb_rx_queue_release(struct igb_rx_queue *rxq)
1568 igb_rx_queue_release_mbufs(rxq);
1569 rte_free(rxq->sw_ring);
1575 eth_igb_rx_queue_release(void *rxq)
1577 igb_rx_queue_release(rxq);
1581 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1583 static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1586 /* Zero out HW ring memory */
1587 for (i = 0; i < rxq->nb_rx_desc; i++) {
1588 rxq->rx_ring[i] = zeroed_desc;
1592 rxq->pkt_first_seg = NULL;
1593 rxq->pkt_last_seg = NULL;
1597 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1600 unsigned int socket_id,
1601 const struct rte_eth_rxconf *rx_conf,
1602 struct rte_mempool *mp)
1604 const struct rte_memzone *rz;
1605 struct igb_rx_queue *rxq;
1606 struct e1000_hw *hw;
1609 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1612 * Validate number of receive descriptors.
1613 * It must not exceed hardware maximum, and must be multiple
1616 if (nb_desc % IGB_RXD_ALIGN != 0 ||
1617 (nb_desc > E1000_MAX_RING_DESC) ||
1618 (nb_desc < E1000_MIN_RING_DESC)) {
1622 /* Free memory prior to re-allocation if needed */
1623 if (dev->data->rx_queues[queue_idx] != NULL) {
1624 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1625 dev->data->rx_queues[queue_idx] = NULL;
1628 /* First allocate the RX queue data structure. */
1629 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1630 RTE_CACHE_LINE_SIZE);
1634 rxq->nb_rx_desc = nb_desc;
1635 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1636 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1637 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1638 if (rxq->wthresh > 0 &&
1639 (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1641 rxq->drop_en = rx_conf->rx_drop_en;
1642 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1643 rxq->queue_id = queue_idx;
1644 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1645 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1646 rxq->port_id = dev->data->port_id;
1647 rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 :
1651 * Allocate RX ring hardware descriptors. A memzone large enough to
1652 * handle the maximum ring size is allocated in order to allow for
1653 * resizing in later calls to the queue setup function.
1655 size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1656 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1657 E1000_ALIGN, socket_id);
1659 igb_rx_queue_release(rxq);
1662 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1663 rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1664 rxq->rx_ring_phys_addr = rz->iova;
1665 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1667 /* Allocate software ring. */
1668 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1669 sizeof(struct igb_rx_entry) * nb_desc,
1670 RTE_CACHE_LINE_SIZE);
1671 if (rxq->sw_ring == NULL) {
1672 igb_rx_queue_release(rxq);
1675 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1676 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1678 dev->data->rx_queues[queue_idx] = rxq;
1679 igb_reset_rx_queue(rxq);
1685 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1687 #define IGB_RXQ_SCAN_INTERVAL 4
1688 volatile union e1000_adv_rx_desc *rxdp;
1689 struct igb_rx_queue *rxq;
1692 rxq = dev->data->rx_queues[rx_queue_id];
1693 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1695 while ((desc < rxq->nb_rx_desc) &&
1696 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1697 desc += IGB_RXQ_SCAN_INTERVAL;
1698 rxdp += IGB_RXQ_SCAN_INTERVAL;
1699 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1700 rxdp = &(rxq->rx_ring[rxq->rx_tail +
1701 desc - rxq->nb_rx_desc]);
1708 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1710 volatile union e1000_adv_rx_desc *rxdp;
1711 struct igb_rx_queue *rxq = rx_queue;
1714 if (unlikely(offset >= rxq->nb_rx_desc))
1716 desc = rxq->rx_tail + offset;
1717 if (desc >= rxq->nb_rx_desc)
1718 desc -= rxq->nb_rx_desc;
1720 rxdp = &rxq->rx_ring[desc];
1721 return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1725 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1727 struct igb_rx_queue *rxq = rx_queue;
1728 volatile uint32_t *status;
1731 if (unlikely(offset >= rxq->nb_rx_desc))
1734 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1735 return RTE_ETH_RX_DESC_UNAVAIL;
1737 desc = rxq->rx_tail + offset;
1738 if (desc >= rxq->nb_rx_desc)
1739 desc -= rxq->nb_rx_desc;
1741 status = &rxq->rx_ring[desc].wb.upper.status_error;
1742 if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1743 return RTE_ETH_RX_DESC_DONE;
1745 return RTE_ETH_RX_DESC_AVAIL;
1749 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1751 struct igb_tx_queue *txq = tx_queue;
1752 volatile uint32_t *status;
1755 if (unlikely(offset >= txq->nb_tx_desc))
1758 desc = txq->tx_tail + offset;
1759 if (desc >= txq->nb_tx_desc)
1760 desc -= txq->nb_tx_desc;
1762 status = &txq->tx_ring[desc].wb.status;
1763 if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1764 return RTE_ETH_TX_DESC_DONE;
1766 return RTE_ETH_TX_DESC_FULL;
1770 igb_dev_clear_queues(struct rte_eth_dev *dev)
1773 struct igb_tx_queue *txq;
1774 struct igb_rx_queue *rxq;
1776 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1777 txq = dev->data->tx_queues[i];
1779 igb_tx_queue_release_mbufs(txq);
1780 igb_reset_tx_queue(txq, dev);
1784 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1785 rxq = dev->data->rx_queues[i];
1787 igb_rx_queue_release_mbufs(rxq);
1788 igb_reset_rx_queue(rxq);
1794 igb_dev_free_queues(struct rte_eth_dev *dev)
1798 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1799 eth_igb_rx_queue_release(dev->data->rx_queues[i]);
1800 dev->data->rx_queues[i] = NULL;
1802 dev->data->nb_rx_queues = 0;
1804 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1805 eth_igb_tx_queue_release(dev->data->tx_queues[i]);
1806 dev->data->tx_queues[i] = NULL;
1808 dev->data->nb_tx_queues = 0;
1812 * Receive Side Scaling (RSS).
1813 * See section 7.1.1.7 in the following document:
1814 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1817 * The source and destination IP addresses of the IP header and the source and
1818 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1819 * against a configurable random key to compute a 32-bit RSS hash result.
1820 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1821 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1822 * RSS output index which is used as the RX queue index where to store the
1824 * The following output is supplied in the RX write-back descriptor:
1825 * - 32-bit result of the Microsoft RSS hash function,
1826 * - 4-bit RSS type field.
1830 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1831 * Used as the default key.
1833 static uint8_t rss_intel_key[40] = {
1834 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1835 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1836 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1837 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1838 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1842 igb_rss_disable(struct rte_eth_dev *dev)
1844 struct e1000_hw *hw;
1847 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1848 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1849 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1850 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1854 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1862 hash_key = rss_conf->rss_key;
1863 if (hash_key != NULL) {
1864 /* Fill in RSS hash key */
1865 for (i = 0; i < 10; i++) {
1866 rss_key = hash_key[(i * 4)];
1867 rss_key |= hash_key[(i * 4) + 1] << 8;
1868 rss_key |= hash_key[(i * 4) + 2] << 16;
1869 rss_key |= hash_key[(i * 4) + 3] << 24;
1870 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1874 /* Set configured hashing protocols in MRQC register */
1875 rss_hf = rss_conf->rss_hf;
1876 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1877 if (rss_hf & ETH_RSS_IPV4)
1878 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1879 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1880 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1881 if (rss_hf & ETH_RSS_IPV6)
1882 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1883 if (rss_hf & ETH_RSS_IPV6_EX)
1884 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1885 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1886 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1887 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1888 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1889 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1890 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1891 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1892 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1893 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1894 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1895 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1899 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1900 struct rte_eth_rss_conf *rss_conf)
1902 struct e1000_hw *hw;
1906 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1909 * Before changing anything, first check that the update RSS operation
1910 * does not attempt to disable RSS, if RSS was enabled at
1911 * initialization time, or does not attempt to enable RSS, if RSS was
1912 * disabled at initialization time.
1914 rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
1915 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1916 if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
1917 if (rss_hf != 0) /* Enable RSS */
1919 return 0; /* Nothing to do */
1922 if (rss_hf == 0) /* Disable RSS */
1924 igb_hw_rss_hash_set(hw, rss_conf);
1928 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
1929 struct rte_eth_rss_conf *rss_conf)
1931 struct e1000_hw *hw;
1938 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1939 hash_key = rss_conf->rss_key;
1940 if (hash_key != NULL) {
1941 /* Return RSS hash key */
1942 for (i = 0; i < 10; i++) {
1943 rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
1944 hash_key[(i * 4)] = rss_key & 0x000000FF;
1945 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
1946 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
1947 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
1951 /* Get RSS functions configured in MRQC register */
1952 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1953 if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
1954 rss_conf->rss_hf = 0;
1958 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
1959 rss_hf |= ETH_RSS_IPV4;
1960 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
1961 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
1962 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
1963 rss_hf |= ETH_RSS_IPV6;
1964 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
1965 rss_hf |= ETH_RSS_IPV6_EX;
1966 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
1967 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
1968 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
1969 rss_hf |= ETH_RSS_IPV6_TCP_EX;
1970 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
1971 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
1972 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
1973 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
1974 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
1975 rss_hf |= ETH_RSS_IPV6_UDP_EX;
1976 rss_conf->rss_hf = rss_hf;
1981 igb_rss_configure(struct rte_eth_dev *dev)
1983 struct rte_eth_rss_conf rss_conf;
1984 struct e1000_hw *hw;
1988 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1990 /* Fill in redirection table. */
1991 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
1992 for (i = 0; i < 128; i++) {
1999 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2000 i % dev->data->nb_rx_queues : 0);
2001 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2003 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2007 * Configure the RSS key and the RSS protocols used to compute
2008 * the RSS hash of input packets.
2010 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2011 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2012 igb_rss_disable(dev);
2015 if (rss_conf.rss_key == NULL)
2016 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2017 igb_hw_rss_hash_set(hw, &rss_conf);
2021 * Check if the mac type support VMDq or not.
2022 * Return 1 if it supports, otherwise, return 0.
2025 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2027 const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2029 switch (hw->mac.type) {
2050 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2056 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2058 struct rte_eth_vmdq_rx_conf *cfg;
2059 struct e1000_hw *hw;
2060 uint32_t mrqc, vt_ctl, vmolr, rctl;
2063 PMD_INIT_FUNC_TRACE();
2065 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2066 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2068 /* Check if mac type can support VMDq, return value of 0 means NOT support */
2069 if (igb_is_vmdq_supported(dev) == 0)
2072 igb_rss_disable(dev);
2074 /* RCTL: eanble VLAN filter */
2075 rctl = E1000_READ_REG(hw, E1000_RCTL);
2076 rctl |= E1000_RCTL_VFE;
2077 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2079 /* MRQC: enable vmdq */
2080 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2081 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2082 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2084 /* VTCTL: pool selection according to VLAN tag */
2085 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2086 if (cfg->enable_default_pool)
2087 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2088 vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2089 E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2091 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2092 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2093 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2094 E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2097 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_UNTAG)
2098 vmolr |= E1000_VMOLR_AUPE;
2099 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_MC)
2100 vmolr |= E1000_VMOLR_ROMPE;
2101 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_UC)
2102 vmolr |= E1000_VMOLR_ROPE;
2103 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_BROADCAST)
2104 vmolr |= E1000_VMOLR_BAM;
2105 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_MULTICAST)
2106 vmolr |= E1000_VMOLR_MPME;
2108 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2112 * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2113 * Both 82576 and 82580 support it
2115 if (hw->mac.type != e1000_i350) {
2116 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2117 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2118 vmolr |= E1000_VMOLR_STRVLAN;
2119 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2123 /* VFTA - enable all vlan filters */
2124 for (i = 0; i < IGB_VFTA_SIZE; i++)
2125 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2127 /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2128 if (hw->mac.type != e1000_82580)
2129 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2132 * RAH/RAL - allow pools to read specific mac addresses
2133 * In this case, all pools should be able to read from mac addr 0
2135 E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2136 E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2138 /* VLVF: set up filters for vlan tags as configured */
2139 for (i = 0; i < cfg->nb_pool_maps; i++) {
2140 /* set vlan id in VF register and set the valid bit */
2141 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE | \
2142 (cfg->pool_map[i].vlan_id & ETH_VLAN_ID_MAX) | \
2143 ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT ) & \
2144 E1000_VLVF_POOLSEL_MASK)));
2147 E1000_WRITE_FLUSH(hw);
2153 /*********************************************************************
2155 * Enable receive unit.
2157 **********************************************************************/
2160 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2162 struct igb_rx_entry *rxe = rxq->sw_ring;
2166 /* Initialize software ring entries. */
2167 for (i = 0; i < rxq->nb_rx_desc; i++) {
2168 volatile union e1000_adv_rx_desc *rxd;
2169 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2172 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2173 "queue_id=%hu", rxq->queue_id);
2177 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2178 rxd = &rxq->rx_ring[i];
2179 rxd->read.hdr_addr = 0;
2180 rxd->read.pkt_addr = dma_addr;
2187 #define E1000_MRQC_DEF_Q_SHIFT (3)
2189 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2191 struct e1000_hw *hw =
2192 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2195 if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
2197 * SRIOV active scheme
2198 * FIXME if support RSS together with VMDq & SRIOV
2200 mrqc = E1000_MRQC_ENABLE_VMDQ;
2201 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2202 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2203 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2204 } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2206 * SRIOV inactive scheme
2208 switch (dev->data->dev_conf.rxmode.mq_mode) {
2210 igb_rss_configure(dev);
2212 case ETH_MQ_RX_VMDQ_ONLY:
2213 /*Configure general VMDQ only RX parameters*/
2214 igb_vmdq_rx_hw_configure(dev);
2216 case ETH_MQ_RX_NONE:
2217 /* if mq_mode is none, disable rss mode.*/
2219 igb_rss_disable(dev);
2228 eth_igb_rx_init(struct rte_eth_dev *dev)
2230 struct e1000_hw *hw;
2231 struct igb_rx_queue *rxq;
2236 uint16_t rctl_bsize;
2240 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2244 * Make sure receives are disabled while setting
2245 * up the descriptor ring.
2247 rctl = E1000_READ_REG(hw, E1000_RCTL);
2248 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2251 * Configure support of jumbo frames, if any.
2253 if (dev->data->dev_conf.rxmode.jumbo_frame == 1) {
2254 rctl |= E1000_RCTL_LPE;
2257 * Set maximum packet length by default, and might be updated
2258 * together with enabling/disabling dual VLAN.
2260 E1000_WRITE_REG(hw, E1000_RLPML,
2261 dev->data->dev_conf.rxmode.max_rx_pkt_len +
2264 rctl &= ~E1000_RCTL_LPE;
2266 /* Configure and enable each RX queue. */
2268 dev->rx_pkt_burst = eth_igb_recv_pkts;
2269 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2273 rxq = dev->data->rx_queues[i];
2277 * i350 and i354 vlan packets have vlan tags byte swapped.
2279 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2280 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2281 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2283 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2286 /* Allocate buffers for descriptor rings and set up queue */
2287 ret = igb_alloc_rx_queue_mbufs(rxq);
2292 * Reset crc_len in case it was changed after queue setup by a
2296 (uint8_t)(dev->data->dev_conf.rxmode.hw_strip_crc ?
2299 bus_addr = rxq->rx_ring_phys_addr;
2300 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2302 sizeof(union e1000_adv_rx_desc));
2303 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2304 (uint32_t)(bus_addr >> 32));
2305 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2307 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2310 * Configure RX buffer size.
2312 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2313 RTE_PKTMBUF_HEADROOM);
2314 if (buf_size >= 1024) {
2316 * Configure the BSIZEPACKET field of the SRRCTL
2317 * register of the queue.
2318 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2319 * If this field is equal to 0b, then RCTL.BSIZE
2320 * determines the RX packet buffer size.
2322 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2323 E1000_SRRCTL_BSIZEPKT_MASK);
2324 buf_size = (uint16_t) ((srrctl &
2325 E1000_SRRCTL_BSIZEPKT_MASK) <<
2326 E1000_SRRCTL_BSIZEPKT_SHIFT);
2328 /* It adds dual VLAN length for supporting dual VLAN */
2329 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2330 2 * VLAN_TAG_SIZE) > buf_size){
2331 if (!dev->data->scattered_rx)
2333 "forcing scatter mode");
2334 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2335 dev->data->scattered_rx = 1;
2339 * Use BSIZE field of the device RCTL register.
2341 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2342 rctl_bsize = buf_size;
2343 if (!dev->data->scattered_rx)
2344 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2345 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2346 dev->data->scattered_rx = 1;
2349 /* Set if packets are dropped when no descriptors available */
2351 srrctl |= E1000_SRRCTL_DROP_EN;
2353 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2355 /* Enable this RX queue. */
2356 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2357 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2358 rxdctl &= 0xFFF00000;
2359 rxdctl |= (rxq->pthresh & 0x1F);
2360 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2361 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2362 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2365 if (dev->data->dev_conf.rxmode.enable_scatter) {
2366 if (!dev->data->scattered_rx)
2367 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2368 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2369 dev->data->scattered_rx = 1;
2373 * Setup BSIZE field of RCTL register, if needed.
2374 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2375 * register, since the code above configures the SRRCTL register of
2376 * the RX queue in such a case.
2377 * All configurable sizes are:
2378 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2379 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
2380 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
2381 * 2048: rctl |= E1000_RCTL_SZ_2048;
2382 * 1024: rctl |= E1000_RCTL_SZ_1024;
2383 * 512: rctl |= E1000_RCTL_SZ_512;
2384 * 256: rctl |= E1000_RCTL_SZ_256;
2386 if (rctl_bsize > 0) {
2387 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2388 rctl |= E1000_RCTL_SZ_512;
2389 else /* 256 <= buf_size < 512 - use 256 */
2390 rctl |= E1000_RCTL_SZ_256;
2394 * Configure RSS if device configured with multiple RX queues.
2396 igb_dev_mq_rx_configure(dev);
2398 /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2399 rctl |= E1000_READ_REG(hw, E1000_RCTL);
2402 * Setup the Checksum Register.
2403 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2405 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2406 rxcsum |= E1000_RXCSUM_PCSD;
2408 /* Enable both L3/L4 rx checksum offload */
2409 if (dev->data->dev_conf.rxmode.hw_ip_checksum)
2410 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL |
2411 E1000_RXCSUM_CRCOFL);
2413 rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL |
2414 E1000_RXCSUM_CRCOFL);
2415 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2417 /* Setup the Receive Control Register. */
2418 if (dev->data->dev_conf.rxmode.hw_strip_crc) {
2419 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2421 /* set STRCRC bit in all queues */
2422 if (hw->mac.type == e1000_i350 ||
2423 hw->mac.type == e1000_i210 ||
2424 hw->mac.type == e1000_i211 ||
2425 hw->mac.type == e1000_i354) {
2426 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2427 rxq = dev->data->rx_queues[i];
2428 uint32_t dvmolr = E1000_READ_REG(hw,
2429 E1000_DVMOLR(rxq->reg_idx));
2430 dvmolr |= E1000_DVMOLR_STRCRC;
2431 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2435 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2437 /* clear STRCRC bit in all queues */
2438 if (hw->mac.type == e1000_i350 ||
2439 hw->mac.type == e1000_i210 ||
2440 hw->mac.type == e1000_i211 ||
2441 hw->mac.type == e1000_i354) {
2442 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2443 rxq = dev->data->rx_queues[i];
2444 uint32_t dvmolr = E1000_READ_REG(hw,
2445 E1000_DVMOLR(rxq->reg_idx));
2446 dvmolr &= ~E1000_DVMOLR_STRCRC;
2447 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2452 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2453 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2454 E1000_RCTL_RDMTS_HALF |
2455 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2457 /* Make sure VLAN Filters are off. */
2458 if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY)
2459 rctl &= ~E1000_RCTL_VFE;
2460 /* Don't store bad packets. */
2461 rctl &= ~E1000_RCTL_SBP;
2463 /* Enable Receives. */
2464 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2467 * Setup the HW Rx Head and Tail Descriptor Pointers.
2468 * This needs to be done after enable.
2470 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2471 rxq = dev->data->rx_queues[i];
2472 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2473 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2479 /*********************************************************************
2481 * Enable transmit unit.
2483 **********************************************************************/
2485 eth_igb_tx_init(struct rte_eth_dev *dev)
2487 struct e1000_hw *hw;
2488 struct igb_tx_queue *txq;
2493 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2495 /* Setup the Base and Length of the Tx Descriptor Rings. */
2496 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2498 txq = dev->data->tx_queues[i];
2499 bus_addr = txq->tx_ring_phys_addr;
2501 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2503 sizeof(union e1000_adv_tx_desc));
2504 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2505 (uint32_t)(bus_addr >> 32));
2506 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2508 /* Setup the HW Tx Head and Tail descriptor pointers. */
2509 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2510 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2512 /* Setup Transmit threshold registers. */
2513 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2514 txdctl |= txq->pthresh & 0x1F;
2515 txdctl |= ((txq->hthresh & 0x1F) << 8);
2516 txdctl |= ((txq->wthresh & 0x1F) << 16);
2517 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2518 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2521 /* Program the Transmit Control Register. */
2522 tctl = E1000_READ_REG(hw, E1000_TCTL);
2523 tctl &= ~E1000_TCTL_CT;
2524 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2525 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2527 e1000_config_collision_dist(hw);
2529 /* This write will effectively turn on the transmit unit. */
2530 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2533 /*********************************************************************
2535 * Enable VF receive unit.
2537 **********************************************************************/
2539 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2541 struct e1000_hw *hw;
2542 struct igb_rx_queue *rxq;
2545 uint16_t rctl_bsize;
2549 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2552 e1000_rlpml_set_vf(hw,
2553 (uint16_t)(dev->data->dev_conf.rxmode.max_rx_pkt_len +
2556 /* Configure and enable each RX queue. */
2558 dev->rx_pkt_burst = eth_igb_recv_pkts;
2559 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2563 rxq = dev->data->rx_queues[i];
2567 * i350VF LB vlan packets have vlan tags byte swapped.
2569 if (hw->mac.type == e1000_vfadapt_i350) {
2570 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2571 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2573 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2576 /* Allocate buffers for descriptor rings and set up queue */
2577 ret = igb_alloc_rx_queue_mbufs(rxq);
2581 bus_addr = rxq->rx_ring_phys_addr;
2582 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2584 sizeof(union e1000_adv_rx_desc));
2585 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2586 (uint32_t)(bus_addr >> 32));
2587 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2589 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2592 * Configure RX buffer size.
2594 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2595 RTE_PKTMBUF_HEADROOM);
2596 if (buf_size >= 1024) {
2598 * Configure the BSIZEPACKET field of the SRRCTL
2599 * register of the queue.
2600 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2601 * If this field is equal to 0b, then RCTL.BSIZE
2602 * determines the RX packet buffer size.
2604 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2605 E1000_SRRCTL_BSIZEPKT_MASK);
2606 buf_size = (uint16_t) ((srrctl &
2607 E1000_SRRCTL_BSIZEPKT_MASK) <<
2608 E1000_SRRCTL_BSIZEPKT_SHIFT);
2610 /* It adds dual VLAN length for supporting dual VLAN */
2611 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2612 2 * VLAN_TAG_SIZE) > buf_size){
2613 if (!dev->data->scattered_rx)
2615 "forcing scatter mode");
2616 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2617 dev->data->scattered_rx = 1;
2621 * Use BSIZE field of the device RCTL register.
2623 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2624 rctl_bsize = buf_size;
2625 if (!dev->data->scattered_rx)
2626 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2627 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2628 dev->data->scattered_rx = 1;
2631 /* Set if packets are dropped when no descriptors available */
2633 srrctl |= E1000_SRRCTL_DROP_EN;
2635 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2637 /* Enable this RX queue. */
2638 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2639 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2640 rxdctl &= 0xFFF00000;
2641 rxdctl |= (rxq->pthresh & 0x1F);
2642 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2643 if (hw->mac.type == e1000_vfadapt) {
2645 * Workaround of 82576 VF Erratum
2646 * force set WTHRESH to 1
2647 * to avoid Write-Back not triggered sometimes
2650 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2653 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2654 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2657 if (dev->data->dev_conf.rxmode.enable_scatter) {
2658 if (!dev->data->scattered_rx)
2659 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2660 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2661 dev->data->scattered_rx = 1;
2665 * Setup the HW Rx Head and Tail Descriptor Pointers.
2666 * This needs to be done after enable.
2668 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2669 rxq = dev->data->rx_queues[i];
2670 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2671 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2677 /*********************************************************************
2679 * Enable VF transmit unit.
2681 **********************************************************************/
2683 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2685 struct e1000_hw *hw;
2686 struct igb_tx_queue *txq;
2690 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2692 /* Setup the Base and Length of the Tx Descriptor Rings. */
2693 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2696 txq = dev->data->tx_queues[i];
2697 bus_addr = txq->tx_ring_phys_addr;
2698 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2700 sizeof(union e1000_adv_tx_desc));
2701 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2702 (uint32_t)(bus_addr >> 32));
2703 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2705 /* Setup the HW Tx Head and Tail descriptor pointers. */
2706 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2707 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2709 /* Setup Transmit threshold registers. */
2710 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2711 txdctl |= txq->pthresh & 0x1F;
2712 txdctl |= ((txq->hthresh & 0x1F) << 8);
2713 if (hw->mac.type == e1000_82576) {
2715 * Workaround of 82576 VF Erratum
2716 * force set WTHRESH to 1
2717 * to avoid Write-Back not triggered sometimes
2720 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2723 txdctl |= ((txq->wthresh & 0x1F) << 16);
2724 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2725 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2731 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2732 struct rte_eth_rxq_info *qinfo)
2734 struct igb_rx_queue *rxq;
2736 rxq = dev->data->rx_queues[queue_id];
2738 qinfo->mp = rxq->mb_pool;
2739 qinfo->scattered_rx = dev->data->scattered_rx;
2740 qinfo->nb_desc = rxq->nb_rx_desc;
2742 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2743 qinfo->conf.rx_drop_en = rxq->drop_en;
2747 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2748 struct rte_eth_txq_info *qinfo)
2750 struct igb_tx_queue *txq;
2752 txq = dev->data->tx_queues[queue_id];
2754 qinfo->nb_desc = txq->nb_tx_desc;
2756 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2757 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2758 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2762 igb_config_rss_filter(struct rte_eth_dev *dev,
2763 struct igb_rte_flow_rss_conf *conf, bool add)
2767 struct rte_eth_rss_conf rss_conf = conf->rss_conf;
2768 struct e1000_filter_info *filter_info =
2769 E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
2770 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2772 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2775 if (memcmp(conf, &filter_info->rss_info,
2776 sizeof(struct igb_rte_flow_rss_conf)) == 0) {
2777 igb_rss_disable(dev);
2778 memset(&filter_info->rss_info, 0,
2779 sizeof(struct igb_rte_flow_rss_conf));
2785 if (filter_info->rss_info.num)
2788 /* Fill in redirection table. */
2789 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2790 for (i = 0, j = 0; i < 128; i++, j++) {
2797 q_idx = conf->queue[j];
2800 reta.bytes[i & 3] = (uint8_t)(q_idx << shift);
2802 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2805 /* Configure the RSS key and the RSS protocols used to compute
2806 * the RSS hash of input packets.
2808 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2809 igb_rss_disable(dev);
2812 if (rss_conf.rss_key == NULL)
2813 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2814 igb_hw_rss_hash_set(hw, &rss_conf);
2816 rte_memcpy(&filter_info->rss_info,
2817 conf, sizeof(struct igb_rte_flow_rss_conf));