1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
5 #include <rte_config.h>
7 #include <rte_malloc.h>
8 #include <ethdev_driver.h>
14 #ifdef RTE_PMD_USE_PREFETCH
15 #define rte_igc_prefetch(p) rte_prefetch0(p)
17 #define rte_igc_prefetch(p) do {} while (0)
20 #ifdef RTE_PMD_PACKET_PREFETCH
21 #define rte_packet_prefetch(p) rte_prefetch1(p)
23 #define rte_packet_prefetch(p) do {} while (0)
26 /* Multicast / Unicast table offset mask. */
27 #define IGC_RCTL_MO_MSK (3u << IGC_RCTL_MO_SHIFT)
30 #define IGC_RCTL_LBM_SHIFT 6
31 #define IGC_RCTL_LBM_MSK (3u << IGC_RCTL_LBM_SHIFT)
33 /* Hash select for MTA */
34 #define IGC_RCTL_HSEL_SHIFT 8
35 #define IGC_RCTL_HSEL_MSK (3u << IGC_RCTL_HSEL_SHIFT)
36 #define IGC_RCTL_PSP (1u << 21)
38 /* Receive buffer size for header buffer */
39 #define IGC_SRRCTL_BSIZEHEADER_SHIFT 8
41 /* RX descriptor status and error flags */
42 #define IGC_RXD_STAT_L4CS (1u << 5)
43 #define IGC_RXD_STAT_VEXT (1u << 9)
44 #define IGC_RXD_STAT_LLINT (1u << 11)
45 #define IGC_RXD_STAT_SCRC (1u << 12)
46 #define IGC_RXD_STAT_SMDT_MASK (3u << 13)
47 #define IGC_RXD_STAT_MC (1u << 19)
48 #define IGC_RXD_EXT_ERR_L4E (1u << 29)
49 #define IGC_RXD_EXT_ERR_IPE (1u << 30)
50 #define IGC_RXD_EXT_ERR_RXE (1u << 31)
51 #define IGC_RXD_RSS_TYPE_MASK 0xfu
52 #define IGC_RXD_PCTYPE_MASK (0x7fu << 4)
53 #define IGC_RXD_ETQF_SHIFT 12
54 #define IGC_RXD_ETQF_MSK (0xfu << IGC_RXD_ETQF_SHIFT)
55 #define IGC_RXD_VPKT (1u << 16)
57 /* TXD control bits */
58 #define IGC_TXDCTL_PTHRESH_SHIFT 0
59 #define IGC_TXDCTL_HTHRESH_SHIFT 8
60 #define IGC_TXDCTL_WTHRESH_SHIFT 16
61 #define IGC_TXDCTL_PTHRESH_MSK (0x1fu << IGC_TXDCTL_PTHRESH_SHIFT)
62 #define IGC_TXDCTL_HTHRESH_MSK (0x1fu << IGC_TXDCTL_HTHRESH_SHIFT)
63 #define IGC_TXDCTL_WTHRESH_MSK (0x1fu << IGC_TXDCTL_WTHRESH_SHIFT)
65 /* RXD control bits */
66 #define IGC_RXDCTL_PTHRESH_SHIFT 0
67 #define IGC_RXDCTL_HTHRESH_SHIFT 8
68 #define IGC_RXDCTL_WTHRESH_SHIFT 16
69 #define IGC_RXDCTL_PTHRESH_MSK (0x1fu << IGC_RXDCTL_PTHRESH_SHIFT)
70 #define IGC_RXDCTL_HTHRESH_MSK (0x1fu << IGC_RXDCTL_HTHRESH_SHIFT)
71 #define IGC_RXDCTL_WTHRESH_MSK (0x1fu << IGC_RXDCTL_WTHRESH_SHIFT)
73 #define IGC_TSO_MAX_HDRLEN 512
74 #define IGC_TSO_MAX_MSS 9216
76 /* Bit Mask to indicate what bits required for building TX context */
77 #define IGC_TX_OFFLOAD_MASK ( \
87 #define IGC_TX_OFFLOAD_SEG (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)
89 #define IGC_ADVTXD_POPTS_TXSM 0x00000200 /* L4 Checksum offload request */
90 #define IGC_ADVTXD_POPTS_IXSM 0x00000100 /* IP Checksum offload request */
92 /* L4 Packet TYPE of Reserved */
93 #define IGC_ADVTXD_TUCMD_L4T_RSV 0x00001800
95 #define IGC_TX_OFFLOAD_NOTSUP_MASK (PKT_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK)
98 * Structure associated with each descriptor of the RX ring of a RX queue.
100 struct igc_rx_entry {
101 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
105 * Structure associated with each RX queue.
107 struct igc_rx_queue {
108 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
109 volatile union igc_adv_rx_desc *rx_ring;
110 /**< RX ring virtual address. */
111 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
112 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
113 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
114 struct igc_rx_entry *sw_ring; /**< address of RX software ring. */
115 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
116 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
117 uint16_t nb_rx_desc; /**< number of RX descriptors. */
118 uint16_t rx_tail; /**< current value of RDT register. */
119 uint16_t nb_rx_hold; /**< number of held free RX desc. */
120 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
121 uint16_t queue_id; /**< RX queue index. */
122 uint16_t reg_idx; /**< RX queue register index. */
123 uint16_t port_id; /**< Device port identifier. */
124 uint8_t pthresh; /**< Prefetch threshold register. */
125 uint8_t hthresh; /**< Host threshold register. */
126 uint8_t wthresh; /**< Write-back threshold register. */
127 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
128 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
129 uint32_t flags; /**< RX flags. */
130 uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */
133 /** Offload features */
134 union igc_tx_offload {
137 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
138 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
139 uint64_t vlan_tci:16;
140 /**< VLAN Tag Control Identifier(CPU order). */
141 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
142 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
143 /* uint64_t unused:8; */
148 * Compare mask for igc_tx_offload.data,
149 * should be in sync with igc_tx_offload layout.
151 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
152 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
153 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
154 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
155 /** Mac + IP + TCP + Mss mask. */
156 #define TX_TSO_CMP_MASK \
157 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
160 * Structure to check if new context need be built
162 struct igc_advctx_info {
163 uint64_t flags; /**< ol_flags related to context build. */
164 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
165 union igc_tx_offload tx_offload;
166 /** compare mask for tx offload. */
167 union igc_tx_offload tx_offload_mask;
171 * Hardware context number
174 IGC_CTX_0 = 0, /**< CTX0 */
175 IGC_CTX_1 = 1, /**< CTX1 */
176 IGC_CTX_NUM = 2, /**< CTX_NUM */
180 * Structure associated with each descriptor of the TX ring of a TX queue.
182 struct igc_tx_entry {
183 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
184 uint16_t next_id; /**< Index of next descriptor in ring. */
185 uint16_t last_id; /**< Index of last scattered descriptor. */
189 * Structure associated with each TX queue.
191 struct igc_tx_queue {
192 volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */
193 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
194 struct igc_tx_entry *sw_ring; /**< virtual address of SW ring. */
195 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
196 uint32_t txd_type; /**< Device-specific TXD type */
197 uint16_t nb_tx_desc; /**< number of TX descriptors. */
198 uint16_t tx_tail; /**< Current value of TDT register. */
200 /**< Index of first used TX descriptor. */
201 uint16_t queue_id; /**< TX queue index. */
202 uint16_t reg_idx; /**< TX queue register index. */
203 uint16_t port_id; /**< Device port identifier. */
204 uint8_t pthresh; /**< Prefetch threshold register. */
205 uint8_t hthresh; /**< Host threshold register. */
206 uint8_t wthresh; /**< Write-back threshold register. */
209 /**< Start context position for transmit queue. */
210 struct igc_advctx_info ctx_cache[IGC_CTX_NUM];
211 /**< Hardware context history.*/
212 uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */
215 static inline uint64_t
216 rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)
218 static uint64_t l4_chksum_flags[] = {0, 0, PKT_RX_L4_CKSUM_GOOD,
219 PKT_RX_L4_CKSUM_BAD};
221 static uint64_t l3_chksum_flags[] = {0, 0, PKT_RX_IP_CKSUM_GOOD,
222 PKT_RX_IP_CKSUM_BAD};
223 uint64_t pkt_flags = 0;
226 if (statuserr & IGC_RXD_STAT_VP)
227 pkt_flags |= PKT_RX_VLAN_STRIPPED;
229 tmp = !!(statuserr & (IGC_RXD_STAT_L4CS | IGC_RXD_STAT_UDPCS));
230 tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_L4E);
231 pkt_flags |= l4_chksum_flags[tmp];
233 tmp = !!(statuserr & IGC_RXD_STAT_IPCS);
234 tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_IPE);
235 pkt_flags |= l3_chksum_flags[tmp];
240 #define IGC_PACKET_TYPE_IPV4 0X01
241 #define IGC_PACKET_TYPE_IPV4_TCP 0X11
242 #define IGC_PACKET_TYPE_IPV4_UDP 0X21
243 #define IGC_PACKET_TYPE_IPV4_SCTP 0X41
244 #define IGC_PACKET_TYPE_IPV4_EXT 0X03
245 #define IGC_PACKET_TYPE_IPV4_EXT_SCTP 0X43
246 #define IGC_PACKET_TYPE_IPV6 0X04
247 #define IGC_PACKET_TYPE_IPV6_TCP 0X14
248 #define IGC_PACKET_TYPE_IPV6_UDP 0X24
249 #define IGC_PACKET_TYPE_IPV6_EXT 0X0C
250 #define IGC_PACKET_TYPE_IPV6_EXT_TCP 0X1C
251 #define IGC_PACKET_TYPE_IPV6_EXT_UDP 0X2C
252 #define IGC_PACKET_TYPE_IPV4_IPV6 0X05
253 #define IGC_PACKET_TYPE_IPV4_IPV6_TCP 0X15
254 #define IGC_PACKET_TYPE_IPV4_IPV6_UDP 0X25
255 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
256 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
257 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
258 #define IGC_PACKET_TYPE_MAX 0X80
259 #define IGC_PACKET_TYPE_MASK 0X7F
260 #define IGC_PACKET_TYPE_SHIFT 0X04
262 static inline uint32_t
263 rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info)
265 static const uint32_t
266 ptype_table[IGC_PACKET_TYPE_MAX] __rte_cache_aligned = {
267 [IGC_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
269 [IGC_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
270 RTE_PTYPE_L3_IPV4_EXT,
271 [IGC_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
273 [IGC_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
274 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
275 RTE_PTYPE_INNER_L3_IPV6,
276 [IGC_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
277 RTE_PTYPE_L3_IPV6_EXT,
278 [IGC_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
279 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
280 RTE_PTYPE_INNER_L3_IPV6_EXT,
281 [IGC_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
282 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
283 [IGC_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
284 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
285 [IGC_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
286 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
287 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
288 [IGC_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
289 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
290 [IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
291 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
292 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
293 [IGC_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
294 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
295 [IGC_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
296 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
297 [IGC_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
298 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
299 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
300 [IGC_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
301 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
302 [IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
303 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
304 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
305 [IGC_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
306 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
307 [IGC_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
308 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
310 if (unlikely(pkt_info & IGC_RXDADV_PKTTYPE_ETQF))
311 return RTE_PTYPE_UNKNOWN;
313 pkt_info = (pkt_info >> IGC_PACKET_TYPE_SHIFT) & IGC_PACKET_TYPE_MASK;
315 return ptype_table[pkt_info];
319 rx_desc_get_pkt_info(struct igc_rx_queue *rxq, struct rte_mbuf *rxm,
320 union igc_adv_rx_desc *rxd, uint32_t staterr)
323 uint32_t hlen_type_rss;
326 /* Prefetch data of first segment, if configured to do so. */
327 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
329 rxm->port = rxq->port_id;
330 hlen_type_rss = rte_le_to_cpu_32(rxd->wb.lower.lo_dword.data);
331 rxm->hash.rss = rte_le_to_cpu_32(rxd->wb.lower.hi_dword.rss);
332 rxm->vlan_tci = rte_le_to_cpu_16(rxd->wb.upper.vlan);
334 pkt_flags = (hlen_type_rss & IGC_RXD_RSS_TYPE_MASK) ?
337 if (hlen_type_rss & IGC_RXD_VPKT)
338 pkt_flags |= PKT_RX_VLAN;
340 pkt_flags |= rx_desc_statuserr_to_pkt_flags(staterr);
342 rxm->ol_flags = pkt_flags;
343 pkt_info = rte_le_to_cpu_16(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
344 rxm->packet_type = rx_desc_pkt_info_to_pkt_type(pkt_info);
348 igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
350 struct igc_rx_queue * const rxq = rx_queue;
351 volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
352 struct igc_rx_entry * const sw_ring = rxq->sw_ring;
353 uint16_t rx_id = rxq->rx_tail;
355 uint16_t nb_hold = 0;
357 while (nb_rx < nb_pkts) {
358 volatile union igc_adv_rx_desc *rxdp;
359 struct igc_rx_entry *rxe;
360 struct rte_mbuf *rxm;
361 struct rte_mbuf *nmb;
362 union igc_adv_rx_desc rxd;
367 * The order of operations here is important as the DD status
368 * bit must not be read after any other descriptor fields.
369 * rx_ring and rxdp are pointing to volatile data so the order
370 * of accesses cannot be reordered by the compiler. If they were
371 * not volatile, they could be reordered which could lead to
372 * using invalid descriptor fields when read from rxd.
374 rxdp = &rx_ring[rx_id];
375 staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
376 if (!(staterr & IGC_RXD_STAT_DD))
383 * If the IGC_RXD_STAT_EOP flag is not set, the RX packet is
384 * likely to be invalid and to be dropped by the various
385 * validation checks performed by the network stack.
387 * Allocate a new mbuf to replenish the RX ring descriptor.
388 * If the allocation fails:
389 * - arrange for that RX descriptor to be the first one
390 * being parsed the next time the receive function is
391 * invoked [on the same queue].
393 * - Stop parsing the RX ring and return immediately.
395 * This policy does not drop the packet received in the RX
396 * descriptor for which the allocation of a new mbuf failed.
397 * Thus, it allows that packet to be later retrieved if
398 * mbuf have been freed in the mean time.
399 * As a side effect, holding RX descriptors instead of
400 * systematically giving them back to the NIC may lead to
401 * RX ring exhaustion situations.
402 * However, the NIC can gracefully prevent such situations
403 * to happen by sending specific "back-pressure" flow control
404 * frames to its peer(s).
407 "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
408 rxq->port_id, rxq->queue_id, rx_id, staterr,
409 rte_le_to_cpu_16(rxd.wb.upper.length));
411 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
415 "RX mbuf alloc failed, port_id=%u queue_id=%u",
416 rxq->port_id, rxq->queue_id);
418 rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
423 rxe = &sw_ring[rx_id];
425 if (rx_id >= rxq->nb_rx_desc)
428 /* Prefetch next mbuf while processing current one. */
429 rte_igc_prefetch(sw_ring[rx_id].mbuf);
432 * When next RX descriptor is on a cache-line boundary,
433 * prefetch the next 4 RX descriptors and the next 8 pointers
436 if ((rx_id & 0x3) == 0) {
437 rte_igc_prefetch(&rx_ring[rx_id]);
438 rte_igc_prefetch(&sw_ring[rx_id]);
442 * Update RX descriptor with the physical address of the new
443 * data buffer of the new allocated mbuf.
447 rxdp->read.hdr_addr = 0;
448 rxdp->read.pkt_addr =
449 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
452 rxm->data_off = RTE_PKTMBUF_HEADROOM;
453 data_len = rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len;
454 rxm->data_len = data_len;
455 rxm->pkt_len = data_len;
458 rx_desc_get_pkt_info(rxq, rxm, &rxd, staterr);
461 * Store the mbuf address into the next entry of the array
462 * of returned packets.
464 rx_pkts[nb_rx++] = rxm;
466 rxq->rx_tail = rx_id;
469 * If the number of free RX descriptors is greater than the RX free
470 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
472 * Update the RDT with the value of the last processed RX descriptor
473 * minus 1, to guarantee that the RDT register is never equal to the
474 * RDH register, which creates a "full" ring situation from the
475 * hardware point of view...
477 nb_hold = nb_hold + rxq->nb_rx_hold;
478 if (nb_hold > rxq->rx_free_thresh) {
480 "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
481 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
482 rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
483 IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
486 rxq->nb_rx_hold = nb_hold;
491 igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
494 struct igc_rx_queue * const rxq = rx_queue;
495 volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
496 struct igc_rx_entry * const sw_ring = rxq->sw_ring;
497 struct rte_mbuf *first_seg = rxq->pkt_first_seg;
498 struct rte_mbuf *last_seg = rxq->pkt_last_seg;
500 uint16_t rx_id = rxq->rx_tail;
502 uint16_t nb_hold = 0;
504 while (nb_rx < nb_pkts) {
505 volatile union igc_adv_rx_desc *rxdp;
506 struct igc_rx_entry *rxe;
507 struct rte_mbuf *rxm;
508 struct rte_mbuf *nmb;
509 union igc_adv_rx_desc rxd;
515 * The order of operations here is important as the DD status
516 * bit must not be read after any other descriptor fields.
517 * rx_ring and rxdp are pointing to volatile data so the order
518 * of accesses cannot be reordered by the compiler. If they were
519 * not volatile, they could be reordered which could lead to
520 * using invalid descriptor fields when read from rxd.
522 rxdp = &rx_ring[rx_id];
523 staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
524 if (!(staterr & IGC_RXD_STAT_DD))
531 * Allocate a new mbuf to replenish the RX ring descriptor.
532 * If the allocation fails:
533 * - arrange for that RX descriptor to be the first one
534 * being parsed the next time the receive function is
535 * invoked [on the same queue].
537 * - Stop parsing the RX ring and return immediately.
539 * This policy does not drop the packet received in the RX
540 * descriptor for which the allocation of a new mbuf failed.
541 * Thus, it allows that packet to be later retrieved if
542 * mbuf have been freed in the mean time.
543 * As a side effect, holding RX descriptors instead of
544 * systematically giving them back to the NIC may lead to
545 * RX ring exhaustion situations.
546 * However, the NIC can gracefully prevent such situations
547 * to happen by sending specific "back-pressure" flow control
548 * frames to its peer(s).
551 "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
552 rxq->port_id, rxq->queue_id, rx_id, staterr,
553 rte_le_to_cpu_16(rxd.wb.upper.length));
555 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
559 "RX mbuf alloc failed, port_id=%u queue_id=%u",
560 rxq->port_id, rxq->queue_id);
562 rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
567 rxe = &sw_ring[rx_id];
569 if (rx_id >= rxq->nb_rx_desc)
572 /* Prefetch next mbuf while processing current one. */
573 rte_igc_prefetch(sw_ring[rx_id].mbuf);
576 * When next RX descriptor is on a cache-line boundary,
577 * prefetch the next 4 RX descriptors and the next 8 pointers
580 if ((rx_id & 0x3) == 0) {
581 rte_igc_prefetch(&rx_ring[rx_id]);
582 rte_igc_prefetch(&sw_ring[rx_id]);
586 * Update RX descriptor with the physical address of the new
587 * data buffer of the new allocated mbuf.
591 rxdp->read.hdr_addr = 0;
592 rxdp->read.pkt_addr =
593 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
597 * Set data length & data buffer address of mbuf.
599 rxm->data_off = RTE_PKTMBUF_HEADROOM;
600 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
601 rxm->data_len = data_len;
604 * If this is the first buffer of the received packet,
605 * set the pointer to the first mbuf of the packet and
606 * initialize its context.
607 * Otherwise, update the total length and the number of segments
608 * of the current scattered packet, and update the pointer to
609 * the last mbuf of the current packet.
611 if (first_seg == NULL) {
613 first_seg->pkt_len = data_len;
614 first_seg->nb_segs = 1;
616 first_seg->pkt_len += data_len;
617 first_seg->nb_segs++;
618 last_seg->next = rxm;
622 * If this is not the last buffer of the received packet,
623 * update the pointer to the last mbuf of the current scattered
624 * packet and continue to parse the RX ring.
626 if (!(staterr & IGC_RXD_STAT_EOP)) {
632 * This is the last buffer of the received packet.
633 * If the CRC is not stripped by the hardware:
634 * - Subtract the CRC length from the total packet length.
635 * - If the last buffer only contains the whole CRC or a part
636 * of it, free the mbuf associated to the last buffer.
637 * If part of the CRC is also contained in the previous
638 * mbuf, subtract the length of that CRC part from the
639 * data length of the previous mbuf.
641 if (unlikely(rxq->crc_len > 0)) {
642 first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
643 if (data_len <= RTE_ETHER_CRC_LEN) {
644 rte_pktmbuf_free_seg(rxm);
645 first_seg->nb_segs--;
646 last_seg->data_len = last_seg->data_len -
647 (RTE_ETHER_CRC_LEN - data_len);
648 last_seg->next = NULL;
650 rxm->data_len = (uint16_t)
651 (data_len - RTE_ETHER_CRC_LEN);
655 rx_desc_get_pkt_info(rxq, first_seg, &rxd, staterr);
658 * Store the mbuf address into the next entry of the array
659 * of returned packets.
661 rx_pkts[nb_rx++] = first_seg;
663 /* Setup receipt context for a new packet. */
666 rxq->rx_tail = rx_id;
669 * Save receive context.
671 rxq->pkt_first_seg = first_seg;
672 rxq->pkt_last_seg = last_seg;
675 * If the number of free RX descriptors is greater than the RX free
676 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
678 * Update the RDT with the value of the last processed RX descriptor
679 * minus 1, to guarantee that the RDT register is never equal to the
680 * RDH register, which creates a "full" ring situation from the
681 * hardware point of view...
683 nb_hold = nb_hold + rxq->nb_rx_hold;
684 if (nb_hold > rxq->rx_free_thresh) {
686 "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
687 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
688 rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
689 IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
692 rxq->nb_rx_hold = nb_hold;
697 igc_rx_queue_release_mbufs(struct igc_rx_queue *rxq)
701 if (rxq->sw_ring != NULL) {
702 for (i = 0; i < rxq->nb_rx_desc; i++) {
703 if (rxq->sw_ring[i].mbuf != NULL) {
704 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
705 rxq->sw_ring[i].mbuf = NULL;
712 igc_rx_queue_release(struct igc_rx_queue *rxq)
714 igc_rx_queue_release_mbufs(rxq);
715 rte_free(rxq->sw_ring);
719 void eth_igc_rx_queue_release(void *rxq)
722 igc_rx_queue_release(rxq);
725 uint32_t eth_igc_rx_queue_count(struct rte_eth_dev *dev,
726 uint16_t rx_queue_id)
729 * Check the DD bit of a rx descriptor of each 4 in a group,
730 * to avoid checking too frequently and downgrading performance
733 #define IGC_RXQ_SCAN_INTERVAL 4
735 volatile union igc_adv_rx_desc *rxdp;
736 struct igc_rx_queue *rxq;
739 rxq = dev->data->rx_queues[rx_queue_id];
740 rxdp = &rxq->rx_ring[rxq->rx_tail];
742 while (desc < rxq->nb_rx_desc - rxq->rx_tail) {
743 if (unlikely(!(rxdp->wb.upper.status_error &
746 desc += IGC_RXQ_SCAN_INTERVAL;
747 rxdp += IGC_RXQ_SCAN_INTERVAL;
749 rxdp = &rxq->rx_ring[rxq->rx_tail + desc - rxq->nb_rx_desc];
751 while (desc < rxq->nb_rx_desc &&
752 (rxdp->wb.upper.status_error & IGC_RXD_STAT_DD)) {
753 desc += IGC_RXQ_SCAN_INTERVAL;
754 rxdp += IGC_RXQ_SCAN_INTERVAL;
760 int eth_igc_rx_descriptor_done(void *rx_queue, uint16_t offset)
762 volatile union igc_adv_rx_desc *rxdp;
763 struct igc_rx_queue *rxq = rx_queue;
766 if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
769 desc = rxq->rx_tail + offset;
770 if (desc >= rxq->nb_rx_desc)
771 desc -= rxq->nb_rx_desc;
773 rxdp = &rxq->rx_ring[desc];
774 return !!(rxdp->wb.upper.status_error &
775 rte_cpu_to_le_32(IGC_RXD_STAT_DD));
778 int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset)
780 struct igc_rx_queue *rxq = rx_queue;
781 volatile uint32_t *status;
784 if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
787 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
788 return RTE_ETH_RX_DESC_UNAVAIL;
790 desc = rxq->rx_tail + offset;
791 if (desc >= rxq->nb_rx_desc)
792 desc -= rxq->nb_rx_desc;
794 status = &rxq->rx_ring[desc].wb.upper.status_error;
795 if (*status & rte_cpu_to_le_32(IGC_RXD_STAT_DD))
796 return RTE_ETH_RX_DESC_DONE;
798 return RTE_ETH_RX_DESC_AVAIL;
802 igc_alloc_rx_queue_mbufs(struct igc_rx_queue *rxq)
804 struct igc_rx_entry *rxe = rxq->sw_ring;
808 /* Initialize software ring entries. */
809 for (i = 0; i < rxq->nb_rx_desc; i++) {
810 volatile union igc_adv_rx_desc *rxd;
811 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
814 PMD_DRV_LOG(ERR, "RX mbuf alloc failed, queue_id=%hu",
818 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
819 rxd = &rxq->rx_ring[i];
820 rxd->read.hdr_addr = 0;
821 rxd->read.pkt_addr = dma_addr;
829 * RSS random key supplied in section 7.1.2.9.3 of the Intel I225 datasheet.
830 * Used as the default key.
832 static uint8_t default_rss_key[40] = {
833 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
834 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
835 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
836 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
837 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
841 igc_rss_disable(struct rte_eth_dev *dev)
843 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
846 mrqc = IGC_READ_REG(hw, IGC_MRQC);
847 mrqc &= ~IGC_MRQC_ENABLE_MASK;
848 IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
852 igc_hw_rss_hash_set(struct igc_hw *hw, struct rte_eth_rss_conf *rss_conf)
854 uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
858 if (hash_key != NULL) {
861 /* Fill in RSS hash key */
862 for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
863 IGC_WRITE_REG_LE_VALUE(hw, IGC_RSSRK(i), hash_key[i]);
866 /* Set configured hashing protocols in MRQC register */
867 rss_hf = rss_conf->rss_hf;
868 mrqc = IGC_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
869 if (rss_hf & ETH_RSS_IPV4)
870 mrqc |= IGC_MRQC_RSS_FIELD_IPV4;
871 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
872 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_TCP;
873 if (rss_hf & ETH_RSS_IPV6)
874 mrqc |= IGC_MRQC_RSS_FIELD_IPV6;
875 if (rss_hf & ETH_RSS_IPV6_EX)
876 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_EX;
877 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
878 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP;
879 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
880 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
881 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
882 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
883 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
884 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
885 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
886 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP_EX;
887 IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
891 igc_rss_configure(struct rte_eth_dev *dev)
893 struct rte_eth_rss_conf rss_conf;
894 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
897 /* Fill in redirection table. */
898 for (i = 0; i < IGC_RSS_RDT_SIZD; i++) {
899 union igc_rss_reta_reg reta;
900 uint16_t q_idx, reta_idx;
902 q_idx = (uint8_t)((dev->data->nb_rx_queues > 1) ?
903 i % dev->data->nb_rx_queues : 0);
904 reta_idx = i % sizeof(reta);
905 reta.bytes[reta_idx] = q_idx;
906 if (reta_idx == sizeof(reta) - 1)
907 IGC_WRITE_REG_LE_VALUE(hw,
908 IGC_RETA(i / sizeof(reta)), reta.dword);
912 * Configure the RSS key and the RSS protocols used to compute
913 * the RSS hash of input packets.
915 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
916 if (rss_conf.rss_key == NULL)
917 rss_conf.rss_key = default_rss_key;
918 igc_hw_rss_hash_set(hw, &rss_conf);
922 igc_del_rss_filter(struct rte_eth_dev *dev)
924 struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
926 if (rss_filter->enable) {
927 /* recover default RSS configuration */
928 igc_rss_configure(dev);
930 /* disable RSS logic and clear filter data */
931 igc_rss_disable(dev);
932 memset(rss_filter, 0, sizeof(*rss_filter));
935 PMD_DRV_LOG(ERR, "filter not exist!");
939 /* Initiate the filter structure by the structure of rte_flow_action_rss */
941 igc_rss_conf_set(struct igc_rss_filter *out,
942 const struct rte_flow_action_rss *rss)
944 out->conf.func = rss->func;
945 out->conf.level = rss->level;
946 out->conf.types = rss->types;
948 if (rss->key_len == sizeof(out->key)) {
949 memcpy(out->key, rss->key, rss->key_len);
950 out->conf.key = out->key;
951 out->conf.key_len = rss->key_len;
953 out->conf.key = NULL;
954 out->conf.key_len = 0;
957 if (rss->queue_num <= IGC_RSS_RDT_SIZD) {
958 memcpy(out->queue, rss->queue,
959 sizeof(*out->queue) * rss->queue_num);
960 out->conf.queue = out->queue;
961 out->conf.queue_num = rss->queue_num;
963 out->conf.queue = NULL;
964 out->conf.queue_num = 0;
969 igc_add_rss_filter(struct rte_eth_dev *dev, struct igc_rss_filter *rss)
971 struct rte_eth_rss_conf rss_conf = {
972 .rss_key = rss->conf.key_len ?
973 (void *)(uintptr_t)rss->conf.key : NULL,
974 .rss_key_len = rss->conf.key_len,
975 .rss_hf = rss->conf.types,
977 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
978 struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
981 /* check RSS type is valid */
982 if ((rss_conf.rss_hf & IGC_RSS_OFFLOAD_ALL) == 0) {
984 "RSS type(0x%" PRIx64 ") error!, only 0x%" PRIx64
985 " been supported", rss_conf.rss_hf,
986 (uint64_t)IGC_RSS_OFFLOAD_ALL);
990 /* check queue count is not zero */
991 if (!rss->conf.queue_num) {
992 PMD_DRV_LOG(ERR, "Queue number should not be 0!");
996 /* check queue id is valid */
997 for (i = 0; i < rss->conf.queue_num; i++)
998 if (rss->conf.queue[i] >= dev->data->nb_rx_queues) {
999 PMD_DRV_LOG(ERR, "Queue id %u is invalid!",
1000 rss->conf.queue[i]);
1004 /* only support one filter */
1005 if (rss_filter->enable) {
1006 PMD_DRV_LOG(ERR, "Only support one RSS filter!");
1009 rss_filter->enable = 1;
1011 igc_rss_conf_set(rss_filter, &rss->conf);
1013 /* Fill in redirection table. */
1014 for (i = 0, j = 0; i < IGC_RSS_RDT_SIZD; i++, j++) {
1015 union igc_rss_reta_reg reta;
1016 uint16_t q_idx, reta_idx;
1018 if (j == rss->conf.queue_num)
1020 q_idx = rss->conf.queue[j];
1021 reta_idx = i % sizeof(reta);
1022 reta.bytes[reta_idx] = q_idx;
1023 if (reta_idx == sizeof(reta) - 1)
1024 IGC_WRITE_REG_LE_VALUE(hw,
1025 IGC_RETA(i / sizeof(reta)), reta.dword);
1028 if (rss_conf.rss_key == NULL)
1029 rss_conf.rss_key = default_rss_key;
1030 igc_hw_rss_hash_set(hw, &rss_conf);
1035 igc_clear_rss_filter(struct rte_eth_dev *dev)
1037 struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
1039 if (!rss_filter->enable)
1042 /* recover default RSS configuration */
1043 igc_rss_configure(dev);
1045 /* disable RSS logic and clear filter data */
1046 igc_rss_disable(dev);
1047 memset(rss_filter, 0, sizeof(*rss_filter));
1051 igc_dev_mq_rx_configure(struct rte_eth_dev *dev)
1053 if (RTE_ETH_DEV_SRIOV(dev).active) {
1054 PMD_DRV_LOG(ERR, "SRIOV unsupported!");
1058 switch (dev->data->dev_conf.rxmode.mq_mode) {
1060 igc_rss_configure(dev);
1062 case ETH_MQ_RX_NONE:
1064 * configure RSS register for following,
1065 * then disable the RSS logic
1067 igc_rss_configure(dev);
1068 igc_rss_disable(dev);
1071 PMD_DRV_LOG(ERR, "rx mode(%d) not supported!",
1072 dev->data->dev_conf.rxmode.mq_mode);
1079 igc_rx_init(struct rte_eth_dev *dev)
1081 struct igc_rx_queue *rxq;
1082 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1083 uint64_t offloads = dev->data->dev_conf.rxmode.offloads;
1084 uint32_t max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
1088 uint16_t rctl_bsize;
1092 dev->rx_pkt_burst = igc_recv_pkts;
1095 * Make sure receives are disabled while setting
1096 * up the descriptor ring.
1098 rctl = IGC_READ_REG(hw, IGC_RCTL);
1099 IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN);
1101 /* Configure support of jumbo frames, if any. */
1102 if (offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
1103 rctl |= IGC_RCTL_LPE;
1106 * Set maximum packet length by default, and might be updated
1107 * together with enabling/disabling dual VLAN.
1109 IGC_WRITE_REG(hw, IGC_RLPML, max_rx_pkt_len);
1111 rctl &= ~IGC_RCTL_LPE;
1114 /* Configure and enable each RX queue. */
1116 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1121 rxq = dev->data->rx_queues[i];
1124 /* Allocate buffers for descriptor rings and set up queue */
1125 ret = igc_alloc_rx_queue_mbufs(rxq);
1130 * Reset crc_len in case it was changed after queue setup by a
1133 rxq->crc_len = (offloads & DEV_RX_OFFLOAD_KEEP_CRC) ?
1134 RTE_ETHER_CRC_LEN : 0;
1136 bus_addr = rxq->rx_ring_phys_addr;
1137 IGC_WRITE_REG(hw, IGC_RDLEN(rxq->reg_idx),
1139 sizeof(union igc_adv_rx_desc));
1140 IGC_WRITE_REG(hw, IGC_RDBAH(rxq->reg_idx),
1141 (uint32_t)(bus_addr >> 32));
1142 IGC_WRITE_REG(hw, IGC_RDBAL(rxq->reg_idx),
1143 (uint32_t)bus_addr);
1145 /* set descriptor configuration */
1146 srrctl = IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
1148 srrctl |= (uint32_t)(RTE_PKTMBUF_HEADROOM / 64) <<
1149 IGC_SRRCTL_BSIZEHEADER_SHIFT;
1151 * Configure RX buffer size.
1153 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
1154 RTE_PKTMBUF_HEADROOM);
1155 if (buf_size >= 1024) {
1157 * Configure the BSIZEPACKET field of the SRRCTL
1158 * register of the queue.
1159 * Value is in 1 KB resolution, from 1 KB to 16 KB.
1160 * If this field is equal to 0b, then RCTL.BSIZE
1161 * determines the RX packet buffer size.
1164 srrctl |= ((buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT) &
1165 IGC_SRRCTL_BSIZEPKT_MASK);
1166 buf_size = (uint16_t)((srrctl &
1167 IGC_SRRCTL_BSIZEPKT_MASK) <<
1168 IGC_SRRCTL_BSIZEPKT_SHIFT);
1170 /* It adds dual VLAN length for supporting dual VLAN */
1171 if (max_rx_pkt_len + 2 * VLAN_TAG_SIZE > buf_size)
1172 dev->data->scattered_rx = 1;
1175 * Use BSIZE field of the device RCTL register.
1177 if (rctl_bsize == 0 || rctl_bsize > buf_size)
1178 rctl_bsize = buf_size;
1179 dev->data->scattered_rx = 1;
1182 /* Set if packets are dropped when no descriptors available */
1184 srrctl |= IGC_SRRCTL_DROP_EN;
1186 IGC_WRITE_REG(hw, IGC_SRRCTL(rxq->reg_idx), srrctl);
1188 /* Enable this RX queue. */
1189 rxdctl = IGC_RXDCTL_QUEUE_ENABLE;
1190 rxdctl |= ((uint32_t)rxq->pthresh << IGC_RXDCTL_PTHRESH_SHIFT) &
1191 IGC_RXDCTL_PTHRESH_MSK;
1192 rxdctl |= ((uint32_t)rxq->hthresh << IGC_RXDCTL_HTHRESH_SHIFT) &
1193 IGC_RXDCTL_HTHRESH_MSK;
1194 rxdctl |= ((uint32_t)rxq->wthresh << IGC_RXDCTL_WTHRESH_SHIFT) &
1195 IGC_RXDCTL_WTHRESH_MSK;
1196 IGC_WRITE_REG(hw, IGC_RXDCTL(rxq->reg_idx), rxdctl);
1199 if (offloads & DEV_RX_OFFLOAD_SCATTER)
1200 dev->data->scattered_rx = 1;
1202 if (dev->data->scattered_rx) {
1203 PMD_DRV_LOG(DEBUG, "forcing scatter mode");
1204 dev->rx_pkt_burst = igc_recv_scattered_pkts;
1207 * Setup BSIZE field of RCTL register, if needed.
1208 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1209 * register, since the code above configures the SRRCTL register of
1210 * the RX queue in such a case.
1211 * All configurable sizes are:
1212 * 16384: rctl |= (IGC_RCTL_SZ_16384 | IGC_RCTL_BSEX);
1213 * 8192: rctl |= (IGC_RCTL_SZ_8192 | IGC_RCTL_BSEX);
1214 * 4096: rctl |= (IGC_RCTL_SZ_4096 | IGC_RCTL_BSEX);
1215 * 2048: rctl |= IGC_RCTL_SZ_2048;
1216 * 1024: rctl |= IGC_RCTL_SZ_1024;
1217 * 512: rctl |= IGC_RCTL_SZ_512;
1218 * 256: rctl |= IGC_RCTL_SZ_256;
1220 if (rctl_bsize > 0) {
1221 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1222 rctl |= IGC_RCTL_SZ_512;
1223 else /* 256 <= buf_size < 512 - use 256 */
1224 rctl |= IGC_RCTL_SZ_256;
1228 * Configure RSS if device configured with multiple RX queues.
1230 igc_dev_mq_rx_configure(dev);
1232 /* Update the rctl since igc_dev_mq_rx_configure may change its value */
1233 rctl |= IGC_READ_REG(hw, IGC_RCTL);
1236 * Setup the Checksum Register.
1237 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1239 rxcsum = IGC_READ_REG(hw, IGC_RXCSUM);
1240 rxcsum |= IGC_RXCSUM_PCSD;
1242 /* Enable both L3/L4 rx checksum offload */
1243 if (offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
1244 rxcsum |= IGC_RXCSUM_IPOFL;
1246 rxcsum &= ~IGC_RXCSUM_IPOFL;
1249 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)) {
1250 rxcsum |= IGC_RXCSUM_TUOFL;
1251 offloads |= DEV_RX_OFFLOAD_SCTP_CKSUM;
1253 rxcsum &= ~IGC_RXCSUM_TUOFL;
1256 if (offloads & DEV_RX_OFFLOAD_SCTP_CKSUM)
1257 rxcsum |= IGC_RXCSUM_CRCOFL;
1259 rxcsum &= ~IGC_RXCSUM_CRCOFL;
1261 IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum);
1263 /* Setup the Receive Control Register. */
1264 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1265 rctl &= ~IGC_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1267 rctl |= IGC_RCTL_SECRC; /* Strip Ethernet CRC. */
1269 rctl &= ~IGC_RCTL_MO_MSK;
1270 rctl &= ~IGC_RCTL_LBM_MSK;
1271 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO |
1273 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
1275 if (dev->data->dev_conf.lpbk_mode == 1)
1276 rctl |= IGC_RCTL_LBM_MAC;
1278 rctl &= ~(IGC_RCTL_HSEL_MSK | IGC_RCTL_CFIEN | IGC_RCTL_CFI |
1279 IGC_RCTL_PSP | IGC_RCTL_PMCF);
1281 /* Make sure VLAN Filters are off. */
1282 rctl &= ~IGC_RCTL_VFE;
1283 /* Don't store bad packets. */
1284 rctl &= ~IGC_RCTL_SBP;
1286 /* Enable Receives. */
1287 IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1290 * Setup the HW Rx Head and Tail Descriptor Pointers.
1291 * This needs to be done after enable.
1293 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1296 rxq = dev->data->rx_queues[i];
1297 IGC_WRITE_REG(hw, IGC_RDH(rxq->reg_idx), 0);
1298 IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
1300 dvmolr = IGC_READ_REG(hw, IGC_DVMOLR(rxq->reg_idx));
1301 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
1302 dvmolr |= IGC_DVMOLR_STRVLAN;
1304 dvmolr &= ~IGC_DVMOLR_STRVLAN;
1306 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1307 dvmolr &= ~IGC_DVMOLR_STRCRC;
1309 dvmolr |= IGC_DVMOLR_STRCRC;
1311 IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr);
1318 igc_reset_rx_queue(struct igc_rx_queue *rxq)
1320 static const union igc_adv_rx_desc zeroed_desc = { {0} };
1323 /* Zero out HW ring memory */
1324 for (i = 0; i < rxq->nb_rx_desc; i++)
1325 rxq->rx_ring[i] = zeroed_desc;
1328 rxq->pkt_first_seg = NULL;
1329 rxq->pkt_last_seg = NULL;
1333 eth_igc_rx_queue_setup(struct rte_eth_dev *dev,
1336 unsigned int socket_id,
1337 const struct rte_eth_rxconf *rx_conf,
1338 struct rte_mempool *mp)
1340 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1341 const struct rte_memzone *rz;
1342 struct igc_rx_queue *rxq;
1346 * Validate number of receive descriptors.
1347 * It must not exceed hardware maximum, and must be multiple
1348 * of IGC_RX_DESCRIPTOR_MULTIPLE.
1350 if (nb_desc % IGC_RX_DESCRIPTOR_MULTIPLE != 0 ||
1351 nb_desc > IGC_MAX_RXD || nb_desc < IGC_MIN_RXD) {
1353 "RX descriptor must be multiple of %u(cur: %u) and between %u and %u",
1354 IGC_RX_DESCRIPTOR_MULTIPLE, nb_desc,
1355 IGC_MIN_RXD, IGC_MAX_RXD);
1359 /* Free memory prior to re-allocation if needed */
1360 if (dev->data->rx_queues[queue_idx] != NULL) {
1361 igc_rx_queue_release(dev->data->rx_queues[queue_idx]);
1362 dev->data->rx_queues[queue_idx] = NULL;
1365 /* First allocate the RX queue data structure. */
1366 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igc_rx_queue),
1367 RTE_CACHE_LINE_SIZE);
1370 rxq->offloads = rx_conf->offloads;
1372 rxq->nb_rx_desc = nb_desc;
1373 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1374 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1375 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1376 rxq->drop_en = rx_conf->rx_drop_en;
1377 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1378 rxq->queue_id = queue_idx;
1379 rxq->reg_idx = queue_idx;
1380 rxq->port_id = dev->data->port_id;
1383 * Allocate RX ring hardware descriptors. A memzone large enough to
1384 * handle the maximum ring size is allocated in order to allow for
1385 * resizing in later calls to the queue setup function.
1387 size = sizeof(union igc_adv_rx_desc) * IGC_MAX_RXD;
1388 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1389 IGC_ALIGN, socket_id);
1391 igc_rx_queue_release(rxq);
1394 rxq->rdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDT(rxq->reg_idx));
1395 rxq->rdh_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDH(rxq->reg_idx));
1396 rxq->rx_ring_phys_addr = rz->iova;
1397 rxq->rx_ring = (union igc_adv_rx_desc *)rz->addr;
1399 /* Allocate software ring. */
1400 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1401 sizeof(struct igc_rx_entry) * nb_desc,
1402 RTE_CACHE_LINE_SIZE);
1403 if (rxq->sw_ring == NULL) {
1404 igc_rx_queue_release(rxq);
1408 PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
1409 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1411 dev->data->rx_queues[queue_idx] = rxq;
1412 igc_reset_rx_queue(rxq);
1417 /* prepare packets for transmit */
1419 eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
1425 for (i = 0; i < nb_pkts; i++) {
1428 /* Check some limitations for TSO in hardware */
1429 if (m->ol_flags & IGC_TX_OFFLOAD_SEG)
1430 if (m->tso_segsz > IGC_TSO_MAX_MSS ||
1431 m->l2_len + m->l3_len + m->l4_len >
1432 IGC_TSO_MAX_HDRLEN) {
1437 if (m->ol_flags & IGC_TX_OFFLOAD_NOTSUP_MASK) {
1438 rte_errno = ENOTSUP;
1442 #ifdef RTE_ETHDEV_DEBUG_TX
1443 ret = rte_validate_tx_offload(m);
1449 ret = rte_net_intel_cksum_prepare(m);
1460 *There're some limitations in hardware for TCP segmentation offload. We
1461 *should check whether the parameters are valid.
1463 static inline uint64_t
1464 check_tso_para(uint64_t ol_req, union igc_tx_offload ol_para)
1466 if (!(ol_req & IGC_TX_OFFLOAD_SEG))
1468 if (ol_para.tso_segsz > IGC_TSO_MAX_MSS || ol_para.l2_len +
1469 ol_para.l3_len + ol_para.l4_len > IGC_TSO_MAX_HDRLEN) {
1470 ol_req &= ~IGC_TX_OFFLOAD_SEG;
1471 ol_req |= PKT_TX_TCP_CKSUM;
1477 * Check which hardware context can be used. Use the existing match
1478 * or create a new context descriptor.
1480 static inline uint32_t
1481 what_advctx_update(struct igc_tx_queue *txq, uint64_t flags,
1482 union igc_tx_offload tx_offload)
1484 uint32_t curr = txq->ctx_curr;
1486 /* If match with the current context */
1487 if (likely(txq->ctx_cache[curr].flags == flags &&
1488 txq->ctx_cache[curr].tx_offload.data ==
1489 (txq->ctx_cache[curr].tx_offload_mask.data &
1490 tx_offload.data))) {
1494 /* Total two context, if match with the second context */
1496 if (likely(txq->ctx_cache[curr].flags == flags &&
1497 txq->ctx_cache[curr].tx_offload.data ==
1498 (txq->ctx_cache[curr].tx_offload_mask.data &
1499 tx_offload.data))) {
1500 txq->ctx_curr = curr;
1504 /* Mismatch, create new one */
1509 * This is a separate function, looking for optimization opportunity here
1510 * Rework required to go with the pre-defined values.
1513 igc_set_xmit_ctx(struct igc_tx_queue *txq,
1514 volatile struct igc_adv_tx_context_desc *ctx_txd,
1515 uint64_t ol_flags, union igc_tx_offload tx_offload)
1517 uint32_t type_tucmd_mlhl;
1518 uint32_t mss_l4len_idx;
1520 uint32_t vlan_macip_lens;
1521 union igc_tx_offload tx_offload_mask;
1523 /* Use the previous context */
1525 ctx_curr = txq->ctx_curr;
1527 tx_offload_mask.data = 0;
1528 type_tucmd_mlhl = 0;
1530 /* Specify which HW CTX to upload. */
1531 mss_l4len_idx = (ctx_curr << IGC_ADVTXD_IDX_SHIFT);
1533 if (ol_flags & PKT_TX_VLAN_PKT)
1534 tx_offload_mask.vlan_tci = 0xffff;
1536 /* check if TCP segmentation required for this packet */
1537 if (ol_flags & IGC_TX_OFFLOAD_SEG) {
1538 /* implies IP cksum in IPv4 */
1539 if (ol_flags & PKT_TX_IP_CKSUM)
1540 type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4 |
1541 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1543 type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV6 |
1544 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1546 if (ol_flags & PKT_TX_TCP_SEG)
1547 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
1549 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
1551 tx_offload_mask.data |= TX_TSO_CMP_MASK;
1552 mss_l4len_idx |= (uint32_t)tx_offload.tso_segsz <<
1553 IGC_ADVTXD_MSS_SHIFT;
1554 mss_l4len_idx |= (uint32_t)tx_offload.l4_len <<
1555 IGC_ADVTXD_L4LEN_SHIFT;
1556 } else { /* no TSO, check if hardware checksum is needed */
1557 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
1558 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
1560 if (ol_flags & PKT_TX_IP_CKSUM)
1561 type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4;
1563 switch (ol_flags & PKT_TX_L4_MASK) {
1564 case PKT_TX_TCP_CKSUM:
1565 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP |
1566 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1567 mss_l4len_idx |= (uint32_t)sizeof(struct rte_tcp_hdr)
1568 << IGC_ADVTXD_L4LEN_SHIFT;
1570 case PKT_TX_UDP_CKSUM:
1571 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP |
1572 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1573 mss_l4len_idx |= (uint32_t)sizeof(struct rte_udp_hdr)
1574 << IGC_ADVTXD_L4LEN_SHIFT;
1576 case PKT_TX_SCTP_CKSUM:
1577 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP |
1578 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1579 mss_l4len_idx |= (uint32_t)sizeof(struct rte_sctp_hdr)
1580 << IGC_ADVTXD_L4LEN_SHIFT;
1583 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_RSV |
1584 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1589 txq->ctx_cache[ctx_curr].flags = ol_flags;
1590 txq->ctx_cache[ctx_curr].tx_offload.data =
1591 tx_offload_mask.data & tx_offload.data;
1592 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
1594 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
1595 vlan_macip_lens = (uint32_t)tx_offload.data;
1596 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
1597 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
1598 ctx_txd->u.launch_time = 0;
1601 static inline uint32_t
1602 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
1605 static uint32_t vlan_cmd[2] = {0, IGC_ADVTXD_DCMD_VLE};
1606 static uint32_t tso_cmd[2] = {0, IGC_ADVTXD_DCMD_TSE};
1607 cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
1608 cmdtype |= tso_cmd[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1612 static inline uint32_t
1613 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
1615 static const uint32_t l4_olinfo[2] = {0, IGC_ADVTXD_POPTS_TXSM};
1616 static const uint32_t l3_olinfo[2] = {0, IGC_ADVTXD_POPTS_IXSM};
1619 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
1620 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
1621 tmp |= l4_olinfo[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1626 igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1628 struct igc_tx_queue * const txq = tx_queue;
1629 struct igc_tx_entry * const sw_ring = txq->sw_ring;
1630 struct igc_tx_entry *txe, *txn;
1631 volatile union igc_adv_tx_desc * const txr = txq->tx_ring;
1632 volatile union igc_adv_tx_desc *txd;
1633 struct rte_mbuf *tx_pkt;
1634 struct rte_mbuf *m_seg;
1635 uint64_t buf_dma_addr;
1636 uint32_t olinfo_status;
1637 uint32_t cmd_type_len;
1646 uint32_t new_ctx = 0;
1647 union igc_tx_offload tx_offload = {0};
1649 tx_id = txq->tx_tail;
1650 txe = &sw_ring[tx_id];
1652 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1653 tx_pkt = *tx_pkts++;
1654 pkt_len = tx_pkt->pkt_len;
1656 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
1659 * The number of descriptors that must be allocated for a
1660 * packet is the number of segments of that packet, plus 1
1661 * Context Descriptor for the VLAN Tag Identifier, if any.
1662 * Determine the last TX descriptor to allocate in the TX ring
1663 * for the packet, starting from the current position (tx_id)
1666 tx_last = (uint16_t)(tx_id + tx_pkt->nb_segs - 1);
1668 ol_flags = tx_pkt->ol_flags;
1669 tx_ol_req = ol_flags & IGC_TX_OFFLOAD_MASK;
1671 /* If a Context Descriptor need be built . */
1673 tx_offload.l2_len = tx_pkt->l2_len;
1674 tx_offload.l3_len = tx_pkt->l3_len;
1675 tx_offload.l4_len = tx_pkt->l4_len;
1676 tx_offload.vlan_tci = tx_pkt->vlan_tci;
1677 tx_offload.tso_segsz = tx_pkt->tso_segsz;
1678 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
1680 new_ctx = what_advctx_update(txq, tx_ol_req,
1682 /* Only allocate context descriptor if required*/
1683 new_ctx = (new_ctx >= IGC_CTX_NUM);
1684 tx_last = (uint16_t)(tx_last + new_ctx);
1686 if (tx_last >= txq->nb_tx_desc)
1687 tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
1690 "port_id=%u queue_id=%u pktlen=%u tx_first=%u tx_last=%u",
1691 txq->port_id, txq->queue_id, pkt_len, tx_id, tx_last);
1694 * Check if there are enough free descriptors in the TX ring
1695 * to transmit the next packet.
1696 * This operation is based on the two following rules:
1698 * 1- Only check that the last needed TX descriptor can be
1699 * allocated (by construction, if that descriptor is free,
1700 * all intermediate ones are also free).
1702 * For this purpose, the index of the last TX descriptor
1703 * used for a packet (the "last descriptor" of a packet)
1704 * is recorded in the TX entries (the last one included)
1705 * that are associated with all TX descriptors allocated
1708 * 2- Avoid to allocate the last free TX descriptor of the
1709 * ring, in order to never set the TDT register with the
1710 * same value stored in parallel by the NIC in the TDH
1711 * register, which makes the TX engine of the NIC enter
1712 * in a deadlock situation.
1714 * By extension, avoid to allocate a free descriptor that
1715 * belongs to the last set of free descriptors allocated
1716 * to the same packet previously transmitted.
1720 * The "last descriptor" of the previously sent packet, if any,
1721 * which used the last descriptor to allocate.
1723 tx_end = sw_ring[tx_last].last_id;
1726 * The next descriptor following that "last descriptor" in the
1729 tx_end = sw_ring[tx_end].next_id;
1732 * The "last descriptor" associated with that next descriptor.
1734 tx_end = sw_ring[tx_end].last_id;
1737 * Check that this descriptor is free.
1739 if (!(txr[tx_end].wb.status & IGC_TXD_STAT_DD)) {
1746 * Set common flags of all TX Data Descriptors.
1748 * The following bits must be set in all Data Descriptors:
1749 * - IGC_ADVTXD_DTYP_DATA
1750 * - IGC_ADVTXD_DCMD_DEXT
1752 * The following bits must be set in the first Data Descriptor
1753 * and are ignored in the other ones:
1754 * - IGC_ADVTXD_DCMD_IFCS
1755 * - IGC_ADVTXD_MAC_1588
1756 * - IGC_ADVTXD_DCMD_VLE
1758 * The following bits must only be set in the last Data
1762 * The following bits can be set in any Data Descriptor, but
1763 * are only set in the last Data Descriptor:
1766 cmd_type_len = txq->txd_type |
1767 IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT;
1768 if (tx_ol_req & IGC_TX_OFFLOAD_SEG)
1769 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len +
1771 olinfo_status = (pkt_len << IGC_ADVTXD_PAYLEN_SHIFT);
1774 * Timer 0 should be used to for packet timestamping,
1775 * sample the packet timestamp to reg 0
1777 if (ol_flags & PKT_TX_IEEE1588_TMST)
1778 cmd_type_len |= IGC_ADVTXD_MAC_TSTAMP;
1781 /* Setup TX Advanced context descriptor if required */
1783 volatile struct igc_adv_tx_context_desc *
1784 ctx_txd = (volatile struct
1785 igc_adv_tx_context_desc *)&txr[tx_id];
1787 txn = &sw_ring[txe->next_id];
1788 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1790 if (txe->mbuf != NULL) {
1791 rte_pktmbuf_free_seg(txe->mbuf);
1795 igc_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
1798 txe->last_id = tx_last;
1799 tx_id = txe->next_id;
1803 /* Setup the TX Advanced Data Descriptor */
1805 tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
1807 tx_desc_cksum_flags_to_olinfo(tx_ol_req);
1808 olinfo_status |= (uint32_t)txq->ctx_curr <<
1809 IGC_ADVTXD_IDX_SHIFT;
1814 txn = &sw_ring[txe->next_id];
1815 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1819 if (txe->mbuf != NULL)
1820 rte_pktmbuf_free_seg(txe->mbuf);
1823 /* Set up transmit descriptor */
1824 slen = (uint16_t)m_seg->data_len;
1825 buf_dma_addr = rte_mbuf_data_iova(m_seg);
1826 txd->read.buffer_addr =
1827 rte_cpu_to_le_64(buf_dma_addr);
1828 txd->read.cmd_type_len =
1829 rte_cpu_to_le_32(cmd_type_len | slen);
1830 txd->read.olinfo_status =
1831 rte_cpu_to_le_32(olinfo_status);
1832 txe->last_id = tx_last;
1833 tx_id = txe->next_id;
1835 m_seg = m_seg->next;
1836 } while (m_seg != NULL);
1839 * The last packet data descriptor needs End Of Packet (EOP)
1840 * and Report Status (RS).
1842 txd->read.cmd_type_len |=
1843 rte_cpu_to_le_32(IGC_TXD_CMD_EOP | IGC_TXD_CMD_RS);
1849 * Set the Transmit Descriptor Tail (TDT).
1851 IGC_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
1852 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
1853 txq->port_id, txq->queue_id, tx_id, nb_tx);
1854 txq->tx_tail = tx_id;
1859 int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset)
1861 struct igc_tx_queue *txq = tx_queue;
1862 volatile uint32_t *status;
1865 if (unlikely(!txq || offset >= txq->nb_tx_desc))
1868 desc = txq->tx_tail + offset;
1869 if (desc >= txq->nb_tx_desc)
1870 desc -= txq->nb_tx_desc;
1872 status = &txq->tx_ring[desc].wb.status;
1873 if (*status & rte_cpu_to_le_32(IGC_TXD_STAT_DD))
1874 return RTE_ETH_TX_DESC_DONE;
1876 return RTE_ETH_TX_DESC_FULL;
1880 igc_tx_queue_release_mbufs(struct igc_tx_queue *txq)
1884 if (txq->sw_ring != NULL) {
1885 for (i = 0; i < txq->nb_tx_desc; i++) {
1886 if (txq->sw_ring[i].mbuf != NULL) {
1887 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1888 txq->sw_ring[i].mbuf = NULL;
1895 igc_tx_queue_release(struct igc_tx_queue *txq)
1897 igc_tx_queue_release_mbufs(txq);
1898 rte_free(txq->sw_ring);
1902 void eth_igc_tx_queue_release(void *txq)
1905 igc_tx_queue_release(txq);
1909 igc_reset_tx_queue_stat(struct igc_tx_queue *txq)
1914 memset((void *)&txq->ctx_cache, 0,
1915 IGC_CTX_NUM * sizeof(struct igc_advctx_info));
1919 igc_reset_tx_queue(struct igc_tx_queue *txq)
1921 struct igc_tx_entry *txe = txq->sw_ring;
1924 /* Initialize ring entries */
1925 prev = (uint16_t)(txq->nb_tx_desc - 1);
1926 for (i = 0; i < txq->nb_tx_desc; i++) {
1927 volatile union igc_adv_tx_desc *txd = &txq->tx_ring[i];
1929 txd->wb.status = IGC_TXD_STAT_DD;
1932 txe[prev].next_id = i;
1936 txq->txd_type = IGC_ADVTXD_DTYP_DATA;
1937 igc_reset_tx_queue_stat(txq);
1941 * clear all rx/tx queue
1944 igc_dev_clear_queues(struct rte_eth_dev *dev)
1947 struct igc_tx_queue *txq;
1948 struct igc_rx_queue *rxq;
1950 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1951 txq = dev->data->tx_queues[i];
1953 igc_tx_queue_release_mbufs(txq);
1954 igc_reset_tx_queue(txq);
1958 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1959 rxq = dev->data->rx_queues[i];
1961 igc_rx_queue_release_mbufs(rxq);
1962 igc_reset_rx_queue(rxq);
1967 int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
1968 uint16_t nb_desc, unsigned int socket_id,
1969 const struct rte_eth_txconf *tx_conf)
1971 const struct rte_memzone *tz;
1972 struct igc_tx_queue *txq;
1976 if (nb_desc % IGC_TX_DESCRIPTOR_MULTIPLE != 0 ||
1977 nb_desc > IGC_MAX_TXD || nb_desc < IGC_MIN_TXD) {
1979 "TX-descriptor must be a multiple of %u and between %u and %u, cur: %u",
1980 IGC_TX_DESCRIPTOR_MULTIPLE,
1981 IGC_MAX_TXD, IGC_MIN_TXD, nb_desc);
1985 hw = IGC_DEV_PRIVATE_HW(dev);
1988 * The tx_free_thresh and tx_rs_thresh values are not used in the 2.5G
1991 if (tx_conf->tx_free_thresh != 0)
1993 "The tx_free_thresh parameter is not used for the 2.5G driver");
1994 if (tx_conf->tx_rs_thresh != 0)
1996 "The tx_rs_thresh parameter is not used for the 2.5G driver");
1997 if (tx_conf->tx_thresh.wthresh == 0)
1999 "To improve 2.5G driver performance, consider setting the TX WTHRESH value to 4, 8, or 16.");
2001 /* Free memory prior to re-allocation if needed */
2002 if (dev->data->tx_queues[queue_idx] != NULL) {
2003 igc_tx_queue_release(dev->data->tx_queues[queue_idx]);
2004 dev->data->tx_queues[queue_idx] = NULL;
2007 /* First allocate the tx queue data structure */
2008 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igc_tx_queue),
2009 RTE_CACHE_LINE_SIZE);
2014 * Allocate TX ring hardware descriptors. A memzone large enough to
2015 * handle the maximum ring size is allocated in order to allow for
2016 * resizing in later calls to the queue setup function.
2018 size = sizeof(union igc_adv_tx_desc) * IGC_MAX_TXD;
2019 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
2020 IGC_ALIGN, socket_id);
2022 igc_tx_queue_release(txq);
2026 txq->nb_tx_desc = nb_desc;
2027 txq->pthresh = tx_conf->tx_thresh.pthresh;
2028 txq->hthresh = tx_conf->tx_thresh.hthresh;
2029 txq->wthresh = tx_conf->tx_thresh.wthresh;
2031 txq->queue_id = queue_idx;
2032 txq->reg_idx = queue_idx;
2033 txq->port_id = dev->data->port_id;
2035 txq->tdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_TDT(txq->reg_idx));
2036 txq->tx_ring_phys_addr = tz->iova;
2038 txq->tx_ring = (union igc_adv_tx_desc *)tz->addr;
2039 /* Allocate software ring */
2040 txq->sw_ring = rte_zmalloc("txq->sw_ring",
2041 sizeof(struct igc_tx_entry) * nb_desc,
2042 RTE_CACHE_LINE_SIZE);
2043 if (txq->sw_ring == NULL) {
2044 igc_tx_queue_release(txq);
2047 PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
2048 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2050 igc_reset_tx_queue(txq);
2051 dev->tx_pkt_burst = igc_xmit_pkts;
2052 dev->tx_pkt_prepare = ð_igc_prep_pkts;
2053 dev->data->tx_queues[queue_idx] = txq;
2054 txq->offloads = tx_conf->offloads;
2060 eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt)
2062 struct igc_tx_queue *txq = txqueue;
2063 struct igc_tx_entry *sw_ring;
2064 volatile union igc_adv_tx_desc *txr;
2065 uint16_t tx_first; /* First segment analyzed. */
2066 uint16_t tx_id; /* Current segment being processed. */
2067 uint16_t tx_last; /* Last segment in the current packet. */
2068 uint16_t tx_next; /* First segment of the next packet. */
2075 sw_ring = txq->sw_ring;
2079 * tx_tail is the last sent packet on the sw_ring. Goto the end
2080 * of that packet (the last segment in the packet chain) and
2081 * then the next segment will be the start of the oldest segment
2082 * in the sw_ring. This is the first packet that will be
2083 * attempted to be freed.
2086 /* Get last segment in most recently added packet. */
2087 tx_first = sw_ring[txq->tx_tail].last_id;
2089 /* Get the next segment, which is the oldest segment in ring. */
2090 tx_first = sw_ring[tx_first].next_id;
2092 /* Set the current index to the first. */
2096 * Loop through each packet. For each packet, verify that an
2097 * mbuf exists and that the last segment is free. If so, free
2101 tx_last = sw_ring[tx_id].last_id;
2103 if (sw_ring[tx_last].mbuf) {
2104 if (!(txr[tx_last].wb.status &
2105 rte_cpu_to_le_32(IGC_TXD_STAT_DD)))
2108 /* Get the start of the next packet. */
2109 tx_next = sw_ring[tx_last].next_id;
2112 * Loop through all segments in a
2116 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
2117 sw_ring[tx_id].mbuf = NULL;
2118 sw_ring[tx_id].last_id = tx_id;
2120 /* Move to next segemnt. */
2121 tx_id = sw_ring[tx_id].next_id;
2122 } while (tx_id != tx_next);
2125 * Increment the number of packets
2129 if (unlikely(count == free_cnt))
2133 * There are multiple reasons to be here:
2134 * 1) All the packets on the ring have been
2135 * freed - tx_id is equal to tx_first
2136 * and some packets have been freed.
2138 * 2) Interfaces has not sent a rings worth of
2139 * packets yet, so the segment after tail is
2140 * still empty. Or a previous call to this
2141 * function freed some of the segments but
2142 * not all so there is a hole in the list.
2143 * Hopefully this is a rare case.
2144 * - Walk the list and find the next mbuf. If
2145 * there isn't one, then done.
2147 if (likely(tx_id == tx_first && count != 0))
2151 * Walk the list and find the next mbuf, if any.
2154 /* Move to next segemnt. */
2155 tx_id = sw_ring[tx_id].next_id;
2157 if (sw_ring[tx_id].mbuf)
2160 } while (tx_id != tx_first);
2163 * Determine why previous loop bailed. If there
2164 * is not an mbuf, done.
2166 if (sw_ring[tx_id].mbuf == NULL)
2175 igc_tx_init(struct rte_eth_dev *dev)
2177 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2182 /* Setup the Base and Length of the Tx Descriptor Rings. */
2183 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2184 struct igc_tx_queue *txq = dev->data->tx_queues[i];
2185 uint64_t bus_addr = txq->tx_ring_phys_addr;
2187 IGC_WRITE_REG(hw, IGC_TDLEN(txq->reg_idx),
2189 sizeof(union igc_adv_tx_desc));
2190 IGC_WRITE_REG(hw, IGC_TDBAH(txq->reg_idx),
2191 (uint32_t)(bus_addr >> 32));
2192 IGC_WRITE_REG(hw, IGC_TDBAL(txq->reg_idx),
2193 (uint32_t)bus_addr);
2195 /* Setup the HW Tx Head and Tail descriptor pointers. */
2196 IGC_WRITE_REG(hw, IGC_TDT(txq->reg_idx), 0);
2197 IGC_WRITE_REG(hw, IGC_TDH(txq->reg_idx), 0);
2199 /* Setup Transmit threshold registers. */
2200 txdctl = ((uint32_t)txq->pthresh << IGC_TXDCTL_PTHRESH_SHIFT) &
2201 IGC_TXDCTL_PTHRESH_MSK;
2202 txdctl |= ((uint32_t)txq->hthresh << IGC_TXDCTL_HTHRESH_SHIFT) &
2203 IGC_TXDCTL_HTHRESH_MSK;
2204 txdctl |= ((uint32_t)txq->wthresh << IGC_TXDCTL_WTHRESH_SHIFT) &
2205 IGC_TXDCTL_WTHRESH_MSK;
2206 txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
2207 IGC_WRITE_REG(hw, IGC_TXDCTL(txq->reg_idx), txdctl);
2210 igc_config_collision_dist(hw);
2212 /* Program the Transmit Control Register. */
2213 tctl = IGC_READ_REG(hw, IGC_TCTL);
2214 tctl &= ~IGC_TCTL_CT;
2215 tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN |
2216 ((uint32_t)IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT));
2218 /* This write will effectively turn on the transmit unit. */
2219 IGC_WRITE_REG(hw, IGC_TCTL, tctl);
2223 eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2224 struct rte_eth_rxq_info *qinfo)
2226 struct igc_rx_queue *rxq;
2228 rxq = dev->data->rx_queues[queue_id];
2230 qinfo->mp = rxq->mb_pool;
2231 qinfo->scattered_rx = dev->data->scattered_rx;
2232 qinfo->nb_desc = rxq->nb_rx_desc;
2234 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2235 qinfo->conf.rx_drop_en = rxq->drop_en;
2236 qinfo->conf.offloads = rxq->offloads;
2237 qinfo->conf.rx_thresh.hthresh = rxq->hthresh;
2238 qinfo->conf.rx_thresh.pthresh = rxq->pthresh;
2239 qinfo->conf.rx_thresh.wthresh = rxq->wthresh;
2243 eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2244 struct rte_eth_txq_info *qinfo)
2246 struct igc_tx_queue *txq;
2248 txq = dev->data->tx_queues[queue_id];
2250 qinfo->nb_desc = txq->nb_tx_desc;
2252 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2253 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2254 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2255 qinfo->conf.offloads = txq->offloads;
2259 eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev,
2260 uint16_t rx_queue_id, int on)
2262 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2263 struct igc_rx_queue *rxq = dev->data->rx_queues[rx_queue_id];
2266 if (rx_queue_id >= IGC_QUEUE_PAIRS_NUM) {
2267 PMD_DRV_LOG(ERR, "Queue index(%u) illegal, max is %u",
2268 rx_queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2272 reg_val = IGC_READ_REG(hw, IGC_DVMOLR(rx_queue_id));
2274 reg_val |= IGC_DVMOLR_STRVLAN;
2275 rxq->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2277 reg_val &= ~(IGC_DVMOLR_STRVLAN | IGC_DVMOLR_HIDVLAN);
2278 rxq->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
2281 IGC_WRITE_REG(hw, IGC_DVMOLR(rx_queue_id), reg_val);