1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020 Intel Corporation
5 #include <rte_config.h>
6 #include <rte_malloc.h>
7 #include <rte_ethdev_driver.h>
13 #ifdef RTE_PMD_USE_PREFETCH
14 #define rte_igc_prefetch(p) rte_prefetch0(p)
16 #define rte_igc_prefetch(p) do {} while (0)
19 #ifdef RTE_PMD_PACKET_PREFETCH
20 #define rte_packet_prefetch(p) rte_prefetch1(p)
22 #define rte_packet_prefetch(p) do {} while (0)
25 /* Multicast / Unicast table offset mask. */
26 #define IGC_RCTL_MO_MSK (3u << IGC_RCTL_MO_SHIFT)
29 #define IGC_RCTL_LBM_SHIFT 6
30 #define IGC_RCTL_LBM_MSK (3u << IGC_RCTL_LBM_SHIFT)
32 /* Hash select for MTA */
33 #define IGC_RCTL_HSEL_SHIFT 8
34 #define IGC_RCTL_HSEL_MSK (3u << IGC_RCTL_HSEL_SHIFT)
35 #define IGC_RCTL_PSP (1u << 21)
37 /* Receive buffer size for header buffer */
38 #define IGC_SRRCTL_BSIZEHEADER_SHIFT 8
40 /* RX descriptor status and error flags */
41 #define IGC_RXD_STAT_L4CS (1u << 5)
42 #define IGC_RXD_STAT_VEXT (1u << 9)
43 #define IGC_RXD_STAT_LLINT (1u << 11)
44 #define IGC_RXD_STAT_SCRC (1u << 12)
45 #define IGC_RXD_STAT_SMDT_MASK (3u << 13)
46 #define IGC_RXD_STAT_MC (1u << 19)
47 #define IGC_RXD_EXT_ERR_L4E (1u << 29)
48 #define IGC_RXD_EXT_ERR_IPE (1u << 30)
49 #define IGC_RXD_EXT_ERR_RXE (1u << 31)
50 #define IGC_RXD_RSS_TYPE_MASK 0xfu
51 #define IGC_RXD_PCTYPE_MASK (0x7fu << 4)
52 #define IGC_RXD_ETQF_SHIFT 12
53 #define IGC_RXD_ETQF_MSK (0xfu << IGC_RXD_ETQF_SHIFT)
54 #define IGC_RXD_VPKT (1u << 16)
56 /* TXD control bits */
57 #define IGC_TXDCTL_PTHRESH_SHIFT 0
58 #define IGC_TXDCTL_HTHRESH_SHIFT 8
59 #define IGC_TXDCTL_WTHRESH_SHIFT 16
60 #define IGC_TXDCTL_PTHRESH_MSK (0x1fu << IGC_TXDCTL_PTHRESH_SHIFT)
61 #define IGC_TXDCTL_HTHRESH_MSK (0x1fu << IGC_TXDCTL_HTHRESH_SHIFT)
62 #define IGC_TXDCTL_WTHRESH_MSK (0x1fu << IGC_TXDCTL_WTHRESH_SHIFT)
64 /* RXD control bits */
65 #define IGC_RXDCTL_PTHRESH_SHIFT 0
66 #define IGC_RXDCTL_HTHRESH_SHIFT 8
67 #define IGC_RXDCTL_WTHRESH_SHIFT 16
68 #define IGC_RXDCTL_PTHRESH_MSK (0x1fu << IGC_RXDCTL_PTHRESH_SHIFT)
69 #define IGC_RXDCTL_HTHRESH_MSK (0x1fu << IGC_RXDCTL_HTHRESH_SHIFT)
70 #define IGC_RXDCTL_WTHRESH_MSK (0x1fu << IGC_RXDCTL_WTHRESH_SHIFT)
72 #define IGC_TSO_MAX_HDRLEN 512
73 #define IGC_TSO_MAX_MSS 9216
75 /* Bit Mask to indicate what bits required for building TX context */
76 #define IGC_TX_OFFLOAD_MASK ( \
86 #define IGC_TX_OFFLOAD_SEG (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)
88 #define IGC_ADVTXD_POPTS_TXSM 0x00000200 /* L4 Checksum offload request */
89 #define IGC_ADVTXD_POPTS_IXSM 0x00000100 /* IP Checksum offload request */
91 /* L4 Packet TYPE of Reserved */
92 #define IGC_ADVTXD_TUCMD_L4T_RSV 0x00001800
94 #define IGC_TX_OFFLOAD_NOTSUP_MASK (PKT_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK)
97 * Structure associated with each descriptor of the RX ring of a RX queue.
100 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
104 * Structure associated with each RX queue.
106 struct igc_rx_queue {
107 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
108 volatile union igc_adv_rx_desc *rx_ring;
109 /**< RX ring virtual address. */
110 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
111 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
112 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
113 struct igc_rx_entry *sw_ring; /**< address of RX software ring. */
114 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
115 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
116 uint16_t nb_rx_desc; /**< number of RX descriptors. */
117 uint16_t rx_tail; /**< current value of RDT register. */
118 uint16_t nb_rx_hold; /**< number of held free RX desc. */
119 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
120 uint16_t queue_id; /**< RX queue index. */
121 uint16_t reg_idx; /**< RX queue register index. */
122 uint16_t port_id; /**< Device port identifier. */
123 uint8_t pthresh; /**< Prefetch threshold register. */
124 uint8_t hthresh; /**< Host threshold register. */
125 uint8_t wthresh; /**< Write-back threshold register. */
126 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
127 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
128 uint32_t flags; /**< RX flags. */
129 uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */
132 /** Offload features */
133 union igc_tx_offload {
136 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
137 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
138 uint64_t vlan_tci:16;
139 /**< VLAN Tag Control Identifier(CPU order). */
140 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
141 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
142 /* uint64_t unused:8; */
147 * Compare mask for igc_tx_offload.data,
148 * should be in sync with igc_tx_offload layout.
150 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
151 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
152 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
153 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
154 /** Mac + IP + TCP + Mss mask. */
155 #define TX_TSO_CMP_MASK \
156 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
159 * Structure to check if new context need be built
161 struct igc_advctx_info {
162 uint64_t flags; /**< ol_flags related to context build. */
163 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
164 union igc_tx_offload tx_offload;
165 /** compare mask for tx offload. */
166 union igc_tx_offload tx_offload_mask;
170 * Hardware context number
173 IGC_CTX_0 = 0, /**< CTX0 */
174 IGC_CTX_1 = 1, /**< CTX1 */
175 IGC_CTX_NUM = 2, /**< CTX_NUM */
179 * Structure associated with each descriptor of the TX ring of a TX queue.
181 struct igc_tx_entry {
182 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
183 uint16_t next_id; /**< Index of next descriptor in ring. */
184 uint16_t last_id; /**< Index of last scattered descriptor. */
188 * Structure associated with each TX queue.
190 struct igc_tx_queue {
191 volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */
192 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
193 struct igc_tx_entry *sw_ring; /**< virtual address of SW ring. */
194 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
195 uint32_t txd_type; /**< Device-specific TXD type */
196 uint16_t nb_tx_desc; /**< number of TX descriptors. */
197 uint16_t tx_tail; /**< Current value of TDT register. */
199 /**< Index of first used TX descriptor. */
200 uint16_t queue_id; /**< TX queue index. */
201 uint16_t reg_idx; /**< TX queue register index. */
202 uint16_t port_id; /**< Device port identifier. */
203 uint8_t pthresh; /**< Prefetch threshold register. */
204 uint8_t hthresh; /**< Host threshold register. */
205 uint8_t wthresh; /**< Write-back threshold register. */
208 /**< Start context position for transmit queue. */
209 struct igc_advctx_info ctx_cache[IGC_CTX_NUM];
210 /**< Hardware context history.*/
211 uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */
214 static inline uint64_t
215 rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)
217 static uint64_t l4_chksum_flags[] = {0, 0, PKT_RX_L4_CKSUM_GOOD,
218 PKT_RX_L4_CKSUM_BAD};
220 static uint64_t l3_chksum_flags[] = {0, 0, PKT_RX_IP_CKSUM_GOOD,
221 PKT_RX_IP_CKSUM_BAD};
222 uint64_t pkt_flags = 0;
225 if (statuserr & IGC_RXD_STAT_VP)
226 pkt_flags |= PKT_RX_VLAN_STRIPPED;
228 tmp = !!(statuserr & (IGC_RXD_STAT_L4CS | IGC_RXD_STAT_UDPCS));
229 tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_L4E);
230 pkt_flags |= l4_chksum_flags[tmp];
232 tmp = !!(statuserr & IGC_RXD_STAT_IPCS);
233 tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_IPE);
234 pkt_flags |= l3_chksum_flags[tmp];
239 #define IGC_PACKET_TYPE_IPV4 0X01
240 #define IGC_PACKET_TYPE_IPV4_TCP 0X11
241 #define IGC_PACKET_TYPE_IPV4_UDP 0X21
242 #define IGC_PACKET_TYPE_IPV4_SCTP 0X41
243 #define IGC_PACKET_TYPE_IPV4_EXT 0X03
244 #define IGC_PACKET_TYPE_IPV4_EXT_SCTP 0X43
245 #define IGC_PACKET_TYPE_IPV6 0X04
246 #define IGC_PACKET_TYPE_IPV6_TCP 0X14
247 #define IGC_PACKET_TYPE_IPV6_UDP 0X24
248 #define IGC_PACKET_TYPE_IPV6_EXT 0X0C
249 #define IGC_PACKET_TYPE_IPV6_EXT_TCP 0X1C
250 #define IGC_PACKET_TYPE_IPV6_EXT_UDP 0X2C
251 #define IGC_PACKET_TYPE_IPV4_IPV6 0X05
252 #define IGC_PACKET_TYPE_IPV4_IPV6_TCP 0X15
253 #define IGC_PACKET_TYPE_IPV4_IPV6_UDP 0X25
254 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
255 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
256 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
257 #define IGC_PACKET_TYPE_MAX 0X80
258 #define IGC_PACKET_TYPE_MASK 0X7F
259 #define IGC_PACKET_TYPE_SHIFT 0X04
261 static inline uint32_t
262 rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info)
264 static const uint32_t
265 ptype_table[IGC_PACKET_TYPE_MAX] __rte_cache_aligned = {
266 [IGC_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
268 [IGC_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
269 RTE_PTYPE_L3_IPV4_EXT,
270 [IGC_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
272 [IGC_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
273 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
274 RTE_PTYPE_INNER_L3_IPV6,
275 [IGC_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
276 RTE_PTYPE_L3_IPV6_EXT,
277 [IGC_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
278 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
279 RTE_PTYPE_INNER_L3_IPV6_EXT,
280 [IGC_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
281 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
282 [IGC_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
283 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
284 [IGC_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
285 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
286 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
287 [IGC_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
288 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
289 [IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
290 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
291 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
292 [IGC_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
293 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
294 [IGC_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
295 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
296 [IGC_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
297 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
298 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
299 [IGC_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
300 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
301 [IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
302 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
303 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
304 [IGC_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
305 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
306 [IGC_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
307 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
309 if (unlikely(pkt_info & IGC_RXDADV_PKTTYPE_ETQF))
310 return RTE_PTYPE_UNKNOWN;
312 pkt_info = (pkt_info >> IGC_PACKET_TYPE_SHIFT) & IGC_PACKET_TYPE_MASK;
314 return ptype_table[pkt_info];
318 rx_desc_get_pkt_info(struct igc_rx_queue *rxq, struct rte_mbuf *rxm,
319 union igc_adv_rx_desc *rxd, uint32_t staterr)
322 uint32_t hlen_type_rss;
325 /* Prefetch data of first segment, if configured to do so. */
326 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
328 rxm->port = rxq->port_id;
329 hlen_type_rss = rte_le_to_cpu_32(rxd->wb.lower.lo_dword.data);
330 rxm->hash.rss = rte_le_to_cpu_32(rxd->wb.lower.hi_dword.rss);
331 rxm->vlan_tci = rte_le_to_cpu_16(rxd->wb.upper.vlan);
333 pkt_flags = (hlen_type_rss & IGC_RXD_RSS_TYPE_MASK) ?
336 if (hlen_type_rss & IGC_RXD_VPKT)
337 pkt_flags |= PKT_RX_VLAN;
339 pkt_flags |= rx_desc_statuserr_to_pkt_flags(staterr);
341 rxm->ol_flags = pkt_flags;
342 pkt_info = rte_le_to_cpu_16(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
343 rxm->packet_type = rx_desc_pkt_info_to_pkt_type(pkt_info);
347 igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
349 struct igc_rx_queue * const rxq = rx_queue;
350 volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
351 struct igc_rx_entry * const sw_ring = rxq->sw_ring;
352 uint16_t rx_id = rxq->rx_tail;
354 uint16_t nb_hold = 0;
356 while (nb_rx < nb_pkts) {
357 volatile union igc_adv_rx_desc *rxdp;
358 struct igc_rx_entry *rxe;
359 struct rte_mbuf *rxm;
360 struct rte_mbuf *nmb;
361 union igc_adv_rx_desc rxd;
366 * The order of operations here is important as the DD status
367 * bit must not be read after any other descriptor fields.
368 * rx_ring and rxdp are pointing to volatile data so the order
369 * of accesses cannot be reordered by the compiler. If they were
370 * not volatile, they could be reordered which could lead to
371 * using invalid descriptor fields when read from rxd.
373 rxdp = &rx_ring[rx_id];
374 staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
375 if (!(staterr & IGC_RXD_STAT_DD))
382 * If the IGC_RXD_STAT_EOP flag is not set, the RX packet is
383 * likely to be invalid and to be dropped by the various
384 * validation checks performed by the network stack.
386 * Allocate a new mbuf to replenish the RX ring descriptor.
387 * If the allocation fails:
388 * - arrange for that RX descriptor to be the first one
389 * being parsed the next time the receive function is
390 * invoked [on the same queue].
392 * - Stop parsing the RX ring and return immediately.
394 * This policy does not drop the packet received in the RX
395 * descriptor for which the allocation of a new mbuf failed.
396 * Thus, it allows that packet to be later retrieved if
397 * mbuf have been freed in the mean time.
398 * As a side effect, holding RX descriptors instead of
399 * systematically giving them back to the NIC may lead to
400 * RX ring exhaustion situations.
401 * However, the NIC can gracefully prevent such situations
402 * to happen by sending specific "back-pressure" flow control
403 * frames to its peer(s).
406 "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
407 rxq->port_id, rxq->queue_id, rx_id, staterr,
408 rte_le_to_cpu_16(rxd.wb.upper.length));
410 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
414 "RX mbuf alloc failed, port_id=%u queue_id=%u",
415 rxq->port_id, rxq->queue_id);
417 rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
422 rxe = &sw_ring[rx_id];
424 if (rx_id >= rxq->nb_rx_desc)
427 /* Prefetch next mbuf while processing current one. */
428 rte_igc_prefetch(sw_ring[rx_id].mbuf);
431 * When next RX descriptor is on a cache-line boundary,
432 * prefetch the next 4 RX descriptors and the next 8 pointers
435 if ((rx_id & 0x3) == 0) {
436 rte_igc_prefetch(&rx_ring[rx_id]);
437 rte_igc_prefetch(&sw_ring[rx_id]);
441 * Update RX descriptor with the physical address of the new
442 * data buffer of the new allocated mbuf.
446 rxdp->read.hdr_addr = 0;
447 rxdp->read.pkt_addr =
448 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
451 rxm->data_off = RTE_PKTMBUF_HEADROOM;
452 data_len = rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len;
453 rxm->data_len = data_len;
454 rxm->pkt_len = data_len;
457 rx_desc_get_pkt_info(rxq, rxm, &rxd, staterr);
460 * Store the mbuf address into the next entry of the array
461 * of returned packets.
463 rx_pkts[nb_rx++] = rxm;
465 rxq->rx_tail = rx_id;
468 * If the number of free RX descriptors is greater than the RX free
469 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
471 * Update the RDT with the value of the last processed RX descriptor
472 * minus 1, to guarantee that the RDT register is never equal to the
473 * RDH register, which creates a "full" ring situation from the
474 * hardware point of view...
476 nb_hold = nb_hold + rxq->nb_rx_hold;
477 if (nb_hold > rxq->rx_free_thresh) {
479 "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
480 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
481 rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
482 IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
485 rxq->nb_rx_hold = nb_hold;
490 igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
493 struct igc_rx_queue * const rxq = rx_queue;
494 volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
495 struct igc_rx_entry * const sw_ring = rxq->sw_ring;
496 struct rte_mbuf *first_seg = rxq->pkt_first_seg;
497 struct rte_mbuf *last_seg = rxq->pkt_last_seg;
499 uint16_t rx_id = rxq->rx_tail;
501 uint16_t nb_hold = 0;
503 while (nb_rx < nb_pkts) {
504 volatile union igc_adv_rx_desc *rxdp;
505 struct igc_rx_entry *rxe;
506 struct rte_mbuf *rxm;
507 struct rte_mbuf *nmb;
508 union igc_adv_rx_desc rxd;
514 * The order of operations here is important as the DD status
515 * bit must not be read after any other descriptor fields.
516 * rx_ring and rxdp are pointing to volatile data so the order
517 * of accesses cannot be reordered by the compiler. If they were
518 * not volatile, they could be reordered which could lead to
519 * using invalid descriptor fields when read from rxd.
521 rxdp = &rx_ring[rx_id];
522 staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
523 if (!(staterr & IGC_RXD_STAT_DD))
530 * Allocate a new mbuf to replenish the RX ring descriptor.
531 * If the allocation fails:
532 * - arrange for that RX descriptor to be the first one
533 * being parsed the next time the receive function is
534 * invoked [on the same queue].
536 * - Stop parsing the RX ring and return immediately.
538 * This policy does not drop the packet received in the RX
539 * descriptor for which the allocation of a new mbuf failed.
540 * Thus, it allows that packet to be later retrieved if
541 * mbuf have been freed in the mean time.
542 * As a side effect, holding RX descriptors instead of
543 * systematically giving them back to the NIC may lead to
544 * RX ring exhaustion situations.
545 * However, the NIC can gracefully prevent such situations
546 * to happen by sending specific "back-pressure" flow control
547 * frames to its peer(s).
550 "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
551 rxq->port_id, rxq->queue_id, rx_id, staterr,
552 rte_le_to_cpu_16(rxd.wb.upper.length));
554 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
558 "RX mbuf alloc failed, port_id=%u queue_id=%u",
559 rxq->port_id, rxq->queue_id);
561 rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
566 rxe = &sw_ring[rx_id];
568 if (rx_id >= rxq->nb_rx_desc)
571 /* Prefetch next mbuf while processing current one. */
572 rte_igc_prefetch(sw_ring[rx_id].mbuf);
575 * When next RX descriptor is on a cache-line boundary,
576 * prefetch the next 4 RX descriptors and the next 8 pointers
579 if ((rx_id & 0x3) == 0) {
580 rte_igc_prefetch(&rx_ring[rx_id]);
581 rte_igc_prefetch(&sw_ring[rx_id]);
585 * Update RX descriptor with the physical address of the new
586 * data buffer of the new allocated mbuf.
590 rxdp->read.hdr_addr = 0;
591 rxdp->read.pkt_addr =
592 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
596 * Set data length & data buffer address of mbuf.
598 rxm->data_off = RTE_PKTMBUF_HEADROOM;
599 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
600 rxm->data_len = data_len;
603 * If this is the first buffer of the received packet,
604 * set the pointer to the first mbuf of the packet and
605 * initialize its context.
606 * Otherwise, update the total length and the number of segments
607 * of the current scattered packet, and update the pointer to
608 * the last mbuf of the current packet.
610 if (first_seg == NULL) {
612 first_seg->pkt_len = data_len;
613 first_seg->nb_segs = 1;
615 first_seg->pkt_len += data_len;
616 first_seg->nb_segs++;
617 last_seg->next = rxm;
621 * If this is not the last buffer of the received packet,
622 * update the pointer to the last mbuf of the current scattered
623 * packet and continue to parse the RX ring.
625 if (!(staterr & IGC_RXD_STAT_EOP)) {
631 * This is the last buffer of the received packet.
632 * If the CRC is not stripped by the hardware:
633 * - Subtract the CRC length from the total packet length.
634 * - If the last buffer only contains the whole CRC or a part
635 * of it, free the mbuf associated to the last buffer.
636 * If part of the CRC is also contained in the previous
637 * mbuf, subtract the length of that CRC part from the
638 * data length of the previous mbuf.
640 if (unlikely(rxq->crc_len > 0)) {
641 first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
642 if (data_len <= RTE_ETHER_CRC_LEN) {
643 rte_pktmbuf_free_seg(rxm);
644 first_seg->nb_segs--;
645 last_seg->data_len = last_seg->data_len -
646 (RTE_ETHER_CRC_LEN - data_len);
647 last_seg->next = NULL;
649 rxm->data_len = (uint16_t)
650 (data_len - RTE_ETHER_CRC_LEN);
654 rx_desc_get_pkt_info(rxq, first_seg, &rxd, staterr);
657 * Store the mbuf address into the next entry of the array
658 * of returned packets.
660 rx_pkts[nb_rx++] = first_seg;
662 /* Setup receipt context for a new packet. */
665 rxq->rx_tail = rx_id;
668 * Save receive context.
670 rxq->pkt_first_seg = first_seg;
671 rxq->pkt_last_seg = last_seg;
674 * If the number of free RX descriptors is greater than the RX free
675 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
677 * Update the RDT with the value of the last processed RX descriptor
678 * minus 1, to guarantee that the RDT register is never equal to the
679 * RDH register, which creates a "full" ring situation from the
680 * hardware point of view...
682 nb_hold = nb_hold + rxq->nb_rx_hold;
683 if (nb_hold > rxq->rx_free_thresh) {
685 "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
686 rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
687 rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
688 IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
691 rxq->nb_rx_hold = nb_hold;
696 igc_rx_queue_release_mbufs(struct igc_rx_queue *rxq)
700 if (rxq->sw_ring != NULL) {
701 for (i = 0; i < rxq->nb_rx_desc; i++) {
702 if (rxq->sw_ring[i].mbuf != NULL) {
703 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
704 rxq->sw_ring[i].mbuf = NULL;
711 igc_rx_queue_release(struct igc_rx_queue *rxq)
713 igc_rx_queue_release_mbufs(rxq);
714 rte_free(rxq->sw_ring);
718 void eth_igc_rx_queue_release(void *rxq)
721 igc_rx_queue_release(rxq);
724 uint32_t eth_igc_rx_queue_count(struct rte_eth_dev *dev,
725 uint16_t rx_queue_id)
728 * Check the DD bit of a rx descriptor of each 4 in a group,
729 * to avoid checking too frequently and downgrading performance
732 #define IGC_RXQ_SCAN_INTERVAL 4
734 volatile union igc_adv_rx_desc *rxdp;
735 struct igc_rx_queue *rxq;
738 rxq = dev->data->rx_queues[rx_queue_id];
739 rxdp = &rxq->rx_ring[rxq->rx_tail];
741 while (desc < rxq->nb_rx_desc - rxq->rx_tail) {
742 if (unlikely(!(rxdp->wb.upper.status_error &
745 desc += IGC_RXQ_SCAN_INTERVAL;
746 rxdp += IGC_RXQ_SCAN_INTERVAL;
748 rxdp = &rxq->rx_ring[rxq->rx_tail + desc - rxq->nb_rx_desc];
750 while (desc < rxq->nb_rx_desc &&
751 (rxdp->wb.upper.status_error & IGC_RXD_STAT_DD)) {
752 desc += IGC_RXQ_SCAN_INTERVAL;
753 rxdp += IGC_RXQ_SCAN_INTERVAL;
759 int eth_igc_rx_descriptor_done(void *rx_queue, uint16_t offset)
761 volatile union igc_adv_rx_desc *rxdp;
762 struct igc_rx_queue *rxq = rx_queue;
765 if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
768 desc = rxq->rx_tail + offset;
769 if (desc >= rxq->nb_rx_desc)
770 desc -= rxq->nb_rx_desc;
772 rxdp = &rxq->rx_ring[desc];
773 return !!(rxdp->wb.upper.status_error &
774 rte_cpu_to_le_32(IGC_RXD_STAT_DD));
777 int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset)
779 struct igc_rx_queue *rxq = rx_queue;
780 volatile uint32_t *status;
783 if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
786 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
787 return RTE_ETH_RX_DESC_UNAVAIL;
789 desc = rxq->rx_tail + offset;
790 if (desc >= rxq->nb_rx_desc)
791 desc -= rxq->nb_rx_desc;
793 status = &rxq->rx_ring[desc].wb.upper.status_error;
794 if (*status & rte_cpu_to_le_32(IGC_RXD_STAT_DD))
795 return RTE_ETH_RX_DESC_DONE;
797 return RTE_ETH_RX_DESC_AVAIL;
801 igc_alloc_rx_queue_mbufs(struct igc_rx_queue *rxq)
803 struct igc_rx_entry *rxe = rxq->sw_ring;
807 /* Initialize software ring entries. */
808 for (i = 0; i < rxq->nb_rx_desc; i++) {
809 volatile union igc_adv_rx_desc *rxd;
810 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
813 PMD_DRV_LOG(ERR, "RX mbuf alloc failed, queue_id=%hu",
817 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
818 rxd = &rxq->rx_ring[i];
819 rxd->read.hdr_addr = 0;
820 rxd->read.pkt_addr = dma_addr;
828 * RSS random key supplied in section 7.1.2.9.3 of the Intel I225 datasheet.
829 * Used as the default key.
831 static uint8_t default_rss_key[40] = {
832 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
833 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
834 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
835 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
836 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
840 igc_rss_disable(struct rte_eth_dev *dev)
842 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
845 mrqc = IGC_READ_REG(hw, IGC_MRQC);
846 mrqc &= ~IGC_MRQC_ENABLE_MASK;
847 IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
851 igc_hw_rss_hash_set(struct igc_hw *hw, struct rte_eth_rss_conf *rss_conf)
853 uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
857 if (hash_key != NULL) {
860 /* Fill in RSS hash key */
861 for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
862 IGC_WRITE_REG_LE_VALUE(hw, IGC_RSSRK(i), hash_key[i]);
865 /* Set configured hashing protocols in MRQC register */
866 rss_hf = rss_conf->rss_hf;
867 mrqc = IGC_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
868 if (rss_hf & ETH_RSS_IPV4)
869 mrqc |= IGC_MRQC_RSS_FIELD_IPV4;
870 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
871 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_TCP;
872 if (rss_hf & ETH_RSS_IPV6)
873 mrqc |= IGC_MRQC_RSS_FIELD_IPV6;
874 if (rss_hf & ETH_RSS_IPV6_EX)
875 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_EX;
876 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
877 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP;
878 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
879 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
880 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
881 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
882 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
883 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
884 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
885 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP_EX;
886 IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
890 igc_rss_configure(struct rte_eth_dev *dev)
892 struct rte_eth_rss_conf rss_conf;
893 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
896 /* Fill in redirection table. */
897 for (i = 0; i < IGC_RSS_RDT_SIZD; i++) {
898 union igc_rss_reta_reg reta;
899 uint16_t q_idx, reta_idx;
901 q_idx = (uint8_t)((dev->data->nb_rx_queues > 1) ?
902 i % dev->data->nb_rx_queues : 0);
903 reta_idx = i % sizeof(reta);
904 reta.bytes[reta_idx] = q_idx;
905 if (reta_idx == sizeof(reta) - 1)
906 IGC_WRITE_REG_LE_VALUE(hw,
907 IGC_RETA(i / sizeof(reta)), reta.dword);
911 * Configure the RSS key and the RSS protocols used to compute
912 * the RSS hash of input packets.
914 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
915 if (rss_conf.rss_key == NULL)
916 rss_conf.rss_key = default_rss_key;
917 igc_hw_rss_hash_set(hw, &rss_conf);
921 igc_del_rss_filter(struct rte_eth_dev *dev)
923 struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
925 if (rss_filter->enable) {
926 /* recover default RSS configuration */
927 igc_rss_configure(dev);
929 /* disable RSS logic and clear filter data */
930 igc_rss_disable(dev);
931 memset(rss_filter, 0, sizeof(*rss_filter));
934 PMD_DRV_LOG(ERR, "filter not exist!");
938 /* Initiate the filter structure by the structure of rte_flow_action_rss */
940 igc_rss_conf_set(struct igc_rss_filter *out,
941 const struct rte_flow_action_rss *rss)
943 out->conf.func = rss->func;
944 out->conf.level = rss->level;
945 out->conf.types = rss->types;
947 if (rss->key_len == sizeof(out->key)) {
948 memcpy(out->key, rss->key, rss->key_len);
949 out->conf.key = out->key;
950 out->conf.key_len = rss->key_len;
952 out->conf.key = NULL;
953 out->conf.key_len = 0;
956 if (rss->queue_num <= IGC_RSS_RDT_SIZD) {
957 memcpy(out->queue, rss->queue,
958 sizeof(*out->queue) * rss->queue_num);
959 out->conf.queue = out->queue;
960 out->conf.queue_num = rss->queue_num;
962 out->conf.queue = NULL;
963 out->conf.queue_num = 0;
968 igc_add_rss_filter(struct rte_eth_dev *dev, struct igc_rss_filter *rss)
970 struct rte_eth_rss_conf rss_conf = {
971 .rss_key = rss->conf.key_len ?
972 (void *)(uintptr_t)rss->conf.key : NULL,
973 .rss_key_len = rss->conf.key_len,
974 .rss_hf = rss->conf.types,
976 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
977 struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
980 /* check RSS type is valid */
981 if ((rss_conf.rss_hf & IGC_RSS_OFFLOAD_ALL) == 0) {
983 "RSS type(0x%" PRIx64 ") error!, only 0x%" PRIx64
984 " been supported", rss_conf.rss_hf,
985 (uint64_t)IGC_RSS_OFFLOAD_ALL);
989 /* check queue count is not zero */
990 if (!rss->conf.queue_num) {
991 PMD_DRV_LOG(ERR, "Queue number should not be 0!");
995 /* check queue id is valid */
996 for (i = 0; i < rss->conf.queue_num; i++)
997 if (rss->conf.queue[i] >= dev->data->nb_rx_queues) {
998 PMD_DRV_LOG(ERR, "Queue id %u is invalid!",
1003 /* only support one filter */
1004 if (rss_filter->enable) {
1005 PMD_DRV_LOG(ERR, "Only support one RSS filter!");
1008 rss_filter->enable = 1;
1010 igc_rss_conf_set(rss_filter, &rss->conf);
1012 /* Fill in redirection table. */
1013 for (i = 0, j = 0; i < IGC_RSS_RDT_SIZD; i++, j++) {
1014 union igc_rss_reta_reg reta;
1015 uint16_t q_idx, reta_idx;
1017 if (j == rss->conf.queue_num)
1019 q_idx = rss->conf.queue[j];
1020 reta_idx = i % sizeof(reta);
1021 reta.bytes[reta_idx] = q_idx;
1022 if (reta_idx == sizeof(reta) - 1)
1023 IGC_WRITE_REG_LE_VALUE(hw,
1024 IGC_RETA(i / sizeof(reta)), reta.dword);
1027 if (rss_conf.rss_key == NULL)
1028 rss_conf.rss_key = default_rss_key;
1029 igc_hw_rss_hash_set(hw, &rss_conf);
1034 igc_clear_rss_filter(struct rte_eth_dev *dev)
1036 struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
1038 if (!rss_filter->enable) {
1039 PMD_DRV_LOG(WARNING, "RSS filter not enabled!");
1043 /* recover default RSS configuration */
1044 igc_rss_configure(dev);
1046 /* disable RSS logic and clear filter data */
1047 igc_rss_disable(dev);
1048 memset(rss_filter, 0, sizeof(*rss_filter));
1052 igc_dev_mq_rx_configure(struct rte_eth_dev *dev)
1054 if (RTE_ETH_DEV_SRIOV(dev).active) {
1055 PMD_DRV_LOG(ERR, "SRIOV unsupported!");
1059 switch (dev->data->dev_conf.rxmode.mq_mode) {
1061 igc_rss_configure(dev);
1063 case ETH_MQ_RX_NONE:
1065 * configure RSS register for following,
1066 * then disable the RSS logic
1068 igc_rss_configure(dev);
1069 igc_rss_disable(dev);
1072 PMD_DRV_LOG(ERR, "rx mode(%d) not supported!",
1073 dev->data->dev_conf.rxmode.mq_mode);
1080 igc_rx_init(struct rte_eth_dev *dev)
1082 struct igc_rx_queue *rxq;
1083 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1084 uint64_t offloads = dev->data->dev_conf.rxmode.offloads;
1085 uint32_t max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
1089 uint16_t rctl_bsize;
1093 dev->rx_pkt_burst = igc_recv_pkts;
1096 * Make sure receives are disabled while setting
1097 * up the descriptor ring.
1099 rctl = IGC_READ_REG(hw, IGC_RCTL);
1100 IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN);
1102 /* Configure support of jumbo frames, if any. */
1103 if (offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
1104 rctl |= IGC_RCTL_LPE;
1107 * Set maximum packet length by default, and might be updated
1108 * together with enabling/disabling dual VLAN.
1110 IGC_WRITE_REG(hw, IGC_RLPML, max_rx_pkt_len);
1112 rctl &= ~IGC_RCTL_LPE;
1115 /* Configure and enable each RX queue. */
1117 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1122 rxq = dev->data->rx_queues[i];
1125 /* Allocate buffers for descriptor rings and set up queue */
1126 ret = igc_alloc_rx_queue_mbufs(rxq);
1131 * Reset crc_len in case it was changed after queue setup by a
1134 rxq->crc_len = (offloads & DEV_RX_OFFLOAD_KEEP_CRC) ?
1135 RTE_ETHER_CRC_LEN : 0;
1137 bus_addr = rxq->rx_ring_phys_addr;
1138 IGC_WRITE_REG(hw, IGC_RDLEN(rxq->reg_idx),
1140 sizeof(union igc_adv_rx_desc));
1141 IGC_WRITE_REG(hw, IGC_RDBAH(rxq->reg_idx),
1142 (uint32_t)(bus_addr >> 32));
1143 IGC_WRITE_REG(hw, IGC_RDBAL(rxq->reg_idx),
1144 (uint32_t)bus_addr);
1146 /* set descriptor configuration */
1147 srrctl = IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
1149 srrctl |= (uint32_t)(RTE_PKTMBUF_HEADROOM / 64) <<
1150 IGC_SRRCTL_BSIZEHEADER_SHIFT;
1152 * Configure RX buffer size.
1154 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
1155 RTE_PKTMBUF_HEADROOM);
1156 if (buf_size >= 1024) {
1158 * Configure the BSIZEPACKET field of the SRRCTL
1159 * register of the queue.
1160 * Value is in 1 KB resolution, from 1 KB to 16 KB.
1161 * If this field is equal to 0b, then RCTL.BSIZE
1162 * determines the RX packet buffer size.
1165 srrctl |= ((buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT) &
1166 IGC_SRRCTL_BSIZEPKT_MASK);
1167 buf_size = (uint16_t)((srrctl &
1168 IGC_SRRCTL_BSIZEPKT_MASK) <<
1169 IGC_SRRCTL_BSIZEPKT_SHIFT);
1171 /* It adds dual VLAN length for supporting dual VLAN */
1172 if (max_rx_pkt_len + 2 * VLAN_TAG_SIZE > buf_size)
1173 dev->data->scattered_rx = 1;
1176 * Use BSIZE field of the device RCTL register.
1178 if (rctl_bsize == 0 || rctl_bsize > buf_size)
1179 rctl_bsize = buf_size;
1180 dev->data->scattered_rx = 1;
1183 /* Set if packets are dropped when no descriptors available */
1185 srrctl |= IGC_SRRCTL_DROP_EN;
1187 IGC_WRITE_REG(hw, IGC_SRRCTL(rxq->reg_idx), srrctl);
1189 /* Enable this RX queue. */
1190 rxdctl = IGC_RXDCTL_QUEUE_ENABLE;
1191 rxdctl |= ((uint32_t)rxq->pthresh << IGC_RXDCTL_PTHRESH_SHIFT) &
1192 IGC_RXDCTL_PTHRESH_MSK;
1193 rxdctl |= ((uint32_t)rxq->hthresh << IGC_RXDCTL_HTHRESH_SHIFT) &
1194 IGC_RXDCTL_HTHRESH_MSK;
1195 rxdctl |= ((uint32_t)rxq->wthresh << IGC_RXDCTL_WTHRESH_SHIFT) &
1196 IGC_RXDCTL_WTHRESH_MSK;
1197 IGC_WRITE_REG(hw, IGC_RXDCTL(rxq->reg_idx), rxdctl);
1200 if (offloads & DEV_RX_OFFLOAD_SCATTER)
1201 dev->data->scattered_rx = 1;
1203 if (dev->data->scattered_rx) {
1204 PMD_DRV_LOG(DEBUG, "forcing scatter mode");
1205 dev->rx_pkt_burst = igc_recv_scattered_pkts;
1208 * Setup BSIZE field of RCTL register, if needed.
1209 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1210 * register, since the code above configures the SRRCTL register of
1211 * the RX queue in such a case.
1212 * All configurable sizes are:
1213 * 16384: rctl |= (IGC_RCTL_SZ_16384 | IGC_RCTL_BSEX);
1214 * 8192: rctl |= (IGC_RCTL_SZ_8192 | IGC_RCTL_BSEX);
1215 * 4096: rctl |= (IGC_RCTL_SZ_4096 | IGC_RCTL_BSEX);
1216 * 2048: rctl |= IGC_RCTL_SZ_2048;
1217 * 1024: rctl |= IGC_RCTL_SZ_1024;
1218 * 512: rctl |= IGC_RCTL_SZ_512;
1219 * 256: rctl |= IGC_RCTL_SZ_256;
1221 if (rctl_bsize > 0) {
1222 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1223 rctl |= IGC_RCTL_SZ_512;
1224 else /* 256 <= buf_size < 512 - use 256 */
1225 rctl |= IGC_RCTL_SZ_256;
1229 * Configure RSS if device configured with multiple RX queues.
1231 igc_dev_mq_rx_configure(dev);
1233 /* Update the rctl since igc_dev_mq_rx_configure may change its value */
1234 rctl |= IGC_READ_REG(hw, IGC_RCTL);
1237 * Setup the Checksum Register.
1238 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1240 rxcsum = IGC_READ_REG(hw, IGC_RXCSUM);
1241 rxcsum |= IGC_RXCSUM_PCSD;
1243 /* Enable both L3/L4 rx checksum offload */
1244 if (offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
1245 rxcsum |= IGC_RXCSUM_IPOFL;
1247 rxcsum &= ~IGC_RXCSUM_IPOFL;
1250 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)) {
1251 rxcsum |= IGC_RXCSUM_TUOFL;
1252 offloads |= DEV_RX_OFFLOAD_SCTP_CKSUM;
1254 rxcsum &= ~IGC_RXCSUM_TUOFL;
1257 if (offloads & DEV_RX_OFFLOAD_SCTP_CKSUM)
1258 rxcsum |= IGC_RXCSUM_CRCOFL;
1260 rxcsum &= ~IGC_RXCSUM_CRCOFL;
1262 IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum);
1264 /* Setup the Receive Control Register. */
1265 if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1266 rctl &= ~IGC_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1268 rctl |= IGC_RCTL_SECRC; /* Strip Ethernet CRC. */
1270 rctl &= ~IGC_RCTL_MO_MSK;
1271 rctl &= ~IGC_RCTL_LBM_MSK;
1272 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO |
1274 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
1276 if (dev->data->dev_conf.lpbk_mode == 1)
1277 rctl |= IGC_RCTL_LBM_MAC;
1279 rctl &= ~(IGC_RCTL_HSEL_MSK | IGC_RCTL_CFIEN | IGC_RCTL_CFI |
1280 IGC_RCTL_PSP | IGC_RCTL_PMCF);
1282 /* Make sure VLAN Filters are off. */
1283 rctl &= ~IGC_RCTL_VFE;
1284 /* Don't store bad packets. */
1285 rctl &= ~IGC_RCTL_SBP;
1287 /* Enable Receives. */
1288 IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1291 * Setup the HW Rx Head and Tail Descriptor Pointers.
1292 * This needs to be done after enable.
1294 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1295 rxq = dev->data->rx_queues[i];
1296 IGC_WRITE_REG(hw, IGC_RDH(rxq->reg_idx), 0);
1297 IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx),
1298 rxq->nb_rx_desc - 1);
1300 /* strip queue vlan offload */
1301 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) {
1303 dvmolr = IGC_READ_REG(hw, IGC_DVMOLR(rxq->queue_id));
1305 /* If vlan been stripped off, the CRC is meaningless. */
1306 dvmolr |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC;
1307 IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr);
1315 igc_reset_rx_queue(struct igc_rx_queue *rxq)
1317 static const union igc_adv_rx_desc zeroed_desc = { {0} };
1320 /* Zero out HW ring memory */
1321 for (i = 0; i < rxq->nb_rx_desc; i++)
1322 rxq->rx_ring[i] = zeroed_desc;
1325 rxq->pkt_first_seg = NULL;
1326 rxq->pkt_last_seg = NULL;
1330 eth_igc_rx_queue_setup(struct rte_eth_dev *dev,
1333 unsigned int socket_id,
1334 const struct rte_eth_rxconf *rx_conf,
1335 struct rte_mempool *mp)
1337 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1338 const struct rte_memzone *rz;
1339 struct igc_rx_queue *rxq;
1343 * Validate number of receive descriptors.
1344 * It must not exceed hardware maximum, and must be multiple
1345 * of IGC_RX_DESCRIPTOR_MULTIPLE.
1347 if (nb_desc % IGC_RX_DESCRIPTOR_MULTIPLE != 0 ||
1348 nb_desc > IGC_MAX_RXD || nb_desc < IGC_MIN_RXD) {
1350 "RX descriptor must be multiple of %u(cur: %u) and between %u and %u",
1351 IGC_RX_DESCRIPTOR_MULTIPLE, nb_desc,
1352 IGC_MIN_RXD, IGC_MAX_RXD);
1356 /* Free memory prior to re-allocation if needed */
1357 if (dev->data->rx_queues[queue_idx] != NULL) {
1358 igc_rx_queue_release(dev->data->rx_queues[queue_idx]);
1359 dev->data->rx_queues[queue_idx] = NULL;
1362 /* First allocate the RX queue data structure. */
1363 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igc_rx_queue),
1364 RTE_CACHE_LINE_SIZE);
1367 rxq->offloads = rx_conf->offloads;
1369 rxq->nb_rx_desc = nb_desc;
1370 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1371 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1372 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1373 rxq->drop_en = rx_conf->rx_drop_en;
1374 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1375 rxq->queue_id = queue_idx;
1376 rxq->reg_idx = queue_idx;
1377 rxq->port_id = dev->data->port_id;
1380 * Allocate RX ring hardware descriptors. A memzone large enough to
1381 * handle the maximum ring size is allocated in order to allow for
1382 * resizing in later calls to the queue setup function.
1384 size = sizeof(union igc_adv_rx_desc) * IGC_MAX_RXD;
1385 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1386 IGC_ALIGN, socket_id);
1388 igc_rx_queue_release(rxq);
1391 rxq->rdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDT(rxq->reg_idx));
1392 rxq->rdh_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDH(rxq->reg_idx));
1393 rxq->rx_ring_phys_addr = rz->iova;
1394 rxq->rx_ring = (union igc_adv_rx_desc *)rz->addr;
1396 /* Allocate software ring. */
1397 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1398 sizeof(struct igc_rx_entry) * nb_desc,
1399 RTE_CACHE_LINE_SIZE);
1400 if (rxq->sw_ring == NULL) {
1401 igc_rx_queue_release(rxq);
1405 PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
1406 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1408 dev->data->rx_queues[queue_idx] = rxq;
1409 igc_reset_rx_queue(rxq);
1414 /* prepare packets for transmit */
1416 eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
1422 for (i = 0; i < nb_pkts; i++) {
1425 /* Check some limitations for TSO in hardware */
1426 if (m->ol_flags & IGC_TX_OFFLOAD_SEG)
1427 if (m->tso_segsz > IGC_TSO_MAX_MSS ||
1428 m->l2_len + m->l3_len + m->l4_len >
1429 IGC_TSO_MAX_HDRLEN) {
1434 if (m->ol_flags & IGC_TX_OFFLOAD_NOTSUP_MASK) {
1435 rte_errno = ENOTSUP;
1439 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1440 ret = rte_validate_tx_offload(m);
1446 ret = rte_net_intel_cksum_prepare(m);
1457 *There're some limitations in hardware for TCP segmentation offload. We
1458 *should check whether the parameters are valid.
1460 static inline uint64_t
1461 check_tso_para(uint64_t ol_req, union igc_tx_offload ol_para)
1463 if (!(ol_req & IGC_TX_OFFLOAD_SEG))
1465 if (ol_para.tso_segsz > IGC_TSO_MAX_MSS || ol_para.l2_len +
1466 ol_para.l3_len + ol_para.l4_len > IGC_TSO_MAX_HDRLEN) {
1467 ol_req &= ~IGC_TX_OFFLOAD_SEG;
1468 ol_req |= PKT_TX_TCP_CKSUM;
1474 * Check which hardware context can be used. Use the existing match
1475 * or create a new context descriptor.
1477 static inline uint32_t
1478 what_advctx_update(struct igc_tx_queue *txq, uint64_t flags,
1479 union igc_tx_offload tx_offload)
1481 uint32_t curr = txq->ctx_curr;
1483 /* If match with the current context */
1484 if (likely(txq->ctx_cache[curr].flags == flags &&
1485 txq->ctx_cache[curr].tx_offload.data ==
1486 (txq->ctx_cache[curr].tx_offload_mask.data &
1487 tx_offload.data))) {
1491 /* Total two context, if match with the second context */
1493 if (likely(txq->ctx_cache[curr].flags == flags &&
1494 txq->ctx_cache[curr].tx_offload.data ==
1495 (txq->ctx_cache[curr].tx_offload_mask.data &
1496 tx_offload.data))) {
1497 txq->ctx_curr = curr;
1501 /* Mismatch, create new one */
1506 * This is a separate function, looking for optimization opportunity here
1507 * Rework required to go with the pre-defined values.
1510 igc_set_xmit_ctx(struct igc_tx_queue *txq,
1511 volatile struct igc_adv_tx_context_desc *ctx_txd,
1512 uint64_t ol_flags, union igc_tx_offload tx_offload)
1514 uint32_t type_tucmd_mlhl;
1515 uint32_t mss_l4len_idx;
1517 uint32_t vlan_macip_lens;
1518 union igc_tx_offload tx_offload_mask;
1520 /* Use the previous context */
1522 ctx_curr = txq->ctx_curr;
1524 tx_offload_mask.data = 0;
1525 type_tucmd_mlhl = 0;
1527 /* Specify which HW CTX to upload. */
1528 mss_l4len_idx = (ctx_curr << IGC_ADVTXD_IDX_SHIFT);
1530 if (ol_flags & PKT_TX_VLAN_PKT)
1531 tx_offload_mask.vlan_tci = 0xffff;
1533 /* check if TCP segmentation required for this packet */
1534 if (ol_flags & IGC_TX_OFFLOAD_SEG) {
1535 /* implies IP cksum in IPv4 */
1536 if (ol_flags & PKT_TX_IP_CKSUM)
1537 type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4 |
1538 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1540 type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV6 |
1541 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1543 if (ol_flags & PKT_TX_TCP_SEG)
1544 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
1546 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
1548 tx_offload_mask.data |= TX_TSO_CMP_MASK;
1549 mss_l4len_idx |= (uint32_t)tx_offload.tso_segsz <<
1550 IGC_ADVTXD_MSS_SHIFT;
1551 mss_l4len_idx |= (uint32_t)tx_offload.l4_len <<
1552 IGC_ADVTXD_L4LEN_SHIFT;
1553 } else { /* no TSO, check if hardware checksum is needed */
1554 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
1555 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
1557 if (ol_flags & PKT_TX_IP_CKSUM)
1558 type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4;
1560 switch (ol_flags & PKT_TX_L4_MASK) {
1561 case PKT_TX_TCP_CKSUM:
1562 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP |
1563 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1564 mss_l4len_idx |= (uint32_t)sizeof(struct rte_tcp_hdr)
1565 << IGC_ADVTXD_L4LEN_SHIFT;
1567 case PKT_TX_UDP_CKSUM:
1568 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP |
1569 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1570 mss_l4len_idx |= (uint32_t)sizeof(struct rte_udp_hdr)
1571 << IGC_ADVTXD_L4LEN_SHIFT;
1573 case PKT_TX_SCTP_CKSUM:
1574 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP |
1575 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1576 mss_l4len_idx |= (uint32_t)sizeof(struct rte_sctp_hdr)
1577 << IGC_ADVTXD_L4LEN_SHIFT;
1580 type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_RSV |
1581 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1586 txq->ctx_cache[ctx_curr].flags = ol_flags;
1587 txq->ctx_cache[ctx_curr].tx_offload.data =
1588 tx_offload_mask.data & tx_offload.data;
1589 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
1591 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
1592 vlan_macip_lens = (uint32_t)tx_offload.data;
1593 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
1594 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
1595 ctx_txd->u.launch_time = 0;
1598 static inline uint32_t
1599 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
1602 static uint32_t vlan_cmd[2] = {0, IGC_ADVTXD_DCMD_VLE};
1603 static uint32_t tso_cmd[2] = {0, IGC_ADVTXD_DCMD_TSE};
1604 cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
1605 cmdtype |= tso_cmd[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1609 static inline uint32_t
1610 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
1612 static const uint32_t l4_olinfo[2] = {0, IGC_ADVTXD_POPTS_TXSM};
1613 static const uint32_t l3_olinfo[2] = {0, IGC_ADVTXD_POPTS_IXSM};
1616 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
1617 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
1618 tmp |= l4_olinfo[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1623 igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1625 struct igc_tx_queue * const txq = tx_queue;
1626 struct igc_tx_entry * const sw_ring = txq->sw_ring;
1627 struct igc_tx_entry *txe, *txn;
1628 volatile union igc_adv_tx_desc * const txr = txq->tx_ring;
1629 volatile union igc_adv_tx_desc *txd;
1630 struct rte_mbuf *tx_pkt;
1631 struct rte_mbuf *m_seg;
1632 uint64_t buf_dma_addr;
1633 uint32_t olinfo_status;
1634 uint32_t cmd_type_len;
1643 uint32_t new_ctx = 0;
1644 union igc_tx_offload tx_offload = {0};
1646 tx_id = txq->tx_tail;
1647 txe = &sw_ring[tx_id];
1649 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1650 tx_pkt = *tx_pkts++;
1651 pkt_len = tx_pkt->pkt_len;
1653 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
1656 * The number of descriptors that must be allocated for a
1657 * packet is the number of segments of that packet, plus 1
1658 * Context Descriptor for the VLAN Tag Identifier, if any.
1659 * Determine the last TX descriptor to allocate in the TX ring
1660 * for the packet, starting from the current position (tx_id)
1663 tx_last = (uint16_t)(tx_id + tx_pkt->nb_segs - 1);
1665 ol_flags = tx_pkt->ol_flags;
1666 tx_ol_req = ol_flags & IGC_TX_OFFLOAD_MASK;
1668 /* If a Context Descriptor need be built . */
1670 tx_offload.l2_len = tx_pkt->l2_len;
1671 tx_offload.l3_len = tx_pkt->l3_len;
1672 tx_offload.l4_len = tx_pkt->l4_len;
1673 tx_offload.vlan_tci = tx_pkt->vlan_tci;
1674 tx_offload.tso_segsz = tx_pkt->tso_segsz;
1675 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
1677 new_ctx = what_advctx_update(txq, tx_ol_req,
1679 /* Only allocate context descriptor if required*/
1680 new_ctx = (new_ctx >= IGC_CTX_NUM);
1681 tx_last = (uint16_t)(tx_last + new_ctx);
1683 if (tx_last >= txq->nb_tx_desc)
1684 tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
1687 "port_id=%u queue_id=%u pktlen=%u tx_first=%u tx_last=%u",
1688 txq->port_id, txq->queue_id, pkt_len, tx_id, tx_last);
1691 * Check if there are enough free descriptors in the TX ring
1692 * to transmit the next packet.
1693 * This operation is based on the two following rules:
1695 * 1- Only check that the last needed TX descriptor can be
1696 * allocated (by construction, if that descriptor is free,
1697 * all intermediate ones are also free).
1699 * For this purpose, the index of the last TX descriptor
1700 * used for a packet (the "last descriptor" of a packet)
1701 * is recorded in the TX entries (the last one included)
1702 * that are associated with all TX descriptors allocated
1705 * 2- Avoid to allocate the last free TX descriptor of the
1706 * ring, in order to never set the TDT register with the
1707 * same value stored in parallel by the NIC in the TDH
1708 * register, which makes the TX engine of the NIC enter
1709 * in a deadlock situation.
1711 * By extension, avoid to allocate a free descriptor that
1712 * belongs to the last set of free descriptors allocated
1713 * to the same packet previously transmitted.
1717 * The "last descriptor" of the previously sent packet, if any,
1718 * which used the last descriptor to allocate.
1720 tx_end = sw_ring[tx_last].last_id;
1723 * The next descriptor following that "last descriptor" in the
1726 tx_end = sw_ring[tx_end].next_id;
1729 * The "last descriptor" associated with that next descriptor.
1731 tx_end = sw_ring[tx_end].last_id;
1734 * Check that this descriptor is free.
1736 if (!(txr[tx_end].wb.status & IGC_TXD_STAT_DD)) {
1743 * Set common flags of all TX Data Descriptors.
1745 * The following bits must be set in all Data Descriptors:
1746 * - IGC_ADVTXD_DTYP_DATA
1747 * - IGC_ADVTXD_DCMD_DEXT
1749 * The following bits must be set in the first Data Descriptor
1750 * and are ignored in the other ones:
1751 * - IGC_ADVTXD_DCMD_IFCS
1752 * - IGC_ADVTXD_MAC_1588
1753 * - IGC_ADVTXD_DCMD_VLE
1755 * The following bits must only be set in the last Data
1759 * The following bits can be set in any Data Descriptor, but
1760 * are only set in the last Data Descriptor:
1763 cmd_type_len = txq->txd_type |
1764 IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT;
1765 if (tx_ol_req & IGC_TX_OFFLOAD_SEG)
1766 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len +
1768 olinfo_status = (pkt_len << IGC_ADVTXD_PAYLEN_SHIFT);
1771 * Timer 0 should be used to for packet timestamping,
1772 * sample the packet timestamp to reg 0
1774 if (ol_flags & PKT_TX_IEEE1588_TMST)
1775 cmd_type_len |= IGC_ADVTXD_MAC_TSTAMP;
1778 /* Setup TX Advanced context descriptor if required */
1780 volatile struct igc_adv_tx_context_desc *
1781 ctx_txd = (volatile struct
1782 igc_adv_tx_context_desc *)&txr[tx_id];
1784 txn = &sw_ring[txe->next_id];
1785 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1787 if (txe->mbuf != NULL) {
1788 rte_pktmbuf_free_seg(txe->mbuf);
1792 igc_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
1795 txe->last_id = tx_last;
1796 tx_id = txe->next_id;
1800 /* Setup the TX Advanced Data Descriptor */
1802 tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
1804 tx_desc_cksum_flags_to_olinfo(tx_ol_req);
1805 olinfo_status |= (uint32_t)txq->ctx_curr <<
1806 IGC_ADVTXD_IDX_SHIFT;
1811 txn = &sw_ring[txe->next_id];
1812 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1816 if (txe->mbuf != NULL)
1817 rte_pktmbuf_free_seg(txe->mbuf);
1820 /* Set up transmit descriptor */
1821 slen = (uint16_t)m_seg->data_len;
1822 buf_dma_addr = rte_mbuf_data_iova(m_seg);
1823 txd->read.buffer_addr =
1824 rte_cpu_to_le_64(buf_dma_addr);
1825 txd->read.cmd_type_len =
1826 rte_cpu_to_le_32(cmd_type_len | slen);
1827 txd->read.olinfo_status =
1828 rte_cpu_to_le_32(olinfo_status);
1829 txe->last_id = tx_last;
1830 tx_id = txe->next_id;
1832 m_seg = m_seg->next;
1833 } while (m_seg != NULL);
1836 * The last packet data descriptor needs End Of Packet (EOP)
1837 * and Report Status (RS).
1839 txd->read.cmd_type_len |=
1840 rte_cpu_to_le_32(IGC_TXD_CMD_EOP | IGC_TXD_CMD_RS);
1846 * Set the Transmit Descriptor Tail (TDT).
1848 IGC_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
1849 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
1850 txq->port_id, txq->queue_id, tx_id, nb_tx);
1851 txq->tx_tail = tx_id;
1856 int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset)
1858 struct igc_tx_queue *txq = tx_queue;
1859 volatile uint32_t *status;
1862 if (unlikely(!txq || offset >= txq->nb_tx_desc))
1865 desc = txq->tx_tail + offset;
1866 if (desc >= txq->nb_tx_desc)
1867 desc -= txq->nb_tx_desc;
1869 status = &txq->tx_ring[desc].wb.status;
1870 if (*status & rte_cpu_to_le_32(IGC_TXD_STAT_DD))
1871 return RTE_ETH_TX_DESC_DONE;
1873 return RTE_ETH_TX_DESC_FULL;
1877 igc_tx_queue_release_mbufs(struct igc_tx_queue *txq)
1881 if (txq->sw_ring != NULL) {
1882 for (i = 0; i < txq->nb_tx_desc; i++) {
1883 if (txq->sw_ring[i].mbuf != NULL) {
1884 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1885 txq->sw_ring[i].mbuf = NULL;
1892 igc_tx_queue_release(struct igc_tx_queue *txq)
1894 igc_tx_queue_release_mbufs(txq);
1895 rte_free(txq->sw_ring);
1899 void eth_igc_tx_queue_release(void *txq)
1902 igc_tx_queue_release(txq);
1906 igc_reset_tx_queue_stat(struct igc_tx_queue *txq)
1911 memset((void *)&txq->ctx_cache, 0,
1912 IGC_CTX_NUM * sizeof(struct igc_advctx_info));
1916 igc_reset_tx_queue(struct igc_tx_queue *txq)
1918 struct igc_tx_entry *txe = txq->sw_ring;
1921 /* Initialize ring entries */
1922 prev = (uint16_t)(txq->nb_tx_desc - 1);
1923 for (i = 0; i < txq->nb_tx_desc; i++) {
1924 volatile union igc_adv_tx_desc *txd = &txq->tx_ring[i];
1926 txd->wb.status = IGC_TXD_STAT_DD;
1929 txe[prev].next_id = i;
1933 txq->txd_type = IGC_ADVTXD_DTYP_DATA;
1934 igc_reset_tx_queue_stat(txq);
1938 * clear all rx/tx queue
1941 igc_dev_clear_queues(struct rte_eth_dev *dev)
1944 struct igc_tx_queue *txq;
1945 struct igc_rx_queue *rxq;
1947 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1948 txq = dev->data->tx_queues[i];
1950 igc_tx_queue_release_mbufs(txq);
1951 igc_reset_tx_queue(txq);
1955 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1956 rxq = dev->data->rx_queues[i];
1958 igc_rx_queue_release_mbufs(rxq);
1959 igc_reset_rx_queue(rxq);
1964 int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
1965 uint16_t nb_desc, unsigned int socket_id,
1966 const struct rte_eth_txconf *tx_conf)
1968 const struct rte_memzone *tz;
1969 struct igc_tx_queue *txq;
1973 if (nb_desc % IGC_TX_DESCRIPTOR_MULTIPLE != 0 ||
1974 nb_desc > IGC_MAX_TXD || nb_desc < IGC_MIN_TXD) {
1976 "TX-descriptor must be a multiple of %u and between %u and %u, cur: %u",
1977 IGC_TX_DESCRIPTOR_MULTIPLE,
1978 IGC_MAX_TXD, IGC_MIN_TXD, nb_desc);
1982 hw = IGC_DEV_PRIVATE_HW(dev);
1985 * The tx_free_thresh and tx_rs_thresh values are not used in the 2.5G
1988 if (tx_conf->tx_free_thresh != 0)
1990 "The tx_free_thresh parameter is not used for the 2.5G driver");
1991 if (tx_conf->tx_rs_thresh != 0)
1993 "The tx_rs_thresh parameter is not used for the 2.5G driver");
1994 if (tx_conf->tx_thresh.wthresh == 0)
1996 "To improve 2.5G driver performance, consider setting the TX WTHRESH value to 4, 8, or 16.");
1998 /* Free memory prior to re-allocation if needed */
1999 if (dev->data->tx_queues[queue_idx] != NULL) {
2000 igc_tx_queue_release(dev->data->tx_queues[queue_idx]);
2001 dev->data->tx_queues[queue_idx] = NULL;
2004 /* First allocate the tx queue data structure */
2005 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igc_tx_queue),
2006 RTE_CACHE_LINE_SIZE);
2011 * Allocate TX ring hardware descriptors. A memzone large enough to
2012 * handle the maximum ring size is allocated in order to allow for
2013 * resizing in later calls to the queue setup function.
2015 size = sizeof(union igc_adv_tx_desc) * IGC_MAX_TXD;
2016 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
2017 IGC_ALIGN, socket_id);
2019 igc_tx_queue_release(txq);
2023 txq->nb_tx_desc = nb_desc;
2024 txq->pthresh = tx_conf->tx_thresh.pthresh;
2025 txq->hthresh = tx_conf->tx_thresh.hthresh;
2026 txq->wthresh = tx_conf->tx_thresh.wthresh;
2028 txq->queue_id = queue_idx;
2029 txq->reg_idx = queue_idx;
2030 txq->port_id = dev->data->port_id;
2032 txq->tdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_TDT(txq->reg_idx));
2033 txq->tx_ring_phys_addr = tz->iova;
2035 txq->tx_ring = (union igc_adv_tx_desc *)tz->addr;
2036 /* Allocate software ring */
2037 txq->sw_ring = rte_zmalloc("txq->sw_ring",
2038 sizeof(struct igc_tx_entry) * nb_desc,
2039 RTE_CACHE_LINE_SIZE);
2040 if (txq->sw_ring == NULL) {
2041 igc_tx_queue_release(txq);
2044 PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
2045 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2047 igc_reset_tx_queue(txq);
2048 dev->tx_pkt_burst = igc_xmit_pkts;
2049 dev->tx_pkt_prepare = ð_igc_prep_pkts;
2050 dev->data->tx_queues[queue_idx] = txq;
2051 txq->offloads = tx_conf->offloads;
2057 eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt)
2059 struct igc_tx_queue *txq = txqueue;
2060 struct igc_tx_entry *sw_ring;
2061 volatile union igc_adv_tx_desc *txr;
2062 uint16_t tx_first; /* First segment analyzed. */
2063 uint16_t tx_id; /* Current segment being processed. */
2064 uint16_t tx_last; /* Last segment in the current packet. */
2065 uint16_t tx_next; /* First segment of the next packet. */
2072 sw_ring = txq->sw_ring;
2076 * tx_tail is the last sent packet on the sw_ring. Goto the end
2077 * of that packet (the last segment in the packet chain) and
2078 * then the next segment will be the start of the oldest segment
2079 * in the sw_ring. This is the first packet that will be
2080 * attempted to be freed.
2083 /* Get last segment in most recently added packet. */
2084 tx_first = sw_ring[txq->tx_tail].last_id;
2086 /* Get the next segment, which is the oldest segment in ring. */
2087 tx_first = sw_ring[tx_first].next_id;
2089 /* Set the current index to the first. */
2093 * Loop through each packet. For each packet, verify that an
2094 * mbuf exists and that the last segment is free. If so, free
2098 tx_last = sw_ring[tx_id].last_id;
2100 if (sw_ring[tx_last].mbuf) {
2101 if (!(txr[tx_last].wb.status &
2102 rte_cpu_to_le_32(IGC_TXD_STAT_DD)))
2105 /* Get the start of the next packet. */
2106 tx_next = sw_ring[tx_last].next_id;
2109 * Loop through all segments in a
2113 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
2114 sw_ring[tx_id].mbuf = NULL;
2115 sw_ring[tx_id].last_id = tx_id;
2117 /* Move to next segemnt. */
2118 tx_id = sw_ring[tx_id].next_id;
2119 } while (tx_id != tx_next);
2122 * Increment the number of packets
2126 if (unlikely(count == free_cnt))
2130 * There are multiple reasons to be here:
2131 * 1) All the packets on the ring have been
2132 * freed - tx_id is equal to tx_first
2133 * and some packets have been freed.
2135 * 2) Interfaces has not sent a rings worth of
2136 * packets yet, so the segment after tail is
2137 * still empty. Or a previous call to this
2138 * function freed some of the segments but
2139 * not all so there is a hole in the list.
2140 * Hopefully this is a rare case.
2141 * - Walk the list and find the next mbuf. If
2142 * there isn't one, then done.
2144 if (likely(tx_id == tx_first && count != 0))
2148 * Walk the list and find the next mbuf, if any.
2151 /* Move to next segemnt. */
2152 tx_id = sw_ring[tx_id].next_id;
2154 if (sw_ring[tx_id].mbuf)
2157 } while (tx_id != tx_first);
2160 * Determine why previous loop bailed. If there
2161 * is not an mbuf, done.
2163 if (sw_ring[tx_id].mbuf == NULL)
2172 igc_tx_init(struct rte_eth_dev *dev)
2174 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2179 /* Setup the Base and Length of the Tx Descriptor Rings. */
2180 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2181 struct igc_tx_queue *txq = dev->data->tx_queues[i];
2182 uint64_t bus_addr = txq->tx_ring_phys_addr;
2184 IGC_WRITE_REG(hw, IGC_TDLEN(txq->reg_idx),
2186 sizeof(union igc_adv_tx_desc));
2187 IGC_WRITE_REG(hw, IGC_TDBAH(txq->reg_idx),
2188 (uint32_t)(bus_addr >> 32));
2189 IGC_WRITE_REG(hw, IGC_TDBAL(txq->reg_idx),
2190 (uint32_t)bus_addr);
2192 /* Setup the HW Tx Head and Tail descriptor pointers. */
2193 IGC_WRITE_REG(hw, IGC_TDT(txq->reg_idx), 0);
2194 IGC_WRITE_REG(hw, IGC_TDH(txq->reg_idx), 0);
2196 /* Setup Transmit threshold registers. */
2197 txdctl = ((uint32_t)txq->pthresh << IGC_TXDCTL_PTHRESH_SHIFT) &
2198 IGC_TXDCTL_PTHRESH_MSK;
2199 txdctl |= ((uint32_t)txq->hthresh << IGC_TXDCTL_HTHRESH_SHIFT) &
2200 IGC_TXDCTL_HTHRESH_MSK;
2201 txdctl |= ((uint32_t)txq->wthresh << IGC_TXDCTL_WTHRESH_SHIFT) &
2202 IGC_TXDCTL_WTHRESH_MSK;
2203 txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
2204 IGC_WRITE_REG(hw, IGC_TXDCTL(txq->reg_idx), txdctl);
2207 igc_config_collision_dist(hw);
2209 /* Program the Transmit Control Register. */
2210 tctl = IGC_READ_REG(hw, IGC_TCTL);
2211 tctl &= ~IGC_TCTL_CT;
2212 tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN |
2213 ((uint32_t)IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT));
2215 /* This write will effectively turn on the transmit unit. */
2216 IGC_WRITE_REG(hw, IGC_TCTL, tctl);
2220 eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2221 struct rte_eth_rxq_info *qinfo)
2223 struct igc_rx_queue *rxq;
2225 rxq = dev->data->rx_queues[queue_id];
2227 qinfo->mp = rxq->mb_pool;
2228 qinfo->scattered_rx = dev->data->scattered_rx;
2229 qinfo->nb_desc = rxq->nb_rx_desc;
2231 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2232 qinfo->conf.rx_drop_en = rxq->drop_en;
2233 qinfo->conf.offloads = rxq->offloads;
2234 qinfo->conf.rx_thresh.hthresh = rxq->hthresh;
2235 qinfo->conf.rx_thresh.pthresh = rxq->pthresh;
2236 qinfo->conf.rx_thresh.wthresh = rxq->wthresh;
2240 eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2241 struct rte_eth_txq_info *qinfo)
2243 struct igc_tx_queue *txq;
2245 txq = dev->data->tx_queues[queue_id];
2247 qinfo->nb_desc = txq->nb_tx_desc;
2249 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2250 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2251 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2252 qinfo->conf.offloads = txq->offloads;
2256 eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev,
2257 uint16_t rx_queue_id, int on)
2259 struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2260 struct igc_rx_queue *rxq = dev->data->rx_queues[rx_queue_id];
2263 if (rx_queue_id >= IGC_QUEUE_PAIRS_NUM) {
2264 PMD_DRV_LOG(ERR, "Queue index(%u) illegal, max is %u",
2265 rx_queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2269 reg_val = IGC_READ_REG(hw, IGC_DVMOLR(rx_queue_id));
2271 /* If vlan been stripped off, the CRC is meaningless. */
2272 reg_val |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC;
2273 rxq->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2275 reg_val &= ~(IGC_DVMOLR_STRVLAN | IGC_DVMOLR_HIDVLAN |
2277 rxq->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
2280 IGC_WRITE_REG(hw, IGC_DVMOLR(rx_queue_id), reg_val);