8eaed728cc861b52268da41f8881fe564aaf4871
[dpdk.git] / drivers / net / igc / igc_txrx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4
5 #include <rte_config.h>
6 #include <rte_flow.h>
7 #include <rte_malloc.h>
8 #include <ethdev_driver.h>
9 #include <rte_net.h>
10
11 #include "igc_logs.h"
12 #include "igc_txrx.h"
13
14 #ifdef RTE_PMD_USE_PREFETCH
15 #define rte_igc_prefetch(p)             rte_prefetch0(p)
16 #else
17 #define rte_igc_prefetch(p)             do {} while (0)
18 #endif
19
20 #ifdef RTE_PMD_PACKET_PREFETCH
21 #define rte_packet_prefetch(p)          rte_prefetch1(p)
22 #else
23 #define rte_packet_prefetch(p)          do {} while (0)
24 #endif
25
26 /* Multicast / Unicast table offset mask. */
27 #define IGC_RCTL_MO_MSK                 (3u << IGC_RCTL_MO_SHIFT)
28
29 /* Loopback mode. */
30 #define IGC_RCTL_LBM_SHIFT              6
31 #define IGC_RCTL_LBM_MSK                (3u << IGC_RCTL_LBM_SHIFT)
32
33 /* Hash select for MTA */
34 #define IGC_RCTL_HSEL_SHIFT             8
35 #define IGC_RCTL_HSEL_MSK               (3u << IGC_RCTL_HSEL_SHIFT)
36 #define IGC_RCTL_PSP                    (1u << 21)
37
38 /* Receive buffer size for header buffer */
39 #define IGC_SRRCTL_BSIZEHEADER_SHIFT    8
40
41 /* RX descriptor status and error flags */
42 #define IGC_RXD_STAT_L4CS               (1u << 5)
43 #define IGC_RXD_STAT_VEXT               (1u << 9)
44 #define IGC_RXD_STAT_LLINT              (1u << 11)
45 #define IGC_RXD_STAT_SCRC               (1u << 12)
46 #define IGC_RXD_STAT_SMDT_MASK          (3u << 13)
47 #define IGC_RXD_STAT_MC                 (1u << 19)
48 #define IGC_RXD_EXT_ERR_L4E             (1u << 29)
49 #define IGC_RXD_EXT_ERR_IPE             (1u << 30)
50 #define IGC_RXD_EXT_ERR_RXE             (1u << 31)
51 #define IGC_RXD_RSS_TYPE_MASK           0xfu
52 #define IGC_RXD_PCTYPE_MASK             (0x7fu << 4)
53 #define IGC_RXD_ETQF_SHIFT              12
54 #define IGC_RXD_ETQF_MSK                (0xfu << IGC_RXD_ETQF_SHIFT)
55 #define IGC_RXD_VPKT                    (1u << 16)
56
57 /* TXD control bits */
58 #define IGC_TXDCTL_PTHRESH_SHIFT        0
59 #define IGC_TXDCTL_HTHRESH_SHIFT        8
60 #define IGC_TXDCTL_WTHRESH_SHIFT        16
61 #define IGC_TXDCTL_PTHRESH_MSK          (0x1fu << IGC_TXDCTL_PTHRESH_SHIFT)
62 #define IGC_TXDCTL_HTHRESH_MSK          (0x1fu << IGC_TXDCTL_HTHRESH_SHIFT)
63 #define IGC_TXDCTL_WTHRESH_MSK          (0x1fu << IGC_TXDCTL_WTHRESH_SHIFT)
64
65 /* RXD control bits */
66 #define IGC_RXDCTL_PTHRESH_SHIFT        0
67 #define IGC_RXDCTL_HTHRESH_SHIFT        8
68 #define IGC_RXDCTL_WTHRESH_SHIFT        16
69 #define IGC_RXDCTL_PTHRESH_MSK          (0x1fu << IGC_RXDCTL_PTHRESH_SHIFT)
70 #define IGC_RXDCTL_HTHRESH_MSK          (0x1fu << IGC_RXDCTL_HTHRESH_SHIFT)
71 #define IGC_RXDCTL_WTHRESH_MSK          (0x1fu << IGC_RXDCTL_WTHRESH_SHIFT)
72
73 #define IGC_TSO_MAX_HDRLEN              512
74 #define IGC_TSO_MAX_MSS                 9216
75
76 /* Bit Mask to indicate what bits required for building TX context */
77 #define IGC_TX_OFFLOAD_MASK (           \
78                 PKT_TX_OUTER_IPV4 |     \
79                 PKT_TX_IPV6 |           \
80                 PKT_TX_IPV4 |           \
81                 PKT_TX_VLAN_PKT |       \
82                 PKT_TX_IP_CKSUM |       \
83                 PKT_TX_L4_MASK |        \
84                 PKT_TX_TCP_SEG |        \
85                 PKT_TX_UDP_SEG)
86
87 #define IGC_TX_OFFLOAD_SEG      (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)
88
89 #define IGC_ADVTXD_POPTS_TXSM   0x00000200 /* L4 Checksum offload request */
90 #define IGC_ADVTXD_POPTS_IXSM   0x00000100 /* IP Checksum offload request */
91
92 /* L4 Packet TYPE of Reserved */
93 #define IGC_ADVTXD_TUCMD_L4T_RSV        0x00001800
94
95 #define IGC_TX_OFFLOAD_NOTSUP_MASK (PKT_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK)
96
97 /**
98  * Structure associated with each descriptor of the RX ring of a RX queue.
99  */
100 struct igc_rx_entry {
101         struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
102 };
103
104 /**
105  * Structure associated with each RX queue.
106  */
107 struct igc_rx_queue {
108         struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
109         volatile union igc_adv_rx_desc *rx_ring;
110         /**< RX ring virtual address. */
111         uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
112         volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
113         volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
114         struct igc_rx_entry *sw_ring;   /**< address of RX software ring. */
115         struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
116         struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
117         uint16_t            nb_rx_desc; /**< number of RX descriptors. */
118         uint16_t            rx_tail;    /**< current value of RDT register. */
119         uint16_t            nb_rx_hold; /**< number of held free RX desc. */
120         uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
121         uint16_t            queue_id;   /**< RX queue index. */
122         uint16_t            reg_idx;    /**< RX queue register index. */
123         uint16_t            port_id;    /**< Device port identifier. */
124         uint8_t             pthresh;    /**< Prefetch threshold register. */
125         uint8_t             hthresh;    /**< Host threshold register. */
126         uint8_t             wthresh;    /**< Write-back threshold register. */
127         uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
128         uint8_t             drop_en;    /**< If not 0, set SRRCTL.Drop_En. */
129         uint32_t            flags;      /**< RX flags. */
130         uint64_t            offloads;   /**< offloads of DEV_RX_OFFLOAD_* */
131 };
132
133 /** Offload features */
134 union igc_tx_offload {
135         uint64_t data;
136         struct {
137                 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
138                 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
139                 uint64_t vlan_tci:16;
140                 /**< VLAN Tag Control Identifier(CPU order). */
141                 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
142                 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
143                 /* uint64_t unused:8; */
144         };
145 };
146
147 /*
148  * Compare mask for igc_tx_offload.data,
149  * should be in sync with igc_tx_offload layout.
150  */
151 #define TX_MACIP_LEN_CMP_MASK   0x000000000000FFFFULL /**< L2L3 header mask. */
152 #define TX_VLAN_CMP_MASK        0x00000000FFFF0000ULL /**< Vlan mask. */
153 #define TX_TCP_LEN_CMP_MASK     0x000000FF00000000ULL /**< TCP header mask. */
154 #define TX_TSO_MSS_CMP_MASK     0x00FFFF0000000000ULL /**< TSO segsz mask. */
155 /** Mac + IP + TCP + Mss mask. */
156 #define TX_TSO_CMP_MASK \
157         (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
158
159 /**
160  * Structure to check if new context need be built
161  */
162 struct igc_advctx_info {
163         uint64_t flags;           /**< ol_flags related to context build. */
164         /** tx offload: vlan, tso, l2-l3-l4 lengths. */
165         union igc_tx_offload tx_offload;
166         /** compare mask for tx offload. */
167         union igc_tx_offload tx_offload_mask;
168 };
169
170 /**
171  * Hardware context number
172  */
173 enum {
174         IGC_CTX_0    = 0, /**< CTX0    */
175         IGC_CTX_1    = 1, /**< CTX1    */
176         IGC_CTX_NUM  = 2, /**< CTX_NUM */
177 };
178
179 /**
180  * Structure associated with each descriptor of the TX ring of a TX queue.
181  */
182 struct igc_tx_entry {
183         struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
184         uint16_t next_id; /**< Index of next descriptor in ring. */
185         uint16_t last_id; /**< Index of last scattered descriptor. */
186 };
187
188 /**
189  * Structure associated with each TX queue.
190  */
191 struct igc_tx_queue {
192         volatile union igc_adv_tx_desc *tx_ring; /**< TX ring address */
193         uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
194         struct igc_tx_entry    *sw_ring; /**< virtual address of SW ring. */
195         volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
196         uint32_t               txd_type;      /**< Device-specific TXD type */
197         uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
198         uint16_t               tx_tail;  /**< Current value of TDT register. */
199         uint16_t               tx_head;
200         /**< Index of first used TX descriptor. */
201         uint16_t               queue_id; /**< TX queue index. */
202         uint16_t               reg_idx;  /**< TX queue register index. */
203         uint16_t               port_id;  /**< Device port identifier. */
204         uint8_t                pthresh;  /**< Prefetch threshold register. */
205         uint8_t                hthresh;  /**< Host threshold register. */
206         uint8_t                wthresh;  /**< Write-back threshold register. */
207         uint8_t                ctx_curr;
208
209         /**< Start context position for transmit queue. */
210         struct igc_advctx_info ctx_cache[IGC_CTX_NUM];
211         /**< Hardware context history.*/
212         uint64_t               offloads; /**< offloads of DEV_TX_OFFLOAD_* */
213 };
214
215 static inline uint64_t
216 rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)
217 {
218         static uint64_t l4_chksum_flags[] = {0, 0, PKT_RX_L4_CKSUM_GOOD,
219                         PKT_RX_L4_CKSUM_BAD};
220
221         static uint64_t l3_chksum_flags[] = {0, 0, PKT_RX_IP_CKSUM_GOOD,
222                         PKT_RX_IP_CKSUM_BAD};
223         uint64_t pkt_flags = 0;
224         uint32_t tmp;
225
226         if (statuserr & IGC_RXD_STAT_VP)
227                 pkt_flags |= PKT_RX_VLAN_STRIPPED;
228
229         tmp = !!(statuserr & (IGC_RXD_STAT_L4CS | IGC_RXD_STAT_UDPCS));
230         tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_L4E);
231         pkt_flags |= l4_chksum_flags[tmp];
232
233         tmp = !!(statuserr & IGC_RXD_STAT_IPCS);
234         tmp = (tmp << 1) | (uint32_t)!!(statuserr & IGC_RXD_EXT_ERR_IPE);
235         pkt_flags |= l3_chksum_flags[tmp];
236
237         return pkt_flags;
238 }
239
240 #define IGC_PACKET_TYPE_IPV4              0X01
241 #define IGC_PACKET_TYPE_IPV4_TCP          0X11
242 #define IGC_PACKET_TYPE_IPV4_UDP          0X21
243 #define IGC_PACKET_TYPE_IPV4_SCTP         0X41
244 #define IGC_PACKET_TYPE_IPV4_EXT          0X03
245 #define IGC_PACKET_TYPE_IPV4_EXT_SCTP     0X43
246 #define IGC_PACKET_TYPE_IPV6              0X04
247 #define IGC_PACKET_TYPE_IPV6_TCP          0X14
248 #define IGC_PACKET_TYPE_IPV6_UDP          0X24
249 #define IGC_PACKET_TYPE_IPV6_EXT          0X0C
250 #define IGC_PACKET_TYPE_IPV6_EXT_TCP      0X1C
251 #define IGC_PACKET_TYPE_IPV6_EXT_UDP      0X2C
252 #define IGC_PACKET_TYPE_IPV4_IPV6         0X05
253 #define IGC_PACKET_TYPE_IPV4_IPV6_TCP     0X15
254 #define IGC_PACKET_TYPE_IPV4_IPV6_UDP     0X25
255 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
256 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
257 #define IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
258 #define IGC_PACKET_TYPE_MAX               0X80
259 #define IGC_PACKET_TYPE_MASK              0X7F
260 #define IGC_PACKET_TYPE_SHIFT             0X04
261
262 static inline uint32_t
263 rx_desc_pkt_info_to_pkt_type(uint32_t pkt_info)
264 {
265         static const uint32_t
266                 ptype_table[IGC_PACKET_TYPE_MAX] __rte_cache_aligned = {
267                 [IGC_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
268                         RTE_PTYPE_L3_IPV4,
269                 [IGC_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
270                         RTE_PTYPE_L3_IPV4_EXT,
271                 [IGC_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
272                         RTE_PTYPE_L3_IPV6,
273                 [IGC_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
274                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
275                         RTE_PTYPE_INNER_L3_IPV6,
276                 [IGC_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
277                         RTE_PTYPE_L3_IPV6_EXT,
278                 [IGC_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
279                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
280                         RTE_PTYPE_INNER_L3_IPV6_EXT,
281                 [IGC_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
282                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
283                 [IGC_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
284                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
285                 [IGC_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
286                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
287                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
288                 [IGC_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
289                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
290                 [IGC_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
291                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
292                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
293                 [IGC_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
294                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
295                 [IGC_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
296                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
297                 [IGC_PACKET_TYPE_IPV4_IPV6_UDP] =  RTE_PTYPE_L2_ETHER |
298                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
299                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
300                 [IGC_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
301                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
302                 [IGC_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
303                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
304                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
305                 [IGC_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
306                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
307                 [IGC_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
308                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
309         };
310         if (unlikely(pkt_info & IGC_RXDADV_PKTTYPE_ETQF))
311                 return RTE_PTYPE_UNKNOWN;
312
313         pkt_info = (pkt_info >> IGC_PACKET_TYPE_SHIFT) & IGC_PACKET_TYPE_MASK;
314
315         return ptype_table[pkt_info];
316 }
317
318 static inline void
319 rx_desc_get_pkt_info(struct igc_rx_queue *rxq, struct rte_mbuf *rxm,
320                 union igc_adv_rx_desc *rxd, uint32_t staterr)
321 {
322         uint64_t pkt_flags;
323         uint32_t hlen_type_rss;
324         uint16_t pkt_info;
325
326         /* Prefetch data of first segment, if configured to do so. */
327         rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
328
329         rxm->port = rxq->port_id;
330         hlen_type_rss = rte_le_to_cpu_32(rxd->wb.lower.lo_dword.data);
331         rxm->hash.rss = rte_le_to_cpu_32(rxd->wb.lower.hi_dword.rss);
332         rxm->vlan_tci = rte_le_to_cpu_16(rxd->wb.upper.vlan);
333
334         pkt_flags = (hlen_type_rss & IGC_RXD_RSS_TYPE_MASK) ?
335                         PKT_RX_RSS_HASH : 0;
336
337         if (hlen_type_rss & IGC_RXD_VPKT)
338                 pkt_flags |= PKT_RX_VLAN;
339
340         pkt_flags |= rx_desc_statuserr_to_pkt_flags(staterr);
341
342         rxm->ol_flags = pkt_flags;
343         pkt_info = rte_le_to_cpu_16(rxd->wb.lower.lo_dword.hs_rss.pkt_info);
344         rxm->packet_type = rx_desc_pkt_info_to_pkt_type(pkt_info);
345 }
346
347 static uint16_t
348 igc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
349 {
350         struct igc_rx_queue * const rxq = rx_queue;
351         volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
352         struct igc_rx_entry * const sw_ring = rxq->sw_ring;
353         uint16_t rx_id = rxq->rx_tail;
354         uint16_t nb_rx = 0;
355         uint16_t nb_hold = 0;
356
357         while (nb_rx < nb_pkts) {
358                 volatile union igc_adv_rx_desc *rxdp;
359                 struct igc_rx_entry *rxe;
360                 struct rte_mbuf *rxm;
361                 struct rte_mbuf *nmb;
362                 union igc_adv_rx_desc rxd;
363                 uint32_t staterr;
364                 uint16_t data_len;
365
366                 /*
367                  * The order of operations here is important as the DD status
368                  * bit must not be read after any other descriptor fields.
369                  * rx_ring and rxdp are pointing to volatile data so the order
370                  * of accesses cannot be reordered by the compiler. If they were
371                  * not volatile, they could be reordered which could lead to
372                  * using invalid descriptor fields when read from rxd.
373                  */
374                 rxdp = &rx_ring[rx_id];
375                 staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
376                 if (!(staterr & IGC_RXD_STAT_DD))
377                         break;
378                 rxd = *rxdp;
379
380                 /*
381                  * End of packet.
382                  *
383                  * If the IGC_RXD_STAT_EOP flag is not set, the RX packet is
384                  * likely to be invalid and to be dropped by the various
385                  * validation checks performed by the network stack.
386                  *
387                  * Allocate a new mbuf to replenish the RX ring descriptor.
388                  * If the allocation fails:
389                  *    - arrange for that RX descriptor to be the first one
390                  *      being parsed the next time the receive function is
391                  *      invoked [on the same queue].
392                  *
393                  *    - Stop parsing the RX ring and return immediately.
394                  *
395                  * This policy does not drop the packet received in the RX
396                  * descriptor for which the allocation of a new mbuf failed.
397                  * Thus, it allows that packet to be later retrieved if
398                  * mbuf have been freed in the mean time.
399                  * As a side effect, holding RX descriptors instead of
400                  * systematically giving them back to the NIC may lead to
401                  * RX ring exhaustion situations.
402                  * However, the NIC can gracefully prevent such situations
403                  * to happen by sending specific "back-pressure" flow control
404                  * frames to its peer(s).
405                  */
406                 PMD_RX_LOG(DEBUG,
407                         "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
408                         rxq->port_id, rxq->queue_id, rx_id, staterr,
409                         rte_le_to_cpu_16(rxd.wb.upper.length));
410
411                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
412                 if (nmb == NULL) {
413                         unsigned int id;
414                         PMD_RX_LOG(DEBUG,
415                                 "RX mbuf alloc failed, port_id=%u queue_id=%u",
416                                 rxq->port_id, rxq->queue_id);
417                         id = rxq->port_id;
418                         rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
419                         break;
420                 }
421
422                 nb_hold++;
423                 rxe = &sw_ring[rx_id];
424                 rx_id++;
425                 if (rx_id >= rxq->nb_rx_desc)
426                         rx_id = 0;
427
428                 /* Prefetch next mbuf while processing current one. */
429                 rte_igc_prefetch(sw_ring[rx_id].mbuf);
430
431                 /*
432                  * When next RX descriptor is on a cache-line boundary,
433                  * prefetch the next 4 RX descriptors and the next 8 pointers
434                  * to mbufs.
435                  */
436                 if ((rx_id & 0x3) == 0) {
437                         rte_igc_prefetch(&rx_ring[rx_id]);
438                         rte_igc_prefetch(&sw_ring[rx_id]);
439                 }
440
441                 /*
442                  * Update RX descriptor with the physical address of the new
443                  * data buffer of the new allocated mbuf.
444                  */
445                 rxm = rxe->mbuf;
446                 rxe->mbuf = nmb;
447                 rxdp->read.hdr_addr = 0;
448                 rxdp->read.pkt_addr =
449                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
450                 rxm->next = NULL;
451
452                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
453                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length) - rxq->crc_len;
454                 rxm->data_len = data_len;
455                 rxm->pkt_len = data_len;
456                 rxm->nb_segs = 1;
457
458                 rx_desc_get_pkt_info(rxq, rxm, &rxd, staterr);
459
460                 /*
461                  * Store the mbuf address into the next entry of the array
462                  * of returned packets.
463                  */
464                 rx_pkts[nb_rx++] = rxm;
465         }
466         rxq->rx_tail = rx_id;
467
468         /*
469          * If the number of free RX descriptors is greater than the RX free
470          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
471          * register.
472          * Update the RDT with the value of the last processed RX descriptor
473          * minus 1, to guarantee that the RDT register is never equal to the
474          * RDH register, which creates a "full" ring situation from the
475          * hardware point of view...
476          */
477         nb_hold = nb_hold + rxq->nb_rx_hold;
478         if (nb_hold > rxq->rx_free_thresh) {
479                 PMD_RX_LOG(DEBUG,
480                         "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
481                         rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
482                 rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
483                 IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
484                 nb_hold = 0;
485         }
486         rxq->nb_rx_hold = nb_hold;
487         return nb_rx;
488 }
489
490 static uint16_t
491 igc_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
492                         uint16_t nb_pkts)
493 {
494         struct igc_rx_queue * const rxq = rx_queue;
495         volatile union igc_adv_rx_desc * const rx_ring = rxq->rx_ring;
496         struct igc_rx_entry * const sw_ring = rxq->sw_ring;
497         struct rte_mbuf *first_seg = rxq->pkt_first_seg;
498         struct rte_mbuf *last_seg = rxq->pkt_last_seg;
499
500         uint16_t rx_id = rxq->rx_tail;
501         uint16_t nb_rx = 0;
502         uint16_t nb_hold = 0;
503
504         while (nb_rx < nb_pkts) {
505                 volatile union igc_adv_rx_desc *rxdp;
506                 struct igc_rx_entry *rxe;
507                 struct rte_mbuf *rxm;
508                 struct rte_mbuf *nmb;
509                 union igc_adv_rx_desc rxd;
510                 uint32_t staterr;
511                 uint16_t data_len;
512
513 next_desc:
514                 /*
515                  * The order of operations here is important as the DD status
516                  * bit must not be read after any other descriptor fields.
517                  * rx_ring and rxdp are pointing to volatile data so the order
518                  * of accesses cannot be reordered by the compiler. If they were
519                  * not volatile, they could be reordered which could lead to
520                  * using invalid descriptor fields when read from rxd.
521                  */
522                 rxdp = &rx_ring[rx_id];
523                 staterr = rte_cpu_to_le_32(rxdp->wb.upper.status_error);
524                 if (!(staterr & IGC_RXD_STAT_DD))
525                         break;
526                 rxd = *rxdp;
527
528                 /*
529                  * Descriptor done.
530                  *
531                  * Allocate a new mbuf to replenish the RX ring descriptor.
532                  * If the allocation fails:
533                  *    - arrange for that RX descriptor to be the first one
534                  *      being parsed the next time the receive function is
535                  *      invoked [on the same queue].
536                  *
537                  *    - Stop parsing the RX ring and return immediately.
538                  *
539                  * This policy does not drop the packet received in the RX
540                  * descriptor for which the allocation of a new mbuf failed.
541                  * Thus, it allows that packet to be later retrieved if
542                  * mbuf have been freed in the mean time.
543                  * As a side effect, holding RX descriptors instead of
544                  * systematically giving them back to the NIC may lead to
545                  * RX ring exhaustion situations.
546                  * However, the NIC can gracefully prevent such situations
547                  * to happen by sending specific "back-pressure" flow control
548                  * frames to its peer(s).
549                  */
550                 PMD_RX_LOG(DEBUG,
551                         "port_id=%u queue_id=%u rx_id=%u staterr=0x%x data_len=%u",
552                         rxq->port_id, rxq->queue_id, rx_id, staterr,
553                         rte_le_to_cpu_16(rxd.wb.upper.length));
554
555                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
556                 if (nmb == NULL) {
557                         unsigned int id;
558                         PMD_RX_LOG(DEBUG,
559                                 "RX mbuf alloc failed, port_id=%u queue_id=%u",
560                                 rxq->port_id, rxq->queue_id);
561                         id = rxq->port_id;
562                         rte_eth_devices[id].data->rx_mbuf_alloc_failed++;
563                         break;
564                 }
565
566                 nb_hold++;
567                 rxe = &sw_ring[rx_id];
568                 rx_id++;
569                 if (rx_id >= rxq->nb_rx_desc)
570                         rx_id = 0;
571
572                 /* Prefetch next mbuf while processing current one. */
573                 rte_igc_prefetch(sw_ring[rx_id].mbuf);
574
575                 /*
576                  * When next RX descriptor is on a cache-line boundary,
577                  * prefetch the next 4 RX descriptors and the next 8 pointers
578                  * to mbufs.
579                  */
580                 if ((rx_id & 0x3) == 0) {
581                         rte_igc_prefetch(&rx_ring[rx_id]);
582                         rte_igc_prefetch(&sw_ring[rx_id]);
583                 }
584
585                 /*
586                  * Update RX descriptor with the physical address of the new
587                  * data buffer of the new allocated mbuf.
588                  */
589                 rxm = rxe->mbuf;
590                 rxe->mbuf = nmb;
591                 rxdp->read.hdr_addr = 0;
592                 rxdp->read.pkt_addr =
593                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
594                 rxm->next = NULL;
595
596                 /*
597                  * Set data length & data buffer address of mbuf.
598                  */
599                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
600                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
601                 rxm->data_len = data_len;
602
603                 /*
604                  * If this is the first buffer of the received packet,
605                  * set the pointer to the first mbuf of the packet and
606                  * initialize its context.
607                  * Otherwise, update the total length and the number of segments
608                  * of the current scattered packet, and update the pointer to
609                  * the last mbuf of the current packet.
610                  */
611                 if (first_seg == NULL) {
612                         first_seg = rxm;
613                         first_seg->pkt_len = data_len;
614                         first_seg->nb_segs = 1;
615                 } else {
616                         first_seg->pkt_len += data_len;
617                         first_seg->nb_segs++;
618                         last_seg->next = rxm;
619                 }
620
621                 /*
622                  * If this is not the last buffer of the received packet,
623                  * update the pointer to the last mbuf of the current scattered
624                  * packet and continue to parse the RX ring.
625                  */
626                 if (!(staterr & IGC_RXD_STAT_EOP)) {
627                         last_seg = rxm;
628                         goto next_desc;
629                 }
630
631                 /*
632                  * This is the last buffer of the received packet.
633                  * If the CRC is not stripped by the hardware:
634                  *   - Subtract the CRC length from the total packet length.
635                  *   - If the last buffer only contains the whole CRC or a part
636                  *     of it, free the mbuf associated to the last buffer.
637                  *     If part of the CRC is also contained in the previous
638                  *     mbuf, subtract the length of that CRC part from the
639                  *     data length of the previous mbuf.
640                  */
641                 if (unlikely(rxq->crc_len > 0)) {
642                         first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
643                         if (data_len <= RTE_ETHER_CRC_LEN) {
644                                 rte_pktmbuf_free_seg(rxm);
645                                 first_seg->nb_segs--;
646                                 last_seg->data_len = last_seg->data_len -
647                                          (RTE_ETHER_CRC_LEN - data_len);
648                                 last_seg->next = NULL;
649                         } else {
650                                 rxm->data_len = (uint16_t)
651                                         (data_len - RTE_ETHER_CRC_LEN);
652                         }
653                 }
654
655                 rx_desc_get_pkt_info(rxq, first_seg, &rxd, staterr);
656
657                 /*
658                  * Store the mbuf address into the next entry of the array
659                  * of returned packets.
660                  */
661                 rx_pkts[nb_rx++] = first_seg;
662
663                 /* Setup receipt context for a new packet. */
664                 first_seg = NULL;
665         }
666         rxq->rx_tail = rx_id;
667
668         /*
669          * Save receive context.
670          */
671         rxq->pkt_first_seg = first_seg;
672         rxq->pkt_last_seg = last_seg;
673
674         /*
675          * If the number of free RX descriptors is greater than the RX free
676          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
677          * register.
678          * Update the RDT with the value of the last processed RX descriptor
679          * minus 1, to guarantee that the RDT register is never equal to the
680          * RDH register, which creates a "full" ring situation from the
681          * hardware point of view...
682          */
683         nb_hold = nb_hold + rxq->nb_rx_hold;
684         if (nb_hold > rxq->rx_free_thresh) {
685                 PMD_RX_LOG(DEBUG,
686                         "port_id=%u queue_id=%u rx_tail=%u nb_hold=%u nb_rx=%u",
687                         rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
688                 rx_id = (rx_id == 0) ? (rxq->nb_rx_desc - 1) : (rx_id - 1);
689                 IGC_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
690                 nb_hold = 0;
691         }
692         rxq->nb_rx_hold = nb_hold;
693         return nb_rx;
694 }
695
696 static void
697 igc_rx_queue_release_mbufs(struct igc_rx_queue *rxq)
698 {
699         unsigned int i;
700
701         if (rxq->sw_ring != NULL) {
702                 for (i = 0; i < rxq->nb_rx_desc; i++) {
703                         if (rxq->sw_ring[i].mbuf != NULL) {
704                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
705                                 rxq->sw_ring[i].mbuf = NULL;
706                         }
707                 }
708         }
709 }
710
711 static void
712 igc_rx_queue_release(struct igc_rx_queue *rxq)
713 {
714         igc_rx_queue_release_mbufs(rxq);
715         rte_free(rxq->sw_ring);
716         rte_free(rxq);
717 }
718
719 void eth_igc_rx_queue_release(void *rxq)
720 {
721         if (rxq)
722                 igc_rx_queue_release(rxq);
723 }
724
725 uint32_t eth_igc_rx_queue_count(struct rte_eth_dev *dev,
726                 uint16_t rx_queue_id)
727 {
728         /**
729          * Check the DD bit of a rx descriptor of each 4 in a group,
730          * to avoid checking too frequently and downgrading performance
731          * too much.
732          */
733 #define IGC_RXQ_SCAN_INTERVAL 4
734
735         volatile union igc_adv_rx_desc *rxdp;
736         struct igc_rx_queue *rxq;
737         uint16_t desc = 0;
738
739         rxq = dev->data->rx_queues[rx_queue_id];
740         rxdp = &rxq->rx_ring[rxq->rx_tail];
741
742         while (desc < rxq->nb_rx_desc - rxq->rx_tail) {
743                 if (unlikely(!(rxdp->wb.upper.status_error &
744                                 IGC_RXD_STAT_DD)))
745                         return desc;
746                 desc += IGC_RXQ_SCAN_INTERVAL;
747                 rxdp += IGC_RXQ_SCAN_INTERVAL;
748         }
749         rxdp = &rxq->rx_ring[rxq->rx_tail + desc - rxq->nb_rx_desc];
750
751         while (desc < rxq->nb_rx_desc &&
752                 (rxdp->wb.upper.status_error & IGC_RXD_STAT_DD)) {
753                 desc += IGC_RXQ_SCAN_INTERVAL;
754                 rxdp += IGC_RXQ_SCAN_INTERVAL;
755         }
756
757         return desc;
758 }
759
760 int eth_igc_rx_descriptor_done(void *rx_queue, uint16_t offset)
761 {
762         volatile union igc_adv_rx_desc *rxdp;
763         struct igc_rx_queue *rxq = rx_queue;
764         uint32_t desc;
765
766         if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
767                 return 0;
768
769         desc = rxq->rx_tail + offset;
770         if (desc >= rxq->nb_rx_desc)
771                 desc -= rxq->nb_rx_desc;
772
773         rxdp = &rxq->rx_ring[desc];
774         return !!(rxdp->wb.upper.status_error &
775                         rte_cpu_to_le_32(IGC_RXD_STAT_DD));
776 }
777
778 int eth_igc_rx_descriptor_status(void *rx_queue, uint16_t offset)
779 {
780         struct igc_rx_queue *rxq = rx_queue;
781         volatile uint32_t *status;
782         uint32_t desc;
783
784         if (unlikely(!rxq || offset >= rxq->nb_rx_desc))
785                 return -EINVAL;
786
787         if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
788                 return RTE_ETH_RX_DESC_UNAVAIL;
789
790         desc = rxq->rx_tail + offset;
791         if (desc >= rxq->nb_rx_desc)
792                 desc -= rxq->nb_rx_desc;
793
794         status = &rxq->rx_ring[desc].wb.upper.status_error;
795         if (*status & rte_cpu_to_le_32(IGC_RXD_STAT_DD))
796                 return RTE_ETH_RX_DESC_DONE;
797
798         return RTE_ETH_RX_DESC_AVAIL;
799 }
800
801 static int
802 igc_alloc_rx_queue_mbufs(struct igc_rx_queue *rxq)
803 {
804         struct igc_rx_entry *rxe = rxq->sw_ring;
805         uint64_t dma_addr;
806         unsigned int i;
807
808         /* Initialize software ring entries. */
809         for (i = 0; i < rxq->nb_rx_desc; i++) {
810                 volatile union igc_adv_rx_desc *rxd;
811                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
812
813                 if (mbuf == NULL) {
814                         PMD_DRV_LOG(ERR, "RX mbuf alloc failed, queue_id=%hu",
815                                 rxq->queue_id);
816                         return -ENOMEM;
817                 }
818                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
819                 rxd = &rxq->rx_ring[i];
820                 rxd->read.hdr_addr = 0;
821                 rxd->read.pkt_addr = dma_addr;
822                 rxe[i].mbuf = mbuf;
823         }
824
825         return 0;
826 }
827
828 /*
829  * RSS random key supplied in section 7.1.2.9.3 of the Intel I225 datasheet.
830  * Used as the default key.
831  */
832 static uint8_t default_rss_key[40] = {
833         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
834         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
835         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
836         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
837         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
838 };
839
840 void
841 igc_rss_disable(struct rte_eth_dev *dev)
842 {
843         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
844         uint32_t mrqc;
845
846         mrqc = IGC_READ_REG(hw, IGC_MRQC);
847         mrqc &= ~IGC_MRQC_ENABLE_MASK;
848         IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
849 }
850
851 void
852 igc_hw_rss_hash_set(struct igc_hw *hw, struct rte_eth_rss_conf *rss_conf)
853 {
854         uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
855         uint32_t mrqc;
856         uint64_t rss_hf;
857
858         if (hash_key != NULL) {
859                 uint8_t i;
860
861                 /* Fill in RSS hash key */
862                 for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
863                         IGC_WRITE_REG_LE_VALUE(hw, IGC_RSSRK(i), hash_key[i]);
864         }
865
866         /* Set configured hashing protocols in MRQC register */
867         rss_hf = rss_conf->rss_hf;
868         mrqc = IGC_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
869         if (rss_hf & ETH_RSS_IPV4)
870                 mrqc |= IGC_MRQC_RSS_FIELD_IPV4;
871         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
872                 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_TCP;
873         if (rss_hf & ETH_RSS_IPV6)
874                 mrqc |= IGC_MRQC_RSS_FIELD_IPV6;
875         if (rss_hf & ETH_RSS_IPV6_EX)
876                 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_EX;
877         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
878                 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP;
879         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
880                 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
881         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
882                 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
883         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
884                 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
885         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
886                 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP_EX;
887         IGC_WRITE_REG(hw, IGC_MRQC, mrqc);
888 }
889
890 static void
891 igc_rss_configure(struct rte_eth_dev *dev)
892 {
893         struct rte_eth_rss_conf rss_conf;
894         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
895         uint16_t i;
896
897         /* Fill in redirection table. */
898         for (i = 0; i < IGC_RSS_RDT_SIZD; i++) {
899                 union igc_rss_reta_reg reta;
900                 uint16_t q_idx, reta_idx;
901
902                 q_idx = (uint8_t)((dev->data->nb_rx_queues > 1) ?
903                                    i % dev->data->nb_rx_queues : 0);
904                 reta_idx = i % sizeof(reta);
905                 reta.bytes[reta_idx] = q_idx;
906                 if (reta_idx == sizeof(reta) - 1)
907                         IGC_WRITE_REG_LE_VALUE(hw,
908                                 IGC_RETA(i / sizeof(reta)), reta.dword);
909         }
910
911         /*
912          * Configure the RSS key and the RSS protocols used to compute
913          * the RSS hash of input packets.
914          */
915         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
916         if (rss_conf.rss_key == NULL)
917                 rss_conf.rss_key = default_rss_key;
918         igc_hw_rss_hash_set(hw, &rss_conf);
919 }
920
921 int
922 igc_del_rss_filter(struct rte_eth_dev *dev)
923 {
924         struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
925
926         if (rss_filter->enable) {
927                 /* recover default RSS configuration */
928                 igc_rss_configure(dev);
929
930                 /* disable RSS logic and clear filter data */
931                 igc_rss_disable(dev);
932                 memset(rss_filter, 0, sizeof(*rss_filter));
933                 return 0;
934         }
935         PMD_DRV_LOG(ERR, "filter not exist!");
936         return -ENOENT;
937 }
938
939 /* Initiate the filter structure by the structure of rte_flow_action_rss */
940 void
941 igc_rss_conf_set(struct igc_rss_filter *out,
942                 const struct rte_flow_action_rss *rss)
943 {
944         out->conf.func = rss->func;
945         out->conf.level = rss->level;
946         out->conf.types = rss->types;
947
948         if (rss->key_len == sizeof(out->key)) {
949                 memcpy(out->key, rss->key, rss->key_len);
950                 out->conf.key = out->key;
951                 out->conf.key_len = rss->key_len;
952         } else {
953                 out->conf.key = NULL;
954                 out->conf.key_len = 0;
955         }
956
957         if (rss->queue_num <= IGC_RSS_RDT_SIZD) {
958                 memcpy(out->queue, rss->queue,
959                         sizeof(*out->queue) * rss->queue_num);
960                 out->conf.queue = out->queue;
961                 out->conf.queue_num = rss->queue_num;
962         } else {
963                 out->conf.queue = NULL;
964                 out->conf.queue_num = 0;
965         }
966 }
967
968 int
969 igc_add_rss_filter(struct rte_eth_dev *dev, struct igc_rss_filter *rss)
970 {
971         struct rte_eth_rss_conf rss_conf = {
972                 .rss_key = rss->conf.key_len ?
973                         (void *)(uintptr_t)rss->conf.key : NULL,
974                 .rss_key_len = rss->conf.key_len,
975                 .rss_hf = rss->conf.types,
976         };
977         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
978         struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
979         uint32_t i, j;
980
981         /* check RSS type is valid */
982         if ((rss_conf.rss_hf & IGC_RSS_OFFLOAD_ALL) == 0) {
983                 PMD_DRV_LOG(ERR,
984                         "RSS type(0x%" PRIx64 ") error!, only 0x%" PRIx64
985                         " been supported", rss_conf.rss_hf,
986                         (uint64_t)IGC_RSS_OFFLOAD_ALL);
987                 return -EINVAL;
988         }
989
990         /* check queue count is not zero */
991         if (!rss->conf.queue_num) {
992                 PMD_DRV_LOG(ERR, "Queue number should not be 0!");
993                 return -EINVAL;
994         }
995
996         /* check queue id is valid */
997         for (i = 0; i < rss->conf.queue_num; i++)
998                 if (rss->conf.queue[i] >= dev->data->nb_rx_queues) {
999                         PMD_DRV_LOG(ERR, "Queue id %u is invalid!",
1000                                         rss->conf.queue[i]);
1001                         return -EINVAL;
1002                 }
1003
1004         /* only support one filter */
1005         if (rss_filter->enable) {
1006                 PMD_DRV_LOG(ERR, "Only support one RSS filter!");
1007                 return -ENOTSUP;
1008         }
1009         rss_filter->enable = 1;
1010
1011         igc_rss_conf_set(rss_filter, &rss->conf);
1012
1013         /* Fill in redirection table. */
1014         for (i = 0, j = 0; i < IGC_RSS_RDT_SIZD; i++, j++) {
1015                 union igc_rss_reta_reg reta;
1016                 uint16_t q_idx, reta_idx;
1017
1018                 if (j == rss->conf.queue_num)
1019                         j = 0;
1020                 q_idx = rss->conf.queue[j];
1021                 reta_idx = i % sizeof(reta);
1022                 reta.bytes[reta_idx] = q_idx;
1023                 if (reta_idx == sizeof(reta) - 1)
1024                         IGC_WRITE_REG_LE_VALUE(hw,
1025                                 IGC_RETA(i / sizeof(reta)), reta.dword);
1026         }
1027
1028         if (rss_conf.rss_key == NULL)
1029                 rss_conf.rss_key = default_rss_key;
1030         igc_hw_rss_hash_set(hw, &rss_conf);
1031         return 0;
1032 }
1033
1034 void
1035 igc_clear_rss_filter(struct rte_eth_dev *dev)
1036 {
1037         struct igc_rss_filter *rss_filter = IGC_DEV_PRIVATE_RSS_FILTER(dev);
1038
1039         if (!rss_filter->enable)
1040                 return;
1041
1042         /* recover default RSS configuration */
1043         igc_rss_configure(dev);
1044
1045         /* disable RSS logic and clear filter data */
1046         igc_rss_disable(dev);
1047         memset(rss_filter, 0, sizeof(*rss_filter));
1048 }
1049
1050 static int
1051 igc_dev_mq_rx_configure(struct rte_eth_dev *dev)
1052 {
1053         if (RTE_ETH_DEV_SRIOV(dev).active) {
1054                 PMD_DRV_LOG(ERR, "SRIOV unsupported!");
1055                 return -EINVAL;
1056         }
1057
1058         switch (dev->data->dev_conf.rxmode.mq_mode) {
1059         case ETH_MQ_RX_RSS:
1060                 igc_rss_configure(dev);
1061                 break;
1062         case ETH_MQ_RX_NONE:
1063                 /*
1064                  * configure RSS register for following,
1065                  * then disable the RSS logic
1066                  */
1067                 igc_rss_configure(dev);
1068                 igc_rss_disable(dev);
1069                 break;
1070         default:
1071                 PMD_DRV_LOG(ERR, "rx mode(%d) not supported!",
1072                         dev->data->dev_conf.rxmode.mq_mode);
1073                 return -EINVAL;
1074         }
1075         return 0;
1076 }
1077
1078 int
1079 igc_rx_init(struct rte_eth_dev *dev)
1080 {
1081         struct igc_rx_queue *rxq;
1082         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1083         uint64_t offloads = dev->data->dev_conf.rxmode.offloads;
1084         uint32_t max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
1085         uint32_t rctl;
1086         uint32_t rxcsum;
1087         uint16_t buf_size;
1088         uint16_t rctl_bsize;
1089         uint16_t i;
1090         int ret;
1091
1092         dev->rx_pkt_burst = igc_recv_pkts;
1093
1094         /*
1095          * Make sure receives are disabled while setting
1096          * up the descriptor ring.
1097          */
1098         rctl = IGC_READ_REG(hw, IGC_RCTL);
1099         IGC_WRITE_REG(hw, IGC_RCTL, rctl & ~IGC_RCTL_EN);
1100
1101         /* Configure support of jumbo frames, if any. */
1102         if (offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
1103                 rctl |= IGC_RCTL_LPE;
1104
1105                 /*
1106                  * Set maximum packet length by default, and might be updated
1107                  * together with enabling/disabling dual VLAN.
1108                  */
1109                 IGC_WRITE_REG(hw, IGC_RLPML, max_rx_pkt_len);
1110         } else {
1111                 rctl &= ~IGC_RCTL_LPE;
1112         }
1113
1114         /* Configure and enable each RX queue. */
1115         rctl_bsize = 0;
1116         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1117                 uint64_t bus_addr;
1118                 uint32_t rxdctl;
1119                 uint32_t srrctl;
1120
1121                 rxq = dev->data->rx_queues[i];
1122                 rxq->flags = 0;
1123
1124                 /* Allocate buffers for descriptor rings and set up queue */
1125                 ret = igc_alloc_rx_queue_mbufs(rxq);
1126                 if (ret)
1127                         return ret;
1128
1129                 /*
1130                  * Reset crc_len in case it was changed after queue setup by a
1131                  * call to configure
1132                  */
1133                 rxq->crc_len = (offloads & DEV_RX_OFFLOAD_KEEP_CRC) ?
1134                                 RTE_ETHER_CRC_LEN : 0;
1135
1136                 bus_addr = rxq->rx_ring_phys_addr;
1137                 IGC_WRITE_REG(hw, IGC_RDLEN(rxq->reg_idx),
1138                                 rxq->nb_rx_desc *
1139                                 sizeof(union igc_adv_rx_desc));
1140                 IGC_WRITE_REG(hw, IGC_RDBAH(rxq->reg_idx),
1141                                 (uint32_t)(bus_addr >> 32));
1142                 IGC_WRITE_REG(hw, IGC_RDBAL(rxq->reg_idx),
1143                                 (uint32_t)bus_addr);
1144
1145                 /* set descriptor configuration */
1146                 srrctl = IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
1147
1148                 srrctl |= (uint32_t)(RTE_PKTMBUF_HEADROOM / 64) <<
1149                                 IGC_SRRCTL_BSIZEHEADER_SHIFT;
1150                 /*
1151                  * Configure RX buffer size.
1152                  */
1153                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
1154                         RTE_PKTMBUF_HEADROOM);
1155                 if (buf_size >= 1024) {
1156                         /*
1157                          * Configure the BSIZEPACKET field of the SRRCTL
1158                          * register of the queue.
1159                          * Value is in 1 KB resolution, from 1 KB to 16 KB.
1160                          * If this field is equal to 0b, then RCTL.BSIZE
1161                          * determines the RX packet buffer size.
1162                          */
1163
1164                         srrctl |= ((buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT) &
1165                                    IGC_SRRCTL_BSIZEPKT_MASK);
1166                         buf_size = (uint16_t)((srrctl &
1167                                         IGC_SRRCTL_BSIZEPKT_MASK) <<
1168                                         IGC_SRRCTL_BSIZEPKT_SHIFT);
1169
1170                         /* It adds dual VLAN length for supporting dual VLAN */
1171                         if (max_rx_pkt_len + 2 * VLAN_TAG_SIZE > buf_size)
1172                                 dev->data->scattered_rx = 1;
1173                 } else {
1174                         /*
1175                          * Use BSIZE field of the device RCTL register.
1176                          */
1177                         if (rctl_bsize == 0 || rctl_bsize > buf_size)
1178                                 rctl_bsize = buf_size;
1179                         dev->data->scattered_rx = 1;
1180                 }
1181
1182                 /* Set if packets are dropped when no descriptors available */
1183                 if (rxq->drop_en)
1184                         srrctl |= IGC_SRRCTL_DROP_EN;
1185
1186                 IGC_WRITE_REG(hw, IGC_SRRCTL(rxq->reg_idx), srrctl);
1187
1188                 /* Enable this RX queue. */
1189                 rxdctl = IGC_RXDCTL_QUEUE_ENABLE;
1190                 rxdctl |= ((uint32_t)rxq->pthresh << IGC_RXDCTL_PTHRESH_SHIFT) &
1191                                 IGC_RXDCTL_PTHRESH_MSK;
1192                 rxdctl |= ((uint32_t)rxq->hthresh << IGC_RXDCTL_HTHRESH_SHIFT) &
1193                                 IGC_RXDCTL_HTHRESH_MSK;
1194                 rxdctl |= ((uint32_t)rxq->wthresh << IGC_RXDCTL_WTHRESH_SHIFT) &
1195                                 IGC_RXDCTL_WTHRESH_MSK;
1196                 IGC_WRITE_REG(hw, IGC_RXDCTL(rxq->reg_idx), rxdctl);
1197         }
1198
1199         if (offloads & DEV_RX_OFFLOAD_SCATTER)
1200                 dev->data->scattered_rx = 1;
1201
1202         if (dev->data->scattered_rx) {
1203                 PMD_DRV_LOG(DEBUG, "forcing scatter mode");
1204                 dev->rx_pkt_burst = igc_recv_scattered_pkts;
1205         }
1206         /*
1207          * Setup BSIZE field of RCTL register, if needed.
1208          * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
1209          * register, since the code above configures the SRRCTL register of
1210          * the RX queue in such a case.
1211          * All configurable sizes are:
1212          * 16384: rctl |= (IGC_RCTL_SZ_16384 | IGC_RCTL_BSEX);
1213          *  8192: rctl |= (IGC_RCTL_SZ_8192  | IGC_RCTL_BSEX);
1214          *  4096: rctl |= (IGC_RCTL_SZ_4096  | IGC_RCTL_BSEX);
1215          *  2048: rctl |= IGC_RCTL_SZ_2048;
1216          *  1024: rctl |= IGC_RCTL_SZ_1024;
1217          *   512: rctl |= IGC_RCTL_SZ_512;
1218          *   256: rctl |= IGC_RCTL_SZ_256;
1219          */
1220         if (rctl_bsize > 0) {
1221                 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
1222                         rctl |= IGC_RCTL_SZ_512;
1223                 else /* 256 <= buf_size < 512 - use 256 */
1224                         rctl |= IGC_RCTL_SZ_256;
1225         }
1226
1227         /*
1228          * Configure RSS if device configured with multiple RX queues.
1229          */
1230         igc_dev_mq_rx_configure(dev);
1231
1232         /* Update the rctl since igc_dev_mq_rx_configure may change its value */
1233         rctl |= IGC_READ_REG(hw, IGC_RCTL);
1234
1235         /*
1236          * Setup the Checksum Register.
1237          * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
1238          */
1239         rxcsum = IGC_READ_REG(hw, IGC_RXCSUM);
1240         rxcsum |= IGC_RXCSUM_PCSD;
1241
1242         /* Enable both L3/L4 rx checksum offload */
1243         if (offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
1244                 rxcsum |= IGC_RXCSUM_IPOFL;
1245         else
1246                 rxcsum &= ~IGC_RXCSUM_IPOFL;
1247
1248         if (offloads &
1249                 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)) {
1250                 rxcsum |= IGC_RXCSUM_TUOFL;
1251                 offloads |= DEV_RX_OFFLOAD_SCTP_CKSUM;
1252         } else {
1253                 rxcsum &= ~IGC_RXCSUM_TUOFL;
1254         }
1255
1256         if (offloads & DEV_RX_OFFLOAD_SCTP_CKSUM)
1257                 rxcsum |= IGC_RXCSUM_CRCOFL;
1258         else
1259                 rxcsum &= ~IGC_RXCSUM_CRCOFL;
1260
1261         IGC_WRITE_REG(hw, IGC_RXCSUM, rxcsum);
1262
1263         /* Setup the Receive Control Register. */
1264         if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1265                 rctl &= ~IGC_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
1266         else
1267                 rctl |= IGC_RCTL_SECRC; /* Strip Ethernet CRC. */
1268
1269         rctl &= ~IGC_RCTL_MO_MSK;
1270         rctl &= ~IGC_RCTL_LBM_MSK;
1271         rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_LBM_NO |
1272                         IGC_RCTL_DPF |
1273                         (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
1274
1275         if (dev->data->dev_conf.lpbk_mode == 1)
1276                 rctl |= IGC_RCTL_LBM_MAC;
1277
1278         rctl &= ~(IGC_RCTL_HSEL_MSK | IGC_RCTL_CFIEN | IGC_RCTL_CFI |
1279                         IGC_RCTL_PSP | IGC_RCTL_PMCF);
1280
1281         /* Make sure VLAN Filters are off. */
1282         rctl &= ~IGC_RCTL_VFE;
1283         /* Don't store bad packets. */
1284         rctl &= ~IGC_RCTL_SBP;
1285
1286         /* Enable Receives. */
1287         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1288
1289         /*
1290          * Setup the HW Rx Head and Tail Descriptor Pointers.
1291          * This needs to be done after enable.
1292          */
1293         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1294                 rxq = dev->data->rx_queues[i];
1295                 IGC_WRITE_REG(hw, IGC_RDH(rxq->reg_idx), 0);
1296                 IGC_WRITE_REG(hw, IGC_RDT(rxq->reg_idx),
1297                                 rxq->nb_rx_desc - 1);
1298
1299                 /* strip queue vlan offload */
1300                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP) {
1301                         uint32_t dvmolr;
1302                         dvmolr = IGC_READ_REG(hw, IGC_DVMOLR(rxq->queue_id));
1303
1304                         /* If vlan been stripped off, the CRC is meaningless. */
1305                         dvmolr |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC;
1306                         IGC_WRITE_REG(hw, IGC_DVMOLR(rxq->reg_idx), dvmolr);
1307                 }
1308         }
1309
1310         return 0;
1311 }
1312
1313 static void
1314 igc_reset_rx_queue(struct igc_rx_queue *rxq)
1315 {
1316         static const union igc_adv_rx_desc zeroed_desc = { {0} };
1317         unsigned int i;
1318
1319         /* Zero out HW ring memory */
1320         for (i = 0; i < rxq->nb_rx_desc; i++)
1321                 rxq->rx_ring[i] = zeroed_desc;
1322
1323         rxq->rx_tail = 0;
1324         rxq->pkt_first_seg = NULL;
1325         rxq->pkt_last_seg = NULL;
1326 }
1327
1328 int
1329 eth_igc_rx_queue_setup(struct rte_eth_dev *dev,
1330                          uint16_t queue_idx,
1331                          uint16_t nb_desc,
1332                          unsigned int socket_id,
1333                          const struct rte_eth_rxconf *rx_conf,
1334                          struct rte_mempool *mp)
1335 {
1336         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1337         const struct rte_memzone *rz;
1338         struct igc_rx_queue *rxq;
1339         unsigned int size;
1340
1341         /*
1342          * Validate number of receive descriptors.
1343          * It must not exceed hardware maximum, and must be multiple
1344          * of IGC_RX_DESCRIPTOR_MULTIPLE.
1345          */
1346         if (nb_desc % IGC_RX_DESCRIPTOR_MULTIPLE != 0 ||
1347                 nb_desc > IGC_MAX_RXD || nb_desc < IGC_MIN_RXD) {
1348                 PMD_DRV_LOG(ERR,
1349                         "RX descriptor must be multiple of %u(cur: %u) and between %u and %u",
1350                         IGC_RX_DESCRIPTOR_MULTIPLE, nb_desc,
1351                         IGC_MIN_RXD, IGC_MAX_RXD);
1352                 return -EINVAL;
1353         }
1354
1355         /* Free memory prior to re-allocation if needed */
1356         if (dev->data->rx_queues[queue_idx] != NULL) {
1357                 igc_rx_queue_release(dev->data->rx_queues[queue_idx]);
1358                 dev->data->rx_queues[queue_idx] = NULL;
1359         }
1360
1361         /* First allocate the RX queue data structure. */
1362         rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igc_rx_queue),
1363                           RTE_CACHE_LINE_SIZE);
1364         if (rxq == NULL)
1365                 return -ENOMEM;
1366         rxq->offloads = rx_conf->offloads;
1367         rxq->mb_pool = mp;
1368         rxq->nb_rx_desc = nb_desc;
1369         rxq->pthresh = rx_conf->rx_thresh.pthresh;
1370         rxq->hthresh = rx_conf->rx_thresh.hthresh;
1371         rxq->wthresh = rx_conf->rx_thresh.wthresh;
1372         rxq->drop_en = rx_conf->rx_drop_en;
1373         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1374         rxq->queue_id = queue_idx;
1375         rxq->reg_idx = queue_idx;
1376         rxq->port_id = dev->data->port_id;
1377
1378         /*
1379          *  Allocate RX ring hardware descriptors. A memzone large enough to
1380          *  handle the maximum ring size is allocated in order to allow for
1381          *  resizing in later calls to the queue setup function.
1382          */
1383         size = sizeof(union igc_adv_rx_desc) * IGC_MAX_RXD;
1384         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1385                                       IGC_ALIGN, socket_id);
1386         if (rz == NULL) {
1387                 igc_rx_queue_release(rxq);
1388                 return -ENOMEM;
1389         }
1390         rxq->rdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDT(rxq->reg_idx));
1391         rxq->rdh_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_RDH(rxq->reg_idx));
1392         rxq->rx_ring_phys_addr = rz->iova;
1393         rxq->rx_ring = (union igc_adv_rx_desc *)rz->addr;
1394
1395         /* Allocate software ring. */
1396         rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1397                                    sizeof(struct igc_rx_entry) * nb_desc,
1398                                    RTE_CACHE_LINE_SIZE);
1399         if (rxq->sw_ring == NULL) {
1400                 igc_rx_queue_release(rxq);
1401                 return -ENOMEM;
1402         }
1403
1404         PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
1405                 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1406
1407         dev->data->rx_queues[queue_idx] = rxq;
1408         igc_reset_rx_queue(rxq);
1409
1410         return 0;
1411 }
1412
1413 /* prepare packets for transmit */
1414 static uint16_t
1415 eth_igc_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
1416                 uint16_t nb_pkts)
1417 {
1418         int i, ret;
1419         struct rte_mbuf *m;
1420
1421         for (i = 0; i < nb_pkts; i++) {
1422                 m = tx_pkts[i];
1423
1424                 /* Check some limitations for TSO in hardware */
1425                 if (m->ol_flags & IGC_TX_OFFLOAD_SEG)
1426                         if (m->tso_segsz > IGC_TSO_MAX_MSS ||
1427                                 m->l2_len + m->l3_len + m->l4_len >
1428                                 IGC_TSO_MAX_HDRLEN) {
1429                                 rte_errno = EINVAL;
1430                                 return i;
1431                         }
1432
1433                 if (m->ol_flags & IGC_TX_OFFLOAD_NOTSUP_MASK) {
1434                         rte_errno = ENOTSUP;
1435                         return i;
1436                 }
1437
1438 #ifdef RTE_ETHDEV_DEBUG_TX
1439                 ret = rte_validate_tx_offload(m);
1440                 if (ret != 0) {
1441                         rte_errno = -ret;
1442                         return i;
1443                 }
1444 #endif
1445                 ret = rte_net_intel_cksum_prepare(m);
1446                 if (ret != 0) {
1447                         rte_errno = -ret;
1448                         return i;
1449                 }
1450         }
1451
1452         return i;
1453 }
1454
1455 /*
1456  *There're some limitations in hardware for TCP segmentation offload. We
1457  *should check whether the parameters are valid.
1458  */
1459 static inline uint64_t
1460 check_tso_para(uint64_t ol_req, union igc_tx_offload ol_para)
1461 {
1462         if (!(ol_req & IGC_TX_OFFLOAD_SEG))
1463                 return ol_req;
1464         if (ol_para.tso_segsz > IGC_TSO_MAX_MSS || ol_para.l2_len +
1465                 ol_para.l3_len + ol_para.l4_len > IGC_TSO_MAX_HDRLEN) {
1466                 ol_req &= ~IGC_TX_OFFLOAD_SEG;
1467                 ol_req |= PKT_TX_TCP_CKSUM;
1468         }
1469         return ol_req;
1470 }
1471
1472 /*
1473  * Check which hardware context can be used. Use the existing match
1474  * or create a new context descriptor.
1475  */
1476 static inline uint32_t
1477 what_advctx_update(struct igc_tx_queue *txq, uint64_t flags,
1478                 union igc_tx_offload tx_offload)
1479 {
1480         uint32_t curr = txq->ctx_curr;
1481
1482         /* If match with the current context */
1483         if (likely(txq->ctx_cache[curr].flags == flags &&
1484                 txq->ctx_cache[curr].tx_offload.data ==
1485                 (txq->ctx_cache[curr].tx_offload_mask.data &
1486                 tx_offload.data))) {
1487                 return curr;
1488         }
1489
1490         /* Total two context, if match with the second context */
1491         curr ^= 1;
1492         if (likely(txq->ctx_cache[curr].flags == flags &&
1493                 txq->ctx_cache[curr].tx_offload.data ==
1494                 (txq->ctx_cache[curr].tx_offload_mask.data &
1495                 tx_offload.data))) {
1496                 txq->ctx_curr = curr;
1497                 return curr;
1498         }
1499
1500         /* Mismatch, create new one */
1501         return IGC_CTX_NUM;
1502 }
1503
1504 /*
1505  * This is a separate function, looking for optimization opportunity here
1506  * Rework required to go with the pre-defined values.
1507  */
1508 static inline void
1509 igc_set_xmit_ctx(struct igc_tx_queue *txq,
1510                 volatile struct igc_adv_tx_context_desc *ctx_txd,
1511                 uint64_t ol_flags, union igc_tx_offload tx_offload)
1512 {
1513         uint32_t type_tucmd_mlhl;
1514         uint32_t mss_l4len_idx;
1515         uint32_t ctx_curr;
1516         uint32_t vlan_macip_lens;
1517         union igc_tx_offload tx_offload_mask;
1518
1519         /* Use the previous context */
1520         txq->ctx_curr ^= 1;
1521         ctx_curr = txq->ctx_curr;
1522
1523         tx_offload_mask.data = 0;
1524         type_tucmd_mlhl = 0;
1525
1526         /* Specify which HW CTX to upload. */
1527         mss_l4len_idx = (ctx_curr << IGC_ADVTXD_IDX_SHIFT);
1528
1529         if (ol_flags & PKT_TX_VLAN_PKT)
1530                 tx_offload_mask.vlan_tci = 0xffff;
1531
1532         /* check if TCP segmentation required for this packet */
1533         if (ol_flags & IGC_TX_OFFLOAD_SEG) {
1534                 /* implies IP cksum in IPv4 */
1535                 if (ol_flags & PKT_TX_IP_CKSUM)
1536                         type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4 |
1537                                 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1538                 else
1539                         type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV6 |
1540                                 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1541
1542                 if (ol_flags & PKT_TX_TCP_SEG)
1543                         type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP;
1544                 else
1545                         type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP;
1546
1547                 tx_offload_mask.data |= TX_TSO_CMP_MASK;
1548                 mss_l4len_idx |= (uint32_t)tx_offload.tso_segsz <<
1549                                 IGC_ADVTXD_MSS_SHIFT;
1550                 mss_l4len_idx |= (uint32_t)tx_offload.l4_len <<
1551                                 IGC_ADVTXD_L4LEN_SHIFT;
1552         } else { /* no TSO, check if hardware checksum is needed */
1553                 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
1554                         tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
1555
1556                 if (ol_flags & PKT_TX_IP_CKSUM)
1557                         type_tucmd_mlhl = IGC_ADVTXD_TUCMD_IPV4;
1558
1559                 switch (ol_flags & PKT_TX_L4_MASK) {
1560                 case PKT_TX_TCP_CKSUM:
1561                         type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_TCP |
1562                                 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1563                         mss_l4len_idx |= (uint32_t)sizeof(struct rte_tcp_hdr)
1564                                 << IGC_ADVTXD_L4LEN_SHIFT;
1565                         break;
1566                 case PKT_TX_UDP_CKSUM:
1567                         type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_UDP |
1568                                 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1569                         mss_l4len_idx |= (uint32_t)sizeof(struct rte_udp_hdr)
1570                                 << IGC_ADVTXD_L4LEN_SHIFT;
1571                         break;
1572                 case PKT_TX_SCTP_CKSUM:
1573                         type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_SCTP |
1574                                 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1575                         mss_l4len_idx |= (uint32_t)sizeof(struct rte_sctp_hdr)
1576                                 << IGC_ADVTXD_L4LEN_SHIFT;
1577                         break;
1578                 default:
1579                         type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_L4T_RSV |
1580                                 IGC_ADVTXD_DTYP_CTXT | IGC_ADVTXD_DCMD_DEXT;
1581                         break;
1582                 }
1583         }
1584
1585         txq->ctx_cache[ctx_curr].flags = ol_flags;
1586         txq->ctx_cache[ctx_curr].tx_offload.data =
1587                 tx_offload_mask.data & tx_offload.data;
1588         txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
1589
1590         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
1591         vlan_macip_lens = (uint32_t)tx_offload.data;
1592         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
1593         ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
1594         ctx_txd->u.launch_time = 0;
1595 }
1596
1597 static inline uint32_t
1598 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
1599 {
1600         uint32_t cmdtype;
1601         static uint32_t vlan_cmd[2] = {0, IGC_ADVTXD_DCMD_VLE};
1602         static uint32_t tso_cmd[2] = {0, IGC_ADVTXD_DCMD_TSE};
1603         cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
1604         cmdtype |= tso_cmd[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1605         return cmdtype;
1606 }
1607
1608 static inline uint32_t
1609 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
1610 {
1611         static const uint32_t l4_olinfo[2] = {0, IGC_ADVTXD_POPTS_TXSM};
1612         static const uint32_t l3_olinfo[2] = {0, IGC_ADVTXD_POPTS_IXSM};
1613         uint32_t tmp;
1614
1615         tmp  = l4_olinfo[(ol_flags & PKT_TX_L4_MASK)  != PKT_TX_L4_NO_CKSUM];
1616         tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
1617         tmp |= l4_olinfo[(ol_flags & IGC_TX_OFFLOAD_SEG) != 0];
1618         return tmp;
1619 }
1620
1621 static uint16_t
1622 igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1623 {
1624         struct igc_tx_queue * const txq = tx_queue;
1625         struct igc_tx_entry * const sw_ring = txq->sw_ring;
1626         struct igc_tx_entry *txe, *txn;
1627         volatile union igc_adv_tx_desc * const txr = txq->tx_ring;
1628         volatile union igc_adv_tx_desc *txd;
1629         struct rte_mbuf *tx_pkt;
1630         struct rte_mbuf *m_seg;
1631         uint64_t buf_dma_addr;
1632         uint32_t olinfo_status;
1633         uint32_t cmd_type_len;
1634         uint32_t pkt_len;
1635         uint16_t slen;
1636         uint64_t ol_flags;
1637         uint16_t tx_end;
1638         uint16_t tx_id;
1639         uint16_t tx_last;
1640         uint16_t nb_tx;
1641         uint64_t tx_ol_req;
1642         uint32_t new_ctx = 0;
1643         union igc_tx_offload tx_offload = {0};
1644
1645         tx_id = txq->tx_tail;
1646         txe = &sw_ring[tx_id];
1647
1648         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1649                 tx_pkt = *tx_pkts++;
1650                 pkt_len = tx_pkt->pkt_len;
1651
1652                 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
1653
1654                 /*
1655                  * The number of descriptors that must be allocated for a
1656                  * packet is the number of segments of that packet, plus 1
1657                  * Context Descriptor for the VLAN Tag Identifier, if any.
1658                  * Determine the last TX descriptor to allocate in the TX ring
1659                  * for the packet, starting from the current position (tx_id)
1660                  * in the ring.
1661                  */
1662                 tx_last = (uint16_t)(tx_id + tx_pkt->nb_segs - 1);
1663
1664                 ol_flags = tx_pkt->ol_flags;
1665                 tx_ol_req = ol_flags & IGC_TX_OFFLOAD_MASK;
1666
1667                 /* If a Context Descriptor need be built . */
1668                 if (tx_ol_req) {
1669                         tx_offload.l2_len = tx_pkt->l2_len;
1670                         tx_offload.l3_len = tx_pkt->l3_len;
1671                         tx_offload.l4_len = tx_pkt->l4_len;
1672                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
1673                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
1674                         tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
1675
1676                         new_ctx = what_advctx_update(txq, tx_ol_req,
1677                                         tx_offload);
1678                         /* Only allocate context descriptor if required*/
1679                         new_ctx = (new_ctx >= IGC_CTX_NUM);
1680                         tx_last = (uint16_t)(tx_last + new_ctx);
1681                 }
1682                 if (tx_last >= txq->nb_tx_desc)
1683                         tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
1684
1685                 PMD_TX_LOG(DEBUG,
1686                         "port_id=%u queue_id=%u pktlen=%u tx_first=%u tx_last=%u",
1687                         txq->port_id, txq->queue_id, pkt_len, tx_id, tx_last);
1688
1689                 /*
1690                  * Check if there are enough free descriptors in the TX ring
1691                  * to transmit the next packet.
1692                  * This operation is based on the two following rules:
1693                  *
1694                  *   1- Only check that the last needed TX descriptor can be
1695                  *      allocated (by construction, if that descriptor is free,
1696                  *      all intermediate ones are also free).
1697                  *
1698                  *      For this purpose, the index of the last TX descriptor
1699                  *      used for a packet (the "last descriptor" of a packet)
1700                  *      is recorded in the TX entries (the last one included)
1701                  *      that are associated with all TX descriptors allocated
1702                  *      for that packet.
1703                  *
1704                  *   2- Avoid to allocate the last free TX descriptor of the
1705                  *      ring, in order to never set the TDT register with the
1706                  *      same value stored in parallel by the NIC in the TDH
1707                  *      register, which makes the TX engine of the NIC enter
1708                  *      in a deadlock situation.
1709                  *
1710                  *      By extension, avoid to allocate a free descriptor that
1711                  *      belongs to the last set of free descriptors allocated
1712                  *      to the same packet previously transmitted.
1713                  */
1714
1715                 /*
1716                  * The "last descriptor" of the previously sent packet, if any,
1717                  * which used the last descriptor to allocate.
1718                  */
1719                 tx_end = sw_ring[tx_last].last_id;
1720
1721                 /*
1722                  * The next descriptor following that "last descriptor" in the
1723                  * ring.
1724                  */
1725                 tx_end = sw_ring[tx_end].next_id;
1726
1727                 /*
1728                  * The "last descriptor" associated with that next descriptor.
1729                  */
1730                 tx_end = sw_ring[tx_end].last_id;
1731
1732                 /*
1733                  * Check that this descriptor is free.
1734                  */
1735                 if (!(txr[tx_end].wb.status & IGC_TXD_STAT_DD)) {
1736                         if (nb_tx == 0)
1737                                 return 0;
1738                         goto end_of_tx;
1739                 }
1740
1741                 /*
1742                  * Set common flags of all TX Data Descriptors.
1743                  *
1744                  * The following bits must be set in all Data Descriptors:
1745                  *   - IGC_ADVTXD_DTYP_DATA
1746                  *   - IGC_ADVTXD_DCMD_DEXT
1747                  *
1748                  * The following bits must be set in the first Data Descriptor
1749                  * and are ignored in the other ones:
1750                  *   - IGC_ADVTXD_DCMD_IFCS
1751                  *   - IGC_ADVTXD_MAC_1588
1752                  *   - IGC_ADVTXD_DCMD_VLE
1753                  *
1754                  * The following bits must only be set in the last Data
1755                  * Descriptor:
1756                  *   - IGC_TXD_CMD_EOP
1757                  *
1758                  * The following bits can be set in any Data Descriptor, but
1759                  * are only set in the last Data Descriptor:
1760                  *   - IGC_TXD_CMD_RS
1761                  */
1762                 cmd_type_len = txq->txd_type |
1763                         IGC_ADVTXD_DCMD_IFCS | IGC_ADVTXD_DCMD_DEXT;
1764                 if (tx_ol_req & IGC_TX_OFFLOAD_SEG)
1765                         pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len +
1766                                         tx_pkt->l4_len);
1767                 olinfo_status = (pkt_len << IGC_ADVTXD_PAYLEN_SHIFT);
1768
1769                 /*
1770                  * Timer 0 should be used to for packet timestamping,
1771                  * sample the packet timestamp to reg 0
1772                  */
1773                 if (ol_flags & PKT_TX_IEEE1588_TMST)
1774                         cmd_type_len |= IGC_ADVTXD_MAC_TSTAMP;
1775
1776                 if (tx_ol_req) {
1777                         /* Setup TX Advanced context descriptor if required */
1778                         if (new_ctx) {
1779                                 volatile struct igc_adv_tx_context_desc *
1780                                         ctx_txd = (volatile struct
1781                                         igc_adv_tx_context_desc *)&txr[tx_id];
1782
1783                                 txn = &sw_ring[txe->next_id];
1784                                 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1785
1786                                 if (txe->mbuf != NULL) {
1787                                         rte_pktmbuf_free_seg(txe->mbuf);
1788                                         txe->mbuf = NULL;
1789                                 }
1790
1791                                 igc_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
1792                                                 tx_offload);
1793
1794                                 txe->last_id = tx_last;
1795                                 tx_id = txe->next_id;
1796                                 txe = txn;
1797                         }
1798
1799                         /* Setup the TX Advanced Data Descriptor */
1800                         cmd_type_len |=
1801                                 tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
1802                         olinfo_status |=
1803                                 tx_desc_cksum_flags_to_olinfo(tx_ol_req);
1804                         olinfo_status |= (uint32_t)txq->ctx_curr <<
1805                                         IGC_ADVTXD_IDX_SHIFT;
1806                 }
1807
1808                 m_seg = tx_pkt;
1809                 do {
1810                         txn = &sw_ring[txe->next_id];
1811                         RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1812
1813                         txd = &txr[tx_id];
1814
1815                         if (txe->mbuf != NULL)
1816                                 rte_pktmbuf_free_seg(txe->mbuf);
1817                         txe->mbuf = m_seg;
1818
1819                         /* Set up transmit descriptor */
1820                         slen = (uint16_t)m_seg->data_len;
1821                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
1822                         txd->read.buffer_addr =
1823                                 rte_cpu_to_le_64(buf_dma_addr);
1824                         txd->read.cmd_type_len =
1825                                 rte_cpu_to_le_32(cmd_type_len | slen);
1826                         txd->read.olinfo_status =
1827                                 rte_cpu_to_le_32(olinfo_status);
1828                         txe->last_id = tx_last;
1829                         tx_id = txe->next_id;
1830                         txe = txn;
1831                         m_seg = m_seg->next;
1832                 } while (m_seg != NULL);
1833
1834                 /*
1835                  * The last packet data descriptor needs End Of Packet (EOP)
1836                  * and Report Status (RS).
1837                  */
1838                 txd->read.cmd_type_len |=
1839                         rte_cpu_to_le_32(IGC_TXD_CMD_EOP | IGC_TXD_CMD_RS);
1840         }
1841 end_of_tx:
1842         rte_wmb();
1843
1844         /*
1845          * Set the Transmit Descriptor Tail (TDT).
1846          */
1847         IGC_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
1848         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
1849                 txq->port_id, txq->queue_id, tx_id, nb_tx);
1850         txq->tx_tail = tx_id;
1851
1852         return nb_tx;
1853 }
1854
1855 int eth_igc_tx_descriptor_status(void *tx_queue, uint16_t offset)
1856 {
1857         struct igc_tx_queue *txq = tx_queue;
1858         volatile uint32_t *status;
1859         uint32_t desc;
1860
1861         if (unlikely(!txq || offset >= txq->nb_tx_desc))
1862                 return -EINVAL;
1863
1864         desc = txq->tx_tail + offset;
1865         if (desc >= txq->nb_tx_desc)
1866                 desc -= txq->nb_tx_desc;
1867
1868         status = &txq->tx_ring[desc].wb.status;
1869         if (*status & rte_cpu_to_le_32(IGC_TXD_STAT_DD))
1870                 return RTE_ETH_TX_DESC_DONE;
1871
1872         return RTE_ETH_TX_DESC_FULL;
1873 }
1874
1875 static void
1876 igc_tx_queue_release_mbufs(struct igc_tx_queue *txq)
1877 {
1878         unsigned int i;
1879
1880         if (txq->sw_ring != NULL) {
1881                 for (i = 0; i < txq->nb_tx_desc; i++) {
1882                         if (txq->sw_ring[i].mbuf != NULL) {
1883                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1884                                 txq->sw_ring[i].mbuf = NULL;
1885                         }
1886                 }
1887         }
1888 }
1889
1890 static void
1891 igc_tx_queue_release(struct igc_tx_queue *txq)
1892 {
1893         igc_tx_queue_release_mbufs(txq);
1894         rte_free(txq->sw_ring);
1895         rte_free(txq);
1896 }
1897
1898 void eth_igc_tx_queue_release(void *txq)
1899 {
1900         if (txq)
1901                 igc_tx_queue_release(txq);
1902 }
1903
1904 static void
1905 igc_reset_tx_queue_stat(struct igc_tx_queue *txq)
1906 {
1907         txq->tx_head = 0;
1908         txq->tx_tail = 0;
1909         txq->ctx_curr = 0;
1910         memset((void *)&txq->ctx_cache, 0,
1911                 IGC_CTX_NUM * sizeof(struct igc_advctx_info));
1912 }
1913
1914 static void
1915 igc_reset_tx_queue(struct igc_tx_queue *txq)
1916 {
1917         struct igc_tx_entry *txe = txq->sw_ring;
1918         uint16_t i, prev;
1919
1920         /* Initialize ring entries */
1921         prev = (uint16_t)(txq->nb_tx_desc - 1);
1922         for (i = 0; i < txq->nb_tx_desc; i++) {
1923                 volatile union igc_adv_tx_desc *txd = &txq->tx_ring[i];
1924
1925                 txd->wb.status = IGC_TXD_STAT_DD;
1926                 txe[i].mbuf = NULL;
1927                 txe[i].last_id = i;
1928                 txe[prev].next_id = i;
1929                 prev = i;
1930         }
1931
1932         txq->txd_type = IGC_ADVTXD_DTYP_DATA;
1933         igc_reset_tx_queue_stat(txq);
1934 }
1935
1936 /*
1937  * clear all rx/tx queue
1938  */
1939 void
1940 igc_dev_clear_queues(struct rte_eth_dev *dev)
1941 {
1942         uint16_t i;
1943         struct igc_tx_queue *txq;
1944         struct igc_rx_queue *rxq;
1945
1946         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1947                 txq = dev->data->tx_queues[i];
1948                 if (txq != NULL) {
1949                         igc_tx_queue_release_mbufs(txq);
1950                         igc_reset_tx_queue(txq);
1951                 }
1952         }
1953
1954         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1955                 rxq = dev->data->rx_queues[i];
1956                 if (rxq != NULL) {
1957                         igc_rx_queue_release_mbufs(rxq);
1958                         igc_reset_rx_queue(rxq);
1959                 }
1960         }
1961 }
1962
1963 int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
1964                 uint16_t nb_desc, unsigned int socket_id,
1965                 const struct rte_eth_txconf *tx_conf)
1966 {
1967         const struct rte_memzone *tz;
1968         struct igc_tx_queue *txq;
1969         struct igc_hw *hw;
1970         uint32_t size;
1971
1972         if (nb_desc % IGC_TX_DESCRIPTOR_MULTIPLE != 0 ||
1973                 nb_desc > IGC_MAX_TXD || nb_desc < IGC_MIN_TXD) {
1974                 PMD_DRV_LOG(ERR,
1975                         "TX-descriptor must be a multiple of %u and between %u and %u, cur: %u",
1976                         IGC_TX_DESCRIPTOR_MULTIPLE,
1977                         IGC_MAX_TXD, IGC_MIN_TXD, nb_desc);
1978                 return -EINVAL;
1979         }
1980
1981         hw = IGC_DEV_PRIVATE_HW(dev);
1982
1983         /*
1984          * The tx_free_thresh and tx_rs_thresh values are not used in the 2.5G
1985          * driver.
1986          */
1987         if (tx_conf->tx_free_thresh != 0)
1988                 PMD_DRV_LOG(INFO,
1989                         "The tx_free_thresh parameter is not used for the 2.5G driver");
1990         if (tx_conf->tx_rs_thresh != 0)
1991                 PMD_DRV_LOG(INFO,
1992                         "The tx_rs_thresh parameter is not used for the 2.5G driver");
1993         if (tx_conf->tx_thresh.wthresh == 0)
1994                 PMD_DRV_LOG(INFO,
1995                         "To improve 2.5G driver performance, consider setting the TX WTHRESH value to 4, 8, or 16.");
1996
1997         /* Free memory prior to re-allocation if needed */
1998         if (dev->data->tx_queues[queue_idx] != NULL) {
1999                 igc_tx_queue_release(dev->data->tx_queues[queue_idx]);
2000                 dev->data->tx_queues[queue_idx] = NULL;
2001         }
2002
2003         /* First allocate the tx queue data structure */
2004         txq = rte_zmalloc("ethdev TX queue", sizeof(struct igc_tx_queue),
2005                                                 RTE_CACHE_LINE_SIZE);
2006         if (txq == NULL)
2007                 return -ENOMEM;
2008
2009         /*
2010          * Allocate TX ring hardware descriptors. A memzone large enough to
2011          * handle the maximum ring size is allocated in order to allow for
2012          * resizing in later calls to the queue setup function.
2013          */
2014         size = sizeof(union igc_adv_tx_desc) * IGC_MAX_TXD;
2015         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
2016                                       IGC_ALIGN, socket_id);
2017         if (tz == NULL) {
2018                 igc_tx_queue_release(txq);
2019                 return -ENOMEM;
2020         }
2021
2022         txq->nb_tx_desc = nb_desc;
2023         txq->pthresh = tx_conf->tx_thresh.pthresh;
2024         txq->hthresh = tx_conf->tx_thresh.hthresh;
2025         txq->wthresh = tx_conf->tx_thresh.wthresh;
2026
2027         txq->queue_id = queue_idx;
2028         txq->reg_idx = queue_idx;
2029         txq->port_id = dev->data->port_id;
2030
2031         txq->tdt_reg_addr = IGC_PCI_REG_ADDR(hw, IGC_TDT(txq->reg_idx));
2032         txq->tx_ring_phys_addr = tz->iova;
2033
2034         txq->tx_ring = (union igc_adv_tx_desc *)tz->addr;
2035         /* Allocate software ring */
2036         txq->sw_ring = rte_zmalloc("txq->sw_ring",
2037                                    sizeof(struct igc_tx_entry) * nb_desc,
2038                                    RTE_CACHE_LINE_SIZE);
2039         if (txq->sw_ring == NULL) {
2040                 igc_tx_queue_release(txq);
2041                 return -ENOMEM;
2042         }
2043         PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
2044                 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2045
2046         igc_reset_tx_queue(txq);
2047         dev->tx_pkt_burst = igc_xmit_pkts;
2048         dev->tx_pkt_prepare = &eth_igc_prep_pkts;
2049         dev->data->tx_queues[queue_idx] = txq;
2050         txq->offloads = tx_conf->offloads;
2051
2052         return 0;
2053 }
2054
2055 int
2056 eth_igc_tx_done_cleanup(void *txqueue, uint32_t free_cnt)
2057 {
2058         struct igc_tx_queue *txq = txqueue;
2059         struct igc_tx_entry *sw_ring;
2060         volatile union igc_adv_tx_desc *txr;
2061         uint16_t tx_first; /* First segment analyzed. */
2062         uint16_t tx_id;    /* Current segment being processed. */
2063         uint16_t tx_last;  /* Last segment in the current packet. */
2064         uint16_t tx_next;  /* First segment of the next packet. */
2065         uint32_t count;
2066
2067         if (txq == NULL)
2068                 return -ENODEV;
2069
2070         count = 0;
2071         sw_ring = txq->sw_ring;
2072         txr = txq->tx_ring;
2073
2074         /*
2075          * tx_tail is the last sent packet on the sw_ring. Goto the end
2076          * of that packet (the last segment in the packet chain) and
2077          * then the next segment will be the start of the oldest segment
2078          * in the sw_ring. This is the first packet that will be
2079          * attempted to be freed.
2080          */
2081
2082         /* Get last segment in most recently added packet. */
2083         tx_first = sw_ring[txq->tx_tail].last_id;
2084
2085         /* Get the next segment, which is the oldest segment in ring. */
2086         tx_first = sw_ring[tx_first].next_id;
2087
2088         /* Set the current index to the first. */
2089         tx_id = tx_first;
2090
2091         /*
2092          * Loop through each packet. For each packet, verify that an
2093          * mbuf exists and that the last segment is free. If so, free
2094          * it and move on.
2095          */
2096         while (1) {
2097                 tx_last = sw_ring[tx_id].last_id;
2098
2099                 if (sw_ring[tx_last].mbuf) {
2100                         if (!(txr[tx_last].wb.status &
2101                                         rte_cpu_to_le_32(IGC_TXD_STAT_DD)))
2102                                 break;
2103
2104                         /* Get the start of the next packet. */
2105                         tx_next = sw_ring[tx_last].next_id;
2106
2107                         /*
2108                          * Loop through all segments in a
2109                          * packet.
2110                          */
2111                         do {
2112                                 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
2113                                 sw_ring[tx_id].mbuf = NULL;
2114                                 sw_ring[tx_id].last_id = tx_id;
2115
2116                                 /* Move to next segemnt. */
2117                                 tx_id = sw_ring[tx_id].next_id;
2118                         } while (tx_id != tx_next);
2119
2120                         /*
2121                          * Increment the number of packets
2122                          * freed.
2123                          */
2124                         count++;
2125                         if (unlikely(count == free_cnt))
2126                                 break;
2127                 } else {
2128                         /*
2129                          * There are multiple reasons to be here:
2130                          * 1) All the packets on the ring have been
2131                          *    freed - tx_id is equal to tx_first
2132                          *    and some packets have been freed.
2133                          *    - Done, exit
2134                          * 2) Interfaces has not sent a rings worth of
2135                          *    packets yet, so the segment after tail is
2136                          *    still empty. Or a previous call to this
2137                          *    function freed some of the segments but
2138                          *    not all so there is a hole in the list.
2139                          *    Hopefully this is a rare case.
2140                          *    - Walk the list and find the next mbuf. If
2141                          *      there isn't one, then done.
2142                          */
2143                         if (likely(tx_id == tx_first && count != 0))
2144                                 break;
2145
2146                         /*
2147                          * Walk the list and find the next mbuf, if any.
2148                          */
2149                         do {
2150                                 /* Move to next segemnt. */
2151                                 tx_id = sw_ring[tx_id].next_id;
2152
2153                                 if (sw_ring[tx_id].mbuf)
2154                                         break;
2155
2156                         } while (tx_id != tx_first);
2157
2158                         /*
2159                          * Determine why previous loop bailed. If there
2160                          * is not an mbuf, done.
2161                          */
2162                         if (sw_ring[tx_id].mbuf == NULL)
2163                                 break;
2164                 }
2165         }
2166
2167         return count;
2168 }
2169
2170 void
2171 igc_tx_init(struct rte_eth_dev *dev)
2172 {
2173         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2174         uint32_t tctl;
2175         uint32_t txdctl;
2176         uint16_t i;
2177
2178         /* Setup the Base and Length of the Tx Descriptor Rings. */
2179         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2180                 struct igc_tx_queue *txq = dev->data->tx_queues[i];
2181                 uint64_t bus_addr = txq->tx_ring_phys_addr;
2182
2183                 IGC_WRITE_REG(hw, IGC_TDLEN(txq->reg_idx),
2184                                 txq->nb_tx_desc *
2185                                 sizeof(union igc_adv_tx_desc));
2186                 IGC_WRITE_REG(hw, IGC_TDBAH(txq->reg_idx),
2187                                 (uint32_t)(bus_addr >> 32));
2188                 IGC_WRITE_REG(hw, IGC_TDBAL(txq->reg_idx),
2189                                 (uint32_t)bus_addr);
2190
2191                 /* Setup the HW Tx Head and Tail descriptor pointers. */
2192                 IGC_WRITE_REG(hw, IGC_TDT(txq->reg_idx), 0);
2193                 IGC_WRITE_REG(hw, IGC_TDH(txq->reg_idx), 0);
2194
2195                 /* Setup Transmit threshold registers. */
2196                 txdctl = ((uint32_t)txq->pthresh << IGC_TXDCTL_PTHRESH_SHIFT) &
2197                                 IGC_TXDCTL_PTHRESH_MSK;
2198                 txdctl |= ((uint32_t)txq->hthresh << IGC_TXDCTL_HTHRESH_SHIFT) &
2199                                 IGC_TXDCTL_HTHRESH_MSK;
2200                 txdctl |= ((uint32_t)txq->wthresh << IGC_TXDCTL_WTHRESH_SHIFT) &
2201                                 IGC_TXDCTL_WTHRESH_MSK;
2202                 txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
2203                 IGC_WRITE_REG(hw, IGC_TXDCTL(txq->reg_idx), txdctl);
2204         }
2205
2206         igc_config_collision_dist(hw);
2207
2208         /* Program the Transmit Control Register. */
2209         tctl = IGC_READ_REG(hw, IGC_TCTL);
2210         tctl &= ~IGC_TCTL_CT;
2211         tctl |= (IGC_TCTL_PSP | IGC_TCTL_RTLC | IGC_TCTL_EN |
2212                  ((uint32_t)IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT));
2213
2214         /* This write will effectively turn on the transmit unit. */
2215         IGC_WRITE_REG(hw, IGC_TCTL, tctl);
2216 }
2217
2218 void
2219 eth_igc_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2220         struct rte_eth_rxq_info *qinfo)
2221 {
2222         struct igc_rx_queue *rxq;
2223
2224         rxq = dev->data->rx_queues[queue_id];
2225
2226         qinfo->mp = rxq->mb_pool;
2227         qinfo->scattered_rx = dev->data->scattered_rx;
2228         qinfo->nb_desc = rxq->nb_rx_desc;
2229
2230         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2231         qinfo->conf.rx_drop_en = rxq->drop_en;
2232         qinfo->conf.offloads = rxq->offloads;
2233         qinfo->conf.rx_thresh.hthresh = rxq->hthresh;
2234         qinfo->conf.rx_thresh.pthresh = rxq->pthresh;
2235         qinfo->conf.rx_thresh.wthresh = rxq->wthresh;
2236 }
2237
2238 void
2239 eth_igc_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2240         struct rte_eth_txq_info *qinfo)
2241 {
2242         struct igc_tx_queue *txq;
2243
2244         txq = dev->data->tx_queues[queue_id];
2245
2246         qinfo->nb_desc = txq->nb_tx_desc;
2247
2248         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2249         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2250         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2251         qinfo->conf.offloads = txq->offloads;
2252 }
2253
2254 void
2255 eth_igc_vlan_strip_queue_set(struct rte_eth_dev *dev,
2256                         uint16_t rx_queue_id, int on)
2257 {
2258         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2259         struct igc_rx_queue *rxq = dev->data->rx_queues[rx_queue_id];
2260         uint32_t reg_val;
2261
2262         if (rx_queue_id >= IGC_QUEUE_PAIRS_NUM) {
2263                 PMD_DRV_LOG(ERR, "Queue index(%u) illegal, max is %u",
2264                         rx_queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2265                 return;
2266         }
2267
2268         reg_val = IGC_READ_REG(hw, IGC_DVMOLR(rx_queue_id));
2269         if (on) {
2270                 /* If vlan been stripped off, the CRC is meaningless. */
2271                 reg_val |= IGC_DVMOLR_STRVLAN | IGC_DVMOLR_STRCRC;
2272                 rxq->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2273         } else {
2274                 reg_val &= ~(IGC_DVMOLR_STRVLAN | IGC_DVMOLR_HIDVLAN |
2275                                 IGC_DVMOLR_STRCRC);
2276                 rxq->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
2277         }
2278
2279         IGC_WRITE_REG(hw, IGC_DVMOLR(rx_queue_id), reg_val);
2280 }