net/igc: support RSS
[dpdk.git] / drivers / net / igc / igc_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019-2020 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7
8 #include <rte_string_fns.h>
9 #include <rte_pci.h>
10 #include <rte_bus_pci.h>
11 #include <rte_ethdev_driver.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_malloc.h>
14 #include <rte_alarm.h>
15
16 #include "igc_logs.h"
17 #include "igc_txrx.h"
18
19 #define IGC_INTEL_VENDOR_ID             0x8086
20
21 /*
22  * The overhead from MTU to max frame size.
23  * Considering VLAN so tag needs to be counted.
24  */
25 #define IGC_ETH_OVERHEAD                (RTE_ETHER_HDR_LEN + \
26                                         RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE)
27
28 #define IGC_FC_PAUSE_TIME               0x0680
29 #define IGC_LINK_UPDATE_CHECK_TIMEOUT   90  /* 9s */
30 #define IGC_LINK_UPDATE_CHECK_INTERVAL  100 /* ms */
31
32 #define IGC_MISC_VEC_ID                 RTE_INTR_VEC_ZERO_OFFSET
33 #define IGC_RX_VEC_START                RTE_INTR_VEC_RXTX_OFFSET
34 #define IGC_MSIX_OTHER_INTR_VEC         0   /* MSI-X other interrupt vector */
35 #define IGC_FLAG_NEED_LINK_UPDATE       (1u << 0)       /* need update link */
36
37 #define IGC_DEFAULT_RX_FREE_THRESH      32
38
39 #define IGC_DEFAULT_RX_PTHRESH          8
40 #define IGC_DEFAULT_RX_HTHRESH          8
41 #define IGC_DEFAULT_RX_WTHRESH          4
42
43 #define IGC_DEFAULT_TX_PTHRESH          8
44 #define IGC_DEFAULT_TX_HTHRESH          1
45 #define IGC_DEFAULT_TX_WTHRESH          16
46
47 /* MSI-X other interrupt vector */
48 #define IGC_MSIX_OTHER_INTR_VEC         0
49
50 /* External VLAN Enable bit mask */
51 #define IGC_CTRL_EXT_EXT_VLAN           (1u << 26)
52
53 /* Per Queue Good Packets Received Count */
54 #define IGC_PQGPRC(idx)         (0x10010 + 0x100 * (idx))
55 /* Per Queue Good Octets Received Count */
56 #define IGC_PQGORC(idx)         (0x10018 + 0x100 * (idx))
57 /* Per Queue Good Octets Transmitted Count */
58 #define IGC_PQGOTC(idx)         (0x10034 + 0x100 * (idx))
59 /* Per Queue Multicast Packets Received Count */
60 #define IGC_PQMPRC(idx)         (0x10038 + 0x100 * (idx))
61 /* Transmit Queue Drop Packet Count */
62 #define IGC_TQDPC(idx)          (0xe030 + 0x40 * (idx))
63
64 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
65 #define U32_0_IN_U64            0       /* lower bytes of u64 */
66 #define U32_1_IN_U64            1       /* higher bytes of u64 */
67 #else
68 #define U32_0_IN_U64            1
69 #define U32_1_IN_U64            0
70 #endif
71
72 #define IGC_ALARM_INTERVAL      8000000u
73 /* us, about 13.6s some per-queue registers will wrap around back to 0. */
74
75 static const struct rte_eth_desc_lim rx_desc_lim = {
76         .nb_max = IGC_MAX_RXD,
77         .nb_min = IGC_MIN_RXD,
78         .nb_align = IGC_RXD_ALIGN,
79 };
80
81 static const struct rte_eth_desc_lim tx_desc_lim = {
82         .nb_max = IGC_MAX_TXD,
83         .nb_min = IGC_MIN_TXD,
84         .nb_align = IGC_TXD_ALIGN,
85         .nb_seg_max = IGC_TX_MAX_SEG,
86         .nb_mtu_seg_max = IGC_TX_MAX_MTU_SEG,
87 };
88
89 static const struct rte_pci_id pci_id_igc_map[] = {
90         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_LM) },
91         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_V)  },
92         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_I)  },
93         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_K)  },
94         { .vendor_id = 0, /* sentinel */ },
95 };
96
97 /* store statistics names and its offset in stats structure */
98 struct rte_igc_xstats_name_off {
99         char name[RTE_ETH_XSTATS_NAME_SIZE];
100         unsigned int offset;
101 };
102
103 static const struct rte_igc_xstats_name_off rte_igc_stats_strings[] = {
104         {"rx_crc_errors", offsetof(struct igc_hw_stats, crcerrs)},
105         {"rx_align_errors", offsetof(struct igc_hw_stats, algnerrc)},
106         {"rx_errors", offsetof(struct igc_hw_stats, rxerrc)},
107         {"rx_missed_packets", offsetof(struct igc_hw_stats, mpc)},
108         {"tx_single_collision_packets", offsetof(struct igc_hw_stats, scc)},
109         {"tx_multiple_collision_packets", offsetof(struct igc_hw_stats, mcc)},
110         {"tx_excessive_collision_packets", offsetof(struct igc_hw_stats,
111                 ecol)},
112         {"tx_late_collisions", offsetof(struct igc_hw_stats, latecol)},
113         {"tx_total_collisions", offsetof(struct igc_hw_stats, colc)},
114         {"tx_deferred_packets", offsetof(struct igc_hw_stats, dc)},
115         {"tx_no_carrier_sense_packets", offsetof(struct igc_hw_stats, tncrs)},
116         {"tx_discarded_packets", offsetof(struct igc_hw_stats, htdpmc)},
117         {"rx_length_errors", offsetof(struct igc_hw_stats, rlec)},
118         {"rx_xon_packets", offsetof(struct igc_hw_stats, xonrxc)},
119         {"tx_xon_packets", offsetof(struct igc_hw_stats, xontxc)},
120         {"rx_xoff_packets", offsetof(struct igc_hw_stats, xoffrxc)},
121         {"tx_xoff_packets", offsetof(struct igc_hw_stats, xofftxc)},
122         {"rx_flow_control_unsupported_packets", offsetof(struct igc_hw_stats,
123                 fcruc)},
124         {"rx_size_64_packets", offsetof(struct igc_hw_stats, prc64)},
125         {"rx_size_65_to_127_packets", offsetof(struct igc_hw_stats, prc127)},
126         {"rx_size_128_to_255_packets", offsetof(struct igc_hw_stats, prc255)},
127         {"rx_size_256_to_511_packets", offsetof(struct igc_hw_stats, prc511)},
128         {"rx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
129                 prc1023)},
130         {"rx_size_1024_to_max_packets", offsetof(struct igc_hw_stats,
131                 prc1522)},
132         {"rx_broadcast_packets", offsetof(struct igc_hw_stats, bprc)},
133         {"rx_multicast_packets", offsetof(struct igc_hw_stats, mprc)},
134         {"rx_undersize_errors", offsetof(struct igc_hw_stats, ruc)},
135         {"rx_fragment_errors", offsetof(struct igc_hw_stats, rfc)},
136         {"rx_oversize_errors", offsetof(struct igc_hw_stats, roc)},
137         {"rx_jabber_errors", offsetof(struct igc_hw_stats, rjc)},
138         {"rx_no_buffers", offsetof(struct igc_hw_stats, rnbc)},
139         {"rx_management_packets", offsetof(struct igc_hw_stats, mgprc)},
140         {"rx_management_dropped", offsetof(struct igc_hw_stats, mgpdc)},
141         {"tx_management_packets", offsetof(struct igc_hw_stats, mgptc)},
142         {"rx_total_packets", offsetof(struct igc_hw_stats, tpr)},
143         {"tx_total_packets", offsetof(struct igc_hw_stats, tpt)},
144         {"rx_total_bytes", offsetof(struct igc_hw_stats, tor)},
145         {"tx_total_bytes", offsetof(struct igc_hw_stats, tot)},
146         {"tx_size_64_packets", offsetof(struct igc_hw_stats, ptc64)},
147         {"tx_size_65_to_127_packets", offsetof(struct igc_hw_stats, ptc127)},
148         {"tx_size_128_to_255_packets", offsetof(struct igc_hw_stats, ptc255)},
149         {"tx_size_256_to_511_packets", offsetof(struct igc_hw_stats, ptc511)},
150         {"tx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
151                 ptc1023)},
152         {"tx_size_1023_to_max_packets", offsetof(struct igc_hw_stats,
153                 ptc1522)},
154         {"tx_multicast_packets", offsetof(struct igc_hw_stats, mptc)},
155         {"tx_broadcast_packets", offsetof(struct igc_hw_stats, bptc)},
156         {"tx_tso_packets", offsetof(struct igc_hw_stats, tsctc)},
157         {"rx_sent_to_host_packets", offsetof(struct igc_hw_stats, rpthc)},
158         {"tx_sent_by_host_packets", offsetof(struct igc_hw_stats, hgptc)},
159         {"interrupt_assert_count", offsetof(struct igc_hw_stats, iac)},
160         {"rx_descriptor_lower_threshold",
161                 offsetof(struct igc_hw_stats, icrxdmtc)},
162 };
163
164 #define IGC_NB_XSTATS (sizeof(rte_igc_stats_strings) / \
165                 sizeof(rte_igc_stats_strings[0]))
166
167 static int eth_igc_configure(struct rte_eth_dev *dev);
168 static int eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete);
169 static void eth_igc_stop(struct rte_eth_dev *dev);
170 static int eth_igc_start(struct rte_eth_dev *dev);
171 static int eth_igc_set_link_up(struct rte_eth_dev *dev);
172 static int eth_igc_set_link_down(struct rte_eth_dev *dev);
173 static void eth_igc_close(struct rte_eth_dev *dev);
174 static int eth_igc_reset(struct rte_eth_dev *dev);
175 static int eth_igc_promiscuous_enable(struct rte_eth_dev *dev);
176 static int eth_igc_promiscuous_disable(struct rte_eth_dev *dev);
177 static int eth_igc_fw_version_get(struct rte_eth_dev *dev,
178                                 char *fw_version, size_t fw_size);
179 static int eth_igc_infos_get(struct rte_eth_dev *dev,
180                         struct rte_eth_dev_info *dev_info);
181 static int eth_igc_led_on(struct rte_eth_dev *dev);
182 static int eth_igc_led_off(struct rte_eth_dev *dev);
183 static const uint32_t *eth_igc_supported_ptypes_get(struct rte_eth_dev *dev);
184 static int eth_igc_rar_set(struct rte_eth_dev *dev,
185                 struct rte_ether_addr *mac_addr, uint32_t index, uint32_t pool);
186 static void eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index);
187 static int eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
188                         struct rte_ether_addr *addr);
189 static int eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
190                          struct rte_ether_addr *mc_addr_set,
191                          uint32_t nb_mc_addr);
192 static int eth_igc_allmulticast_enable(struct rte_eth_dev *dev);
193 static int eth_igc_allmulticast_disable(struct rte_eth_dev *dev);
194 static int eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
195 static int eth_igc_stats_get(struct rte_eth_dev *dev,
196                         struct rte_eth_stats *rte_stats);
197 static int eth_igc_xstats_get(struct rte_eth_dev *dev,
198                         struct rte_eth_xstat *xstats, unsigned int n);
199 static int eth_igc_xstats_get_by_id(struct rte_eth_dev *dev,
200                                 const uint64_t *ids,
201                                 uint64_t *values, unsigned int n);
202 static int eth_igc_xstats_get_names(struct rte_eth_dev *dev,
203                                 struct rte_eth_xstat_name *xstats_names,
204                                 unsigned int size);
205 static int eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
206                 struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
207                 unsigned int limit);
208 static int eth_igc_xstats_reset(struct rte_eth_dev *dev);
209 static int
210 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
211         uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx);
212 static int
213 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
214 static int
215 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
216 static int
217 eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
218 static int
219 eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
220 static int eth_igc_rss_reta_update(struct rte_eth_dev *dev,
221                         struct rte_eth_rss_reta_entry64 *reta_conf,
222                         uint16_t reta_size);
223 static int eth_igc_rss_reta_query(struct rte_eth_dev *dev,
224                        struct rte_eth_rss_reta_entry64 *reta_conf,
225                        uint16_t reta_size);
226 static int eth_igc_rss_hash_update(struct rte_eth_dev *dev,
227                         struct rte_eth_rss_conf *rss_conf);
228 static int eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev,
229                         struct rte_eth_rss_conf *rss_conf);
230
231 static const struct eth_dev_ops eth_igc_ops = {
232         .dev_configure          = eth_igc_configure,
233         .link_update            = eth_igc_link_update,
234         .dev_stop               = eth_igc_stop,
235         .dev_start              = eth_igc_start,
236         .dev_close              = eth_igc_close,
237         .dev_reset              = eth_igc_reset,
238         .dev_set_link_up        = eth_igc_set_link_up,
239         .dev_set_link_down      = eth_igc_set_link_down,
240         .promiscuous_enable     = eth_igc_promiscuous_enable,
241         .promiscuous_disable    = eth_igc_promiscuous_disable,
242         .allmulticast_enable    = eth_igc_allmulticast_enable,
243         .allmulticast_disable   = eth_igc_allmulticast_disable,
244         .fw_version_get         = eth_igc_fw_version_get,
245         .dev_infos_get          = eth_igc_infos_get,
246         .dev_led_on             = eth_igc_led_on,
247         .dev_led_off            = eth_igc_led_off,
248         .dev_supported_ptypes_get = eth_igc_supported_ptypes_get,
249         .mtu_set                = eth_igc_mtu_set,
250         .mac_addr_add           = eth_igc_rar_set,
251         .mac_addr_remove        = eth_igc_rar_clear,
252         .mac_addr_set           = eth_igc_default_mac_addr_set,
253         .set_mc_addr_list       = eth_igc_set_mc_addr_list,
254
255         .rx_queue_setup         = eth_igc_rx_queue_setup,
256         .rx_queue_release       = eth_igc_rx_queue_release,
257         .rx_queue_count         = eth_igc_rx_queue_count,
258         .rx_descriptor_done     = eth_igc_rx_descriptor_done,
259         .rx_descriptor_status   = eth_igc_rx_descriptor_status,
260         .tx_descriptor_status   = eth_igc_tx_descriptor_status,
261         .tx_queue_setup         = eth_igc_tx_queue_setup,
262         .tx_queue_release       = eth_igc_tx_queue_release,
263         .tx_done_cleanup        = eth_igc_tx_done_cleanup,
264         .rxq_info_get           = eth_igc_rxq_info_get,
265         .txq_info_get           = eth_igc_txq_info_get,
266         .stats_get              = eth_igc_stats_get,
267         .xstats_get             = eth_igc_xstats_get,
268         .xstats_get_by_id       = eth_igc_xstats_get_by_id,
269         .xstats_get_names_by_id = eth_igc_xstats_get_names_by_id,
270         .xstats_get_names       = eth_igc_xstats_get_names,
271         .stats_reset            = eth_igc_xstats_reset,
272         .xstats_reset           = eth_igc_xstats_reset,
273         .queue_stats_mapping_set = eth_igc_queue_stats_mapping_set,
274         .rx_queue_intr_enable   = eth_igc_rx_queue_intr_enable,
275         .rx_queue_intr_disable  = eth_igc_rx_queue_intr_disable,
276         .flow_ctrl_get          = eth_igc_flow_ctrl_get,
277         .flow_ctrl_set          = eth_igc_flow_ctrl_set,
278         .reta_update            = eth_igc_rss_reta_update,
279         .reta_query             = eth_igc_rss_reta_query,
280         .rss_hash_update        = eth_igc_rss_hash_update,
281         .rss_hash_conf_get      = eth_igc_rss_hash_conf_get,
282 };
283
284 /*
285  * multiple queue mode checking
286  */
287 static int
288 igc_check_mq_mode(struct rte_eth_dev *dev)
289 {
290         enum rte_eth_rx_mq_mode rx_mq_mode = dev->data->dev_conf.rxmode.mq_mode;
291         enum rte_eth_tx_mq_mode tx_mq_mode = dev->data->dev_conf.txmode.mq_mode;
292
293         if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
294                 PMD_INIT_LOG(ERR, "SRIOV is not supported.");
295                 return -EINVAL;
296         }
297
298         if (rx_mq_mode != ETH_MQ_RX_NONE &&
299                 rx_mq_mode != ETH_MQ_RX_RSS) {
300                 /* RSS together with VMDq not supported*/
301                 PMD_INIT_LOG(ERR, "RX mode %d is not supported.",
302                                 rx_mq_mode);
303                 return -EINVAL;
304         }
305
306         /* To no break software that set invalid mode, only display
307          * warning if invalid mode is used.
308          */
309         if (tx_mq_mode != ETH_MQ_TX_NONE)
310                 PMD_INIT_LOG(WARNING,
311                         "TX mode %d is not supported. Due to meaningless in this driver, just ignore",
312                         tx_mq_mode);
313
314         return 0;
315 }
316
317 static int
318 eth_igc_configure(struct rte_eth_dev *dev)
319 {
320         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
321         int ret;
322
323         PMD_INIT_FUNC_TRACE();
324
325         ret  = igc_check_mq_mode(dev);
326         if (ret != 0)
327                 return ret;
328
329         intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
330         return 0;
331 }
332
333 static int
334 eth_igc_set_link_up(struct rte_eth_dev *dev)
335 {
336         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
337
338         if (hw->phy.media_type == igc_media_type_copper)
339                 igc_power_up_phy(hw);
340         else
341                 igc_power_up_fiber_serdes_link(hw);
342         return 0;
343 }
344
345 static int
346 eth_igc_set_link_down(struct rte_eth_dev *dev)
347 {
348         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
349
350         if (hw->phy.media_type == igc_media_type_copper)
351                 igc_power_down_phy(hw);
352         else
353                 igc_shutdown_fiber_serdes_link(hw);
354         return 0;
355 }
356
357 /*
358  * disable other interrupt
359  */
360 static void
361 igc_intr_other_disable(struct rte_eth_dev *dev)
362 {
363         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
364         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
365         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
366
367         if (rte_intr_allow_others(intr_handle) &&
368                 dev->data->dev_conf.intr_conf.lsc) {
369                 IGC_WRITE_REG(hw, IGC_EIMC, 1u << IGC_MSIX_OTHER_INTR_VEC);
370         }
371
372         IGC_WRITE_REG(hw, IGC_IMC, ~0);
373         IGC_WRITE_FLUSH(hw);
374 }
375
376 /*
377  * enable other interrupt
378  */
379 static inline void
380 igc_intr_other_enable(struct rte_eth_dev *dev)
381 {
382         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
383         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
384         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
385         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
386
387         if (rte_intr_allow_others(intr_handle) &&
388                 dev->data->dev_conf.intr_conf.lsc) {
389                 IGC_WRITE_REG(hw, IGC_EIMS, 1u << IGC_MSIX_OTHER_INTR_VEC);
390         }
391
392         IGC_WRITE_REG(hw, IGC_IMS, intr->mask);
393         IGC_WRITE_FLUSH(hw);
394 }
395
396 /*
397  * It reads ICR and gets interrupt causes, check it and set a bit flag
398  * to update link status.
399  */
400 static void
401 eth_igc_interrupt_get_status(struct rte_eth_dev *dev)
402 {
403         uint32_t icr;
404         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
405         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
406
407         /* read-on-clear nic registers here */
408         icr = IGC_READ_REG(hw, IGC_ICR);
409
410         intr->flags = 0;
411         if (icr & IGC_ICR_LSC)
412                 intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
413 }
414
415 /* return 0 means link status changed, -1 means not changed */
416 static int
417 eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete)
418 {
419         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
420         struct rte_eth_link link;
421         int link_check, count;
422
423         link_check = 0;
424         hw->mac.get_link_status = 1;
425
426         /* possible wait-to-complete in up to 9 seconds */
427         for (count = 0; count < IGC_LINK_UPDATE_CHECK_TIMEOUT; count++) {
428                 /* Read the real link status */
429                 switch (hw->phy.media_type) {
430                 case igc_media_type_copper:
431                         /* Do the work to read phy */
432                         igc_check_for_link(hw);
433                         link_check = !hw->mac.get_link_status;
434                         break;
435
436                 case igc_media_type_fiber:
437                         igc_check_for_link(hw);
438                         link_check = (IGC_READ_REG(hw, IGC_STATUS) &
439                                       IGC_STATUS_LU);
440                         break;
441
442                 case igc_media_type_internal_serdes:
443                         igc_check_for_link(hw);
444                         link_check = hw->mac.serdes_has_link;
445                         break;
446
447                 default:
448                         break;
449                 }
450                 if (link_check || wait_to_complete == 0)
451                         break;
452                 rte_delay_ms(IGC_LINK_UPDATE_CHECK_INTERVAL);
453         }
454         memset(&link, 0, sizeof(link));
455
456         /* Now we check if a transition has happened */
457         if (link_check) {
458                 uint16_t duplex, speed;
459                 hw->mac.ops.get_link_up_info(hw, &speed, &duplex);
460                 link.link_duplex = (duplex == FULL_DUPLEX) ?
461                                 ETH_LINK_FULL_DUPLEX :
462                                 ETH_LINK_HALF_DUPLEX;
463                 link.link_speed = speed;
464                 link.link_status = ETH_LINK_UP;
465                 link.link_autoneg = !(dev->data->dev_conf.link_speeds &
466                                 ETH_LINK_SPEED_FIXED);
467
468                 if (speed == SPEED_2500) {
469                         uint32_t tipg = IGC_READ_REG(hw, IGC_TIPG);
470                         if ((tipg & IGC_TIPG_IPGT_MASK) != 0x0b) {
471                                 tipg &= ~IGC_TIPG_IPGT_MASK;
472                                 tipg |= 0x0b;
473                                 IGC_WRITE_REG(hw, IGC_TIPG, tipg);
474                         }
475                 }
476         } else {
477                 link.link_speed = 0;
478                 link.link_duplex = ETH_LINK_HALF_DUPLEX;
479                 link.link_status = ETH_LINK_DOWN;
480                 link.link_autoneg = ETH_LINK_FIXED;
481         }
482
483         return rte_eth_linkstatus_set(dev, &link);
484 }
485
486 /*
487  * It executes link_update after knowing an interrupt is present.
488  */
489 static void
490 eth_igc_interrupt_action(struct rte_eth_dev *dev)
491 {
492         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
493         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
494         struct rte_eth_link link;
495         int ret;
496
497         if (intr->flags & IGC_FLAG_NEED_LINK_UPDATE) {
498                 intr->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
499
500                 /* set get_link_status to check register later */
501                 ret = eth_igc_link_update(dev, 0);
502
503                 /* check if link has changed */
504                 if (ret < 0)
505                         return;
506
507                 rte_eth_linkstatus_get(dev, &link);
508                 if (link.link_status)
509                         PMD_DRV_LOG(INFO,
510                                 " Port %d: Link Up - speed %u Mbps - %s",
511                                 dev->data->port_id,
512                                 (unsigned int)link.link_speed,
513                                 link.link_duplex == ETH_LINK_FULL_DUPLEX ?
514                                 "full-duplex" : "half-duplex");
515                 else
516                         PMD_DRV_LOG(INFO, " Port %d: Link Down",
517                                 dev->data->port_id);
518
519                 PMD_DRV_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT,
520                                 pci_dev->addr.domain,
521                                 pci_dev->addr.bus,
522                                 pci_dev->addr.devid,
523                                 pci_dev->addr.function);
524                 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
525                                 NULL);
526         }
527 }
528
529 /*
530  * Interrupt handler which shall be registered at first.
531  *
532  * @handle
533  *  Pointer to interrupt handle.
534  * @param
535  *  The address of parameter (struct rte_eth_dev *) registered before.
536  */
537 static void
538 eth_igc_interrupt_handler(void *param)
539 {
540         struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
541
542         eth_igc_interrupt_get_status(dev);
543         eth_igc_interrupt_action(dev);
544 }
545
546 static void igc_read_queue_stats_register(struct rte_eth_dev *dev);
547
548 /*
549  * Update the queue status every IGC_ALARM_INTERVAL time.
550  * @param
551  *  The address of parameter (struct rte_eth_dev *) registered before.
552  */
553 static void
554 igc_update_queue_stats_handler(void *param)
555 {
556         struct rte_eth_dev *dev = param;
557         igc_read_queue_stats_register(dev);
558         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
559                         igc_update_queue_stats_handler, dev);
560 }
561
562 /*
563  * rx,tx enable/disable
564  */
565 static void
566 eth_igc_rxtx_control(struct rte_eth_dev *dev, bool enable)
567 {
568         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
569         uint32_t tctl, rctl;
570
571         tctl = IGC_READ_REG(hw, IGC_TCTL);
572         rctl = IGC_READ_REG(hw, IGC_RCTL);
573
574         if (enable) {
575                 /* enable Tx/Rx */
576                 tctl |= IGC_TCTL_EN;
577                 rctl |= IGC_RCTL_EN;
578         } else {
579                 /* disable Tx/Rx */
580                 tctl &= ~IGC_TCTL_EN;
581                 rctl &= ~IGC_RCTL_EN;
582         }
583         IGC_WRITE_REG(hw, IGC_TCTL, tctl);
584         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
585         IGC_WRITE_FLUSH(hw);
586 }
587
588 /*
589  *  This routine disables all traffic on the adapter by issuing a
590  *  global reset on the MAC.
591  */
592 static void
593 eth_igc_stop(struct rte_eth_dev *dev)
594 {
595         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
596         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
597         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
598         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
599         struct rte_eth_link link;
600
601         adapter->stopped = 1;
602
603         /* disable receive and transmit */
604         eth_igc_rxtx_control(dev, false);
605
606         /* disable all MSI-X interrupts */
607         IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
608         IGC_WRITE_FLUSH(hw);
609
610         /* clear all MSI-X interrupts */
611         IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
612
613         igc_intr_other_disable(dev);
614
615         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
616
617         /* disable intr eventfd mapping */
618         rte_intr_disable(intr_handle);
619
620         igc_reset_hw(hw);
621
622         /* disable all wake up */
623         IGC_WRITE_REG(hw, IGC_WUC, 0);
624
625         /* Set bit for Go Link disconnect */
626         igc_read_reg_check_set_bits(hw, IGC_82580_PHY_POWER_MGMT,
627                         IGC_82580_PM_GO_LINKD);
628
629         /* Power down the phy. Needed to make the link go Down */
630         eth_igc_set_link_down(dev);
631
632         igc_dev_clear_queues(dev);
633
634         /* clear the recorded link status */
635         memset(&link, 0, sizeof(link));
636         rte_eth_linkstatus_set(dev, &link);
637
638         if (!rte_intr_allow_others(intr_handle))
639                 /* resume to the default handler */
640                 rte_intr_callback_register(intr_handle,
641                                            eth_igc_interrupt_handler,
642                                            (void *)dev);
643
644         /* Clean datapath event and queue/vec mapping */
645         rte_intr_efd_disable(intr_handle);
646         if (intr_handle->intr_vec != NULL) {
647                 rte_free(intr_handle->intr_vec);
648                 intr_handle->intr_vec = NULL;
649         }
650 }
651
652 /*
653  * write interrupt vector allocation register
654  * @hw
655  *  board private structure
656  * @queue_index
657  *  queue index, valid 0,1,2,3
658  * @tx
659  *  tx:1, rx:0
660  * @msix_vector
661  *  msix-vector, valid 0,1,2,3,4
662  */
663 static void
664 igc_write_ivar(struct igc_hw *hw, uint8_t queue_index,
665                 bool tx, uint8_t msix_vector)
666 {
667         uint8_t offset = 0;
668         uint8_t reg_index = queue_index >> 1;
669         uint32_t val;
670
671         /*
672          * IVAR(0)
673          * bit31...24   bit23...16      bit15...8       bit7...0
674          * TX1          RX1             TX0             RX0
675          *
676          * IVAR(1)
677          * bit31...24   bit23...16      bit15...8       bit7...0
678          * TX3          RX3             TX2             RX2
679          */
680
681         if (tx)
682                 offset = 8;
683
684         if (queue_index & 1)
685                 offset += 16;
686
687         val = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, reg_index);
688
689         /* clear bits */
690         val &= ~((uint32_t)0xFF << offset);
691
692         /* write vector and valid bit */
693         val |= (uint32_t)(msix_vector | IGC_IVAR_VALID) << offset;
694
695         IGC_WRITE_REG_ARRAY(hw, IGC_IVAR0, reg_index, val);
696 }
697
698 /* Sets up the hardware to generate MSI-X interrupts properly
699  * @hw
700  *  board private structure
701  */
702 static void
703 igc_configure_msix_intr(struct rte_eth_dev *dev)
704 {
705         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
706         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
707         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
708
709         uint32_t intr_mask;
710         uint32_t vec = IGC_MISC_VEC_ID;
711         uint32_t base = IGC_MISC_VEC_ID;
712         uint32_t misc_shift = 0;
713         int i;
714
715         /* won't configure msix register if no mapping is done
716          * between intr vector and event fd
717          */
718         if (!rte_intr_dp_is_en(intr_handle))
719                 return;
720
721         if (rte_intr_allow_others(intr_handle)) {
722                 base = IGC_RX_VEC_START;
723                 vec = base;
724                 misc_shift = 1;
725         }
726
727         /* turn on MSI-X capability first */
728         IGC_WRITE_REG(hw, IGC_GPIE, IGC_GPIE_MSIX_MODE |
729                                 IGC_GPIE_PBA | IGC_GPIE_EIAME |
730                                 IGC_GPIE_NSICR);
731         intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) <<
732                 misc_shift;
733
734         if (dev->data->dev_conf.intr_conf.lsc)
735                 intr_mask |= (1u << IGC_MSIX_OTHER_INTR_VEC);
736
737         /* enable msix auto-clear */
738         igc_read_reg_check_set_bits(hw, IGC_EIAC, intr_mask);
739
740         /* set other cause interrupt vector */
741         igc_read_reg_check_set_bits(hw, IGC_IVAR_MISC,
742                 (uint32_t)(IGC_MSIX_OTHER_INTR_VEC | IGC_IVAR_VALID) << 8);
743
744         /* enable auto-mask */
745         igc_read_reg_check_set_bits(hw, IGC_EIAM, intr_mask);
746
747         for (i = 0; i < dev->data->nb_rx_queues; i++) {
748                 igc_write_ivar(hw, i, 0, vec);
749                 intr_handle->intr_vec[i] = vec;
750                 if (vec < base + intr_handle->nb_efd - 1)
751                         vec++;
752         }
753
754         IGC_WRITE_FLUSH(hw);
755 }
756
757 /**
758  * It enables the interrupt mask and then enable the interrupt.
759  *
760  * @dev
761  *  Pointer to struct rte_eth_dev.
762  * @on
763  *  Enable or Disable
764  */
765 static void
766 igc_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on)
767 {
768         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
769
770         if (on)
771                 intr->mask |= IGC_ICR_LSC;
772         else
773                 intr->mask &= ~IGC_ICR_LSC;
774 }
775
776 /*
777  * It enables the interrupt.
778  * It will be called once only during nic initialized.
779  */
780 static void
781 igc_rxq_interrupt_setup(struct rte_eth_dev *dev)
782 {
783         uint32_t mask;
784         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
785         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
786         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
787         int misc_shift = rte_intr_allow_others(intr_handle) ? 1 : 0;
788
789         /* won't configure msix register if no mapping is done
790          * between intr vector and event fd
791          */
792         if (!rte_intr_dp_is_en(intr_handle))
793                 return;
794
795         mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) << misc_shift;
796         IGC_WRITE_REG(hw, IGC_EIMS, mask);
797 }
798
799 /*
800  *  Get hardware rx-buffer size.
801  */
802 static inline int
803 igc_get_rx_buffer_size(struct igc_hw *hw)
804 {
805         return (IGC_READ_REG(hw, IGC_RXPBS) & 0x3f) << 10;
806 }
807
808 /*
809  * igc_hw_control_acquire sets CTRL_EXT:DRV_LOAD bit.
810  * For ASF and Pass Through versions of f/w this means
811  * that the driver is loaded.
812  */
813 static void
814 igc_hw_control_acquire(struct igc_hw *hw)
815 {
816         uint32_t ctrl_ext;
817
818         /* Let firmware know the driver has taken over */
819         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
820         IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
821 }
822
823 /*
824  * igc_hw_control_release resets CTRL_EXT:DRV_LOAD bit.
825  * For ASF and Pass Through versions of f/w this means that the
826  * driver is no longer loaded.
827  */
828 static void
829 igc_hw_control_release(struct igc_hw *hw)
830 {
831         uint32_t ctrl_ext;
832
833         /* Let firmware taken over control of h/w */
834         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
835         IGC_WRITE_REG(hw, IGC_CTRL_EXT,
836                         ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
837 }
838
839 static int
840 igc_hardware_init(struct igc_hw *hw)
841 {
842         uint32_t rx_buf_size;
843         int diag;
844
845         /* Let the firmware know the OS is in control */
846         igc_hw_control_acquire(hw);
847
848         /* Issue a global reset */
849         igc_reset_hw(hw);
850
851         /* disable all wake up */
852         IGC_WRITE_REG(hw, IGC_WUC, 0);
853
854         /*
855          * Hardware flow control
856          * - High water mark should allow for at least two standard size (1518)
857          *   frames to be received after sending an XOFF.
858          * - Low water mark works best when it is very near the high water mark.
859          *   This allows the receiver to restart by sending XON when it has
860          *   drained a bit. Here we use an arbitrary value of 1500 which will
861          *   restart after one full frame is pulled from the buffer. There
862          *   could be several smaller frames in the buffer and if so they will
863          *   not trigger the XON until their total number reduces the buffer
864          *   by 1500.
865          */
866         rx_buf_size = igc_get_rx_buffer_size(hw);
867         hw->fc.high_water = rx_buf_size - (RTE_ETHER_MAX_LEN * 2);
868         hw->fc.low_water = hw->fc.high_water - 1500;
869         hw->fc.pause_time = IGC_FC_PAUSE_TIME;
870         hw->fc.send_xon = 1;
871         hw->fc.requested_mode = igc_fc_full;
872
873         diag = igc_init_hw(hw);
874         if (diag < 0)
875                 return diag;
876
877         igc_get_phy_info(hw);
878         igc_check_for_link(hw);
879
880         return 0;
881 }
882
883 static int
884 eth_igc_start(struct rte_eth_dev *dev)
885 {
886         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
887         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
888         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
889         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
890         uint32_t *speeds;
891         int ret;
892
893         PMD_INIT_FUNC_TRACE();
894
895         /* disable all MSI-X interrupts */
896         IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
897         IGC_WRITE_FLUSH(hw);
898
899         /* clear all MSI-X interrupts */
900         IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
901
902         /* disable uio/vfio intr/eventfd mapping */
903         if (!adapter->stopped)
904                 rte_intr_disable(intr_handle);
905
906         /* Power up the phy. Needed to make the link go Up */
907         eth_igc_set_link_up(dev);
908
909         /* Put the address into the Receive Address Array */
910         igc_rar_set(hw, hw->mac.addr, 0);
911
912         /* Initialize the hardware */
913         if (igc_hardware_init(hw)) {
914                 PMD_DRV_LOG(ERR, "Unable to initialize the hardware");
915                 return -EIO;
916         }
917         adapter->stopped = 0;
918
919         /* check and configure queue intr-vector mapping */
920         if (rte_intr_cap_multiple(intr_handle) &&
921                 dev->data->dev_conf.intr_conf.rxq) {
922                 uint32_t intr_vector = dev->data->nb_rx_queues;
923                 if (rte_intr_efd_enable(intr_handle, intr_vector))
924                         return -1;
925         }
926
927         if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
928                 intr_handle->intr_vec = rte_zmalloc("intr_vec",
929                         dev->data->nb_rx_queues * sizeof(int), 0);
930                 if (intr_handle->intr_vec == NULL) {
931                         PMD_DRV_LOG(ERR,
932                                 "Failed to allocate %d rx_queues intr_vec",
933                                 dev->data->nb_rx_queues);
934                         return -ENOMEM;
935                 }
936         }
937
938         /* configure msix for rx interrupt */
939         igc_configure_msix_intr(dev);
940
941         igc_tx_init(dev);
942
943         /* This can fail when allocating mbufs for descriptor rings */
944         ret = igc_rx_init(dev);
945         if (ret) {
946                 PMD_DRV_LOG(ERR, "Unable to initialize RX hardware");
947                 igc_dev_clear_queues(dev);
948                 return ret;
949         }
950
951         igc_clear_hw_cntrs_base_generic(hw);
952
953         /* Setup link speed and duplex */
954         speeds = &dev->data->dev_conf.link_speeds;
955         if (*speeds == ETH_LINK_SPEED_AUTONEG) {
956                 hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
957                 hw->mac.autoneg = 1;
958         } else {
959                 int num_speeds = 0;
960                 bool autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0;
961
962                 /* Reset */
963                 hw->phy.autoneg_advertised = 0;
964
965                 if (*speeds & ~(ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
966                                 ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
967                                 ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
968                                 ETH_LINK_SPEED_FIXED)) {
969                         num_speeds = -1;
970                         goto error_invalid_config;
971                 }
972                 if (*speeds & ETH_LINK_SPEED_10M_HD) {
973                         hw->phy.autoneg_advertised |= ADVERTISE_10_HALF;
974                         num_speeds++;
975                 }
976                 if (*speeds & ETH_LINK_SPEED_10M) {
977                         hw->phy.autoneg_advertised |= ADVERTISE_10_FULL;
978                         num_speeds++;
979                 }
980                 if (*speeds & ETH_LINK_SPEED_100M_HD) {
981                         hw->phy.autoneg_advertised |= ADVERTISE_100_HALF;
982                         num_speeds++;
983                 }
984                 if (*speeds & ETH_LINK_SPEED_100M) {
985                         hw->phy.autoneg_advertised |= ADVERTISE_100_FULL;
986                         num_speeds++;
987                 }
988                 if (*speeds & ETH_LINK_SPEED_1G) {
989                         hw->phy.autoneg_advertised |= ADVERTISE_1000_FULL;
990                         num_speeds++;
991                 }
992                 if (*speeds & ETH_LINK_SPEED_2_5G) {
993                         hw->phy.autoneg_advertised |= ADVERTISE_2500_FULL;
994                         num_speeds++;
995                 }
996                 if (num_speeds == 0 || (!autoneg && num_speeds > 1))
997                         goto error_invalid_config;
998
999                 /* Set/reset the mac.autoneg based on the link speed,
1000                  * fixed or not
1001                  */
1002                 if (!autoneg) {
1003                         hw->mac.autoneg = 0;
1004                         hw->mac.forced_speed_duplex =
1005                                         hw->phy.autoneg_advertised;
1006                 } else {
1007                         hw->mac.autoneg = 1;
1008                 }
1009         }
1010
1011         igc_setup_link(hw);
1012
1013         if (rte_intr_allow_others(intr_handle)) {
1014                 /* check if lsc interrupt is enabled */
1015                 if (dev->data->dev_conf.intr_conf.lsc)
1016                         igc_lsc_interrupt_setup(dev, 1);
1017                 else
1018                         igc_lsc_interrupt_setup(dev, 0);
1019         } else {
1020                 rte_intr_callback_unregister(intr_handle,
1021                                              eth_igc_interrupt_handler,
1022                                              (void *)dev);
1023                 if (dev->data->dev_conf.intr_conf.lsc)
1024                         PMD_DRV_LOG(INFO,
1025                                 "LSC won't enable because of no intr multiplex");
1026         }
1027
1028         /* enable uio/vfio intr/eventfd mapping */
1029         rte_intr_enable(intr_handle);
1030
1031         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1032                         igc_update_queue_stats_handler, dev);
1033
1034         /* check if rxq interrupt is enabled */
1035         if (dev->data->dev_conf.intr_conf.rxq &&
1036                         rte_intr_dp_is_en(intr_handle))
1037                 igc_rxq_interrupt_setup(dev);
1038
1039         /* resume enabled intr since hw reset */
1040         igc_intr_other_enable(dev);
1041
1042         eth_igc_rxtx_control(dev, true);
1043         eth_igc_link_update(dev, 0);
1044
1045         return 0;
1046
1047 error_invalid_config:
1048         PMD_DRV_LOG(ERR, "Invalid advertised speeds (%u) for port %u",
1049                      dev->data->dev_conf.link_speeds, dev->data->port_id);
1050         igc_dev_clear_queues(dev);
1051         return -EINVAL;
1052 }
1053
1054 static int
1055 igc_reset_swfw_lock(struct igc_hw *hw)
1056 {
1057         int ret_val;
1058
1059         /*
1060          * Do mac ops initialization manually here, since we will need
1061          * some function pointers set by this call.
1062          */
1063         ret_val = igc_init_mac_params(hw);
1064         if (ret_val)
1065                 return ret_val;
1066
1067         /*
1068          * SMBI lock should not fail in this early stage. If this is the case,
1069          * it is due to an improper exit of the application.
1070          * So force the release of the faulty lock.
1071          */
1072         if (igc_get_hw_semaphore_generic(hw) < 0)
1073                 PMD_DRV_LOG(DEBUG, "SMBI lock released");
1074
1075         igc_put_hw_semaphore_generic(hw);
1076
1077         if (hw->mac.ops.acquire_swfw_sync != NULL) {
1078                 uint16_t mask;
1079
1080                 /*
1081                  * Phy lock should not fail in this early stage.
1082                  * If this is the case, it is due to an improper exit of the
1083                  * application. So force the release of the faulty lock.
1084                  */
1085                 mask = IGC_SWFW_PHY0_SM;
1086                 if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0) {
1087                         PMD_DRV_LOG(DEBUG, "SWFW phy%d lock released",
1088                                     hw->bus.func);
1089                 }
1090                 hw->mac.ops.release_swfw_sync(hw, mask);
1091
1092                 /*
1093                  * This one is more tricky since it is common to all ports; but
1094                  * swfw_sync retries last long enough (1s) to be almost sure
1095                  * that if lock can not be taken it is due to an improper lock
1096                  * of the semaphore.
1097                  */
1098                 mask = IGC_SWFW_EEP_SM;
1099                 if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0)
1100                         PMD_DRV_LOG(DEBUG, "SWFW common locks released");
1101
1102                 hw->mac.ops.release_swfw_sync(hw, mask);
1103         }
1104
1105         return IGC_SUCCESS;
1106 }
1107
1108 /*
1109  * free all rx/tx queues.
1110  */
1111 static void
1112 igc_dev_free_queues(struct rte_eth_dev *dev)
1113 {
1114         uint16_t i;
1115
1116         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1117                 eth_igc_rx_queue_release(dev->data->rx_queues[i]);
1118                 dev->data->rx_queues[i] = NULL;
1119         }
1120         dev->data->nb_rx_queues = 0;
1121
1122         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1123                 eth_igc_tx_queue_release(dev->data->tx_queues[i]);
1124                 dev->data->tx_queues[i] = NULL;
1125         }
1126         dev->data->nb_tx_queues = 0;
1127 }
1128
1129 static void
1130 eth_igc_close(struct rte_eth_dev *dev)
1131 {
1132         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1133         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
1134         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1135         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
1136         int retry = 0;
1137
1138         PMD_INIT_FUNC_TRACE();
1139
1140         if (!adapter->stopped)
1141                 eth_igc_stop(dev);
1142
1143         igc_intr_other_disable(dev);
1144         do {
1145                 int ret = rte_intr_callback_unregister(intr_handle,
1146                                 eth_igc_interrupt_handler, dev);
1147                 if (ret >= 0 || ret == -ENOENT || ret == -EINVAL)
1148                         break;
1149
1150                 PMD_DRV_LOG(ERR, "intr callback unregister failed: %d", ret);
1151                 DELAY(200 * 1000); /* delay 200ms */
1152         } while (retry++ < 5);
1153
1154         igc_phy_hw_reset(hw);
1155         igc_hw_control_release(hw);
1156         igc_dev_free_queues(dev);
1157
1158         /* Reset any pending lock */
1159         igc_reset_swfw_lock(hw);
1160 }
1161
1162 static void
1163 igc_identify_hardware(struct rte_eth_dev *dev, struct rte_pci_device *pci_dev)
1164 {
1165         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1166
1167         hw->vendor_id = pci_dev->id.vendor_id;
1168         hw->device_id = pci_dev->id.device_id;
1169         hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
1170         hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
1171 }
1172
1173 static int
1174 eth_igc_dev_init(struct rte_eth_dev *dev)
1175 {
1176         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1177         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1178         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1179         int i, error = 0;
1180
1181         PMD_INIT_FUNC_TRACE();
1182         dev->dev_ops = &eth_igc_ops;
1183
1184         /*
1185          * for secondary processes, we don't initialize any further as primary
1186          * has already done this work. Only check we don't need a different
1187          * RX function.
1188          */
1189         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1190                 return 0;
1191
1192         rte_eth_copy_pci_info(dev, pci_dev);
1193
1194         hw->back = pci_dev;
1195         hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
1196
1197         igc_identify_hardware(dev, pci_dev);
1198         if (igc_setup_init_funcs(hw, false) != IGC_SUCCESS) {
1199                 error = -EIO;
1200                 goto err_late;
1201         }
1202
1203         igc_get_bus_info(hw);
1204
1205         /* Reset any pending lock */
1206         if (igc_reset_swfw_lock(hw) != IGC_SUCCESS) {
1207                 error = -EIO;
1208                 goto err_late;
1209         }
1210
1211         /* Finish initialization */
1212         if (igc_setup_init_funcs(hw, true) != IGC_SUCCESS) {
1213                 error = -EIO;
1214                 goto err_late;
1215         }
1216
1217         hw->mac.autoneg = 1;
1218         hw->phy.autoneg_wait_to_complete = 0;
1219         hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
1220
1221         /* Copper options */
1222         if (hw->phy.media_type == igc_media_type_copper) {
1223                 hw->phy.mdix = 0; /* AUTO_ALL_MODES */
1224                 hw->phy.disable_polarity_correction = 0;
1225                 hw->phy.ms_type = igc_ms_hw_default;
1226         }
1227
1228         /*
1229          * Start from a known state, this is important in reading the nvm
1230          * and mac from that.
1231          */
1232         igc_reset_hw(hw);
1233
1234         /* Make sure we have a good EEPROM before we read from it */
1235         if (igc_validate_nvm_checksum(hw) < 0) {
1236                 /*
1237                  * Some PCI-E parts fail the first check due to
1238                  * the link being in sleep state, call it again,
1239                  * if it fails a second time its a real issue.
1240                  */
1241                 if (igc_validate_nvm_checksum(hw) < 0) {
1242                         PMD_INIT_LOG(ERR, "EEPROM checksum invalid");
1243                         error = -EIO;
1244                         goto err_late;
1245                 }
1246         }
1247
1248         /* Read the permanent MAC address out of the EEPROM */
1249         if (igc_read_mac_addr(hw) != 0) {
1250                 PMD_INIT_LOG(ERR, "EEPROM error while reading MAC address");
1251                 error = -EIO;
1252                 goto err_late;
1253         }
1254
1255         /* Allocate memory for storing MAC addresses */
1256         dev->data->mac_addrs = rte_zmalloc("igc",
1257                 RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count, 0);
1258         if (dev->data->mac_addrs == NULL) {
1259                 PMD_INIT_LOG(ERR, "Failed to allocate %d bytes for storing MAC",
1260                                 RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count);
1261                 error = -ENOMEM;
1262                 goto err_late;
1263         }
1264
1265         /* Copy the permanent MAC address */
1266         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac.addr,
1267                         &dev->data->mac_addrs[0]);
1268
1269         /* Now initialize the hardware */
1270         if (igc_hardware_init(hw) != 0) {
1271                 PMD_INIT_LOG(ERR, "Hardware initialization failed");
1272                 rte_free(dev->data->mac_addrs);
1273                 dev->data->mac_addrs = NULL;
1274                 error = -ENODEV;
1275                 goto err_late;
1276         }
1277
1278         /* Pass the information to the rte_eth_dev_close() that it should also
1279          * release the private port resources.
1280          */
1281         dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
1282
1283         hw->mac.get_link_status = 1;
1284         igc->stopped = 0;
1285
1286         /* Indicate SOL/IDER usage */
1287         if (igc_check_reset_block(hw) < 0)
1288                 PMD_INIT_LOG(ERR,
1289                         "PHY reset is blocked due to SOL/IDER session.");
1290
1291         PMD_INIT_LOG(DEBUG, "port_id %d vendorID=0x%x deviceID=0x%x",
1292                         dev->data->port_id, pci_dev->id.vendor_id,
1293                         pci_dev->id.device_id);
1294
1295         rte_intr_callback_register(&pci_dev->intr_handle,
1296                         eth_igc_interrupt_handler, (void *)dev);
1297
1298         /* enable uio/vfio intr/eventfd mapping */
1299         rte_intr_enable(&pci_dev->intr_handle);
1300
1301         /* enable support intr */
1302         igc_intr_other_enable(dev);
1303
1304         /* initiate queue status */
1305         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1306                 igc->txq_stats_map[i] = -1;
1307                 igc->rxq_stats_map[i] = -1;
1308         }
1309
1310         return 0;
1311
1312 err_late:
1313         igc_hw_control_release(hw);
1314         return error;
1315 }
1316
1317 static int
1318 eth_igc_dev_uninit(__rte_unused struct rte_eth_dev *eth_dev)
1319 {
1320         PMD_INIT_FUNC_TRACE();
1321
1322         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1323                 return 0;
1324
1325         eth_igc_close(eth_dev);
1326         return 0;
1327 }
1328
1329 static int
1330 eth_igc_reset(struct rte_eth_dev *dev)
1331 {
1332         int ret;
1333
1334         PMD_INIT_FUNC_TRACE();
1335
1336         ret = eth_igc_dev_uninit(dev);
1337         if (ret)
1338                 return ret;
1339
1340         return eth_igc_dev_init(dev);
1341 }
1342
1343 static int
1344 eth_igc_promiscuous_enable(struct rte_eth_dev *dev)
1345 {
1346         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1347         uint32_t rctl;
1348
1349         rctl = IGC_READ_REG(hw, IGC_RCTL);
1350         rctl |= (IGC_RCTL_UPE | IGC_RCTL_MPE);
1351         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1352         return 0;
1353 }
1354
1355 static int
1356 eth_igc_promiscuous_disable(struct rte_eth_dev *dev)
1357 {
1358         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1359         uint32_t rctl;
1360
1361         rctl = IGC_READ_REG(hw, IGC_RCTL);
1362         rctl &= (~IGC_RCTL_UPE);
1363         if (dev->data->all_multicast == 1)
1364                 rctl |= IGC_RCTL_MPE;
1365         else
1366                 rctl &= (~IGC_RCTL_MPE);
1367         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1368         return 0;
1369 }
1370
1371 static int
1372 eth_igc_allmulticast_enable(struct rte_eth_dev *dev)
1373 {
1374         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1375         uint32_t rctl;
1376
1377         rctl = IGC_READ_REG(hw, IGC_RCTL);
1378         rctl |= IGC_RCTL_MPE;
1379         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1380         return 0;
1381 }
1382
1383 static int
1384 eth_igc_allmulticast_disable(struct rte_eth_dev *dev)
1385 {
1386         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1387         uint32_t rctl;
1388
1389         if (dev->data->promiscuous == 1)
1390                 return 0;       /* must remain in all_multicast mode */
1391
1392         rctl = IGC_READ_REG(hw, IGC_RCTL);
1393         rctl &= (~IGC_RCTL_MPE);
1394         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1395         return 0;
1396 }
1397
1398 static int
1399 eth_igc_fw_version_get(struct rte_eth_dev *dev, char *fw_version,
1400                        size_t fw_size)
1401 {
1402         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1403         struct igc_fw_version fw;
1404         int ret;
1405
1406         igc_get_fw_version(hw, &fw);
1407
1408         /* if option rom is valid, display its version too */
1409         if (fw.or_valid) {
1410                 ret = snprintf(fw_version, fw_size,
1411                          "%d.%d, 0x%08x, %d.%d.%d",
1412                          fw.eep_major, fw.eep_minor, fw.etrack_id,
1413                          fw.or_major, fw.or_build, fw.or_patch);
1414         /* no option rom */
1415         } else {
1416                 if (fw.etrack_id != 0X0000) {
1417                         ret = snprintf(fw_version, fw_size,
1418                                  "%d.%d, 0x%08x",
1419                                  fw.eep_major, fw.eep_minor,
1420                                  fw.etrack_id);
1421                 } else {
1422                         ret = snprintf(fw_version, fw_size,
1423                                  "%d.%d.%d",
1424                                  fw.eep_major, fw.eep_minor,
1425                                  fw.eep_build);
1426                 }
1427         }
1428
1429         ret += 1; /* add the size of '\0' */
1430         if (fw_size < (u32)ret)
1431                 return ret;
1432         else
1433                 return 0;
1434 }
1435
1436 static int
1437 eth_igc_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1438 {
1439         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1440
1441         dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */
1442         dev_info->max_rx_pktlen = MAX_RX_JUMBO_FRAME_SIZE;
1443         dev_info->max_mac_addrs = hw->mac.rar_entry_count;
1444         dev_info->rx_offload_capa = IGC_RX_OFFLOAD_ALL;
1445         dev_info->tx_offload_capa = IGC_TX_OFFLOAD_ALL;
1446
1447         dev_info->max_rx_queues = IGC_QUEUE_PAIRS_NUM;
1448         dev_info->max_tx_queues = IGC_QUEUE_PAIRS_NUM;
1449         dev_info->max_vmdq_pools = 0;
1450
1451         dev_info->hash_key_size = IGC_HKEY_MAX_INDEX * sizeof(uint32_t);
1452         dev_info->reta_size = ETH_RSS_RETA_SIZE_128;
1453         dev_info->flow_type_rss_offloads = IGC_RSS_OFFLOAD_ALL;
1454
1455         dev_info->default_rxconf = (struct rte_eth_rxconf) {
1456                 .rx_thresh = {
1457                         .pthresh = IGC_DEFAULT_RX_PTHRESH,
1458                         .hthresh = IGC_DEFAULT_RX_HTHRESH,
1459                         .wthresh = IGC_DEFAULT_RX_WTHRESH,
1460                 },
1461                 .rx_free_thresh = IGC_DEFAULT_RX_FREE_THRESH,
1462                 .rx_drop_en = 0,
1463                 .offloads = 0,
1464         };
1465
1466         dev_info->default_txconf = (struct rte_eth_txconf) {
1467                 .tx_thresh = {
1468                         .pthresh = IGC_DEFAULT_TX_PTHRESH,
1469                         .hthresh = IGC_DEFAULT_TX_HTHRESH,
1470                         .wthresh = IGC_DEFAULT_TX_WTHRESH,
1471                 },
1472                 .offloads = 0,
1473         };
1474
1475         dev_info->rx_desc_lim = rx_desc_lim;
1476         dev_info->tx_desc_lim = tx_desc_lim;
1477
1478         dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
1479                         ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
1480                         ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G;
1481
1482         dev_info->max_mtu = dev_info->max_rx_pktlen - IGC_ETH_OVERHEAD;
1483         dev_info->min_mtu = RTE_ETHER_MIN_MTU;
1484         return 0;
1485 }
1486
1487 static int
1488 eth_igc_led_on(struct rte_eth_dev *dev)
1489 {
1490         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1491
1492         return igc_led_on(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1493 }
1494
1495 static int
1496 eth_igc_led_off(struct rte_eth_dev *dev)
1497 {
1498         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1499
1500         return igc_led_off(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1501 }
1502
1503 static const uint32_t *
1504 eth_igc_supported_ptypes_get(__rte_unused struct rte_eth_dev *dev)
1505 {
1506         static const uint32_t ptypes[] = {
1507                 /* refers to rx_desc_pkt_info_to_pkt_type() */
1508                 RTE_PTYPE_L2_ETHER,
1509                 RTE_PTYPE_L3_IPV4,
1510                 RTE_PTYPE_L3_IPV4_EXT,
1511                 RTE_PTYPE_L3_IPV6,
1512                 RTE_PTYPE_L3_IPV6_EXT,
1513                 RTE_PTYPE_L4_TCP,
1514                 RTE_PTYPE_L4_UDP,
1515                 RTE_PTYPE_L4_SCTP,
1516                 RTE_PTYPE_TUNNEL_IP,
1517                 RTE_PTYPE_INNER_L3_IPV6,
1518                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1519                 RTE_PTYPE_INNER_L4_TCP,
1520                 RTE_PTYPE_INNER_L4_UDP,
1521                 RTE_PTYPE_UNKNOWN
1522         };
1523
1524         return ptypes;
1525 }
1526
1527 static int
1528 eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1529 {
1530         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1531         uint32_t frame_size = mtu + IGC_ETH_OVERHEAD;
1532         uint32_t rctl;
1533
1534         /* if extend vlan has been enabled */
1535         if (IGC_READ_REG(hw, IGC_CTRL_EXT) & IGC_CTRL_EXT_EXT_VLAN)
1536                 frame_size += VLAN_TAG_SIZE;
1537
1538         /* check that mtu is within the allowed range */
1539         if (mtu < RTE_ETHER_MIN_MTU ||
1540                 frame_size > MAX_RX_JUMBO_FRAME_SIZE)
1541                 return -EINVAL;
1542
1543         /*
1544          * refuse mtu that requires the support of scattered packets when
1545          * this feature has not been enabled before.
1546          */
1547         if (!dev->data->scattered_rx &&
1548             frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM)
1549                 return -EINVAL;
1550
1551         rctl = IGC_READ_REG(hw, IGC_RCTL);
1552
1553         /* switch to jumbo mode if needed */
1554         if (mtu > RTE_ETHER_MTU) {
1555                 dev->data->dev_conf.rxmode.offloads |=
1556                         DEV_RX_OFFLOAD_JUMBO_FRAME;
1557                 rctl |= IGC_RCTL_LPE;
1558         } else {
1559                 dev->data->dev_conf.rxmode.offloads &=
1560                         ~DEV_RX_OFFLOAD_JUMBO_FRAME;
1561                 rctl &= ~IGC_RCTL_LPE;
1562         }
1563         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1564
1565         /* update max frame size */
1566         dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
1567
1568         IGC_WRITE_REG(hw, IGC_RLPML,
1569                         dev->data->dev_conf.rxmode.max_rx_pkt_len);
1570
1571         return 0;
1572 }
1573
1574 static int
1575 eth_igc_rar_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1576                 uint32_t index, uint32_t pool)
1577 {
1578         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1579
1580         igc_rar_set(hw, mac_addr->addr_bytes, index);
1581         RTE_SET_USED(pool);
1582         return 0;
1583 }
1584
1585 static void
1586 eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index)
1587 {
1588         uint8_t addr[RTE_ETHER_ADDR_LEN];
1589         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1590
1591         memset(addr, 0, sizeof(addr));
1592         igc_rar_set(hw, addr, index);
1593 }
1594
1595 static int
1596 eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
1597                         struct rte_ether_addr *addr)
1598 {
1599         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1600         igc_rar_set(hw, addr->addr_bytes, 0);
1601         return 0;
1602 }
1603
1604 static int
1605 eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
1606                          struct rte_ether_addr *mc_addr_set,
1607                          uint32_t nb_mc_addr)
1608 {
1609         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1610         igc_update_mc_addr_list(hw, (u8 *)mc_addr_set, nb_mc_addr);
1611         return 0;
1612 }
1613
1614 /*
1615  * Read hardware registers
1616  */
1617 static void
1618 igc_read_stats_registers(struct igc_hw *hw, struct igc_hw_stats *stats)
1619 {
1620         int pause_frames;
1621
1622         uint64_t old_gprc  = stats->gprc;
1623         uint64_t old_gptc  = stats->gptc;
1624         uint64_t old_tpr   = stats->tpr;
1625         uint64_t old_tpt   = stats->tpt;
1626         uint64_t old_rpthc = stats->rpthc;
1627         uint64_t old_hgptc = stats->hgptc;
1628
1629         stats->crcerrs += IGC_READ_REG(hw, IGC_CRCERRS);
1630         stats->algnerrc += IGC_READ_REG(hw, IGC_ALGNERRC);
1631         stats->rxerrc += IGC_READ_REG(hw, IGC_RXERRC);
1632         stats->mpc += IGC_READ_REG(hw, IGC_MPC);
1633         stats->scc += IGC_READ_REG(hw, IGC_SCC);
1634         stats->ecol += IGC_READ_REG(hw, IGC_ECOL);
1635
1636         stats->mcc += IGC_READ_REG(hw, IGC_MCC);
1637         stats->latecol += IGC_READ_REG(hw, IGC_LATECOL);
1638         stats->colc += IGC_READ_REG(hw, IGC_COLC);
1639
1640         stats->dc += IGC_READ_REG(hw, IGC_DC);
1641         stats->tncrs += IGC_READ_REG(hw, IGC_TNCRS);
1642         stats->htdpmc += IGC_READ_REG(hw, IGC_HTDPMC);
1643         stats->rlec += IGC_READ_REG(hw, IGC_RLEC);
1644         stats->xonrxc += IGC_READ_REG(hw, IGC_XONRXC);
1645         stats->xontxc += IGC_READ_REG(hw, IGC_XONTXC);
1646
1647         /*
1648          * For watchdog management we need to know if we have been
1649          * paused during the last interval, so capture that here.
1650          */
1651         pause_frames = IGC_READ_REG(hw, IGC_XOFFRXC);
1652         stats->xoffrxc += pause_frames;
1653         stats->xofftxc += IGC_READ_REG(hw, IGC_XOFFTXC);
1654         stats->fcruc += IGC_READ_REG(hw, IGC_FCRUC);
1655         stats->prc64 += IGC_READ_REG(hw, IGC_PRC64);
1656         stats->prc127 += IGC_READ_REG(hw, IGC_PRC127);
1657         stats->prc255 += IGC_READ_REG(hw, IGC_PRC255);
1658         stats->prc511 += IGC_READ_REG(hw, IGC_PRC511);
1659         stats->prc1023 += IGC_READ_REG(hw, IGC_PRC1023);
1660         stats->prc1522 += IGC_READ_REG(hw, IGC_PRC1522);
1661         stats->gprc += IGC_READ_REG(hw, IGC_GPRC);
1662         stats->bprc += IGC_READ_REG(hw, IGC_BPRC);
1663         stats->mprc += IGC_READ_REG(hw, IGC_MPRC);
1664         stats->gptc += IGC_READ_REG(hw, IGC_GPTC);
1665
1666         /* For the 64-bit byte counters the low dword must be read first. */
1667         /* Both registers clear on the read of the high dword */
1668
1669         /* Workaround CRC bytes included in size, take away 4 bytes/packet */
1670         stats->gorc += IGC_READ_REG(hw, IGC_GORCL);
1671         stats->gorc += ((uint64_t)IGC_READ_REG(hw, IGC_GORCH) << 32);
1672         stats->gorc -= (stats->gprc - old_gprc) * RTE_ETHER_CRC_LEN;
1673         stats->gotc += IGC_READ_REG(hw, IGC_GOTCL);
1674         stats->gotc += ((uint64_t)IGC_READ_REG(hw, IGC_GOTCH) << 32);
1675         stats->gotc -= (stats->gptc - old_gptc) * RTE_ETHER_CRC_LEN;
1676
1677         stats->rnbc += IGC_READ_REG(hw, IGC_RNBC);
1678         stats->ruc += IGC_READ_REG(hw, IGC_RUC);
1679         stats->rfc += IGC_READ_REG(hw, IGC_RFC);
1680         stats->roc += IGC_READ_REG(hw, IGC_ROC);
1681         stats->rjc += IGC_READ_REG(hw, IGC_RJC);
1682
1683         stats->mgprc += IGC_READ_REG(hw, IGC_MGTPRC);
1684         stats->mgpdc += IGC_READ_REG(hw, IGC_MGTPDC);
1685         stats->mgptc += IGC_READ_REG(hw, IGC_MGTPTC);
1686         stats->b2ospc += IGC_READ_REG(hw, IGC_B2OSPC);
1687         stats->b2ogprc += IGC_READ_REG(hw, IGC_B2OGPRC);
1688         stats->o2bgptc += IGC_READ_REG(hw, IGC_O2BGPTC);
1689         stats->o2bspc += IGC_READ_REG(hw, IGC_O2BSPC);
1690
1691         stats->tpr += IGC_READ_REG(hw, IGC_TPR);
1692         stats->tpt += IGC_READ_REG(hw, IGC_TPT);
1693
1694         stats->tor += IGC_READ_REG(hw, IGC_TORL);
1695         stats->tor += ((uint64_t)IGC_READ_REG(hw, IGC_TORH) << 32);
1696         stats->tor -= (stats->tpr - old_tpr) * RTE_ETHER_CRC_LEN;
1697         stats->tot += IGC_READ_REG(hw, IGC_TOTL);
1698         stats->tot += ((uint64_t)IGC_READ_REG(hw, IGC_TOTH) << 32);
1699         stats->tot -= (stats->tpt - old_tpt) * RTE_ETHER_CRC_LEN;
1700
1701         stats->ptc64 += IGC_READ_REG(hw, IGC_PTC64);
1702         stats->ptc127 += IGC_READ_REG(hw, IGC_PTC127);
1703         stats->ptc255 += IGC_READ_REG(hw, IGC_PTC255);
1704         stats->ptc511 += IGC_READ_REG(hw, IGC_PTC511);
1705         stats->ptc1023 += IGC_READ_REG(hw, IGC_PTC1023);
1706         stats->ptc1522 += IGC_READ_REG(hw, IGC_PTC1522);
1707         stats->mptc += IGC_READ_REG(hw, IGC_MPTC);
1708         stats->bptc += IGC_READ_REG(hw, IGC_BPTC);
1709         stats->tsctc += IGC_READ_REG(hw, IGC_TSCTC);
1710
1711         stats->iac += IGC_READ_REG(hw, IGC_IAC);
1712         stats->rpthc += IGC_READ_REG(hw, IGC_RPTHC);
1713         stats->hgptc += IGC_READ_REG(hw, IGC_HGPTC);
1714         stats->icrxdmtc += IGC_READ_REG(hw, IGC_ICRXDMTC);
1715
1716         /* Host to Card Statistics */
1717         stats->hgorc += IGC_READ_REG(hw, IGC_HGORCL);
1718         stats->hgorc += ((uint64_t)IGC_READ_REG(hw, IGC_HGORCH) << 32);
1719         stats->hgorc -= (stats->rpthc - old_rpthc) * RTE_ETHER_CRC_LEN;
1720         stats->hgotc += IGC_READ_REG(hw, IGC_HGOTCL);
1721         stats->hgotc += ((uint64_t)IGC_READ_REG(hw, IGC_HGOTCH) << 32);
1722         stats->hgotc -= (stats->hgptc - old_hgptc) * RTE_ETHER_CRC_LEN;
1723         stats->lenerrs += IGC_READ_REG(hw, IGC_LENERRS);
1724 }
1725
1726 /*
1727  * Write 0 to all queue status registers
1728  */
1729 static void
1730 igc_reset_queue_stats_register(struct igc_hw *hw)
1731 {
1732         int i;
1733
1734         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1735                 IGC_WRITE_REG(hw, IGC_PQGPRC(i), 0);
1736                 IGC_WRITE_REG(hw, IGC_PQGPTC(i), 0);
1737                 IGC_WRITE_REG(hw, IGC_PQGORC(i), 0);
1738                 IGC_WRITE_REG(hw, IGC_PQGOTC(i), 0);
1739                 IGC_WRITE_REG(hw, IGC_PQMPRC(i), 0);
1740                 IGC_WRITE_REG(hw, IGC_RQDPC(i), 0);
1741                 IGC_WRITE_REG(hw, IGC_TQDPC(i), 0);
1742         }
1743 }
1744
1745 /*
1746  * Read all hardware queue status registers
1747  */
1748 static void
1749 igc_read_queue_stats_register(struct rte_eth_dev *dev)
1750 {
1751         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1752         struct igc_hw_queue_stats *queue_stats =
1753                                 IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1754         int i;
1755
1756         /*
1757          * This register is not cleared on read. Furthermore, the register wraps
1758          * around back to 0x00000000 on the next increment when reaching a value
1759          * of 0xFFFFFFFF and then continues normal count operation.
1760          */
1761         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1762                 union {
1763                         u64 ddword;
1764                         u32 dword[2];
1765                 } value;
1766                 u32 tmp;
1767
1768                 /*
1769                  * Read the register first, if the value is smaller than that
1770                  * previous read, that mean the register has been overflowed,
1771                  * then we add the high 4 bytes by 1 and replace the low 4
1772                  * bytes by the new value.
1773                  */
1774                 tmp = IGC_READ_REG(hw, IGC_PQGPRC(i));
1775                 value.ddword = queue_stats->pqgprc[i];
1776                 if (value.dword[U32_0_IN_U64] > tmp)
1777                         value.dword[U32_1_IN_U64]++;
1778                 value.dword[U32_0_IN_U64] = tmp;
1779                 queue_stats->pqgprc[i] = value.ddword;
1780
1781                 tmp = IGC_READ_REG(hw, IGC_PQGPTC(i));
1782                 value.ddword = queue_stats->pqgptc[i];
1783                 if (value.dword[U32_0_IN_U64] > tmp)
1784                         value.dword[U32_1_IN_U64]++;
1785                 value.dword[U32_0_IN_U64] = tmp;
1786                 queue_stats->pqgptc[i] = value.ddword;
1787
1788                 tmp = IGC_READ_REG(hw, IGC_PQGORC(i));
1789                 value.ddword = queue_stats->pqgorc[i];
1790                 if (value.dword[U32_0_IN_U64] > tmp)
1791                         value.dword[U32_1_IN_U64]++;
1792                 value.dword[U32_0_IN_U64] = tmp;
1793                 queue_stats->pqgorc[i] = value.ddword;
1794
1795                 tmp = IGC_READ_REG(hw, IGC_PQGOTC(i));
1796                 value.ddword = queue_stats->pqgotc[i];
1797                 if (value.dword[U32_0_IN_U64] > tmp)
1798                         value.dword[U32_1_IN_U64]++;
1799                 value.dword[U32_0_IN_U64] = tmp;
1800                 queue_stats->pqgotc[i] = value.ddword;
1801
1802                 tmp = IGC_READ_REG(hw, IGC_PQMPRC(i));
1803                 value.ddword = queue_stats->pqmprc[i];
1804                 if (value.dword[U32_0_IN_U64] > tmp)
1805                         value.dword[U32_1_IN_U64]++;
1806                 value.dword[U32_0_IN_U64] = tmp;
1807                 queue_stats->pqmprc[i] = value.ddword;
1808
1809                 tmp = IGC_READ_REG(hw, IGC_RQDPC(i));
1810                 value.ddword = queue_stats->rqdpc[i];
1811                 if (value.dword[U32_0_IN_U64] > tmp)
1812                         value.dword[U32_1_IN_U64]++;
1813                 value.dword[U32_0_IN_U64] = tmp;
1814                 queue_stats->rqdpc[i] = value.ddword;
1815
1816                 tmp = IGC_READ_REG(hw, IGC_TQDPC(i));
1817                 value.ddword = queue_stats->tqdpc[i];
1818                 if (value.dword[U32_0_IN_U64] > tmp)
1819                         value.dword[U32_1_IN_U64]++;
1820                 value.dword[U32_0_IN_U64] = tmp;
1821                 queue_stats->tqdpc[i] = value.ddword;
1822         }
1823 }
1824
1825 static int
1826 eth_igc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
1827 {
1828         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1829         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1830         struct igc_hw_stats *stats = IGC_DEV_PRIVATE_STATS(dev);
1831         struct igc_hw_queue_stats *queue_stats =
1832                         IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1833         int i;
1834
1835         /*
1836          * Cancel status handler since it will read the queue status registers
1837          */
1838         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1839
1840         /* Read status register */
1841         igc_read_queue_stats_register(dev);
1842         igc_read_stats_registers(hw, stats);
1843
1844         if (rte_stats == NULL) {
1845                 /* Restart queue status handler */
1846                 rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1847                                 igc_update_queue_stats_handler, dev);
1848                 return -EINVAL;
1849         }
1850
1851         /* Rx Errors */
1852         rte_stats->imissed = stats->mpc;
1853         rte_stats->ierrors = stats->crcerrs +
1854                         stats->rlec + stats->ruc + stats->roc +
1855                         stats->rxerrc + stats->algnerrc;
1856
1857         /* Tx Errors */
1858         rte_stats->oerrors = stats->ecol + stats->latecol;
1859
1860         rte_stats->ipackets = stats->gprc;
1861         rte_stats->opackets = stats->gptc;
1862         rte_stats->ibytes   = stats->gorc;
1863         rte_stats->obytes   = stats->gotc;
1864
1865         /* Get per-queue statuses */
1866         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1867                 /* GET TX queue statuses */
1868                 int map_id = igc->txq_stats_map[i];
1869                 if (map_id >= 0) {
1870                         rte_stats->q_opackets[map_id] += queue_stats->pqgptc[i];
1871                         rte_stats->q_obytes[map_id] += queue_stats->pqgotc[i];
1872                 }
1873                 /* Get RX queue statuses */
1874                 map_id = igc->rxq_stats_map[i];
1875                 if (map_id >= 0) {
1876                         rte_stats->q_ipackets[map_id] += queue_stats->pqgprc[i];
1877                         rte_stats->q_ibytes[map_id] += queue_stats->pqgorc[i];
1878                         rte_stats->q_errors[map_id] += queue_stats->rqdpc[i];
1879                 }
1880         }
1881
1882         /* Restart queue status handler */
1883         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1884                         igc_update_queue_stats_handler, dev);
1885         return 0;
1886 }
1887
1888 static int
1889 eth_igc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1890                    unsigned int n)
1891 {
1892         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1893         struct igc_hw_stats *hw_stats =
1894                         IGC_DEV_PRIVATE_STATS(dev);
1895         unsigned int i;
1896
1897         igc_read_stats_registers(hw, hw_stats);
1898
1899         if (n < IGC_NB_XSTATS)
1900                 return IGC_NB_XSTATS;
1901
1902         /* If this is a reset xstats is NULL, and we have cleared the
1903          * registers by reading them.
1904          */
1905         if (!xstats)
1906                 return 0;
1907
1908         /* Extended stats */
1909         for (i = 0; i < IGC_NB_XSTATS; i++) {
1910                 xstats[i].id = i;
1911                 xstats[i].value = *(uint64_t *)(((char *)hw_stats) +
1912                         rte_igc_stats_strings[i].offset);
1913         }
1914
1915         return IGC_NB_XSTATS;
1916 }
1917
1918 static int
1919 eth_igc_xstats_reset(struct rte_eth_dev *dev)
1920 {
1921         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1922         struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
1923         struct igc_hw_queue_stats *queue_stats =
1924                         IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1925
1926         /* Cancel queue status handler for avoid conflict */
1927         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1928
1929         /* HW registers are cleared on read */
1930         igc_reset_queue_stats_register(hw);
1931         igc_read_stats_registers(hw, hw_stats);
1932
1933         /* Reset software totals */
1934         memset(hw_stats, 0, sizeof(*hw_stats));
1935         memset(queue_stats, 0, sizeof(*queue_stats));
1936
1937         /* Restart the queue status handler */
1938         rte_eal_alarm_set(IGC_ALARM_INTERVAL, igc_update_queue_stats_handler,
1939                         dev);
1940
1941         return 0;
1942 }
1943
1944 static int
1945 eth_igc_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
1946         struct rte_eth_xstat_name *xstats_names, unsigned int size)
1947 {
1948         unsigned int i;
1949
1950         if (xstats_names == NULL)
1951                 return IGC_NB_XSTATS;
1952
1953         if (size < IGC_NB_XSTATS) {
1954                 PMD_DRV_LOG(ERR, "not enough buffers!");
1955                 return IGC_NB_XSTATS;
1956         }
1957
1958         for (i = 0; i < IGC_NB_XSTATS; i++)
1959                 strlcpy(xstats_names[i].name, rte_igc_stats_strings[i].name,
1960                         sizeof(xstats_names[i].name));
1961
1962         return IGC_NB_XSTATS;
1963 }
1964
1965 static int
1966 eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
1967                 struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
1968                 unsigned int limit)
1969 {
1970         unsigned int i;
1971
1972         if (!ids)
1973                 return eth_igc_xstats_get_names(dev, xstats_names, limit);
1974
1975         for (i = 0; i < limit; i++) {
1976                 if (ids[i] >= IGC_NB_XSTATS) {
1977                         PMD_DRV_LOG(ERR, "id value isn't valid");
1978                         return -EINVAL;
1979                 }
1980                 strlcpy(xstats_names[i].name,
1981                         rte_igc_stats_strings[ids[i]].name,
1982                         sizeof(xstats_names[i].name));
1983         }
1984         return limit;
1985 }
1986
1987 static int
1988 eth_igc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
1989                 uint64_t *values, unsigned int n)
1990 {
1991         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1992         struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
1993         unsigned int i;
1994
1995         igc_read_stats_registers(hw, hw_stats);
1996
1997         if (!ids) {
1998                 if (n < IGC_NB_XSTATS)
1999                         return IGC_NB_XSTATS;
2000
2001                 /* If this is a reset xstats is NULL, and we have cleared the
2002                  * registers by reading them.
2003                  */
2004                 if (!values)
2005                         return 0;
2006
2007                 /* Extended stats */
2008                 for (i = 0; i < IGC_NB_XSTATS; i++)
2009                         values[i] = *(uint64_t *)(((char *)hw_stats) +
2010                                         rte_igc_stats_strings[i].offset);
2011
2012                 return IGC_NB_XSTATS;
2013
2014         } else {
2015                 for (i = 0; i < n; i++) {
2016                         if (ids[i] >= IGC_NB_XSTATS) {
2017                                 PMD_DRV_LOG(ERR, "id value isn't valid");
2018                                 return -EINVAL;
2019                         }
2020                         values[i] = *(uint64_t *)(((char *)hw_stats) +
2021                                         rte_igc_stats_strings[ids[i]].offset);
2022                 }
2023                 return n;
2024         }
2025 }
2026
2027 static int
2028 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
2029                 uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx)
2030 {
2031         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
2032
2033         /* check queue id is valid */
2034         if (queue_id >= IGC_QUEUE_PAIRS_NUM) {
2035                 PMD_DRV_LOG(ERR, "queue id(%u) error, max is %u",
2036                         queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2037                 return -EINVAL;
2038         }
2039
2040         /* store the mapping status id */
2041         if (is_rx)
2042                 igc->rxq_stats_map[queue_id] = stat_idx;
2043         else
2044                 igc->txq_stats_map[queue_id] = stat_idx;
2045
2046         return 0;
2047 }
2048
2049 static int
2050 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
2051 {
2052         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2053         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2054         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2055         uint32_t vec = IGC_MISC_VEC_ID;
2056
2057         if (rte_intr_allow_others(intr_handle))
2058                 vec = IGC_RX_VEC_START;
2059
2060         uint32_t mask = 1u << (queue_id + vec);
2061
2062         IGC_WRITE_REG(hw, IGC_EIMC, mask);
2063         IGC_WRITE_FLUSH(hw);
2064
2065         return 0;
2066 }
2067
2068 static int
2069 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
2070 {
2071         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2072         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2073         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2074         uint32_t vec = IGC_MISC_VEC_ID;
2075
2076         if (rte_intr_allow_others(intr_handle))
2077                 vec = IGC_RX_VEC_START;
2078
2079         uint32_t mask = 1u << (queue_id + vec);
2080
2081         IGC_WRITE_REG(hw, IGC_EIMS, mask);
2082         IGC_WRITE_FLUSH(hw);
2083
2084         rte_intr_enable(intr_handle);
2085
2086         return 0;
2087 }
2088
2089 static int
2090 eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
2091 {
2092         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2093         uint32_t ctrl;
2094         int tx_pause;
2095         int rx_pause;
2096
2097         fc_conf->pause_time = hw->fc.pause_time;
2098         fc_conf->high_water = hw->fc.high_water;
2099         fc_conf->low_water = hw->fc.low_water;
2100         fc_conf->send_xon = hw->fc.send_xon;
2101         fc_conf->autoneg = hw->mac.autoneg;
2102
2103         /*
2104          * Return rx_pause and tx_pause status according to actual setting of
2105          * the TFCE and RFCE bits in the CTRL register.
2106          */
2107         ctrl = IGC_READ_REG(hw, IGC_CTRL);
2108         if (ctrl & IGC_CTRL_TFCE)
2109                 tx_pause = 1;
2110         else
2111                 tx_pause = 0;
2112
2113         if (ctrl & IGC_CTRL_RFCE)
2114                 rx_pause = 1;
2115         else
2116                 rx_pause = 0;
2117
2118         if (rx_pause && tx_pause)
2119                 fc_conf->mode = RTE_FC_FULL;
2120         else if (rx_pause)
2121                 fc_conf->mode = RTE_FC_RX_PAUSE;
2122         else if (tx_pause)
2123                 fc_conf->mode = RTE_FC_TX_PAUSE;
2124         else
2125                 fc_conf->mode = RTE_FC_NONE;
2126
2127         return 0;
2128 }
2129
2130 static int
2131 eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
2132 {
2133         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2134         uint32_t rx_buf_size;
2135         uint32_t max_high_water;
2136         uint32_t rctl;
2137         int err;
2138
2139         if (fc_conf->autoneg != hw->mac.autoneg)
2140                 return -ENOTSUP;
2141
2142         rx_buf_size = igc_get_rx_buffer_size(hw);
2143         PMD_DRV_LOG(DEBUG, "Rx packet buffer size = 0x%x", rx_buf_size);
2144
2145         /* At least reserve one Ethernet frame for watermark */
2146         max_high_water = rx_buf_size - RTE_ETHER_MAX_LEN;
2147         if (fc_conf->high_water > max_high_water ||
2148                 fc_conf->high_water < fc_conf->low_water) {
2149                 PMD_DRV_LOG(ERR,
2150                         "Incorrect high(%u)/low(%u) water value, max is %u",
2151                         fc_conf->high_water, fc_conf->low_water,
2152                         max_high_water);
2153                 return -EINVAL;
2154         }
2155
2156         switch (fc_conf->mode) {
2157         case RTE_FC_NONE:
2158                 hw->fc.requested_mode = igc_fc_none;
2159                 break;
2160         case RTE_FC_RX_PAUSE:
2161                 hw->fc.requested_mode = igc_fc_rx_pause;
2162                 break;
2163         case RTE_FC_TX_PAUSE:
2164                 hw->fc.requested_mode = igc_fc_tx_pause;
2165                 break;
2166         case RTE_FC_FULL:
2167                 hw->fc.requested_mode = igc_fc_full;
2168                 break;
2169         default:
2170                 PMD_DRV_LOG(ERR, "unsupported fc mode: %u", fc_conf->mode);
2171                 return -EINVAL;
2172         }
2173
2174         hw->fc.pause_time     = fc_conf->pause_time;
2175         hw->fc.high_water     = fc_conf->high_water;
2176         hw->fc.low_water      = fc_conf->low_water;
2177         hw->fc.send_xon       = fc_conf->send_xon;
2178
2179         err = igc_setup_link_generic(hw);
2180         if (err == IGC_SUCCESS) {
2181                 /**
2182                  * check if we want to forward MAC frames - driver doesn't have
2183                  * native capability to do that, so we'll write the registers
2184                  * ourselves
2185                  **/
2186                 rctl = IGC_READ_REG(hw, IGC_RCTL);
2187
2188                 /* set or clear MFLCN.PMCF bit depending on configuration */
2189                 if (fc_conf->mac_ctrl_frame_fwd != 0)
2190                         rctl |= IGC_RCTL_PMCF;
2191                 else
2192                         rctl &= ~IGC_RCTL_PMCF;
2193
2194                 IGC_WRITE_REG(hw, IGC_RCTL, rctl);
2195                 IGC_WRITE_FLUSH(hw);
2196
2197                 return 0;
2198         }
2199
2200         PMD_DRV_LOG(ERR, "igc_setup_link_generic = 0x%x", err);
2201         return -EIO;
2202 }
2203
2204 static int
2205 eth_igc_rss_reta_update(struct rte_eth_dev *dev,
2206                         struct rte_eth_rss_reta_entry64 *reta_conf,
2207                         uint16_t reta_size)
2208 {
2209         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2210         uint16_t i;
2211
2212         if (reta_size != ETH_RSS_RETA_SIZE_128) {
2213                 PMD_DRV_LOG(ERR,
2214                         "The size of RSS redirection table configured(%d) doesn't match the number hardware can supported(%d)",
2215                         reta_size, ETH_RSS_RETA_SIZE_128);
2216                 return -EINVAL;
2217         }
2218
2219         /* set redirection table */
2220         for (i = 0; i < ETH_RSS_RETA_SIZE_128; i += IGC_RSS_RDT_REG_SIZE) {
2221                 union igc_rss_reta_reg reta, reg;
2222                 uint16_t idx, shift;
2223                 uint8_t j, mask;
2224
2225                 idx = i / RTE_RETA_GROUP_SIZE;
2226                 shift = i % RTE_RETA_GROUP_SIZE;
2227                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
2228                                 IGC_RSS_RDT_REG_SIZE_MASK);
2229
2230                 /* if no need to update the register */
2231                 if (!mask)
2232                         continue;
2233
2234                 /* check mask whether need to read the register value first */
2235                 if (mask == IGC_RSS_RDT_REG_SIZE_MASK)
2236                         reg.dword = 0;
2237                 else
2238                         reg.dword = IGC_READ_REG_LE_VALUE(hw,
2239                                         IGC_RETA(i / IGC_RSS_RDT_REG_SIZE));
2240
2241                 /* update the register */
2242                 for (j = 0; j < IGC_RSS_RDT_REG_SIZE; j++) {
2243                         if (mask & (1u << j))
2244                                 reta.bytes[j] =
2245                                         (uint8_t)reta_conf[idx].reta[shift + j];
2246                         else
2247                                 reta.bytes[j] = reg.bytes[j];
2248                 }
2249                 IGC_WRITE_REG_LE_VALUE(hw,
2250                         IGC_RETA(i / IGC_RSS_RDT_REG_SIZE), reta.dword);
2251         }
2252
2253         return 0;
2254 }
2255
2256 static int
2257 eth_igc_rss_reta_query(struct rte_eth_dev *dev,
2258                        struct rte_eth_rss_reta_entry64 *reta_conf,
2259                        uint16_t reta_size)
2260 {
2261         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2262         uint16_t i;
2263
2264         if (reta_size != ETH_RSS_RETA_SIZE_128) {
2265                 PMD_DRV_LOG(ERR,
2266                         "The size of RSS redirection table configured(%d) doesn't match the number hardware can supported(%d)",
2267                         reta_size, ETH_RSS_RETA_SIZE_128);
2268                 return -EINVAL;
2269         }
2270
2271         /* read redirection table */
2272         for (i = 0; i < ETH_RSS_RETA_SIZE_128; i += IGC_RSS_RDT_REG_SIZE) {
2273                 union igc_rss_reta_reg reta;
2274                 uint16_t idx, shift;
2275                 uint8_t j, mask;
2276
2277                 idx = i / RTE_RETA_GROUP_SIZE;
2278                 shift = i % RTE_RETA_GROUP_SIZE;
2279                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
2280                                 IGC_RSS_RDT_REG_SIZE_MASK);
2281
2282                 /* if no need to read register */
2283                 if (!mask)
2284                         continue;
2285
2286                 /* read register and get the queue index */
2287                 reta.dword = IGC_READ_REG_LE_VALUE(hw,
2288                                 IGC_RETA(i / IGC_RSS_RDT_REG_SIZE));
2289                 for (j = 0; j < IGC_RSS_RDT_REG_SIZE; j++) {
2290                         if (mask & (1u << j))
2291                                 reta_conf[idx].reta[shift + j] = reta.bytes[j];
2292                 }
2293         }
2294
2295         return 0;
2296 }
2297
2298 static int
2299 eth_igc_rss_hash_update(struct rte_eth_dev *dev,
2300                         struct rte_eth_rss_conf *rss_conf)
2301 {
2302         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2303         igc_hw_rss_hash_set(hw, rss_conf);
2304         return 0;
2305 }
2306
2307 static int
2308 eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev,
2309                         struct rte_eth_rss_conf *rss_conf)
2310 {
2311         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2312         uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
2313         uint32_t mrqc;
2314         uint64_t rss_hf;
2315
2316         if (hash_key != NULL) {
2317                 int i;
2318
2319                 /* if not enough space for store hash key */
2320                 if (rss_conf->rss_key_len != IGC_HKEY_SIZE) {
2321                         PMD_DRV_LOG(ERR,
2322                                 "RSS hash key size %u in parameter doesn't match the hardware hash key size %u",
2323                                 rss_conf->rss_key_len, IGC_HKEY_SIZE);
2324                         return -EINVAL;
2325                 }
2326
2327                 /* read RSS key from register */
2328                 for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
2329                         hash_key[i] = IGC_READ_REG_LE_VALUE(hw, IGC_RSSRK(i));
2330         }
2331
2332         /* get RSS functions configured in MRQC register */
2333         mrqc = IGC_READ_REG(hw, IGC_MRQC);
2334         if ((mrqc & IGC_MRQC_ENABLE_RSS_4Q) == 0)
2335                 return 0;
2336
2337         rss_hf = 0;
2338         if (mrqc & IGC_MRQC_RSS_FIELD_IPV4)
2339                 rss_hf |= ETH_RSS_IPV4;
2340         if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_TCP)
2341                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2342         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6)
2343                 rss_hf |= ETH_RSS_IPV6;
2344         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_EX)
2345                 rss_hf |= ETH_RSS_IPV6_EX;
2346         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP)
2347                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2348         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP_EX)
2349                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2350         if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_UDP)
2351                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2352         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP)
2353                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2354         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP_EX)
2355                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2356
2357         rss_conf->rss_hf |= rss_hf;
2358         return 0;
2359 }
2360
2361 static int
2362 eth_igc_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2363         struct rte_pci_device *pci_dev)
2364 {
2365         PMD_INIT_FUNC_TRACE();
2366         return rte_eth_dev_pci_generic_probe(pci_dev,
2367                 sizeof(struct igc_adapter), eth_igc_dev_init);
2368 }
2369
2370 static int
2371 eth_igc_pci_remove(struct rte_pci_device *pci_dev)
2372 {
2373         PMD_INIT_FUNC_TRACE();
2374         return rte_eth_dev_pci_generic_remove(pci_dev, eth_igc_dev_uninit);
2375 }
2376
2377 static struct rte_pci_driver rte_igc_pmd = {
2378         .id_table = pci_id_igc_map,
2379         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
2380         .probe = eth_igc_pci_probe,
2381         .remove = eth_igc_pci_remove,
2382 };
2383
2384 RTE_PMD_REGISTER_PCI(net_igc, rte_igc_pmd);
2385 RTE_PMD_REGISTER_PCI_TABLE(net_igc, pci_id_igc_map);
2386 RTE_PMD_REGISTER_KMOD_DEP(net_igc, "* igb_uio | uio_pci_generic | vfio-pci");