net/igc: enable Rx queue interrupts
[dpdk.git] / drivers / net / igc / igc_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019-2020 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7
8 #include <rte_string_fns.h>
9 #include <rte_pci.h>
10 #include <rte_bus_pci.h>
11 #include <rte_ethdev_driver.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_malloc.h>
14 #include <rte_alarm.h>
15
16 #include "igc_logs.h"
17 #include "igc_txrx.h"
18
19 #define IGC_INTEL_VENDOR_ID             0x8086
20
21 /*
22  * The overhead from MTU to max frame size.
23  * Considering VLAN so tag needs to be counted.
24  */
25 #define IGC_ETH_OVERHEAD                (RTE_ETHER_HDR_LEN + \
26                                         RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE)
27
28 #define IGC_FC_PAUSE_TIME               0x0680
29 #define IGC_LINK_UPDATE_CHECK_TIMEOUT   90  /* 9s */
30 #define IGC_LINK_UPDATE_CHECK_INTERVAL  100 /* ms */
31
32 #define IGC_MISC_VEC_ID                 RTE_INTR_VEC_ZERO_OFFSET
33 #define IGC_RX_VEC_START                RTE_INTR_VEC_RXTX_OFFSET
34 #define IGC_MSIX_OTHER_INTR_VEC         0   /* MSI-X other interrupt vector */
35 #define IGC_FLAG_NEED_LINK_UPDATE       (1u << 0)       /* need update link */
36
37 #define IGC_DEFAULT_RX_FREE_THRESH      32
38
39 #define IGC_DEFAULT_RX_PTHRESH          8
40 #define IGC_DEFAULT_RX_HTHRESH          8
41 #define IGC_DEFAULT_RX_WTHRESH          4
42
43 #define IGC_DEFAULT_TX_PTHRESH          8
44 #define IGC_DEFAULT_TX_HTHRESH          1
45 #define IGC_DEFAULT_TX_WTHRESH          16
46
47 /* MSI-X other interrupt vector */
48 #define IGC_MSIX_OTHER_INTR_VEC         0
49
50 /* External VLAN Enable bit mask */
51 #define IGC_CTRL_EXT_EXT_VLAN           (1u << 26)
52
53 /* Per Queue Good Packets Received Count */
54 #define IGC_PQGPRC(idx)         (0x10010 + 0x100 * (idx))
55 /* Per Queue Good Octets Received Count */
56 #define IGC_PQGORC(idx)         (0x10018 + 0x100 * (idx))
57 /* Per Queue Good Octets Transmitted Count */
58 #define IGC_PQGOTC(idx)         (0x10034 + 0x100 * (idx))
59 /* Per Queue Multicast Packets Received Count */
60 #define IGC_PQMPRC(idx)         (0x10038 + 0x100 * (idx))
61 /* Transmit Queue Drop Packet Count */
62 #define IGC_TQDPC(idx)          (0xe030 + 0x40 * (idx))
63
64 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
65 #define U32_0_IN_U64            0       /* lower bytes of u64 */
66 #define U32_1_IN_U64            1       /* higher bytes of u64 */
67 #else
68 #define U32_0_IN_U64            1
69 #define U32_1_IN_U64            0
70 #endif
71
72 #define IGC_ALARM_INTERVAL      8000000u
73 /* us, about 13.6s some per-queue registers will wrap around back to 0. */
74
75 static const struct rte_eth_desc_lim rx_desc_lim = {
76         .nb_max = IGC_MAX_RXD,
77         .nb_min = IGC_MIN_RXD,
78         .nb_align = IGC_RXD_ALIGN,
79 };
80
81 static const struct rte_eth_desc_lim tx_desc_lim = {
82         .nb_max = IGC_MAX_TXD,
83         .nb_min = IGC_MIN_TXD,
84         .nb_align = IGC_TXD_ALIGN,
85         .nb_seg_max = IGC_TX_MAX_SEG,
86         .nb_mtu_seg_max = IGC_TX_MAX_MTU_SEG,
87 };
88
89 static const struct rte_pci_id pci_id_igc_map[] = {
90         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_LM) },
91         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_V)  },
92         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_I)  },
93         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_K)  },
94         { .vendor_id = 0, /* sentinel */ },
95 };
96
97 /* store statistics names and its offset in stats structure */
98 struct rte_igc_xstats_name_off {
99         char name[RTE_ETH_XSTATS_NAME_SIZE];
100         unsigned int offset;
101 };
102
103 static const struct rte_igc_xstats_name_off rte_igc_stats_strings[] = {
104         {"rx_crc_errors", offsetof(struct igc_hw_stats, crcerrs)},
105         {"rx_align_errors", offsetof(struct igc_hw_stats, algnerrc)},
106         {"rx_errors", offsetof(struct igc_hw_stats, rxerrc)},
107         {"rx_missed_packets", offsetof(struct igc_hw_stats, mpc)},
108         {"tx_single_collision_packets", offsetof(struct igc_hw_stats, scc)},
109         {"tx_multiple_collision_packets", offsetof(struct igc_hw_stats, mcc)},
110         {"tx_excessive_collision_packets", offsetof(struct igc_hw_stats,
111                 ecol)},
112         {"tx_late_collisions", offsetof(struct igc_hw_stats, latecol)},
113         {"tx_total_collisions", offsetof(struct igc_hw_stats, colc)},
114         {"tx_deferred_packets", offsetof(struct igc_hw_stats, dc)},
115         {"tx_no_carrier_sense_packets", offsetof(struct igc_hw_stats, tncrs)},
116         {"tx_discarded_packets", offsetof(struct igc_hw_stats, htdpmc)},
117         {"rx_length_errors", offsetof(struct igc_hw_stats, rlec)},
118         {"rx_xon_packets", offsetof(struct igc_hw_stats, xonrxc)},
119         {"tx_xon_packets", offsetof(struct igc_hw_stats, xontxc)},
120         {"rx_xoff_packets", offsetof(struct igc_hw_stats, xoffrxc)},
121         {"tx_xoff_packets", offsetof(struct igc_hw_stats, xofftxc)},
122         {"rx_flow_control_unsupported_packets", offsetof(struct igc_hw_stats,
123                 fcruc)},
124         {"rx_size_64_packets", offsetof(struct igc_hw_stats, prc64)},
125         {"rx_size_65_to_127_packets", offsetof(struct igc_hw_stats, prc127)},
126         {"rx_size_128_to_255_packets", offsetof(struct igc_hw_stats, prc255)},
127         {"rx_size_256_to_511_packets", offsetof(struct igc_hw_stats, prc511)},
128         {"rx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
129                 prc1023)},
130         {"rx_size_1024_to_max_packets", offsetof(struct igc_hw_stats,
131                 prc1522)},
132         {"rx_broadcast_packets", offsetof(struct igc_hw_stats, bprc)},
133         {"rx_multicast_packets", offsetof(struct igc_hw_stats, mprc)},
134         {"rx_undersize_errors", offsetof(struct igc_hw_stats, ruc)},
135         {"rx_fragment_errors", offsetof(struct igc_hw_stats, rfc)},
136         {"rx_oversize_errors", offsetof(struct igc_hw_stats, roc)},
137         {"rx_jabber_errors", offsetof(struct igc_hw_stats, rjc)},
138         {"rx_no_buffers", offsetof(struct igc_hw_stats, rnbc)},
139         {"rx_management_packets", offsetof(struct igc_hw_stats, mgprc)},
140         {"rx_management_dropped", offsetof(struct igc_hw_stats, mgpdc)},
141         {"tx_management_packets", offsetof(struct igc_hw_stats, mgptc)},
142         {"rx_total_packets", offsetof(struct igc_hw_stats, tpr)},
143         {"tx_total_packets", offsetof(struct igc_hw_stats, tpt)},
144         {"rx_total_bytes", offsetof(struct igc_hw_stats, tor)},
145         {"tx_total_bytes", offsetof(struct igc_hw_stats, tot)},
146         {"tx_size_64_packets", offsetof(struct igc_hw_stats, ptc64)},
147         {"tx_size_65_to_127_packets", offsetof(struct igc_hw_stats, ptc127)},
148         {"tx_size_128_to_255_packets", offsetof(struct igc_hw_stats, ptc255)},
149         {"tx_size_256_to_511_packets", offsetof(struct igc_hw_stats, ptc511)},
150         {"tx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
151                 ptc1023)},
152         {"tx_size_1023_to_max_packets", offsetof(struct igc_hw_stats,
153                 ptc1522)},
154         {"tx_multicast_packets", offsetof(struct igc_hw_stats, mptc)},
155         {"tx_broadcast_packets", offsetof(struct igc_hw_stats, bptc)},
156         {"tx_tso_packets", offsetof(struct igc_hw_stats, tsctc)},
157         {"rx_sent_to_host_packets", offsetof(struct igc_hw_stats, rpthc)},
158         {"tx_sent_by_host_packets", offsetof(struct igc_hw_stats, hgptc)},
159         {"interrupt_assert_count", offsetof(struct igc_hw_stats, iac)},
160         {"rx_descriptor_lower_threshold",
161                 offsetof(struct igc_hw_stats, icrxdmtc)},
162 };
163
164 #define IGC_NB_XSTATS (sizeof(rte_igc_stats_strings) / \
165                 sizeof(rte_igc_stats_strings[0]))
166
167 static int eth_igc_configure(struct rte_eth_dev *dev);
168 static int eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete);
169 static void eth_igc_stop(struct rte_eth_dev *dev);
170 static int eth_igc_start(struct rte_eth_dev *dev);
171 static int eth_igc_set_link_up(struct rte_eth_dev *dev);
172 static int eth_igc_set_link_down(struct rte_eth_dev *dev);
173 static void eth_igc_close(struct rte_eth_dev *dev);
174 static int eth_igc_reset(struct rte_eth_dev *dev);
175 static int eth_igc_promiscuous_enable(struct rte_eth_dev *dev);
176 static int eth_igc_promiscuous_disable(struct rte_eth_dev *dev);
177 static int eth_igc_fw_version_get(struct rte_eth_dev *dev,
178                                 char *fw_version, size_t fw_size);
179 static int eth_igc_infos_get(struct rte_eth_dev *dev,
180                         struct rte_eth_dev_info *dev_info);
181 static int eth_igc_led_on(struct rte_eth_dev *dev);
182 static int eth_igc_led_off(struct rte_eth_dev *dev);
183 static const uint32_t *eth_igc_supported_ptypes_get(struct rte_eth_dev *dev);
184 static int eth_igc_rar_set(struct rte_eth_dev *dev,
185                 struct rte_ether_addr *mac_addr, uint32_t index, uint32_t pool);
186 static void eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index);
187 static int eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
188                         struct rte_ether_addr *addr);
189 static int eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
190                          struct rte_ether_addr *mc_addr_set,
191                          uint32_t nb_mc_addr);
192 static int eth_igc_allmulticast_enable(struct rte_eth_dev *dev);
193 static int eth_igc_allmulticast_disable(struct rte_eth_dev *dev);
194 static int eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
195 static int eth_igc_stats_get(struct rte_eth_dev *dev,
196                         struct rte_eth_stats *rte_stats);
197 static int eth_igc_xstats_get(struct rte_eth_dev *dev,
198                         struct rte_eth_xstat *xstats, unsigned int n);
199 static int eth_igc_xstats_get_by_id(struct rte_eth_dev *dev,
200                                 const uint64_t *ids,
201                                 uint64_t *values, unsigned int n);
202 static int eth_igc_xstats_get_names(struct rte_eth_dev *dev,
203                                 struct rte_eth_xstat_name *xstats_names,
204                                 unsigned int size);
205 static int eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
206                 struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
207                 unsigned int limit);
208 static int eth_igc_xstats_reset(struct rte_eth_dev *dev);
209 static int
210 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
211         uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx);
212 static int
213 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
214 static int
215 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
216
217 static const struct eth_dev_ops eth_igc_ops = {
218         .dev_configure          = eth_igc_configure,
219         .link_update            = eth_igc_link_update,
220         .dev_stop               = eth_igc_stop,
221         .dev_start              = eth_igc_start,
222         .dev_close              = eth_igc_close,
223         .dev_reset              = eth_igc_reset,
224         .dev_set_link_up        = eth_igc_set_link_up,
225         .dev_set_link_down      = eth_igc_set_link_down,
226         .promiscuous_enable     = eth_igc_promiscuous_enable,
227         .promiscuous_disable    = eth_igc_promiscuous_disable,
228         .allmulticast_enable    = eth_igc_allmulticast_enable,
229         .allmulticast_disable   = eth_igc_allmulticast_disable,
230         .fw_version_get         = eth_igc_fw_version_get,
231         .dev_infos_get          = eth_igc_infos_get,
232         .dev_led_on             = eth_igc_led_on,
233         .dev_led_off            = eth_igc_led_off,
234         .dev_supported_ptypes_get = eth_igc_supported_ptypes_get,
235         .mtu_set                = eth_igc_mtu_set,
236         .mac_addr_add           = eth_igc_rar_set,
237         .mac_addr_remove        = eth_igc_rar_clear,
238         .mac_addr_set           = eth_igc_default_mac_addr_set,
239         .set_mc_addr_list       = eth_igc_set_mc_addr_list,
240
241         .rx_queue_setup         = eth_igc_rx_queue_setup,
242         .rx_queue_release       = eth_igc_rx_queue_release,
243         .rx_queue_count         = eth_igc_rx_queue_count,
244         .rx_descriptor_done     = eth_igc_rx_descriptor_done,
245         .rx_descriptor_status   = eth_igc_rx_descriptor_status,
246         .tx_descriptor_status   = eth_igc_tx_descriptor_status,
247         .tx_queue_setup         = eth_igc_tx_queue_setup,
248         .tx_queue_release       = eth_igc_tx_queue_release,
249         .tx_done_cleanup        = eth_igc_tx_done_cleanup,
250         .rxq_info_get           = eth_igc_rxq_info_get,
251         .txq_info_get           = eth_igc_txq_info_get,
252         .stats_get              = eth_igc_stats_get,
253         .xstats_get             = eth_igc_xstats_get,
254         .xstats_get_by_id       = eth_igc_xstats_get_by_id,
255         .xstats_get_names_by_id = eth_igc_xstats_get_names_by_id,
256         .xstats_get_names       = eth_igc_xstats_get_names,
257         .stats_reset            = eth_igc_xstats_reset,
258         .xstats_reset           = eth_igc_xstats_reset,
259         .queue_stats_mapping_set = eth_igc_queue_stats_mapping_set,
260         .rx_queue_intr_enable   = eth_igc_rx_queue_intr_enable,
261         .rx_queue_intr_disable  = eth_igc_rx_queue_intr_disable,
262 };
263
264 /*
265  * multiple queue mode checking
266  */
267 static int
268 igc_check_mq_mode(struct rte_eth_dev *dev)
269 {
270         enum rte_eth_rx_mq_mode rx_mq_mode = dev->data->dev_conf.rxmode.mq_mode;
271         enum rte_eth_tx_mq_mode tx_mq_mode = dev->data->dev_conf.txmode.mq_mode;
272
273         if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
274                 PMD_INIT_LOG(ERR, "SRIOV is not supported.");
275                 return -EINVAL;
276         }
277
278         if (rx_mq_mode != ETH_MQ_RX_NONE &&
279                 rx_mq_mode != ETH_MQ_RX_RSS) {
280                 /* RSS together with VMDq not supported*/
281                 PMD_INIT_LOG(ERR, "RX mode %d is not supported.",
282                                 rx_mq_mode);
283                 return -EINVAL;
284         }
285
286         /* To no break software that set invalid mode, only display
287          * warning if invalid mode is used.
288          */
289         if (tx_mq_mode != ETH_MQ_TX_NONE)
290                 PMD_INIT_LOG(WARNING,
291                         "TX mode %d is not supported. Due to meaningless in this driver, just ignore",
292                         tx_mq_mode);
293
294         return 0;
295 }
296
297 static int
298 eth_igc_configure(struct rte_eth_dev *dev)
299 {
300         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
301         int ret;
302
303         PMD_INIT_FUNC_TRACE();
304
305         ret  = igc_check_mq_mode(dev);
306         if (ret != 0)
307                 return ret;
308
309         intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
310         return 0;
311 }
312
313 static int
314 eth_igc_set_link_up(struct rte_eth_dev *dev)
315 {
316         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
317
318         if (hw->phy.media_type == igc_media_type_copper)
319                 igc_power_up_phy(hw);
320         else
321                 igc_power_up_fiber_serdes_link(hw);
322         return 0;
323 }
324
325 static int
326 eth_igc_set_link_down(struct rte_eth_dev *dev)
327 {
328         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
329
330         if (hw->phy.media_type == igc_media_type_copper)
331                 igc_power_down_phy(hw);
332         else
333                 igc_shutdown_fiber_serdes_link(hw);
334         return 0;
335 }
336
337 /*
338  * disable other interrupt
339  */
340 static void
341 igc_intr_other_disable(struct rte_eth_dev *dev)
342 {
343         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
344         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
345         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
346
347         if (rte_intr_allow_others(intr_handle) &&
348                 dev->data->dev_conf.intr_conf.lsc) {
349                 IGC_WRITE_REG(hw, IGC_EIMC, 1u << IGC_MSIX_OTHER_INTR_VEC);
350         }
351
352         IGC_WRITE_REG(hw, IGC_IMC, ~0);
353         IGC_WRITE_FLUSH(hw);
354 }
355
356 /*
357  * enable other interrupt
358  */
359 static inline void
360 igc_intr_other_enable(struct rte_eth_dev *dev)
361 {
362         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
363         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
364         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
365         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
366
367         if (rte_intr_allow_others(intr_handle) &&
368                 dev->data->dev_conf.intr_conf.lsc) {
369                 IGC_WRITE_REG(hw, IGC_EIMS, 1u << IGC_MSIX_OTHER_INTR_VEC);
370         }
371
372         IGC_WRITE_REG(hw, IGC_IMS, intr->mask);
373         IGC_WRITE_FLUSH(hw);
374 }
375
376 /*
377  * It reads ICR and gets interrupt causes, check it and set a bit flag
378  * to update link status.
379  */
380 static void
381 eth_igc_interrupt_get_status(struct rte_eth_dev *dev)
382 {
383         uint32_t icr;
384         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
385         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
386
387         /* read-on-clear nic registers here */
388         icr = IGC_READ_REG(hw, IGC_ICR);
389
390         intr->flags = 0;
391         if (icr & IGC_ICR_LSC)
392                 intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
393 }
394
395 /* return 0 means link status changed, -1 means not changed */
396 static int
397 eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete)
398 {
399         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
400         struct rte_eth_link link;
401         int link_check, count;
402
403         link_check = 0;
404         hw->mac.get_link_status = 1;
405
406         /* possible wait-to-complete in up to 9 seconds */
407         for (count = 0; count < IGC_LINK_UPDATE_CHECK_TIMEOUT; count++) {
408                 /* Read the real link status */
409                 switch (hw->phy.media_type) {
410                 case igc_media_type_copper:
411                         /* Do the work to read phy */
412                         igc_check_for_link(hw);
413                         link_check = !hw->mac.get_link_status;
414                         break;
415
416                 case igc_media_type_fiber:
417                         igc_check_for_link(hw);
418                         link_check = (IGC_READ_REG(hw, IGC_STATUS) &
419                                       IGC_STATUS_LU);
420                         break;
421
422                 case igc_media_type_internal_serdes:
423                         igc_check_for_link(hw);
424                         link_check = hw->mac.serdes_has_link;
425                         break;
426
427                 default:
428                         break;
429                 }
430                 if (link_check || wait_to_complete == 0)
431                         break;
432                 rte_delay_ms(IGC_LINK_UPDATE_CHECK_INTERVAL);
433         }
434         memset(&link, 0, sizeof(link));
435
436         /* Now we check if a transition has happened */
437         if (link_check) {
438                 uint16_t duplex, speed;
439                 hw->mac.ops.get_link_up_info(hw, &speed, &duplex);
440                 link.link_duplex = (duplex == FULL_DUPLEX) ?
441                                 ETH_LINK_FULL_DUPLEX :
442                                 ETH_LINK_HALF_DUPLEX;
443                 link.link_speed = speed;
444                 link.link_status = ETH_LINK_UP;
445                 link.link_autoneg = !(dev->data->dev_conf.link_speeds &
446                                 ETH_LINK_SPEED_FIXED);
447
448                 if (speed == SPEED_2500) {
449                         uint32_t tipg = IGC_READ_REG(hw, IGC_TIPG);
450                         if ((tipg & IGC_TIPG_IPGT_MASK) != 0x0b) {
451                                 tipg &= ~IGC_TIPG_IPGT_MASK;
452                                 tipg |= 0x0b;
453                                 IGC_WRITE_REG(hw, IGC_TIPG, tipg);
454                         }
455                 }
456         } else {
457                 link.link_speed = 0;
458                 link.link_duplex = ETH_LINK_HALF_DUPLEX;
459                 link.link_status = ETH_LINK_DOWN;
460                 link.link_autoneg = ETH_LINK_FIXED;
461         }
462
463         return rte_eth_linkstatus_set(dev, &link);
464 }
465
466 /*
467  * It executes link_update after knowing an interrupt is present.
468  */
469 static void
470 eth_igc_interrupt_action(struct rte_eth_dev *dev)
471 {
472         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
473         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
474         struct rte_eth_link link;
475         int ret;
476
477         if (intr->flags & IGC_FLAG_NEED_LINK_UPDATE) {
478                 intr->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
479
480                 /* set get_link_status to check register later */
481                 ret = eth_igc_link_update(dev, 0);
482
483                 /* check if link has changed */
484                 if (ret < 0)
485                         return;
486
487                 rte_eth_linkstatus_get(dev, &link);
488                 if (link.link_status)
489                         PMD_DRV_LOG(INFO,
490                                 " Port %d: Link Up - speed %u Mbps - %s",
491                                 dev->data->port_id,
492                                 (unsigned int)link.link_speed,
493                                 link.link_duplex == ETH_LINK_FULL_DUPLEX ?
494                                 "full-duplex" : "half-duplex");
495                 else
496                         PMD_DRV_LOG(INFO, " Port %d: Link Down",
497                                 dev->data->port_id);
498
499                 PMD_DRV_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT,
500                                 pci_dev->addr.domain,
501                                 pci_dev->addr.bus,
502                                 pci_dev->addr.devid,
503                                 pci_dev->addr.function);
504                 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
505                                 NULL);
506         }
507 }
508
509 /*
510  * Interrupt handler which shall be registered at first.
511  *
512  * @handle
513  *  Pointer to interrupt handle.
514  * @param
515  *  The address of parameter (struct rte_eth_dev *) registered before.
516  */
517 static void
518 eth_igc_interrupt_handler(void *param)
519 {
520         struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
521
522         eth_igc_interrupt_get_status(dev);
523         eth_igc_interrupt_action(dev);
524 }
525
526 static void igc_read_queue_stats_register(struct rte_eth_dev *dev);
527
528 /*
529  * Update the queue status every IGC_ALARM_INTERVAL time.
530  * @param
531  *  The address of parameter (struct rte_eth_dev *) registered before.
532  */
533 static void
534 igc_update_queue_stats_handler(void *param)
535 {
536         struct rte_eth_dev *dev = param;
537         igc_read_queue_stats_register(dev);
538         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
539                         igc_update_queue_stats_handler, dev);
540 }
541
542 /*
543  * rx,tx enable/disable
544  */
545 static void
546 eth_igc_rxtx_control(struct rte_eth_dev *dev, bool enable)
547 {
548         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
549         uint32_t tctl, rctl;
550
551         tctl = IGC_READ_REG(hw, IGC_TCTL);
552         rctl = IGC_READ_REG(hw, IGC_RCTL);
553
554         if (enable) {
555                 /* enable Tx/Rx */
556                 tctl |= IGC_TCTL_EN;
557                 rctl |= IGC_RCTL_EN;
558         } else {
559                 /* disable Tx/Rx */
560                 tctl &= ~IGC_TCTL_EN;
561                 rctl &= ~IGC_RCTL_EN;
562         }
563         IGC_WRITE_REG(hw, IGC_TCTL, tctl);
564         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
565         IGC_WRITE_FLUSH(hw);
566 }
567
568 /*
569  *  This routine disables all traffic on the adapter by issuing a
570  *  global reset on the MAC.
571  */
572 static void
573 eth_igc_stop(struct rte_eth_dev *dev)
574 {
575         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
576         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
577         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
578         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
579         struct rte_eth_link link;
580
581         adapter->stopped = 1;
582
583         /* disable receive and transmit */
584         eth_igc_rxtx_control(dev, false);
585
586         /* disable all MSI-X interrupts */
587         IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
588         IGC_WRITE_FLUSH(hw);
589
590         /* clear all MSI-X interrupts */
591         IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
592
593         igc_intr_other_disable(dev);
594
595         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
596
597         /* disable intr eventfd mapping */
598         rte_intr_disable(intr_handle);
599
600         igc_reset_hw(hw);
601
602         /* disable all wake up */
603         IGC_WRITE_REG(hw, IGC_WUC, 0);
604
605         /* Set bit for Go Link disconnect */
606         igc_read_reg_check_set_bits(hw, IGC_82580_PHY_POWER_MGMT,
607                         IGC_82580_PM_GO_LINKD);
608
609         /* Power down the phy. Needed to make the link go Down */
610         eth_igc_set_link_down(dev);
611
612         igc_dev_clear_queues(dev);
613
614         /* clear the recorded link status */
615         memset(&link, 0, sizeof(link));
616         rte_eth_linkstatus_set(dev, &link);
617
618         if (!rte_intr_allow_others(intr_handle))
619                 /* resume to the default handler */
620                 rte_intr_callback_register(intr_handle,
621                                            eth_igc_interrupt_handler,
622                                            (void *)dev);
623
624         /* Clean datapath event and queue/vec mapping */
625         rte_intr_efd_disable(intr_handle);
626         if (intr_handle->intr_vec != NULL) {
627                 rte_free(intr_handle->intr_vec);
628                 intr_handle->intr_vec = NULL;
629         }
630 }
631
632 /*
633  * write interrupt vector allocation register
634  * @hw
635  *  board private structure
636  * @queue_index
637  *  queue index, valid 0,1,2,3
638  * @tx
639  *  tx:1, rx:0
640  * @msix_vector
641  *  msix-vector, valid 0,1,2,3,4
642  */
643 static void
644 igc_write_ivar(struct igc_hw *hw, uint8_t queue_index,
645                 bool tx, uint8_t msix_vector)
646 {
647         uint8_t offset = 0;
648         uint8_t reg_index = queue_index >> 1;
649         uint32_t val;
650
651         /*
652          * IVAR(0)
653          * bit31...24   bit23...16      bit15...8       bit7...0
654          * TX1          RX1             TX0             RX0
655          *
656          * IVAR(1)
657          * bit31...24   bit23...16      bit15...8       bit7...0
658          * TX3          RX3             TX2             RX2
659          */
660
661         if (tx)
662                 offset = 8;
663
664         if (queue_index & 1)
665                 offset += 16;
666
667         val = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, reg_index);
668
669         /* clear bits */
670         val &= ~((uint32_t)0xFF << offset);
671
672         /* write vector and valid bit */
673         val |= (uint32_t)(msix_vector | IGC_IVAR_VALID) << offset;
674
675         IGC_WRITE_REG_ARRAY(hw, IGC_IVAR0, reg_index, val);
676 }
677
678 /* Sets up the hardware to generate MSI-X interrupts properly
679  * @hw
680  *  board private structure
681  */
682 static void
683 igc_configure_msix_intr(struct rte_eth_dev *dev)
684 {
685         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
686         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
687         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
688
689         uint32_t intr_mask;
690         uint32_t vec = IGC_MISC_VEC_ID;
691         uint32_t base = IGC_MISC_VEC_ID;
692         uint32_t misc_shift = 0;
693         int i;
694
695         /* won't configure msix register if no mapping is done
696          * between intr vector and event fd
697          */
698         if (!rte_intr_dp_is_en(intr_handle))
699                 return;
700
701         if (rte_intr_allow_others(intr_handle)) {
702                 base = IGC_RX_VEC_START;
703                 vec = base;
704                 misc_shift = 1;
705         }
706
707         /* turn on MSI-X capability first */
708         IGC_WRITE_REG(hw, IGC_GPIE, IGC_GPIE_MSIX_MODE |
709                                 IGC_GPIE_PBA | IGC_GPIE_EIAME |
710                                 IGC_GPIE_NSICR);
711         intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) <<
712                 misc_shift;
713
714         if (dev->data->dev_conf.intr_conf.lsc)
715                 intr_mask |= (1u << IGC_MSIX_OTHER_INTR_VEC);
716
717         /* enable msix auto-clear */
718         igc_read_reg_check_set_bits(hw, IGC_EIAC, intr_mask);
719
720         /* set other cause interrupt vector */
721         igc_read_reg_check_set_bits(hw, IGC_IVAR_MISC,
722                 (uint32_t)(IGC_MSIX_OTHER_INTR_VEC | IGC_IVAR_VALID) << 8);
723
724         /* enable auto-mask */
725         igc_read_reg_check_set_bits(hw, IGC_EIAM, intr_mask);
726
727         for (i = 0; i < dev->data->nb_rx_queues; i++) {
728                 igc_write_ivar(hw, i, 0, vec);
729                 intr_handle->intr_vec[i] = vec;
730                 if (vec < base + intr_handle->nb_efd - 1)
731                         vec++;
732         }
733
734         IGC_WRITE_FLUSH(hw);
735 }
736
737 /**
738  * It enables the interrupt mask and then enable the interrupt.
739  *
740  * @dev
741  *  Pointer to struct rte_eth_dev.
742  * @on
743  *  Enable or Disable
744  */
745 static void
746 igc_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on)
747 {
748         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
749
750         if (on)
751                 intr->mask |= IGC_ICR_LSC;
752         else
753                 intr->mask &= ~IGC_ICR_LSC;
754 }
755
756 /*
757  * It enables the interrupt.
758  * It will be called once only during nic initialized.
759  */
760 static void
761 igc_rxq_interrupt_setup(struct rte_eth_dev *dev)
762 {
763         uint32_t mask;
764         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
765         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
766         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
767         int misc_shift = rte_intr_allow_others(intr_handle) ? 1 : 0;
768
769         /* won't configure msix register if no mapping is done
770          * between intr vector and event fd
771          */
772         if (!rte_intr_dp_is_en(intr_handle))
773                 return;
774
775         mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) << misc_shift;
776         IGC_WRITE_REG(hw, IGC_EIMS, mask);
777 }
778
779 /*
780  *  Get hardware rx-buffer size.
781  */
782 static inline int
783 igc_get_rx_buffer_size(struct igc_hw *hw)
784 {
785         return (IGC_READ_REG(hw, IGC_RXPBS) & 0x3f) << 10;
786 }
787
788 /*
789  * igc_hw_control_acquire sets CTRL_EXT:DRV_LOAD bit.
790  * For ASF and Pass Through versions of f/w this means
791  * that the driver is loaded.
792  */
793 static void
794 igc_hw_control_acquire(struct igc_hw *hw)
795 {
796         uint32_t ctrl_ext;
797
798         /* Let firmware know the driver has taken over */
799         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
800         IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
801 }
802
803 /*
804  * igc_hw_control_release resets CTRL_EXT:DRV_LOAD bit.
805  * For ASF and Pass Through versions of f/w this means that the
806  * driver is no longer loaded.
807  */
808 static void
809 igc_hw_control_release(struct igc_hw *hw)
810 {
811         uint32_t ctrl_ext;
812
813         /* Let firmware taken over control of h/w */
814         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
815         IGC_WRITE_REG(hw, IGC_CTRL_EXT,
816                         ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
817 }
818
819 static int
820 igc_hardware_init(struct igc_hw *hw)
821 {
822         uint32_t rx_buf_size;
823         int diag;
824
825         /* Let the firmware know the OS is in control */
826         igc_hw_control_acquire(hw);
827
828         /* Issue a global reset */
829         igc_reset_hw(hw);
830
831         /* disable all wake up */
832         IGC_WRITE_REG(hw, IGC_WUC, 0);
833
834         /*
835          * Hardware flow control
836          * - High water mark should allow for at least two standard size (1518)
837          *   frames to be received after sending an XOFF.
838          * - Low water mark works best when it is very near the high water mark.
839          *   This allows the receiver to restart by sending XON when it has
840          *   drained a bit. Here we use an arbitrary value of 1500 which will
841          *   restart after one full frame is pulled from the buffer. There
842          *   could be several smaller frames in the buffer and if so they will
843          *   not trigger the XON until their total number reduces the buffer
844          *   by 1500.
845          */
846         rx_buf_size = igc_get_rx_buffer_size(hw);
847         hw->fc.high_water = rx_buf_size - (RTE_ETHER_MAX_LEN * 2);
848         hw->fc.low_water = hw->fc.high_water - 1500;
849         hw->fc.pause_time = IGC_FC_PAUSE_TIME;
850         hw->fc.send_xon = 1;
851         hw->fc.requested_mode = igc_fc_full;
852
853         diag = igc_init_hw(hw);
854         if (diag < 0)
855                 return diag;
856
857         igc_get_phy_info(hw);
858         igc_check_for_link(hw);
859
860         return 0;
861 }
862
863 static int
864 eth_igc_start(struct rte_eth_dev *dev)
865 {
866         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
867         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
868         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
869         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
870         uint32_t *speeds;
871         int ret;
872
873         PMD_INIT_FUNC_TRACE();
874
875         /* disable all MSI-X interrupts */
876         IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
877         IGC_WRITE_FLUSH(hw);
878
879         /* clear all MSI-X interrupts */
880         IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
881
882         /* disable uio/vfio intr/eventfd mapping */
883         if (!adapter->stopped)
884                 rte_intr_disable(intr_handle);
885
886         /* Power up the phy. Needed to make the link go Up */
887         eth_igc_set_link_up(dev);
888
889         /* Put the address into the Receive Address Array */
890         igc_rar_set(hw, hw->mac.addr, 0);
891
892         /* Initialize the hardware */
893         if (igc_hardware_init(hw)) {
894                 PMD_DRV_LOG(ERR, "Unable to initialize the hardware");
895                 return -EIO;
896         }
897         adapter->stopped = 0;
898
899         /* check and configure queue intr-vector mapping */
900         if (rte_intr_cap_multiple(intr_handle) &&
901                 dev->data->dev_conf.intr_conf.rxq) {
902                 uint32_t intr_vector = dev->data->nb_rx_queues;
903                 if (rte_intr_efd_enable(intr_handle, intr_vector))
904                         return -1;
905         }
906
907         if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
908                 intr_handle->intr_vec = rte_zmalloc("intr_vec",
909                         dev->data->nb_rx_queues * sizeof(int), 0);
910                 if (intr_handle->intr_vec == NULL) {
911                         PMD_DRV_LOG(ERR,
912                                 "Failed to allocate %d rx_queues intr_vec",
913                                 dev->data->nb_rx_queues);
914                         return -ENOMEM;
915                 }
916         }
917
918         /* configure msix for rx interrupt */
919         igc_configure_msix_intr(dev);
920
921         igc_tx_init(dev);
922
923         /* This can fail when allocating mbufs for descriptor rings */
924         ret = igc_rx_init(dev);
925         if (ret) {
926                 PMD_DRV_LOG(ERR, "Unable to initialize RX hardware");
927                 igc_dev_clear_queues(dev);
928                 return ret;
929         }
930
931         igc_clear_hw_cntrs_base_generic(hw);
932
933         /* Setup link speed and duplex */
934         speeds = &dev->data->dev_conf.link_speeds;
935         if (*speeds == ETH_LINK_SPEED_AUTONEG) {
936                 hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
937                 hw->mac.autoneg = 1;
938         } else {
939                 int num_speeds = 0;
940                 bool autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0;
941
942                 /* Reset */
943                 hw->phy.autoneg_advertised = 0;
944
945                 if (*speeds & ~(ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
946                                 ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
947                                 ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
948                                 ETH_LINK_SPEED_FIXED)) {
949                         num_speeds = -1;
950                         goto error_invalid_config;
951                 }
952                 if (*speeds & ETH_LINK_SPEED_10M_HD) {
953                         hw->phy.autoneg_advertised |= ADVERTISE_10_HALF;
954                         num_speeds++;
955                 }
956                 if (*speeds & ETH_LINK_SPEED_10M) {
957                         hw->phy.autoneg_advertised |= ADVERTISE_10_FULL;
958                         num_speeds++;
959                 }
960                 if (*speeds & ETH_LINK_SPEED_100M_HD) {
961                         hw->phy.autoneg_advertised |= ADVERTISE_100_HALF;
962                         num_speeds++;
963                 }
964                 if (*speeds & ETH_LINK_SPEED_100M) {
965                         hw->phy.autoneg_advertised |= ADVERTISE_100_FULL;
966                         num_speeds++;
967                 }
968                 if (*speeds & ETH_LINK_SPEED_1G) {
969                         hw->phy.autoneg_advertised |= ADVERTISE_1000_FULL;
970                         num_speeds++;
971                 }
972                 if (*speeds & ETH_LINK_SPEED_2_5G) {
973                         hw->phy.autoneg_advertised |= ADVERTISE_2500_FULL;
974                         num_speeds++;
975                 }
976                 if (num_speeds == 0 || (!autoneg && num_speeds > 1))
977                         goto error_invalid_config;
978
979                 /* Set/reset the mac.autoneg based on the link speed,
980                  * fixed or not
981                  */
982                 if (!autoneg) {
983                         hw->mac.autoneg = 0;
984                         hw->mac.forced_speed_duplex =
985                                         hw->phy.autoneg_advertised;
986                 } else {
987                         hw->mac.autoneg = 1;
988                 }
989         }
990
991         igc_setup_link(hw);
992
993         if (rte_intr_allow_others(intr_handle)) {
994                 /* check if lsc interrupt is enabled */
995                 if (dev->data->dev_conf.intr_conf.lsc)
996                         igc_lsc_interrupt_setup(dev, 1);
997                 else
998                         igc_lsc_interrupt_setup(dev, 0);
999         } else {
1000                 rte_intr_callback_unregister(intr_handle,
1001                                              eth_igc_interrupt_handler,
1002                                              (void *)dev);
1003                 if (dev->data->dev_conf.intr_conf.lsc)
1004                         PMD_DRV_LOG(INFO,
1005                                 "LSC won't enable because of no intr multiplex");
1006         }
1007
1008         /* enable uio/vfio intr/eventfd mapping */
1009         rte_intr_enable(intr_handle);
1010
1011         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1012                         igc_update_queue_stats_handler, dev);
1013
1014         /* check if rxq interrupt is enabled */
1015         if (dev->data->dev_conf.intr_conf.rxq &&
1016                         rte_intr_dp_is_en(intr_handle))
1017                 igc_rxq_interrupt_setup(dev);
1018
1019         /* resume enabled intr since hw reset */
1020         igc_intr_other_enable(dev);
1021
1022         eth_igc_rxtx_control(dev, true);
1023         eth_igc_link_update(dev, 0);
1024
1025         return 0;
1026
1027 error_invalid_config:
1028         PMD_DRV_LOG(ERR, "Invalid advertised speeds (%u) for port %u",
1029                      dev->data->dev_conf.link_speeds, dev->data->port_id);
1030         igc_dev_clear_queues(dev);
1031         return -EINVAL;
1032 }
1033
1034 static int
1035 igc_reset_swfw_lock(struct igc_hw *hw)
1036 {
1037         int ret_val;
1038
1039         /*
1040          * Do mac ops initialization manually here, since we will need
1041          * some function pointers set by this call.
1042          */
1043         ret_val = igc_init_mac_params(hw);
1044         if (ret_val)
1045                 return ret_val;
1046
1047         /*
1048          * SMBI lock should not fail in this early stage. If this is the case,
1049          * it is due to an improper exit of the application.
1050          * So force the release of the faulty lock.
1051          */
1052         if (igc_get_hw_semaphore_generic(hw) < 0)
1053                 PMD_DRV_LOG(DEBUG, "SMBI lock released");
1054
1055         igc_put_hw_semaphore_generic(hw);
1056
1057         if (hw->mac.ops.acquire_swfw_sync != NULL) {
1058                 uint16_t mask;
1059
1060                 /*
1061                  * Phy lock should not fail in this early stage.
1062                  * If this is the case, it is due to an improper exit of the
1063                  * application. So force the release of the faulty lock.
1064                  */
1065                 mask = IGC_SWFW_PHY0_SM;
1066                 if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0) {
1067                         PMD_DRV_LOG(DEBUG, "SWFW phy%d lock released",
1068                                     hw->bus.func);
1069                 }
1070                 hw->mac.ops.release_swfw_sync(hw, mask);
1071
1072                 /*
1073                  * This one is more tricky since it is common to all ports; but
1074                  * swfw_sync retries last long enough (1s) to be almost sure
1075                  * that if lock can not be taken it is due to an improper lock
1076                  * of the semaphore.
1077                  */
1078                 mask = IGC_SWFW_EEP_SM;
1079                 if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0)
1080                         PMD_DRV_LOG(DEBUG, "SWFW common locks released");
1081
1082                 hw->mac.ops.release_swfw_sync(hw, mask);
1083         }
1084
1085         return IGC_SUCCESS;
1086 }
1087
1088 /*
1089  * free all rx/tx queues.
1090  */
1091 static void
1092 igc_dev_free_queues(struct rte_eth_dev *dev)
1093 {
1094         uint16_t i;
1095
1096         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1097                 eth_igc_rx_queue_release(dev->data->rx_queues[i]);
1098                 dev->data->rx_queues[i] = NULL;
1099         }
1100         dev->data->nb_rx_queues = 0;
1101
1102         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1103                 eth_igc_tx_queue_release(dev->data->tx_queues[i]);
1104                 dev->data->tx_queues[i] = NULL;
1105         }
1106         dev->data->nb_tx_queues = 0;
1107 }
1108
1109 static void
1110 eth_igc_close(struct rte_eth_dev *dev)
1111 {
1112         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1113         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
1114         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1115         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
1116         int retry = 0;
1117
1118         PMD_INIT_FUNC_TRACE();
1119
1120         if (!adapter->stopped)
1121                 eth_igc_stop(dev);
1122
1123         igc_intr_other_disable(dev);
1124         do {
1125                 int ret = rte_intr_callback_unregister(intr_handle,
1126                                 eth_igc_interrupt_handler, dev);
1127                 if (ret >= 0 || ret == -ENOENT || ret == -EINVAL)
1128                         break;
1129
1130                 PMD_DRV_LOG(ERR, "intr callback unregister failed: %d", ret);
1131                 DELAY(200 * 1000); /* delay 200ms */
1132         } while (retry++ < 5);
1133
1134         igc_phy_hw_reset(hw);
1135         igc_hw_control_release(hw);
1136         igc_dev_free_queues(dev);
1137
1138         /* Reset any pending lock */
1139         igc_reset_swfw_lock(hw);
1140 }
1141
1142 static void
1143 igc_identify_hardware(struct rte_eth_dev *dev, struct rte_pci_device *pci_dev)
1144 {
1145         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1146
1147         hw->vendor_id = pci_dev->id.vendor_id;
1148         hw->device_id = pci_dev->id.device_id;
1149         hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
1150         hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
1151 }
1152
1153 static int
1154 eth_igc_dev_init(struct rte_eth_dev *dev)
1155 {
1156         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1157         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1158         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1159         int i, error = 0;
1160
1161         PMD_INIT_FUNC_TRACE();
1162         dev->dev_ops = &eth_igc_ops;
1163
1164         /*
1165          * for secondary processes, we don't initialize any further as primary
1166          * has already done this work. Only check we don't need a different
1167          * RX function.
1168          */
1169         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1170                 return 0;
1171
1172         rte_eth_copy_pci_info(dev, pci_dev);
1173
1174         hw->back = pci_dev;
1175         hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
1176
1177         igc_identify_hardware(dev, pci_dev);
1178         if (igc_setup_init_funcs(hw, false) != IGC_SUCCESS) {
1179                 error = -EIO;
1180                 goto err_late;
1181         }
1182
1183         igc_get_bus_info(hw);
1184
1185         /* Reset any pending lock */
1186         if (igc_reset_swfw_lock(hw) != IGC_SUCCESS) {
1187                 error = -EIO;
1188                 goto err_late;
1189         }
1190
1191         /* Finish initialization */
1192         if (igc_setup_init_funcs(hw, true) != IGC_SUCCESS) {
1193                 error = -EIO;
1194                 goto err_late;
1195         }
1196
1197         hw->mac.autoneg = 1;
1198         hw->phy.autoneg_wait_to_complete = 0;
1199         hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
1200
1201         /* Copper options */
1202         if (hw->phy.media_type == igc_media_type_copper) {
1203                 hw->phy.mdix = 0; /* AUTO_ALL_MODES */
1204                 hw->phy.disable_polarity_correction = 0;
1205                 hw->phy.ms_type = igc_ms_hw_default;
1206         }
1207
1208         /*
1209          * Start from a known state, this is important in reading the nvm
1210          * and mac from that.
1211          */
1212         igc_reset_hw(hw);
1213
1214         /* Make sure we have a good EEPROM before we read from it */
1215         if (igc_validate_nvm_checksum(hw) < 0) {
1216                 /*
1217                  * Some PCI-E parts fail the first check due to
1218                  * the link being in sleep state, call it again,
1219                  * if it fails a second time its a real issue.
1220                  */
1221                 if (igc_validate_nvm_checksum(hw) < 0) {
1222                         PMD_INIT_LOG(ERR, "EEPROM checksum invalid");
1223                         error = -EIO;
1224                         goto err_late;
1225                 }
1226         }
1227
1228         /* Read the permanent MAC address out of the EEPROM */
1229         if (igc_read_mac_addr(hw) != 0) {
1230                 PMD_INIT_LOG(ERR, "EEPROM error while reading MAC address");
1231                 error = -EIO;
1232                 goto err_late;
1233         }
1234
1235         /* Allocate memory for storing MAC addresses */
1236         dev->data->mac_addrs = rte_zmalloc("igc",
1237                 RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count, 0);
1238         if (dev->data->mac_addrs == NULL) {
1239                 PMD_INIT_LOG(ERR, "Failed to allocate %d bytes for storing MAC",
1240                                 RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count);
1241                 error = -ENOMEM;
1242                 goto err_late;
1243         }
1244
1245         /* Copy the permanent MAC address */
1246         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac.addr,
1247                         &dev->data->mac_addrs[0]);
1248
1249         /* Now initialize the hardware */
1250         if (igc_hardware_init(hw) != 0) {
1251                 PMD_INIT_LOG(ERR, "Hardware initialization failed");
1252                 rte_free(dev->data->mac_addrs);
1253                 dev->data->mac_addrs = NULL;
1254                 error = -ENODEV;
1255                 goto err_late;
1256         }
1257
1258         /* Pass the information to the rte_eth_dev_close() that it should also
1259          * release the private port resources.
1260          */
1261         dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
1262
1263         hw->mac.get_link_status = 1;
1264         igc->stopped = 0;
1265
1266         /* Indicate SOL/IDER usage */
1267         if (igc_check_reset_block(hw) < 0)
1268                 PMD_INIT_LOG(ERR,
1269                         "PHY reset is blocked due to SOL/IDER session.");
1270
1271         PMD_INIT_LOG(DEBUG, "port_id %d vendorID=0x%x deviceID=0x%x",
1272                         dev->data->port_id, pci_dev->id.vendor_id,
1273                         pci_dev->id.device_id);
1274
1275         rte_intr_callback_register(&pci_dev->intr_handle,
1276                         eth_igc_interrupt_handler, (void *)dev);
1277
1278         /* enable uio/vfio intr/eventfd mapping */
1279         rte_intr_enable(&pci_dev->intr_handle);
1280
1281         /* enable support intr */
1282         igc_intr_other_enable(dev);
1283
1284         /* initiate queue status */
1285         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1286                 igc->txq_stats_map[i] = -1;
1287                 igc->rxq_stats_map[i] = -1;
1288         }
1289
1290         return 0;
1291
1292 err_late:
1293         igc_hw_control_release(hw);
1294         return error;
1295 }
1296
1297 static int
1298 eth_igc_dev_uninit(__rte_unused struct rte_eth_dev *eth_dev)
1299 {
1300         PMD_INIT_FUNC_TRACE();
1301
1302         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1303                 return 0;
1304
1305         eth_igc_close(eth_dev);
1306         return 0;
1307 }
1308
1309 static int
1310 eth_igc_reset(struct rte_eth_dev *dev)
1311 {
1312         int ret;
1313
1314         PMD_INIT_FUNC_TRACE();
1315
1316         ret = eth_igc_dev_uninit(dev);
1317         if (ret)
1318                 return ret;
1319
1320         return eth_igc_dev_init(dev);
1321 }
1322
1323 static int
1324 eth_igc_promiscuous_enable(struct rte_eth_dev *dev)
1325 {
1326         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1327         uint32_t rctl;
1328
1329         rctl = IGC_READ_REG(hw, IGC_RCTL);
1330         rctl |= (IGC_RCTL_UPE | IGC_RCTL_MPE);
1331         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1332         return 0;
1333 }
1334
1335 static int
1336 eth_igc_promiscuous_disable(struct rte_eth_dev *dev)
1337 {
1338         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1339         uint32_t rctl;
1340
1341         rctl = IGC_READ_REG(hw, IGC_RCTL);
1342         rctl &= (~IGC_RCTL_UPE);
1343         if (dev->data->all_multicast == 1)
1344                 rctl |= IGC_RCTL_MPE;
1345         else
1346                 rctl &= (~IGC_RCTL_MPE);
1347         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1348         return 0;
1349 }
1350
1351 static int
1352 eth_igc_allmulticast_enable(struct rte_eth_dev *dev)
1353 {
1354         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1355         uint32_t rctl;
1356
1357         rctl = IGC_READ_REG(hw, IGC_RCTL);
1358         rctl |= IGC_RCTL_MPE;
1359         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1360         return 0;
1361 }
1362
1363 static int
1364 eth_igc_allmulticast_disable(struct rte_eth_dev *dev)
1365 {
1366         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1367         uint32_t rctl;
1368
1369         if (dev->data->promiscuous == 1)
1370                 return 0;       /* must remain in all_multicast mode */
1371
1372         rctl = IGC_READ_REG(hw, IGC_RCTL);
1373         rctl &= (~IGC_RCTL_MPE);
1374         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1375         return 0;
1376 }
1377
1378 static int
1379 eth_igc_fw_version_get(struct rte_eth_dev *dev, char *fw_version,
1380                        size_t fw_size)
1381 {
1382         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1383         struct igc_fw_version fw;
1384         int ret;
1385
1386         igc_get_fw_version(hw, &fw);
1387
1388         /* if option rom is valid, display its version too */
1389         if (fw.or_valid) {
1390                 ret = snprintf(fw_version, fw_size,
1391                          "%d.%d, 0x%08x, %d.%d.%d",
1392                          fw.eep_major, fw.eep_minor, fw.etrack_id,
1393                          fw.or_major, fw.or_build, fw.or_patch);
1394         /* no option rom */
1395         } else {
1396                 if (fw.etrack_id != 0X0000) {
1397                         ret = snprintf(fw_version, fw_size,
1398                                  "%d.%d, 0x%08x",
1399                                  fw.eep_major, fw.eep_minor,
1400                                  fw.etrack_id);
1401                 } else {
1402                         ret = snprintf(fw_version, fw_size,
1403                                  "%d.%d.%d",
1404                                  fw.eep_major, fw.eep_minor,
1405                                  fw.eep_build);
1406                 }
1407         }
1408
1409         ret += 1; /* add the size of '\0' */
1410         if (fw_size < (u32)ret)
1411                 return ret;
1412         else
1413                 return 0;
1414 }
1415
1416 static int
1417 eth_igc_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1418 {
1419         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1420
1421         dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */
1422         dev_info->max_rx_pktlen = MAX_RX_JUMBO_FRAME_SIZE;
1423         dev_info->max_mac_addrs = hw->mac.rar_entry_count;
1424         dev_info->rx_offload_capa = IGC_RX_OFFLOAD_ALL;
1425         dev_info->tx_offload_capa = IGC_TX_OFFLOAD_ALL;
1426
1427         dev_info->max_rx_queues = IGC_QUEUE_PAIRS_NUM;
1428         dev_info->max_tx_queues = IGC_QUEUE_PAIRS_NUM;
1429         dev_info->max_vmdq_pools = 0;
1430
1431         dev_info->hash_key_size = IGC_HKEY_MAX_INDEX * sizeof(uint32_t);
1432         dev_info->reta_size = ETH_RSS_RETA_SIZE_128;
1433         dev_info->flow_type_rss_offloads = IGC_RSS_OFFLOAD_ALL;
1434
1435         dev_info->default_rxconf = (struct rte_eth_rxconf) {
1436                 .rx_thresh = {
1437                         .pthresh = IGC_DEFAULT_RX_PTHRESH,
1438                         .hthresh = IGC_DEFAULT_RX_HTHRESH,
1439                         .wthresh = IGC_DEFAULT_RX_WTHRESH,
1440                 },
1441                 .rx_free_thresh = IGC_DEFAULT_RX_FREE_THRESH,
1442                 .rx_drop_en = 0,
1443                 .offloads = 0,
1444         };
1445
1446         dev_info->default_txconf = (struct rte_eth_txconf) {
1447                 .tx_thresh = {
1448                         .pthresh = IGC_DEFAULT_TX_PTHRESH,
1449                         .hthresh = IGC_DEFAULT_TX_HTHRESH,
1450                         .wthresh = IGC_DEFAULT_TX_WTHRESH,
1451                 },
1452                 .offloads = 0,
1453         };
1454
1455         dev_info->rx_desc_lim = rx_desc_lim;
1456         dev_info->tx_desc_lim = tx_desc_lim;
1457
1458         dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
1459                         ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
1460                         ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G;
1461
1462         dev_info->max_mtu = dev_info->max_rx_pktlen - IGC_ETH_OVERHEAD;
1463         dev_info->min_mtu = RTE_ETHER_MIN_MTU;
1464         return 0;
1465 }
1466
1467 static int
1468 eth_igc_led_on(struct rte_eth_dev *dev)
1469 {
1470         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1471
1472         return igc_led_on(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1473 }
1474
1475 static int
1476 eth_igc_led_off(struct rte_eth_dev *dev)
1477 {
1478         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1479
1480         return igc_led_off(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1481 }
1482
1483 static const uint32_t *
1484 eth_igc_supported_ptypes_get(__rte_unused struct rte_eth_dev *dev)
1485 {
1486         static const uint32_t ptypes[] = {
1487                 /* refers to rx_desc_pkt_info_to_pkt_type() */
1488                 RTE_PTYPE_L2_ETHER,
1489                 RTE_PTYPE_L3_IPV4,
1490                 RTE_PTYPE_L3_IPV4_EXT,
1491                 RTE_PTYPE_L3_IPV6,
1492                 RTE_PTYPE_L3_IPV6_EXT,
1493                 RTE_PTYPE_L4_TCP,
1494                 RTE_PTYPE_L4_UDP,
1495                 RTE_PTYPE_L4_SCTP,
1496                 RTE_PTYPE_TUNNEL_IP,
1497                 RTE_PTYPE_INNER_L3_IPV6,
1498                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1499                 RTE_PTYPE_INNER_L4_TCP,
1500                 RTE_PTYPE_INNER_L4_UDP,
1501                 RTE_PTYPE_UNKNOWN
1502         };
1503
1504         return ptypes;
1505 }
1506
1507 static int
1508 eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1509 {
1510         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1511         uint32_t frame_size = mtu + IGC_ETH_OVERHEAD;
1512         uint32_t rctl;
1513
1514         /* if extend vlan has been enabled */
1515         if (IGC_READ_REG(hw, IGC_CTRL_EXT) & IGC_CTRL_EXT_EXT_VLAN)
1516                 frame_size += VLAN_TAG_SIZE;
1517
1518         /* check that mtu is within the allowed range */
1519         if (mtu < RTE_ETHER_MIN_MTU ||
1520                 frame_size > MAX_RX_JUMBO_FRAME_SIZE)
1521                 return -EINVAL;
1522
1523         /*
1524          * refuse mtu that requires the support of scattered packets when
1525          * this feature has not been enabled before.
1526          */
1527         if (!dev->data->scattered_rx &&
1528             frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM)
1529                 return -EINVAL;
1530
1531         rctl = IGC_READ_REG(hw, IGC_RCTL);
1532
1533         /* switch to jumbo mode if needed */
1534         if (mtu > RTE_ETHER_MTU) {
1535                 dev->data->dev_conf.rxmode.offloads |=
1536                         DEV_RX_OFFLOAD_JUMBO_FRAME;
1537                 rctl |= IGC_RCTL_LPE;
1538         } else {
1539                 dev->data->dev_conf.rxmode.offloads &=
1540                         ~DEV_RX_OFFLOAD_JUMBO_FRAME;
1541                 rctl &= ~IGC_RCTL_LPE;
1542         }
1543         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1544
1545         /* update max frame size */
1546         dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
1547
1548         IGC_WRITE_REG(hw, IGC_RLPML,
1549                         dev->data->dev_conf.rxmode.max_rx_pkt_len);
1550
1551         return 0;
1552 }
1553
1554 static int
1555 eth_igc_rar_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1556                 uint32_t index, uint32_t pool)
1557 {
1558         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1559
1560         igc_rar_set(hw, mac_addr->addr_bytes, index);
1561         RTE_SET_USED(pool);
1562         return 0;
1563 }
1564
1565 static void
1566 eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index)
1567 {
1568         uint8_t addr[RTE_ETHER_ADDR_LEN];
1569         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1570
1571         memset(addr, 0, sizeof(addr));
1572         igc_rar_set(hw, addr, index);
1573 }
1574
1575 static int
1576 eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
1577                         struct rte_ether_addr *addr)
1578 {
1579         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1580         igc_rar_set(hw, addr->addr_bytes, 0);
1581         return 0;
1582 }
1583
1584 static int
1585 eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
1586                          struct rte_ether_addr *mc_addr_set,
1587                          uint32_t nb_mc_addr)
1588 {
1589         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1590         igc_update_mc_addr_list(hw, (u8 *)mc_addr_set, nb_mc_addr);
1591         return 0;
1592 }
1593
1594 /*
1595  * Read hardware registers
1596  */
1597 static void
1598 igc_read_stats_registers(struct igc_hw *hw, struct igc_hw_stats *stats)
1599 {
1600         int pause_frames;
1601
1602         uint64_t old_gprc  = stats->gprc;
1603         uint64_t old_gptc  = stats->gptc;
1604         uint64_t old_tpr   = stats->tpr;
1605         uint64_t old_tpt   = stats->tpt;
1606         uint64_t old_rpthc = stats->rpthc;
1607         uint64_t old_hgptc = stats->hgptc;
1608
1609         stats->crcerrs += IGC_READ_REG(hw, IGC_CRCERRS);
1610         stats->algnerrc += IGC_READ_REG(hw, IGC_ALGNERRC);
1611         stats->rxerrc += IGC_READ_REG(hw, IGC_RXERRC);
1612         stats->mpc += IGC_READ_REG(hw, IGC_MPC);
1613         stats->scc += IGC_READ_REG(hw, IGC_SCC);
1614         stats->ecol += IGC_READ_REG(hw, IGC_ECOL);
1615
1616         stats->mcc += IGC_READ_REG(hw, IGC_MCC);
1617         stats->latecol += IGC_READ_REG(hw, IGC_LATECOL);
1618         stats->colc += IGC_READ_REG(hw, IGC_COLC);
1619
1620         stats->dc += IGC_READ_REG(hw, IGC_DC);
1621         stats->tncrs += IGC_READ_REG(hw, IGC_TNCRS);
1622         stats->htdpmc += IGC_READ_REG(hw, IGC_HTDPMC);
1623         stats->rlec += IGC_READ_REG(hw, IGC_RLEC);
1624         stats->xonrxc += IGC_READ_REG(hw, IGC_XONRXC);
1625         stats->xontxc += IGC_READ_REG(hw, IGC_XONTXC);
1626
1627         /*
1628          * For watchdog management we need to know if we have been
1629          * paused during the last interval, so capture that here.
1630          */
1631         pause_frames = IGC_READ_REG(hw, IGC_XOFFRXC);
1632         stats->xoffrxc += pause_frames;
1633         stats->xofftxc += IGC_READ_REG(hw, IGC_XOFFTXC);
1634         stats->fcruc += IGC_READ_REG(hw, IGC_FCRUC);
1635         stats->prc64 += IGC_READ_REG(hw, IGC_PRC64);
1636         stats->prc127 += IGC_READ_REG(hw, IGC_PRC127);
1637         stats->prc255 += IGC_READ_REG(hw, IGC_PRC255);
1638         stats->prc511 += IGC_READ_REG(hw, IGC_PRC511);
1639         stats->prc1023 += IGC_READ_REG(hw, IGC_PRC1023);
1640         stats->prc1522 += IGC_READ_REG(hw, IGC_PRC1522);
1641         stats->gprc += IGC_READ_REG(hw, IGC_GPRC);
1642         stats->bprc += IGC_READ_REG(hw, IGC_BPRC);
1643         stats->mprc += IGC_READ_REG(hw, IGC_MPRC);
1644         stats->gptc += IGC_READ_REG(hw, IGC_GPTC);
1645
1646         /* For the 64-bit byte counters the low dword must be read first. */
1647         /* Both registers clear on the read of the high dword */
1648
1649         /* Workaround CRC bytes included in size, take away 4 bytes/packet */
1650         stats->gorc += IGC_READ_REG(hw, IGC_GORCL);
1651         stats->gorc += ((uint64_t)IGC_READ_REG(hw, IGC_GORCH) << 32);
1652         stats->gorc -= (stats->gprc - old_gprc) * RTE_ETHER_CRC_LEN;
1653         stats->gotc += IGC_READ_REG(hw, IGC_GOTCL);
1654         stats->gotc += ((uint64_t)IGC_READ_REG(hw, IGC_GOTCH) << 32);
1655         stats->gotc -= (stats->gptc - old_gptc) * RTE_ETHER_CRC_LEN;
1656
1657         stats->rnbc += IGC_READ_REG(hw, IGC_RNBC);
1658         stats->ruc += IGC_READ_REG(hw, IGC_RUC);
1659         stats->rfc += IGC_READ_REG(hw, IGC_RFC);
1660         stats->roc += IGC_READ_REG(hw, IGC_ROC);
1661         stats->rjc += IGC_READ_REG(hw, IGC_RJC);
1662
1663         stats->mgprc += IGC_READ_REG(hw, IGC_MGTPRC);
1664         stats->mgpdc += IGC_READ_REG(hw, IGC_MGTPDC);
1665         stats->mgptc += IGC_READ_REG(hw, IGC_MGTPTC);
1666         stats->b2ospc += IGC_READ_REG(hw, IGC_B2OSPC);
1667         stats->b2ogprc += IGC_READ_REG(hw, IGC_B2OGPRC);
1668         stats->o2bgptc += IGC_READ_REG(hw, IGC_O2BGPTC);
1669         stats->o2bspc += IGC_READ_REG(hw, IGC_O2BSPC);
1670
1671         stats->tpr += IGC_READ_REG(hw, IGC_TPR);
1672         stats->tpt += IGC_READ_REG(hw, IGC_TPT);
1673
1674         stats->tor += IGC_READ_REG(hw, IGC_TORL);
1675         stats->tor += ((uint64_t)IGC_READ_REG(hw, IGC_TORH) << 32);
1676         stats->tor -= (stats->tpr - old_tpr) * RTE_ETHER_CRC_LEN;
1677         stats->tot += IGC_READ_REG(hw, IGC_TOTL);
1678         stats->tot += ((uint64_t)IGC_READ_REG(hw, IGC_TOTH) << 32);
1679         stats->tot -= (stats->tpt - old_tpt) * RTE_ETHER_CRC_LEN;
1680
1681         stats->ptc64 += IGC_READ_REG(hw, IGC_PTC64);
1682         stats->ptc127 += IGC_READ_REG(hw, IGC_PTC127);
1683         stats->ptc255 += IGC_READ_REG(hw, IGC_PTC255);
1684         stats->ptc511 += IGC_READ_REG(hw, IGC_PTC511);
1685         stats->ptc1023 += IGC_READ_REG(hw, IGC_PTC1023);
1686         stats->ptc1522 += IGC_READ_REG(hw, IGC_PTC1522);
1687         stats->mptc += IGC_READ_REG(hw, IGC_MPTC);
1688         stats->bptc += IGC_READ_REG(hw, IGC_BPTC);
1689         stats->tsctc += IGC_READ_REG(hw, IGC_TSCTC);
1690
1691         stats->iac += IGC_READ_REG(hw, IGC_IAC);
1692         stats->rpthc += IGC_READ_REG(hw, IGC_RPTHC);
1693         stats->hgptc += IGC_READ_REG(hw, IGC_HGPTC);
1694         stats->icrxdmtc += IGC_READ_REG(hw, IGC_ICRXDMTC);
1695
1696         /* Host to Card Statistics */
1697         stats->hgorc += IGC_READ_REG(hw, IGC_HGORCL);
1698         stats->hgorc += ((uint64_t)IGC_READ_REG(hw, IGC_HGORCH) << 32);
1699         stats->hgorc -= (stats->rpthc - old_rpthc) * RTE_ETHER_CRC_LEN;
1700         stats->hgotc += IGC_READ_REG(hw, IGC_HGOTCL);
1701         stats->hgotc += ((uint64_t)IGC_READ_REG(hw, IGC_HGOTCH) << 32);
1702         stats->hgotc -= (stats->hgptc - old_hgptc) * RTE_ETHER_CRC_LEN;
1703         stats->lenerrs += IGC_READ_REG(hw, IGC_LENERRS);
1704 }
1705
1706 /*
1707  * Write 0 to all queue status registers
1708  */
1709 static void
1710 igc_reset_queue_stats_register(struct igc_hw *hw)
1711 {
1712         int i;
1713
1714         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1715                 IGC_WRITE_REG(hw, IGC_PQGPRC(i), 0);
1716                 IGC_WRITE_REG(hw, IGC_PQGPTC(i), 0);
1717                 IGC_WRITE_REG(hw, IGC_PQGORC(i), 0);
1718                 IGC_WRITE_REG(hw, IGC_PQGOTC(i), 0);
1719                 IGC_WRITE_REG(hw, IGC_PQMPRC(i), 0);
1720                 IGC_WRITE_REG(hw, IGC_RQDPC(i), 0);
1721                 IGC_WRITE_REG(hw, IGC_TQDPC(i), 0);
1722         }
1723 }
1724
1725 /*
1726  * Read all hardware queue status registers
1727  */
1728 static void
1729 igc_read_queue_stats_register(struct rte_eth_dev *dev)
1730 {
1731         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1732         struct igc_hw_queue_stats *queue_stats =
1733                                 IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1734         int i;
1735
1736         /*
1737          * This register is not cleared on read. Furthermore, the register wraps
1738          * around back to 0x00000000 on the next increment when reaching a value
1739          * of 0xFFFFFFFF and then continues normal count operation.
1740          */
1741         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1742                 union {
1743                         u64 ddword;
1744                         u32 dword[2];
1745                 } value;
1746                 u32 tmp;
1747
1748                 /*
1749                  * Read the register first, if the value is smaller than that
1750                  * previous read, that mean the register has been overflowed,
1751                  * then we add the high 4 bytes by 1 and replace the low 4
1752                  * bytes by the new value.
1753                  */
1754                 tmp = IGC_READ_REG(hw, IGC_PQGPRC(i));
1755                 value.ddword = queue_stats->pqgprc[i];
1756                 if (value.dword[U32_0_IN_U64] > tmp)
1757                         value.dword[U32_1_IN_U64]++;
1758                 value.dword[U32_0_IN_U64] = tmp;
1759                 queue_stats->pqgprc[i] = value.ddword;
1760
1761                 tmp = IGC_READ_REG(hw, IGC_PQGPTC(i));
1762                 value.ddword = queue_stats->pqgptc[i];
1763                 if (value.dword[U32_0_IN_U64] > tmp)
1764                         value.dword[U32_1_IN_U64]++;
1765                 value.dword[U32_0_IN_U64] = tmp;
1766                 queue_stats->pqgptc[i] = value.ddword;
1767
1768                 tmp = IGC_READ_REG(hw, IGC_PQGORC(i));
1769                 value.ddword = queue_stats->pqgorc[i];
1770                 if (value.dword[U32_0_IN_U64] > tmp)
1771                         value.dword[U32_1_IN_U64]++;
1772                 value.dword[U32_0_IN_U64] = tmp;
1773                 queue_stats->pqgorc[i] = value.ddword;
1774
1775                 tmp = IGC_READ_REG(hw, IGC_PQGOTC(i));
1776                 value.ddword = queue_stats->pqgotc[i];
1777                 if (value.dword[U32_0_IN_U64] > tmp)
1778                         value.dword[U32_1_IN_U64]++;
1779                 value.dword[U32_0_IN_U64] = tmp;
1780                 queue_stats->pqgotc[i] = value.ddword;
1781
1782                 tmp = IGC_READ_REG(hw, IGC_PQMPRC(i));
1783                 value.ddword = queue_stats->pqmprc[i];
1784                 if (value.dword[U32_0_IN_U64] > tmp)
1785                         value.dword[U32_1_IN_U64]++;
1786                 value.dword[U32_0_IN_U64] = tmp;
1787                 queue_stats->pqmprc[i] = value.ddword;
1788
1789                 tmp = IGC_READ_REG(hw, IGC_RQDPC(i));
1790                 value.ddword = queue_stats->rqdpc[i];
1791                 if (value.dword[U32_0_IN_U64] > tmp)
1792                         value.dword[U32_1_IN_U64]++;
1793                 value.dword[U32_0_IN_U64] = tmp;
1794                 queue_stats->rqdpc[i] = value.ddword;
1795
1796                 tmp = IGC_READ_REG(hw, IGC_TQDPC(i));
1797                 value.ddword = queue_stats->tqdpc[i];
1798                 if (value.dword[U32_0_IN_U64] > tmp)
1799                         value.dword[U32_1_IN_U64]++;
1800                 value.dword[U32_0_IN_U64] = tmp;
1801                 queue_stats->tqdpc[i] = value.ddword;
1802         }
1803 }
1804
1805 static int
1806 eth_igc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
1807 {
1808         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1809         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1810         struct igc_hw_stats *stats = IGC_DEV_PRIVATE_STATS(dev);
1811         struct igc_hw_queue_stats *queue_stats =
1812                         IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1813         int i;
1814
1815         /*
1816          * Cancel status handler since it will read the queue status registers
1817          */
1818         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1819
1820         /* Read status register */
1821         igc_read_queue_stats_register(dev);
1822         igc_read_stats_registers(hw, stats);
1823
1824         if (rte_stats == NULL) {
1825                 /* Restart queue status handler */
1826                 rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1827                                 igc_update_queue_stats_handler, dev);
1828                 return -EINVAL;
1829         }
1830
1831         /* Rx Errors */
1832         rte_stats->imissed = stats->mpc;
1833         rte_stats->ierrors = stats->crcerrs +
1834                         stats->rlec + stats->ruc + stats->roc +
1835                         stats->rxerrc + stats->algnerrc;
1836
1837         /* Tx Errors */
1838         rte_stats->oerrors = stats->ecol + stats->latecol;
1839
1840         rte_stats->ipackets = stats->gprc;
1841         rte_stats->opackets = stats->gptc;
1842         rte_stats->ibytes   = stats->gorc;
1843         rte_stats->obytes   = stats->gotc;
1844
1845         /* Get per-queue statuses */
1846         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1847                 /* GET TX queue statuses */
1848                 int map_id = igc->txq_stats_map[i];
1849                 if (map_id >= 0) {
1850                         rte_stats->q_opackets[map_id] += queue_stats->pqgptc[i];
1851                         rte_stats->q_obytes[map_id] += queue_stats->pqgotc[i];
1852                 }
1853                 /* Get RX queue statuses */
1854                 map_id = igc->rxq_stats_map[i];
1855                 if (map_id >= 0) {
1856                         rte_stats->q_ipackets[map_id] += queue_stats->pqgprc[i];
1857                         rte_stats->q_ibytes[map_id] += queue_stats->pqgorc[i];
1858                         rte_stats->q_errors[map_id] += queue_stats->rqdpc[i];
1859                 }
1860         }
1861
1862         /* Restart queue status handler */
1863         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1864                         igc_update_queue_stats_handler, dev);
1865         return 0;
1866 }
1867
1868 static int
1869 eth_igc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1870                    unsigned int n)
1871 {
1872         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1873         struct igc_hw_stats *hw_stats =
1874                         IGC_DEV_PRIVATE_STATS(dev);
1875         unsigned int i;
1876
1877         igc_read_stats_registers(hw, hw_stats);
1878
1879         if (n < IGC_NB_XSTATS)
1880                 return IGC_NB_XSTATS;
1881
1882         /* If this is a reset xstats is NULL, and we have cleared the
1883          * registers by reading them.
1884          */
1885         if (!xstats)
1886                 return 0;
1887
1888         /* Extended stats */
1889         for (i = 0; i < IGC_NB_XSTATS; i++) {
1890                 xstats[i].id = i;
1891                 xstats[i].value = *(uint64_t *)(((char *)hw_stats) +
1892                         rte_igc_stats_strings[i].offset);
1893         }
1894
1895         return IGC_NB_XSTATS;
1896 }
1897
1898 static int
1899 eth_igc_xstats_reset(struct rte_eth_dev *dev)
1900 {
1901         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1902         struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
1903         struct igc_hw_queue_stats *queue_stats =
1904                         IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1905
1906         /* Cancel queue status handler for avoid conflict */
1907         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1908
1909         /* HW registers are cleared on read */
1910         igc_reset_queue_stats_register(hw);
1911         igc_read_stats_registers(hw, hw_stats);
1912
1913         /* Reset software totals */
1914         memset(hw_stats, 0, sizeof(*hw_stats));
1915         memset(queue_stats, 0, sizeof(*queue_stats));
1916
1917         /* Restart the queue status handler */
1918         rte_eal_alarm_set(IGC_ALARM_INTERVAL, igc_update_queue_stats_handler,
1919                         dev);
1920
1921         return 0;
1922 }
1923
1924 static int
1925 eth_igc_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
1926         struct rte_eth_xstat_name *xstats_names, unsigned int size)
1927 {
1928         unsigned int i;
1929
1930         if (xstats_names == NULL)
1931                 return IGC_NB_XSTATS;
1932
1933         if (size < IGC_NB_XSTATS) {
1934                 PMD_DRV_LOG(ERR, "not enough buffers!");
1935                 return IGC_NB_XSTATS;
1936         }
1937
1938         for (i = 0; i < IGC_NB_XSTATS; i++)
1939                 strlcpy(xstats_names[i].name, rte_igc_stats_strings[i].name,
1940                         sizeof(xstats_names[i].name));
1941
1942         return IGC_NB_XSTATS;
1943 }
1944
1945 static int
1946 eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
1947                 struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
1948                 unsigned int limit)
1949 {
1950         unsigned int i;
1951
1952         if (!ids)
1953                 return eth_igc_xstats_get_names(dev, xstats_names, limit);
1954
1955         for (i = 0; i < limit; i++) {
1956                 if (ids[i] >= IGC_NB_XSTATS) {
1957                         PMD_DRV_LOG(ERR, "id value isn't valid");
1958                         return -EINVAL;
1959                 }
1960                 strlcpy(xstats_names[i].name,
1961                         rte_igc_stats_strings[ids[i]].name,
1962                         sizeof(xstats_names[i].name));
1963         }
1964         return limit;
1965 }
1966
1967 static int
1968 eth_igc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
1969                 uint64_t *values, unsigned int n)
1970 {
1971         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1972         struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
1973         unsigned int i;
1974
1975         igc_read_stats_registers(hw, hw_stats);
1976
1977         if (!ids) {
1978                 if (n < IGC_NB_XSTATS)
1979                         return IGC_NB_XSTATS;
1980
1981                 /* If this is a reset xstats is NULL, and we have cleared the
1982                  * registers by reading them.
1983                  */
1984                 if (!values)
1985                         return 0;
1986
1987                 /* Extended stats */
1988                 for (i = 0; i < IGC_NB_XSTATS; i++)
1989                         values[i] = *(uint64_t *)(((char *)hw_stats) +
1990                                         rte_igc_stats_strings[i].offset);
1991
1992                 return IGC_NB_XSTATS;
1993
1994         } else {
1995                 for (i = 0; i < n; i++) {
1996                         if (ids[i] >= IGC_NB_XSTATS) {
1997                                 PMD_DRV_LOG(ERR, "id value isn't valid");
1998                                 return -EINVAL;
1999                         }
2000                         values[i] = *(uint64_t *)(((char *)hw_stats) +
2001                                         rte_igc_stats_strings[ids[i]].offset);
2002                 }
2003                 return n;
2004         }
2005 }
2006
2007 static int
2008 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
2009                 uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx)
2010 {
2011         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
2012
2013         /* check queue id is valid */
2014         if (queue_id >= IGC_QUEUE_PAIRS_NUM) {
2015                 PMD_DRV_LOG(ERR, "queue id(%u) error, max is %u",
2016                         queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2017                 return -EINVAL;
2018         }
2019
2020         /* store the mapping status id */
2021         if (is_rx)
2022                 igc->rxq_stats_map[queue_id] = stat_idx;
2023         else
2024                 igc->txq_stats_map[queue_id] = stat_idx;
2025
2026         return 0;
2027 }
2028
2029 static int
2030 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
2031 {
2032         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2033         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2034         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2035         uint32_t vec = IGC_MISC_VEC_ID;
2036
2037         if (rte_intr_allow_others(intr_handle))
2038                 vec = IGC_RX_VEC_START;
2039
2040         uint32_t mask = 1u << (queue_id + vec);
2041
2042         IGC_WRITE_REG(hw, IGC_EIMC, mask);
2043         IGC_WRITE_FLUSH(hw);
2044
2045         return 0;
2046 }
2047
2048 static int
2049 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
2050 {
2051         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2052         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2053         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2054         uint32_t vec = IGC_MISC_VEC_ID;
2055
2056         if (rte_intr_allow_others(intr_handle))
2057                 vec = IGC_RX_VEC_START;
2058
2059         uint32_t mask = 1u << (queue_id + vec);
2060
2061         IGC_WRITE_REG(hw, IGC_EIMS, mask);
2062         IGC_WRITE_FLUSH(hw);
2063
2064         rte_intr_enable(intr_handle);
2065
2066         return 0;
2067 }
2068
2069 static int
2070 eth_igc_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2071         struct rte_pci_device *pci_dev)
2072 {
2073         PMD_INIT_FUNC_TRACE();
2074         return rte_eth_dev_pci_generic_probe(pci_dev,
2075                 sizeof(struct igc_adapter), eth_igc_dev_init);
2076 }
2077
2078 static int
2079 eth_igc_pci_remove(struct rte_pci_device *pci_dev)
2080 {
2081         PMD_INIT_FUNC_TRACE();
2082         return rte_eth_dev_pci_generic_remove(pci_dev, eth_igc_dev_uninit);
2083 }
2084
2085 static struct rte_pci_driver rte_igc_pmd = {
2086         .id_table = pci_id_igc_map,
2087         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
2088         .probe = eth_igc_pci_probe,
2089         .remove = eth_igc_pci_remove,
2090 };
2091
2092 RTE_PMD_REGISTER_PCI(net_igc, rte_igc_pmd);
2093 RTE_PMD_REGISTER_PCI_TABLE(net_igc, pci_id_igc_map);
2094 RTE_PMD_REGISTER_KMOD_DEP(net_igc, "* igb_uio | uio_pci_generic | vfio-pci");