net/igc: fix Rx error counter for bad length
[dpdk.git] / drivers / net / igc / igc_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019-2020 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7
8 #include <rte_string_fns.h>
9 #include <rte_pci.h>
10 #include <rte_bus_pci.h>
11 #include <ethdev_driver.h>
12 #include <ethdev_pci.h>
13 #include <rte_malloc.h>
14 #include <rte_alarm.h>
15
16 #include "igc_logs.h"
17 #include "igc_txrx.h"
18 #include "igc_filter.h"
19 #include "igc_flow.h"
20
21 #define IGC_INTEL_VENDOR_ID             0x8086
22
23 /*
24  * The overhead from MTU to max frame size.
25  * Considering VLAN so tag needs to be counted.
26  */
27 #define IGC_ETH_OVERHEAD                (RTE_ETHER_HDR_LEN + \
28                                         RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE)
29
30 #define IGC_FC_PAUSE_TIME               0x0680
31 #define IGC_LINK_UPDATE_CHECK_TIMEOUT   90  /* 9s */
32 #define IGC_LINK_UPDATE_CHECK_INTERVAL  100 /* ms */
33
34 #define IGC_MISC_VEC_ID                 RTE_INTR_VEC_ZERO_OFFSET
35 #define IGC_RX_VEC_START                RTE_INTR_VEC_RXTX_OFFSET
36 #define IGC_MSIX_OTHER_INTR_VEC         0   /* MSI-X other interrupt vector */
37 #define IGC_FLAG_NEED_LINK_UPDATE       (1u << 0)       /* need update link */
38
39 #define IGC_DEFAULT_RX_FREE_THRESH      32
40
41 #define IGC_DEFAULT_RX_PTHRESH          8
42 #define IGC_DEFAULT_RX_HTHRESH          8
43 #define IGC_DEFAULT_RX_WTHRESH          4
44
45 #define IGC_DEFAULT_TX_PTHRESH          8
46 #define IGC_DEFAULT_TX_HTHRESH          1
47 #define IGC_DEFAULT_TX_WTHRESH          16
48
49 /* MSI-X other interrupt vector */
50 #define IGC_MSIX_OTHER_INTR_VEC         0
51
52 /* External VLAN Enable bit mask */
53 #define IGC_CTRL_EXT_EXT_VLAN           (1u << 26)
54
55 /* Speed select */
56 #define IGC_CTRL_SPEED_MASK             (7u << 8)
57 #define IGC_CTRL_SPEED_2500             (6u << 8)
58
59 /* External VLAN Ether Type bit mask and shift */
60 #define IGC_VET_EXT                     0xFFFF0000
61 #define IGC_VET_EXT_SHIFT               16
62
63 /* Force EEE Auto-negotiation */
64 #define IGC_EEER_EEE_FRC_AN             (1u << 28)
65
66 /* Per Queue Good Packets Received Count */
67 #define IGC_PQGPRC(idx)         (0x10010 + 0x100 * (idx))
68 /* Per Queue Good Octets Received Count */
69 #define IGC_PQGORC(idx)         (0x10018 + 0x100 * (idx))
70 /* Per Queue Good Octets Transmitted Count */
71 #define IGC_PQGOTC(idx)         (0x10034 + 0x100 * (idx))
72 /* Per Queue Multicast Packets Received Count */
73 #define IGC_PQMPRC(idx)         (0x10038 + 0x100 * (idx))
74 /* Transmit Queue Drop Packet Count */
75 #define IGC_TQDPC(idx)          (0xe030 + 0x40 * (idx))
76
77 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
78 #define U32_0_IN_U64            0       /* lower bytes of u64 */
79 #define U32_1_IN_U64            1       /* higher bytes of u64 */
80 #else
81 #define U32_0_IN_U64            1
82 #define U32_1_IN_U64            0
83 #endif
84
85 #define IGC_ALARM_INTERVAL      8000000u
86 /* us, about 13.6s some per-queue registers will wrap around back to 0. */
87
88 static const struct rte_eth_desc_lim rx_desc_lim = {
89         .nb_max = IGC_MAX_RXD,
90         .nb_min = IGC_MIN_RXD,
91         .nb_align = IGC_RXD_ALIGN,
92 };
93
94 static const struct rte_eth_desc_lim tx_desc_lim = {
95         .nb_max = IGC_MAX_TXD,
96         .nb_min = IGC_MIN_TXD,
97         .nb_align = IGC_TXD_ALIGN,
98         .nb_seg_max = IGC_TX_MAX_SEG,
99         .nb_mtu_seg_max = IGC_TX_MAX_MTU_SEG,
100 };
101
102 static const struct rte_pci_id pci_id_igc_map[] = {
103         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_LM) },
104         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_V)  },
105         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_I)  },
106         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_K)  },
107         { .vendor_id = 0, /* sentinel */ },
108 };
109
110 /* store statistics names and its offset in stats structure */
111 struct rte_igc_xstats_name_off {
112         char name[RTE_ETH_XSTATS_NAME_SIZE];
113         unsigned int offset;
114 };
115
116 static const struct rte_igc_xstats_name_off rte_igc_stats_strings[] = {
117         {"rx_crc_errors", offsetof(struct igc_hw_stats, crcerrs)},
118         {"rx_align_errors", offsetof(struct igc_hw_stats, algnerrc)},
119         {"rx_errors", offsetof(struct igc_hw_stats, rxerrc)},
120         {"rx_missed_packets", offsetof(struct igc_hw_stats, mpc)},
121         {"tx_single_collision_packets", offsetof(struct igc_hw_stats, scc)},
122         {"tx_multiple_collision_packets", offsetof(struct igc_hw_stats, mcc)},
123         {"tx_excessive_collision_packets", offsetof(struct igc_hw_stats,
124                 ecol)},
125         {"tx_late_collisions", offsetof(struct igc_hw_stats, latecol)},
126         {"tx_total_collisions", offsetof(struct igc_hw_stats, colc)},
127         {"tx_deferred_packets", offsetof(struct igc_hw_stats, dc)},
128         {"tx_no_carrier_sense_packets", offsetof(struct igc_hw_stats, tncrs)},
129         {"tx_discarded_packets", offsetof(struct igc_hw_stats, htdpmc)},
130         {"rx_length_errors", offsetof(struct igc_hw_stats, rlec)},
131         {"rx_xon_packets", offsetof(struct igc_hw_stats, xonrxc)},
132         {"tx_xon_packets", offsetof(struct igc_hw_stats, xontxc)},
133         {"rx_xoff_packets", offsetof(struct igc_hw_stats, xoffrxc)},
134         {"tx_xoff_packets", offsetof(struct igc_hw_stats, xofftxc)},
135         {"rx_flow_control_unsupported_packets", offsetof(struct igc_hw_stats,
136                 fcruc)},
137         {"rx_size_64_packets", offsetof(struct igc_hw_stats, prc64)},
138         {"rx_size_65_to_127_packets", offsetof(struct igc_hw_stats, prc127)},
139         {"rx_size_128_to_255_packets", offsetof(struct igc_hw_stats, prc255)},
140         {"rx_size_256_to_511_packets", offsetof(struct igc_hw_stats, prc511)},
141         {"rx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
142                 prc1023)},
143         {"rx_size_1024_to_max_packets", offsetof(struct igc_hw_stats,
144                 prc1522)},
145         {"rx_broadcast_packets", offsetof(struct igc_hw_stats, bprc)},
146         {"rx_multicast_packets", offsetof(struct igc_hw_stats, mprc)},
147         {"rx_undersize_errors", offsetof(struct igc_hw_stats, ruc)},
148         {"rx_fragment_errors", offsetof(struct igc_hw_stats, rfc)},
149         {"rx_oversize_errors", offsetof(struct igc_hw_stats, roc)},
150         {"rx_jabber_errors", offsetof(struct igc_hw_stats, rjc)},
151         {"rx_no_buffers", offsetof(struct igc_hw_stats, rnbc)},
152         {"rx_management_packets", offsetof(struct igc_hw_stats, mgprc)},
153         {"rx_management_dropped", offsetof(struct igc_hw_stats, mgpdc)},
154         {"tx_management_packets", offsetof(struct igc_hw_stats, mgptc)},
155         {"rx_total_packets", offsetof(struct igc_hw_stats, tpr)},
156         {"tx_total_packets", offsetof(struct igc_hw_stats, tpt)},
157         {"rx_total_bytes", offsetof(struct igc_hw_stats, tor)},
158         {"tx_total_bytes", offsetof(struct igc_hw_stats, tot)},
159         {"tx_size_64_packets", offsetof(struct igc_hw_stats, ptc64)},
160         {"tx_size_65_to_127_packets", offsetof(struct igc_hw_stats, ptc127)},
161         {"tx_size_128_to_255_packets", offsetof(struct igc_hw_stats, ptc255)},
162         {"tx_size_256_to_511_packets", offsetof(struct igc_hw_stats, ptc511)},
163         {"tx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
164                 ptc1023)},
165         {"tx_size_1023_to_max_packets", offsetof(struct igc_hw_stats,
166                 ptc1522)},
167         {"tx_multicast_packets", offsetof(struct igc_hw_stats, mptc)},
168         {"tx_broadcast_packets", offsetof(struct igc_hw_stats, bptc)},
169         {"tx_tso_packets", offsetof(struct igc_hw_stats, tsctc)},
170         {"rx_sent_to_host_packets", offsetof(struct igc_hw_stats, rpthc)},
171         {"tx_sent_by_host_packets", offsetof(struct igc_hw_stats, hgptc)},
172         {"interrupt_assert_count", offsetof(struct igc_hw_stats, iac)},
173         {"rx_descriptor_lower_threshold",
174                 offsetof(struct igc_hw_stats, icrxdmtc)},
175 };
176
177 #define IGC_NB_XSTATS (sizeof(rte_igc_stats_strings) / \
178                 sizeof(rte_igc_stats_strings[0]))
179
180 static int eth_igc_configure(struct rte_eth_dev *dev);
181 static int eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete);
182 static int eth_igc_stop(struct rte_eth_dev *dev);
183 static int eth_igc_start(struct rte_eth_dev *dev);
184 static int eth_igc_set_link_up(struct rte_eth_dev *dev);
185 static int eth_igc_set_link_down(struct rte_eth_dev *dev);
186 static int eth_igc_close(struct rte_eth_dev *dev);
187 static int eth_igc_reset(struct rte_eth_dev *dev);
188 static int eth_igc_promiscuous_enable(struct rte_eth_dev *dev);
189 static int eth_igc_promiscuous_disable(struct rte_eth_dev *dev);
190 static int eth_igc_fw_version_get(struct rte_eth_dev *dev,
191                                 char *fw_version, size_t fw_size);
192 static int eth_igc_infos_get(struct rte_eth_dev *dev,
193                         struct rte_eth_dev_info *dev_info);
194 static int eth_igc_led_on(struct rte_eth_dev *dev);
195 static int eth_igc_led_off(struct rte_eth_dev *dev);
196 static const uint32_t *eth_igc_supported_ptypes_get(struct rte_eth_dev *dev);
197 static int eth_igc_rar_set(struct rte_eth_dev *dev,
198                 struct rte_ether_addr *mac_addr, uint32_t index, uint32_t pool);
199 static void eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index);
200 static int eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
201                         struct rte_ether_addr *addr);
202 static int eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
203                          struct rte_ether_addr *mc_addr_set,
204                          uint32_t nb_mc_addr);
205 static int eth_igc_allmulticast_enable(struct rte_eth_dev *dev);
206 static int eth_igc_allmulticast_disable(struct rte_eth_dev *dev);
207 static int eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
208 static int eth_igc_stats_get(struct rte_eth_dev *dev,
209                         struct rte_eth_stats *rte_stats);
210 static int eth_igc_xstats_get(struct rte_eth_dev *dev,
211                         struct rte_eth_xstat *xstats, unsigned int n);
212 static int eth_igc_xstats_get_by_id(struct rte_eth_dev *dev,
213                                 const uint64_t *ids,
214                                 uint64_t *values, unsigned int n);
215 static int eth_igc_xstats_get_names(struct rte_eth_dev *dev,
216                                 struct rte_eth_xstat_name *xstats_names,
217                                 unsigned int size);
218 static int eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
219                 struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
220                 unsigned int limit);
221 static int eth_igc_xstats_reset(struct rte_eth_dev *dev);
222 static int
223 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
224         uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx);
225 static int
226 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
227 static int
228 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
229 static int
230 eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
231 static int
232 eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
233 static int eth_igc_rss_reta_update(struct rte_eth_dev *dev,
234                         struct rte_eth_rss_reta_entry64 *reta_conf,
235                         uint16_t reta_size);
236 static int eth_igc_rss_reta_query(struct rte_eth_dev *dev,
237                        struct rte_eth_rss_reta_entry64 *reta_conf,
238                        uint16_t reta_size);
239 static int eth_igc_rss_hash_update(struct rte_eth_dev *dev,
240                         struct rte_eth_rss_conf *rss_conf);
241 static int eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev,
242                         struct rte_eth_rss_conf *rss_conf);
243 static int
244 eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on);
245 static int eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask);
246 static int eth_igc_vlan_tpid_set(struct rte_eth_dev *dev,
247                       enum rte_vlan_type vlan_type, uint16_t tpid);
248
249 static const struct eth_dev_ops eth_igc_ops = {
250         .dev_configure          = eth_igc_configure,
251         .link_update            = eth_igc_link_update,
252         .dev_stop               = eth_igc_stop,
253         .dev_start              = eth_igc_start,
254         .dev_close              = eth_igc_close,
255         .dev_reset              = eth_igc_reset,
256         .dev_set_link_up        = eth_igc_set_link_up,
257         .dev_set_link_down      = eth_igc_set_link_down,
258         .promiscuous_enable     = eth_igc_promiscuous_enable,
259         .promiscuous_disable    = eth_igc_promiscuous_disable,
260         .allmulticast_enable    = eth_igc_allmulticast_enable,
261         .allmulticast_disable   = eth_igc_allmulticast_disable,
262         .fw_version_get         = eth_igc_fw_version_get,
263         .dev_infos_get          = eth_igc_infos_get,
264         .dev_led_on             = eth_igc_led_on,
265         .dev_led_off            = eth_igc_led_off,
266         .dev_supported_ptypes_get = eth_igc_supported_ptypes_get,
267         .mtu_set                = eth_igc_mtu_set,
268         .mac_addr_add           = eth_igc_rar_set,
269         .mac_addr_remove        = eth_igc_rar_clear,
270         .mac_addr_set           = eth_igc_default_mac_addr_set,
271         .set_mc_addr_list       = eth_igc_set_mc_addr_list,
272
273         .rx_queue_setup         = eth_igc_rx_queue_setup,
274         .rx_queue_release       = eth_igc_rx_queue_release,
275         .tx_queue_setup         = eth_igc_tx_queue_setup,
276         .tx_queue_release       = eth_igc_tx_queue_release,
277         .tx_done_cleanup        = eth_igc_tx_done_cleanup,
278         .rxq_info_get           = eth_igc_rxq_info_get,
279         .txq_info_get           = eth_igc_txq_info_get,
280         .stats_get              = eth_igc_stats_get,
281         .xstats_get             = eth_igc_xstats_get,
282         .xstats_get_by_id       = eth_igc_xstats_get_by_id,
283         .xstats_get_names_by_id = eth_igc_xstats_get_names_by_id,
284         .xstats_get_names       = eth_igc_xstats_get_names,
285         .stats_reset            = eth_igc_xstats_reset,
286         .xstats_reset           = eth_igc_xstats_reset,
287         .queue_stats_mapping_set = eth_igc_queue_stats_mapping_set,
288         .rx_queue_intr_enable   = eth_igc_rx_queue_intr_enable,
289         .rx_queue_intr_disable  = eth_igc_rx_queue_intr_disable,
290         .flow_ctrl_get          = eth_igc_flow_ctrl_get,
291         .flow_ctrl_set          = eth_igc_flow_ctrl_set,
292         .reta_update            = eth_igc_rss_reta_update,
293         .reta_query             = eth_igc_rss_reta_query,
294         .rss_hash_update        = eth_igc_rss_hash_update,
295         .rss_hash_conf_get      = eth_igc_rss_hash_conf_get,
296         .vlan_filter_set        = eth_igc_vlan_filter_set,
297         .vlan_offload_set       = eth_igc_vlan_offload_set,
298         .vlan_tpid_set          = eth_igc_vlan_tpid_set,
299         .vlan_strip_queue_set   = eth_igc_vlan_strip_queue_set,
300         .flow_ops_get           = eth_igc_flow_ops_get,
301 };
302
303 /*
304  * multiple queue mode checking
305  */
306 static int
307 igc_check_mq_mode(struct rte_eth_dev *dev)
308 {
309         enum rte_eth_rx_mq_mode rx_mq_mode = dev->data->dev_conf.rxmode.mq_mode;
310         enum rte_eth_tx_mq_mode tx_mq_mode = dev->data->dev_conf.txmode.mq_mode;
311
312         if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
313                 PMD_INIT_LOG(ERR, "SRIOV is not supported.");
314                 return -EINVAL;
315         }
316
317         if (rx_mq_mode != ETH_MQ_RX_NONE &&
318                 rx_mq_mode != ETH_MQ_RX_RSS) {
319                 /* RSS together with VMDq not supported*/
320                 PMD_INIT_LOG(ERR, "RX mode %d is not supported.",
321                                 rx_mq_mode);
322                 return -EINVAL;
323         }
324
325         /* To no break software that set invalid mode, only display
326          * warning if invalid mode is used.
327          */
328         if (tx_mq_mode != ETH_MQ_TX_NONE)
329                 PMD_INIT_LOG(WARNING,
330                         "TX mode %d is not supported. Due to meaningless in this driver, just ignore",
331                         tx_mq_mode);
332
333         return 0;
334 }
335
336 static int
337 eth_igc_configure(struct rte_eth_dev *dev)
338 {
339         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
340         int ret;
341
342         PMD_INIT_FUNC_TRACE();
343
344         if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG)
345                 dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH;
346
347         ret  = igc_check_mq_mode(dev);
348         if (ret != 0)
349                 return ret;
350
351         intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
352         return 0;
353 }
354
355 static int
356 eth_igc_set_link_up(struct rte_eth_dev *dev)
357 {
358         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
359
360         if (hw->phy.media_type == igc_media_type_copper)
361                 igc_power_up_phy(hw);
362         else
363                 igc_power_up_fiber_serdes_link(hw);
364         return 0;
365 }
366
367 static int
368 eth_igc_set_link_down(struct rte_eth_dev *dev)
369 {
370         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
371
372         if (hw->phy.media_type == igc_media_type_copper)
373                 igc_power_down_phy(hw);
374         else
375                 igc_shutdown_fiber_serdes_link(hw);
376         return 0;
377 }
378
379 /*
380  * disable other interrupt
381  */
382 static void
383 igc_intr_other_disable(struct rte_eth_dev *dev)
384 {
385         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
386         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
387         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
388
389         if (rte_intr_allow_others(intr_handle) &&
390                 dev->data->dev_conf.intr_conf.lsc) {
391                 IGC_WRITE_REG(hw, IGC_EIMC, 1u << IGC_MSIX_OTHER_INTR_VEC);
392         }
393
394         IGC_WRITE_REG(hw, IGC_IMC, ~0);
395         IGC_WRITE_FLUSH(hw);
396 }
397
398 /*
399  * enable other interrupt
400  */
401 static inline void
402 igc_intr_other_enable(struct rte_eth_dev *dev)
403 {
404         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
405         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
406         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
407         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
408
409         if (rte_intr_allow_others(intr_handle) &&
410                 dev->data->dev_conf.intr_conf.lsc) {
411                 IGC_WRITE_REG(hw, IGC_EIMS, 1u << IGC_MSIX_OTHER_INTR_VEC);
412         }
413
414         IGC_WRITE_REG(hw, IGC_IMS, intr->mask);
415         IGC_WRITE_FLUSH(hw);
416 }
417
418 /*
419  * It reads ICR and gets interrupt causes, check it and set a bit flag
420  * to update link status.
421  */
422 static void
423 eth_igc_interrupt_get_status(struct rte_eth_dev *dev)
424 {
425         uint32_t icr;
426         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
427         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
428
429         /* read-on-clear nic registers here */
430         icr = IGC_READ_REG(hw, IGC_ICR);
431
432         intr->flags = 0;
433         if (icr & IGC_ICR_LSC)
434                 intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
435 }
436
437 /* return 0 means link status changed, -1 means not changed */
438 static int
439 eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete)
440 {
441         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
442         struct rte_eth_link link;
443         int link_check, count;
444
445         link_check = 0;
446         hw->mac.get_link_status = 1;
447
448         /* possible wait-to-complete in up to 9 seconds */
449         for (count = 0; count < IGC_LINK_UPDATE_CHECK_TIMEOUT; count++) {
450                 /* Read the real link status */
451                 switch (hw->phy.media_type) {
452                 case igc_media_type_copper:
453                         /* Do the work to read phy */
454                         igc_check_for_link(hw);
455                         link_check = !hw->mac.get_link_status;
456                         break;
457
458                 case igc_media_type_fiber:
459                         igc_check_for_link(hw);
460                         link_check = (IGC_READ_REG(hw, IGC_STATUS) &
461                                       IGC_STATUS_LU);
462                         break;
463
464                 case igc_media_type_internal_serdes:
465                         igc_check_for_link(hw);
466                         link_check = hw->mac.serdes_has_link;
467                         break;
468
469                 default:
470                         break;
471                 }
472                 if (link_check || wait_to_complete == 0)
473                         break;
474                 rte_delay_ms(IGC_LINK_UPDATE_CHECK_INTERVAL);
475         }
476         memset(&link, 0, sizeof(link));
477
478         /* Now we check if a transition has happened */
479         if (link_check) {
480                 uint16_t duplex, speed;
481                 hw->mac.ops.get_link_up_info(hw, &speed, &duplex);
482                 link.link_duplex = (duplex == FULL_DUPLEX) ?
483                                 ETH_LINK_FULL_DUPLEX :
484                                 ETH_LINK_HALF_DUPLEX;
485                 link.link_speed = speed;
486                 link.link_status = ETH_LINK_UP;
487                 link.link_autoneg = !(dev->data->dev_conf.link_speeds &
488                                 ETH_LINK_SPEED_FIXED);
489
490                 if (speed == SPEED_2500) {
491                         uint32_t tipg = IGC_READ_REG(hw, IGC_TIPG);
492                         if ((tipg & IGC_TIPG_IPGT_MASK) != 0x0b) {
493                                 tipg &= ~IGC_TIPG_IPGT_MASK;
494                                 tipg |= 0x0b;
495                                 IGC_WRITE_REG(hw, IGC_TIPG, tipg);
496                         }
497                 }
498         } else {
499                 link.link_speed = 0;
500                 link.link_duplex = ETH_LINK_HALF_DUPLEX;
501                 link.link_status = ETH_LINK_DOWN;
502                 link.link_autoneg = ETH_LINK_FIXED;
503         }
504
505         return rte_eth_linkstatus_set(dev, &link);
506 }
507
508 /*
509  * It executes link_update after knowing an interrupt is present.
510  */
511 static void
512 eth_igc_interrupt_action(struct rte_eth_dev *dev)
513 {
514         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
515         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
516         struct rte_eth_link link;
517         int ret;
518
519         if (intr->flags & IGC_FLAG_NEED_LINK_UPDATE) {
520                 intr->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
521
522                 /* set get_link_status to check register later */
523                 ret = eth_igc_link_update(dev, 0);
524
525                 /* check if link has changed */
526                 if (ret < 0)
527                         return;
528
529                 rte_eth_linkstatus_get(dev, &link);
530                 if (link.link_status)
531                         PMD_DRV_LOG(INFO,
532                                 " Port %d: Link Up - speed %u Mbps - %s",
533                                 dev->data->port_id,
534                                 (unsigned int)link.link_speed,
535                                 link.link_duplex == ETH_LINK_FULL_DUPLEX ?
536                                 "full-duplex" : "half-duplex");
537                 else
538                         PMD_DRV_LOG(INFO, " Port %d: Link Down",
539                                 dev->data->port_id);
540
541                 PMD_DRV_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT,
542                                 pci_dev->addr.domain,
543                                 pci_dev->addr.bus,
544                                 pci_dev->addr.devid,
545                                 pci_dev->addr.function);
546                 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
547         }
548 }
549
550 /*
551  * Interrupt handler which shall be registered at first.
552  *
553  * @handle
554  *  Pointer to interrupt handle.
555  * @param
556  *  The address of parameter (struct rte_eth_dev *) registered before.
557  */
558 static void
559 eth_igc_interrupt_handler(void *param)
560 {
561         struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
562
563         eth_igc_interrupt_get_status(dev);
564         eth_igc_interrupt_action(dev);
565 }
566
567 static void igc_read_queue_stats_register(struct rte_eth_dev *dev);
568
569 /*
570  * Update the queue status every IGC_ALARM_INTERVAL time.
571  * @param
572  *  The address of parameter (struct rte_eth_dev *) registered before.
573  */
574 static void
575 igc_update_queue_stats_handler(void *param)
576 {
577         struct rte_eth_dev *dev = param;
578         igc_read_queue_stats_register(dev);
579         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
580                         igc_update_queue_stats_handler, dev);
581 }
582
583 /*
584  * rx,tx enable/disable
585  */
586 static void
587 eth_igc_rxtx_control(struct rte_eth_dev *dev, bool enable)
588 {
589         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
590         uint32_t tctl, rctl;
591
592         tctl = IGC_READ_REG(hw, IGC_TCTL);
593         rctl = IGC_READ_REG(hw, IGC_RCTL);
594
595         if (enable) {
596                 /* enable Tx/Rx */
597                 tctl |= IGC_TCTL_EN;
598                 rctl |= IGC_RCTL_EN;
599         } else {
600                 /* disable Tx/Rx */
601                 tctl &= ~IGC_TCTL_EN;
602                 rctl &= ~IGC_RCTL_EN;
603         }
604         IGC_WRITE_REG(hw, IGC_TCTL, tctl);
605         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
606         IGC_WRITE_FLUSH(hw);
607 }
608
609 /*
610  *  This routine disables all traffic on the adapter by issuing a
611  *  global reset on the MAC.
612  */
613 static int
614 eth_igc_stop(struct rte_eth_dev *dev)
615 {
616         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
617         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
618         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
619         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
620         struct rte_eth_link link;
621
622         dev->data->dev_started = 0;
623         adapter->stopped = 1;
624
625         /* disable receive and transmit */
626         eth_igc_rxtx_control(dev, false);
627
628         /* disable all MSI-X interrupts */
629         IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
630         IGC_WRITE_FLUSH(hw);
631
632         /* clear all MSI-X interrupts */
633         IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
634
635         igc_intr_other_disable(dev);
636
637         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
638
639         /* disable intr eventfd mapping */
640         rte_intr_disable(intr_handle);
641
642         igc_reset_hw(hw);
643
644         /* disable all wake up */
645         IGC_WRITE_REG(hw, IGC_WUC, 0);
646
647         /* disable checking EEE operation in MAC loopback mode */
648         igc_read_reg_check_clear_bits(hw, IGC_EEER, IGC_EEER_EEE_FRC_AN);
649
650         /* Set bit for Go Link disconnect */
651         igc_read_reg_check_set_bits(hw, IGC_82580_PHY_POWER_MGMT,
652                         IGC_82580_PM_GO_LINKD);
653
654         /* Power down the phy. Needed to make the link go Down */
655         eth_igc_set_link_down(dev);
656
657         igc_dev_clear_queues(dev);
658
659         /* clear the recorded link status */
660         memset(&link, 0, sizeof(link));
661         rte_eth_linkstatus_set(dev, &link);
662
663         if (!rte_intr_allow_others(intr_handle))
664                 /* resume to the default handler */
665                 rte_intr_callback_register(intr_handle,
666                                            eth_igc_interrupt_handler,
667                                            (void *)dev);
668
669         /* Clean datapath event and queue/vec mapping */
670         rte_intr_efd_disable(intr_handle);
671         if (intr_handle->intr_vec != NULL) {
672                 rte_free(intr_handle->intr_vec);
673                 intr_handle->intr_vec = NULL;
674         }
675
676         return 0;
677 }
678
679 /*
680  * write interrupt vector allocation register
681  * @hw
682  *  board private structure
683  * @queue_index
684  *  queue index, valid 0,1,2,3
685  * @tx
686  *  tx:1, rx:0
687  * @msix_vector
688  *  msix-vector, valid 0,1,2,3,4
689  */
690 static void
691 igc_write_ivar(struct igc_hw *hw, uint8_t queue_index,
692                 bool tx, uint8_t msix_vector)
693 {
694         uint8_t offset = 0;
695         uint8_t reg_index = queue_index >> 1;
696         uint32_t val;
697
698         /*
699          * IVAR(0)
700          * bit31...24   bit23...16      bit15...8       bit7...0
701          * TX1          RX1             TX0             RX0
702          *
703          * IVAR(1)
704          * bit31...24   bit23...16      bit15...8       bit7...0
705          * TX3          RX3             TX2             RX2
706          */
707
708         if (tx)
709                 offset = 8;
710
711         if (queue_index & 1)
712                 offset += 16;
713
714         val = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, reg_index);
715
716         /* clear bits */
717         val &= ~((uint32_t)0xFF << offset);
718
719         /* write vector and valid bit */
720         val |= (uint32_t)(msix_vector | IGC_IVAR_VALID) << offset;
721
722         IGC_WRITE_REG_ARRAY(hw, IGC_IVAR0, reg_index, val);
723 }
724
725 /* Sets up the hardware to generate MSI-X interrupts properly
726  * @hw
727  *  board private structure
728  */
729 static void
730 igc_configure_msix_intr(struct rte_eth_dev *dev)
731 {
732         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
733         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
734         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
735
736         uint32_t intr_mask;
737         uint32_t vec = IGC_MISC_VEC_ID;
738         uint32_t base = IGC_MISC_VEC_ID;
739         uint32_t misc_shift = 0;
740         int i;
741
742         /* won't configure msix register if no mapping is done
743          * between intr vector and event fd
744          */
745         if (!rte_intr_dp_is_en(intr_handle))
746                 return;
747
748         if (rte_intr_allow_others(intr_handle)) {
749                 base = IGC_RX_VEC_START;
750                 vec = base;
751                 misc_shift = 1;
752         }
753
754         /* turn on MSI-X capability first */
755         IGC_WRITE_REG(hw, IGC_GPIE, IGC_GPIE_MSIX_MODE |
756                                 IGC_GPIE_PBA | IGC_GPIE_EIAME |
757                                 IGC_GPIE_NSICR);
758         intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) <<
759                 misc_shift;
760
761         if (dev->data->dev_conf.intr_conf.lsc)
762                 intr_mask |= (1u << IGC_MSIX_OTHER_INTR_VEC);
763
764         /* enable msix auto-clear */
765         igc_read_reg_check_set_bits(hw, IGC_EIAC, intr_mask);
766
767         /* set other cause interrupt vector */
768         igc_read_reg_check_set_bits(hw, IGC_IVAR_MISC,
769                 (uint32_t)(IGC_MSIX_OTHER_INTR_VEC | IGC_IVAR_VALID) << 8);
770
771         /* enable auto-mask */
772         igc_read_reg_check_set_bits(hw, IGC_EIAM, intr_mask);
773
774         for (i = 0; i < dev->data->nb_rx_queues; i++) {
775                 igc_write_ivar(hw, i, 0, vec);
776                 intr_handle->intr_vec[i] = vec;
777                 if (vec < base + intr_handle->nb_efd - 1)
778                         vec++;
779         }
780
781         IGC_WRITE_FLUSH(hw);
782 }
783
784 /**
785  * It enables the interrupt mask and then enable the interrupt.
786  *
787  * @dev
788  *  Pointer to struct rte_eth_dev.
789  * @on
790  *  Enable or Disable
791  */
792 static void
793 igc_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on)
794 {
795         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
796
797         if (on)
798                 intr->mask |= IGC_ICR_LSC;
799         else
800                 intr->mask &= ~IGC_ICR_LSC;
801 }
802
803 /*
804  * It enables the interrupt.
805  * It will be called once only during nic initialized.
806  */
807 static void
808 igc_rxq_interrupt_setup(struct rte_eth_dev *dev)
809 {
810         uint32_t mask;
811         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
812         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
813         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
814         int misc_shift = rte_intr_allow_others(intr_handle) ? 1 : 0;
815
816         /* won't configure msix register if no mapping is done
817          * between intr vector and event fd
818          */
819         if (!rte_intr_dp_is_en(intr_handle))
820                 return;
821
822         mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) << misc_shift;
823         IGC_WRITE_REG(hw, IGC_EIMS, mask);
824 }
825
826 /*
827  *  Get hardware rx-buffer size.
828  */
829 static inline int
830 igc_get_rx_buffer_size(struct igc_hw *hw)
831 {
832         return (IGC_READ_REG(hw, IGC_RXPBS) & 0x3f) << 10;
833 }
834
835 /*
836  * igc_hw_control_acquire sets CTRL_EXT:DRV_LOAD bit.
837  * For ASF and Pass Through versions of f/w this means
838  * that the driver is loaded.
839  */
840 static void
841 igc_hw_control_acquire(struct igc_hw *hw)
842 {
843         uint32_t ctrl_ext;
844
845         /* Let firmware know the driver has taken over */
846         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
847         IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
848 }
849
850 /*
851  * igc_hw_control_release resets CTRL_EXT:DRV_LOAD bit.
852  * For ASF and Pass Through versions of f/w this means that the
853  * driver is no longer loaded.
854  */
855 static void
856 igc_hw_control_release(struct igc_hw *hw)
857 {
858         uint32_t ctrl_ext;
859
860         /* Let firmware taken over control of h/w */
861         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
862         IGC_WRITE_REG(hw, IGC_CTRL_EXT,
863                         ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
864 }
865
866 static int
867 igc_hardware_init(struct igc_hw *hw)
868 {
869         uint32_t rx_buf_size;
870         int diag;
871
872         /* Let the firmware know the OS is in control */
873         igc_hw_control_acquire(hw);
874
875         /* Issue a global reset */
876         igc_reset_hw(hw);
877
878         /* disable all wake up */
879         IGC_WRITE_REG(hw, IGC_WUC, 0);
880
881         /*
882          * Hardware flow control
883          * - High water mark should allow for at least two standard size (1518)
884          *   frames to be received after sending an XOFF.
885          * - Low water mark works best when it is very near the high water mark.
886          *   This allows the receiver to restart by sending XON when it has
887          *   drained a bit. Here we use an arbitrary value of 1500 which will
888          *   restart after one full frame is pulled from the buffer. There
889          *   could be several smaller frames in the buffer and if so they will
890          *   not trigger the XON until their total number reduces the buffer
891          *   by 1500.
892          */
893         rx_buf_size = igc_get_rx_buffer_size(hw);
894         hw->fc.high_water = rx_buf_size - (RTE_ETHER_MAX_LEN * 2);
895         hw->fc.low_water = hw->fc.high_water - 1500;
896         hw->fc.pause_time = IGC_FC_PAUSE_TIME;
897         hw->fc.send_xon = 1;
898         hw->fc.requested_mode = igc_fc_full;
899
900         diag = igc_init_hw(hw);
901         if (diag < 0)
902                 return diag;
903
904         igc_get_phy_info(hw);
905         igc_check_for_link(hw);
906
907         return 0;
908 }
909
910 static int
911 eth_igc_start(struct rte_eth_dev *dev)
912 {
913         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
914         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
915         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
916         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
917         uint32_t *speeds;
918         int ret;
919
920         PMD_INIT_FUNC_TRACE();
921
922         /* disable all MSI-X interrupts */
923         IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
924         IGC_WRITE_FLUSH(hw);
925
926         /* clear all MSI-X interrupts */
927         IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
928
929         /* disable uio/vfio intr/eventfd mapping */
930         if (!adapter->stopped)
931                 rte_intr_disable(intr_handle);
932
933         /* Power up the phy. Needed to make the link go Up */
934         eth_igc_set_link_up(dev);
935
936         /* Put the address into the Receive Address Array */
937         igc_rar_set(hw, hw->mac.addr, 0);
938
939         /* Initialize the hardware */
940         if (igc_hardware_init(hw)) {
941                 PMD_DRV_LOG(ERR, "Unable to initialize the hardware");
942                 return -EIO;
943         }
944         adapter->stopped = 0;
945
946         /* check and configure queue intr-vector mapping */
947         if (rte_intr_cap_multiple(intr_handle) &&
948                 dev->data->dev_conf.intr_conf.rxq) {
949                 uint32_t intr_vector = dev->data->nb_rx_queues;
950                 if (rte_intr_efd_enable(intr_handle, intr_vector))
951                         return -1;
952         }
953
954         if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
955                 intr_handle->intr_vec = rte_zmalloc("intr_vec",
956                         dev->data->nb_rx_queues * sizeof(int), 0);
957                 if (intr_handle->intr_vec == NULL) {
958                         PMD_DRV_LOG(ERR,
959                                 "Failed to allocate %d rx_queues intr_vec",
960                                 dev->data->nb_rx_queues);
961                         return -ENOMEM;
962                 }
963         }
964
965         /* configure msix for rx interrupt */
966         igc_configure_msix_intr(dev);
967
968         igc_tx_init(dev);
969
970         /* This can fail when allocating mbufs for descriptor rings */
971         ret = igc_rx_init(dev);
972         if (ret) {
973                 PMD_DRV_LOG(ERR, "Unable to initialize RX hardware");
974                 igc_dev_clear_queues(dev);
975                 return ret;
976         }
977
978         igc_clear_hw_cntrs_base_generic(hw);
979
980         /* VLAN Offload Settings */
981         eth_igc_vlan_offload_set(dev,
982                 ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK |
983                 ETH_VLAN_EXTEND_MASK);
984
985         /* Setup link speed and duplex */
986         speeds = &dev->data->dev_conf.link_speeds;
987         if (*speeds == ETH_LINK_SPEED_AUTONEG) {
988                 hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
989                 hw->mac.autoneg = 1;
990         } else {
991                 int num_speeds = 0;
992                 bool autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0;
993
994                 /* Reset */
995                 hw->phy.autoneg_advertised = 0;
996
997                 if (*speeds & ~(ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
998                                 ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
999                                 ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
1000                                 ETH_LINK_SPEED_FIXED)) {
1001                         num_speeds = -1;
1002                         goto error_invalid_config;
1003                 }
1004                 if (*speeds & ETH_LINK_SPEED_10M_HD) {
1005                         hw->phy.autoneg_advertised |= ADVERTISE_10_HALF;
1006                         num_speeds++;
1007                 }
1008                 if (*speeds & ETH_LINK_SPEED_10M) {
1009                         hw->phy.autoneg_advertised |= ADVERTISE_10_FULL;
1010                         num_speeds++;
1011                 }
1012                 if (*speeds & ETH_LINK_SPEED_100M_HD) {
1013                         hw->phy.autoneg_advertised |= ADVERTISE_100_HALF;
1014                         num_speeds++;
1015                 }
1016                 if (*speeds & ETH_LINK_SPEED_100M) {
1017                         hw->phy.autoneg_advertised |= ADVERTISE_100_FULL;
1018                         num_speeds++;
1019                 }
1020                 if (*speeds & ETH_LINK_SPEED_1G) {
1021                         hw->phy.autoneg_advertised |= ADVERTISE_1000_FULL;
1022                         num_speeds++;
1023                 }
1024                 if (*speeds & ETH_LINK_SPEED_2_5G) {
1025                         hw->phy.autoneg_advertised |= ADVERTISE_2500_FULL;
1026                         num_speeds++;
1027                 }
1028                 if (num_speeds == 0 || (!autoneg && num_speeds > 1))
1029                         goto error_invalid_config;
1030
1031                 /* Set/reset the mac.autoneg based on the link speed,
1032                  * fixed or not
1033                  */
1034                 if (!autoneg) {
1035                         hw->mac.autoneg = 0;
1036                         hw->mac.forced_speed_duplex =
1037                                         hw->phy.autoneg_advertised;
1038                 } else {
1039                         hw->mac.autoneg = 1;
1040                 }
1041         }
1042
1043         igc_setup_link(hw);
1044
1045         if (rte_intr_allow_others(intr_handle)) {
1046                 /* check if lsc interrupt is enabled */
1047                 if (dev->data->dev_conf.intr_conf.lsc)
1048                         igc_lsc_interrupt_setup(dev, 1);
1049                 else
1050                         igc_lsc_interrupt_setup(dev, 0);
1051         } else {
1052                 rte_intr_callback_unregister(intr_handle,
1053                                              eth_igc_interrupt_handler,
1054                                              (void *)dev);
1055                 if (dev->data->dev_conf.intr_conf.lsc)
1056                         PMD_DRV_LOG(INFO,
1057                                 "LSC won't enable because of no intr multiplex");
1058         }
1059
1060         /* enable uio/vfio intr/eventfd mapping */
1061         rte_intr_enable(intr_handle);
1062
1063         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1064                         igc_update_queue_stats_handler, dev);
1065
1066         /* check if rxq interrupt is enabled */
1067         if (dev->data->dev_conf.intr_conf.rxq &&
1068                         rte_intr_dp_is_en(intr_handle))
1069                 igc_rxq_interrupt_setup(dev);
1070
1071         /* resume enabled intr since hw reset */
1072         igc_intr_other_enable(dev);
1073
1074         eth_igc_rxtx_control(dev, true);
1075         eth_igc_link_update(dev, 0);
1076
1077         /* configure MAC-loopback mode */
1078         if (dev->data->dev_conf.lpbk_mode == 1) {
1079                 uint32_t reg_val;
1080
1081                 reg_val = IGC_READ_REG(hw, IGC_CTRL);
1082                 reg_val &= ~IGC_CTRL_SPEED_MASK;
1083                 reg_val |= IGC_CTRL_SLU | IGC_CTRL_FRCSPD |
1084                         IGC_CTRL_FRCDPX | IGC_CTRL_FD | IGC_CTRL_SPEED_2500;
1085                 IGC_WRITE_REG(hw, IGC_CTRL, reg_val);
1086
1087                 igc_read_reg_check_set_bits(hw, IGC_EEER, IGC_EEER_EEE_FRC_AN);
1088         }
1089
1090         return 0;
1091
1092 error_invalid_config:
1093         PMD_DRV_LOG(ERR, "Invalid advertised speeds (%u) for port %u",
1094                      dev->data->dev_conf.link_speeds, dev->data->port_id);
1095         igc_dev_clear_queues(dev);
1096         return -EINVAL;
1097 }
1098
1099 static int
1100 igc_reset_swfw_lock(struct igc_hw *hw)
1101 {
1102         int ret_val;
1103
1104         /*
1105          * Do mac ops initialization manually here, since we will need
1106          * some function pointers set by this call.
1107          */
1108         ret_val = igc_init_mac_params(hw);
1109         if (ret_val)
1110                 return ret_val;
1111
1112         /*
1113          * SMBI lock should not fail in this early stage. If this is the case,
1114          * it is due to an improper exit of the application.
1115          * So force the release of the faulty lock.
1116          */
1117         if (igc_get_hw_semaphore_generic(hw) < 0)
1118                 PMD_DRV_LOG(DEBUG, "SMBI lock released");
1119
1120         igc_put_hw_semaphore_generic(hw);
1121
1122         if (hw->mac.ops.acquire_swfw_sync != NULL) {
1123                 uint16_t mask;
1124
1125                 /*
1126                  * Phy lock should not fail in this early stage.
1127                  * If this is the case, it is due to an improper exit of the
1128                  * application. So force the release of the faulty lock.
1129                  */
1130                 mask = IGC_SWFW_PHY0_SM;
1131                 if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0) {
1132                         PMD_DRV_LOG(DEBUG, "SWFW phy%d lock released",
1133                                     hw->bus.func);
1134                 }
1135                 hw->mac.ops.release_swfw_sync(hw, mask);
1136
1137                 /*
1138                  * This one is more tricky since it is common to all ports; but
1139                  * swfw_sync retries last long enough (1s) to be almost sure
1140                  * that if lock can not be taken it is due to an improper lock
1141                  * of the semaphore.
1142                  */
1143                 mask = IGC_SWFW_EEP_SM;
1144                 if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0)
1145                         PMD_DRV_LOG(DEBUG, "SWFW common locks released");
1146
1147                 hw->mac.ops.release_swfw_sync(hw, mask);
1148         }
1149
1150         return IGC_SUCCESS;
1151 }
1152
1153 /*
1154  * free all rx/tx queues.
1155  */
1156 static void
1157 igc_dev_free_queues(struct rte_eth_dev *dev)
1158 {
1159         uint16_t i;
1160
1161         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1162                 eth_igc_rx_queue_release(dev->data->rx_queues[i]);
1163                 dev->data->rx_queues[i] = NULL;
1164         }
1165         dev->data->nb_rx_queues = 0;
1166
1167         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1168                 eth_igc_tx_queue_release(dev->data->tx_queues[i]);
1169                 dev->data->tx_queues[i] = NULL;
1170         }
1171         dev->data->nb_tx_queues = 0;
1172 }
1173
1174 static int
1175 eth_igc_close(struct rte_eth_dev *dev)
1176 {
1177         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1178         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
1179         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1180         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
1181         int retry = 0;
1182         int ret = 0;
1183
1184         PMD_INIT_FUNC_TRACE();
1185         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1186                 return 0;
1187
1188         if (!adapter->stopped)
1189                 ret = eth_igc_stop(dev);
1190
1191         igc_flow_flush(dev, NULL);
1192         igc_clear_all_filter(dev);
1193
1194         igc_intr_other_disable(dev);
1195         do {
1196                 int ret = rte_intr_callback_unregister(intr_handle,
1197                                 eth_igc_interrupt_handler, dev);
1198                 if (ret >= 0 || ret == -ENOENT || ret == -EINVAL)
1199                         break;
1200
1201                 PMD_DRV_LOG(ERR, "intr callback unregister failed: %d", ret);
1202                 DELAY(200 * 1000); /* delay 200ms */
1203         } while (retry++ < 5);
1204
1205         igc_phy_hw_reset(hw);
1206         igc_hw_control_release(hw);
1207         igc_dev_free_queues(dev);
1208
1209         /* Reset any pending lock */
1210         igc_reset_swfw_lock(hw);
1211
1212         return ret;
1213 }
1214
1215 static void
1216 igc_identify_hardware(struct rte_eth_dev *dev, struct rte_pci_device *pci_dev)
1217 {
1218         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1219
1220         hw->vendor_id = pci_dev->id.vendor_id;
1221         hw->device_id = pci_dev->id.device_id;
1222         hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
1223         hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
1224 }
1225
1226 static int
1227 eth_igc_dev_init(struct rte_eth_dev *dev)
1228 {
1229         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1230         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1231         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1232         int i, error = 0;
1233
1234         PMD_INIT_FUNC_TRACE();
1235         dev->dev_ops = &eth_igc_ops;
1236         dev->rx_descriptor_done = eth_igc_rx_descriptor_done;
1237         dev->rx_queue_count = eth_igc_rx_queue_count;
1238         dev->rx_descriptor_status = eth_igc_rx_descriptor_status;
1239         dev->tx_descriptor_status = eth_igc_tx_descriptor_status;
1240
1241         /*
1242          * for secondary processes, we don't initialize any further as primary
1243          * has already done this work. Only check we don't need a different
1244          * RX function.
1245          */
1246         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1247                 return 0;
1248
1249         rte_eth_copy_pci_info(dev, pci_dev);
1250         dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1251
1252         hw->back = pci_dev;
1253         hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
1254
1255         igc_identify_hardware(dev, pci_dev);
1256         if (igc_setup_init_funcs(hw, false) != IGC_SUCCESS) {
1257                 error = -EIO;
1258                 goto err_late;
1259         }
1260
1261         igc_get_bus_info(hw);
1262
1263         /* Reset any pending lock */
1264         if (igc_reset_swfw_lock(hw) != IGC_SUCCESS) {
1265                 error = -EIO;
1266                 goto err_late;
1267         }
1268
1269         /* Finish initialization */
1270         if (igc_setup_init_funcs(hw, true) != IGC_SUCCESS) {
1271                 error = -EIO;
1272                 goto err_late;
1273         }
1274
1275         hw->mac.autoneg = 1;
1276         hw->phy.autoneg_wait_to_complete = 0;
1277         hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
1278
1279         /* Copper options */
1280         if (hw->phy.media_type == igc_media_type_copper) {
1281                 hw->phy.mdix = 0; /* AUTO_ALL_MODES */
1282                 hw->phy.disable_polarity_correction = 0;
1283                 hw->phy.ms_type = igc_ms_hw_default;
1284         }
1285
1286         /*
1287          * Start from a known state, this is important in reading the nvm
1288          * and mac from that.
1289          */
1290         igc_reset_hw(hw);
1291
1292         /* Make sure we have a good EEPROM before we read from it */
1293         if (igc_validate_nvm_checksum(hw) < 0) {
1294                 /*
1295                  * Some PCI-E parts fail the first check due to
1296                  * the link being in sleep state, call it again,
1297                  * if it fails a second time its a real issue.
1298                  */
1299                 if (igc_validate_nvm_checksum(hw) < 0) {
1300                         PMD_INIT_LOG(ERR, "EEPROM checksum invalid");
1301                         error = -EIO;
1302                         goto err_late;
1303                 }
1304         }
1305
1306         /* Read the permanent MAC address out of the EEPROM */
1307         if (igc_read_mac_addr(hw) != 0) {
1308                 PMD_INIT_LOG(ERR, "EEPROM error while reading MAC address");
1309                 error = -EIO;
1310                 goto err_late;
1311         }
1312
1313         /* Allocate memory for storing MAC addresses */
1314         dev->data->mac_addrs = rte_zmalloc("igc",
1315                 RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count, 0);
1316         if (dev->data->mac_addrs == NULL) {
1317                 PMD_INIT_LOG(ERR, "Failed to allocate %d bytes for storing MAC",
1318                                 RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count);
1319                 error = -ENOMEM;
1320                 goto err_late;
1321         }
1322
1323         /* Copy the permanent MAC address */
1324         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac.addr,
1325                         &dev->data->mac_addrs[0]);
1326
1327         /* Now initialize the hardware */
1328         if (igc_hardware_init(hw) != 0) {
1329                 PMD_INIT_LOG(ERR, "Hardware initialization failed");
1330                 rte_free(dev->data->mac_addrs);
1331                 dev->data->mac_addrs = NULL;
1332                 error = -ENODEV;
1333                 goto err_late;
1334         }
1335
1336         hw->mac.get_link_status = 1;
1337         igc->stopped = 0;
1338
1339         /* Indicate SOL/IDER usage */
1340         if (igc_check_reset_block(hw) < 0)
1341                 PMD_INIT_LOG(ERR,
1342                         "PHY reset is blocked due to SOL/IDER session.");
1343
1344         PMD_INIT_LOG(DEBUG, "port_id %d vendorID=0x%x deviceID=0x%x",
1345                         dev->data->port_id, pci_dev->id.vendor_id,
1346                         pci_dev->id.device_id);
1347
1348         rte_intr_callback_register(&pci_dev->intr_handle,
1349                         eth_igc_interrupt_handler, (void *)dev);
1350
1351         /* enable uio/vfio intr/eventfd mapping */
1352         rte_intr_enable(&pci_dev->intr_handle);
1353
1354         /* enable support intr */
1355         igc_intr_other_enable(dev);
1356
1357         /* initiate queue status */
1358         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1359                 igc->txq_stats_map[i] = -1;
1360                 igc->rxq_stats_map[i] = -1;
1361         }
1362
1363         igc_flow_init(dev);
1364         igc_clear_all_filter(dev);
1365         return 0;
1366
1367 err_late:
1368         igc_hw_control_release(hw);
1369         return error;
1370 }
1371
1372 static int
1373 eth_igc_dev_uninit(__rte_unused struct rte_eth_dev *eth_dev)
1374 {
1375         PMD_INIT_FUNC_TRACE();
1376         eth_igc_close(eth_dev);
1377         return 0;
1378 }
1379
1380 static int
1381 eth_igc_reset(struct rte_eth_dev *dev)
1382 {
1383         int ret;
1384
1385         PMD_INIT_FUNC_TRACE();
1386
1387         ret = eth_igc_dev_uninit(dev);
1388         if (ret)
1389                 return ret;
1390
1391         return eth_igc_dev_init(dev);
1392 }
1393
1394 static int
1395 eth_igc_promiscuous_enable(struct rte_eth_dev *dev)
1396 {
1397         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1398         uint32_t rctl;
1399
1400         rctl = IGC_READ_REG(hw, IGC_RCTL);
1401         rctl |= (IGC_RCTL_UPE | IGC_RCTL_MPE);
1402         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1403         return 0;
1404 }
1405
1406 static int
1407 eth_igc_promiscuous_disable(struct rte_eth_dev *dev)
1408 {
1409         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1410         uint32_t rctl;
1411
1412         rctl = IGC_READ_REG(hw, IGC_RCTL);
1413         rctl &= (~IGC_RCTL_UPE);
1414         if (dev->data->all_multicast == 1)
1415                 rctl |= IGC_RCTL_MPE;
1416         else
1417                 rctl &= (~IGC_RCTL_MPE);
1418         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1419         return 0;
1420 }
1421
1422 static int
1423 eth_igc_allmulticast_enable(struct rte_eth_dev *dev)
1424 {
1425         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1426         uint32_t rctl;
1427
1428         rctl = IGC_READ_REG(hw, IGC_RCTL);
1429         rctl |= IGC_RCTL_MPE;
1430         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1431         return 0;
1432 }
1433
1434 static int
1435 eth_igc_allmulticast_disable(struct rte_eth_dev *dev)
1436 {
1437         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1438         uint32_t rctl;
1439
1440         if (dev->data->promiscuous == 1)
1441                 return 0;       /* must remain in all_multicast mode */
1442
1443         rctl = IGC_READ_REG(hw, IGC_RCTL);
1444         rctl &= (~IGC_RCTL_MPE);
1445         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1446         return 0;
1447 }
1448
1449 static int
1450 eth_igc_fw_version_get(struct rte_eth_dev *dev, char *fw_version,
1451                        size_t fw_size)
1452 {
1453         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1454         struct igc_fw_version fw;
1455         int ret;
1456
1457         igc_get_fw_version(hw, &fw);
1458
1459         /* if option rom is valid, display its version too */
1460         if (fw.or_valid) {
1461                 ret = snprintf(fw_version, fw_size,
1462                          "%d.%d, 0x%08x, %d.%d.%d",
1463                          fw.eep_major, fw.eep_minor, fw.etrack_id,
1464                          fw.or_major, fw.or_build, fw.or_patch);
1465         /* no option rom */
1466         } else {
1467                 if (fw.etrack_id != 0X0000) {
1468                         ret = snprintf(fw_version, fw_size,
1469                                  "%d.%d, 0x%08x",
1470                                  fw.eep_major, fw.eep_minor,
1471                                  fw.etrack_id);
1472                 } else {
1473                         ret = snprintf(fw_version, fw_size,
1474                                  "%d.%d.%d",
1475                                  fw.eep_major, fw.eep_minor,
1476                                  fw.eep_build);
1477                 }
1478         }
1479
1480         ret += 1; /* add the size of '\0' */
1481         if (fw_size < (u32)ret)
1482                 return ret;
1483         else
1484                 return 0;
1485 }
1486
1487 static int
1488 eth_igc_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1489 {
1490         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1491
1492         dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */
1493         dev_info->max_rx_pktlen = MAX_RX_JUMBO_FRAME_SIZE;
1494         dev_info->max_mac_addrs = hw->mac.rar_entry_count;
1495         dev_info->rx_offload_capa = IGC_RX_OFFLOAD_ALL;
1496         dev_info->tx_offload_capa = IGC_TX_OFFLOAD_ALL;
1497         dev_info->rx_queue_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
1498
1499         dev_info->max_rx_queues = IGC_QUEUE_PAIRS_NUM;
1500         dev_info->max_tx_queues = IGC_QUEUE_PAIRS_NUM;
1501         dev_info->max_vmdq_pools = 0;
1502
1503         dev_info->hash_key_size = IGC_HKEY_MAX_INDEX * sizeof(uint32_t);
1504         dev_info->reta_size = ETH_RSS_RETA_SIZE_128;
1505         dev_info->flow_type_rss_offloads = IGC_RSS_OFFLOAD_ALL;
1506
1507         dev_info->default_rxconf = (struct rte_eth_rxconf) {
1508                 .rx_thresh = {
1509                         .pthresh = IGC_DEFAULT_RX_PTHRESH,
1510                         .hthresh = IGC_DEFAULT_RX_HTHRESH,
1511                         .wthresh = IGC_DEFAULT_RX_WTHRESH,
1512                 },
1513                 .rx_free_thresh = IGC_DEFAULT_RX_FREE_THRESH,
1514                 .rx_drop_en = 0,
1515                 .offloads = 0,
1516         };
1517
1518         dev_info->default_txconf = (struct rte_eth_txconf) {
1519                 .tx_thresh = {
1520                         .pthresh = IGC_DEFAULT_TX_PTHRESH,
1521                         .hthresh = IGC_DEFAULT_TX_HTHRESH,
1522                         .wthresh = IGC_DEFAULT_TX_WTHRESH,
1523                 },
1524                 .offloads = 0,
1525         };
1526
1527         dev_info->rx_desc_lim = rx_desc_lim;
1528         dev_info->tx_desc_lim = tx_desc_lim;
1529
1530         dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
1531                         ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
1532                         ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G;
1533
1534         dev_info->max_mtu = dev_info->max_rx_pktlen - IGC_ETH_OVERHEAD;
1535         dev_info->min_mtu = RTE_ETHER_MIN_MTU;
1536         return 0;
1537 }
1538
1539 static int
1540 eth_igc_led_on(struct rte_eth_dev *dev)
1541 {
1542         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1543
1544         return igc_led_on(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1545 }
1546
1547 static int
1548 eth_igc_led_off(struct rte_eth_dev *dev)
1549 {
1550         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1551
1552         return igc_led_off(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1553 }
1554
1555 static const uint32_t *
1556 eth_igc_supported_ptypes_get(__rte_unused struct rte_eth_dev *dev)
1557 {
1558         static const uint32_t ptypes[] = {
1559                 /* refers to rx_desc_pkt_info_to_pkt_type() */
1560                 RTE_PTYPE_L2_ETHER,
1561                 RTE_PTYPE_L3_IPV4,
1562                 RTE_PTYPE_L3_IPV4_EXT,
1563                 RTE_PTYPE_L3_IPV6,
1564                 RTE_PTYPE_L3_IPV6_EXT,
1565                 RTE_PTYPE_L4_TCP,
1566                 RTE_PTYPE_L4_UDP,
1567                 RTE_PTYPE_L4_SCTP,
1568                 RTE_PTYPE_TUNNEL_IP,
1569                 RTE_PTYPE_INNER_L3_IPV6,
1570                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1571                 RTE_PTYPE_INNER_L4_TCP,
1572                 RTE_PTYPE_INNER_L4_UDP,
1573                 RTE_PTYPE_UNKNOWN
1574         };
1575
1576         return ptypes;
1577 }
1578
1579 static int
1580 eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1581 {
1582         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1583         uint32_t frame_size = mtu + IGC_ETH_OVERHEAD;
1584         uint32_t rctl;
1585
1586         /* if extend vlan has been enabled */
1587         if (IGC_READ_REG(hw, IGC_CTRL_EXT) & IGC_CTRL_EXT_EXT_VLAN)
1588                 frame_size += VLAN_TAG_SIZE;
1589
1590         /* check that mtu is within the allowed range */
1591         if (mtu < RTE_ETHER_MIN_MTU ||
1592                 frame_size > MAX_RX_JUMBO_FRAME_SIZE)
1593                 return -EINVAL;
1594
1595         /*
1596          * If device is started, refuse mtu that requires the support of
1597          * scattered packets when this feature has not been enabled before.
1598          */
1599         if (dev->data->dev_started && !dev->data->scattered_rx &&
1600             frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM) {
1601                 PMD_INIT_LOG(ERR, "Stop port first.");
1602                 return -EINVAL;
1603         }
1604
1605         rctl = IGC_READ_REG(hw, IGC_RCTL);
1606
1607         /* switch to jumbo mode if needed */
1608         if (mtu > RTE_ETHER_MTU) {
1609                 dev->data->dev_conf.rxmode.offloads |=
1610                         DEV_RX_OFFLOAD_JUMBO_FRAME;
1611                 rctl |= IGC_RCTL_LPE;
1612         } else {
1613                 dev->data->dev_conf.rxmode.offloads &=
1614                         ~DEV_RX_OFFLOAD_JUMBO_FRAME;
1615                 rctl &= ~IGC_RCTL_LPE;
1616         }
1617         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1618
1619         /* update max frame size */
1620         dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
1621
1622         IGC_WRITE_REG(hw, IGC_RLPML,
1623                         dev->data->dev_conf.rxmode.max_rx_pkt_len);
1624
1625         return 0;
1626 }
1627
1628 static int
1629 eth_igc_rar_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1630                 uint32_t index, uint32_t pool)
1631 {
1632         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1633
1634         igc_rar_set(hw, mac_addr->addr_bytes, index);
1635         RTE_SET_USED(pool);
1636         return 0;
1637 }
1638
1639 static void
1640 eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index)
1641 {
1642         uint8_t addr[RTE_ETHER_ADDR_LEN];
1643         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1644
1645         memset(addr, 0, sizeof(addr));
1646         igc_rar_set(hw, addr, index);
1647 }
1648
1649 static int
1650 eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
1651                         struct rte_ether_addr *addr)
1652 {
1653         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1654         igc_rar_set(hw, addr->addr_bytes, 0);
1655         return 0;
1656 }
1657
1658 static int
1659 eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
1660                          struct rte_ether_addr *mc_addr_set,
1661                          uint32_t nb_mc_addr)
1662 {
1663         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1664         igc_update_mc_addr_list(hw, (u8 *)mc_addr_set, nb_mc_addr);
1665         return 0;
1666 }
1667
1668 /*
1669  * Read hardware registers
1670  */
1671 static void
1672 igc_read_stats_registers(struct igc_hw *hw, struct igc_hw_stats *stats)
1673 {
1674         int pause_frames;
1675
1676         uint64_t old_gprc  = stats->gprc;
1677         uint64_t old_gptc  = stats->gptc;
1678         uint64_t old_tpr   = stats->tpr;
1679         uint64_t old_tpt   = stats->tpt;
1680         uint64_t old_rpthc = stats->rpthc;
1681         uint64_t old_hgptc = stats->hgptc;
1682
1683         stats->crcerrs += IGC_READ_REG(hw, IGC_CRCERRS);
1684         stats->algnerrc += IGC_READ_REG(hw, IGC_ALGNERRC);
1685         stats->rxerrc += IGC_READ_REG(hw, IGC_RXERRC);
1686         stats->mpc += IGC_READ_REG(hw, IGC_MPC);
1687         stats->scc += IGC_READ_REG(hw, IGC_SCC);
1688         stats->ecol += IGC_READ_REG(hw, IGC_ECOL);
1689
1690         stats->mcc += IGC_READ_REG(hw, IGC_MCC);
1691         stats->latecol += IGC_READ_REG(hw, IGC_LATECOL);
1692         stats->colc += IGC_READ_REG(hw, IGC_COLC);
1693
1694         stats->dc += IGC_READ_REG(hw, IGC_DC);
1695         stats->tncrs += IGC_READ_REG(hw, IGC_TNCRS);
1696         stats->htdpmc += IGC_READ_REG(hw, IGC_HTDPMC);
1697         stats->rlec += IGC_READ_REG(hw, IGC_RLEC);
1698         stats->xonrxc += IGC_READ_REG(hw, IGC_XONRXC);
1699         stats->xontxc += IGC_READ_REG(hw, IGC_XONTXC);
1700
1701         /*
1702          * For watchdog management we need to know if we have been
1703          * paused during the last interval, so capture that here.
1704          */
1705         pause_frames = IGC_READ_REG(hw, IGC_XOFFRXC);
1706         stats->xoffrxc += pause_frames;
1707         stats->xofftxc += IGC_READ_REG(hw, IGC_XOFFTXC);
1708         stats->fcruc += IGC_READ_REG(hw, IGC_FCRUC);
1709         stats->prc64 += IGC_READ_REG(hw, IGC_PRC64);
1710         stats->prc127 += IGC_READ_REG(hw, IGC_PRC127);
1711         stats->prc255 += IGC_READ_REG(hw, IGC_PRC255);
1712         stats->prc511 += IGC_READ_REG(hw, IGC_PRC511);
1713         stats->prc1023 += IGC_READ_REG(hw, IGC_PRC1023);
1714         stats->prc1522 += IGC_READ_REG(hw, IGC_PRC1522);
1715         stats->gprc += IGC_READ_REG(hw, IGC_GPRC);
1716         stats->bprc += IGC_READ_REG(hw, IGC_BPRC);
1717         stats->mprc += IGC_READ_REG(hw, IGC_MPRC);
1718         stats->gptc += IGC_READ_REG(hw, IGC_GPTC);
1719
1720         /* For the 64-bit byte counters the low dword must be read first. */
1721         /* Both registers clear on the read of the high dword */
1722
1723         /* Workaround CRC bytes included in size, take away 4 bytes/packet */
1724         stats->gorc += IGC_READ_REG(hw, IGC_GORCL);
1725         stats->gorc += ((uint64_t)IGC_READ_REG(hw, IGC_GORCH) << 32);
1726         stats->gorc -= (stats->gprc - old_gprc) * RTE_ETHER_CRC_LEN;
1727         stats->gotc += IGC_READ_REG(hw, IGC_GOTCL);
1728         stats->gotc += ((uint64_t)IGC_READ_REG(hw, IGC_GOTCH) << 32);
1729         stats->gotc -= (stats->gptc - old_gptc) * RTE_ETHER_CRC_LEN;
1730
1731         stats->rnbc += IGC_READ_REG(hw, IGC_RNBC);
1732         stats->ruc += IGC_READ_REG(hw, IGC_RUC);
1733         stats->rfc += IGC_READ_REG(hw, IGC_RFC);
1734         stats->roc += IGC_READ_REG(hw, IGC_ROC);
1735         stats->rjc += IGC_READ_REG(hw, IGC_RJC);
1736
1737         stats->mgprc += IGC_READ_REG(hw, IGC_MGTPRC);
1738         stats->mgpdc += IGC_READ_REG(hw, IGC_MGTPDC);
1739         stats->mgptc += IGC_READ_REG(hw, IGC_MGTPTC);
1740         stats->b2ospc += IGC_READ_REG(hw, IGC_B2OSPC);
1741         stats->b2ogprc += IGC_READ_REG(hw, IGC_B2OGPRC);
1742         stats->o2bgptc += IGC_READ_REG(hw, IGC_O2BGPTC);
1743         stats->o2bspc += IGC_READ_REG(hw, IGC_O2BSPC);
1744
1745         stats->tpr += IGC_READ_REG(hw, IGC_TPR);
1746         stats->tpt += IGC_READ_REG(hw, IGC_TPT);
1747
1748         stats->tor += IGC_READ_REG(hw, IGC_TORL);
1749         stats->tor += ((uint64_t)IGC_READ_REG(hw, IGC_TORH) << 32);
1750         stats->tor -= (stats->tpr - old_tpr) * RTE_ETHER_CRC_LEN;
1751         stats->tot += IGC_READ_REG(hw, IGC_TOTL);
1752         stats->tot += ((uint64_t)IGC_READ_REG(hw, IGC_TOTH) << 32);
1753         stats->tot -= (stats->tpt - old_tpt) * RTE_ETHER_CRC_LEN;
1754
1755         stats->ptc64 += IGC_READ_REG(hw, IGC_PTC64);
1756         stats->ptc127 += IGC_READ_REG(hw, IGC_PTC127);
1757         stats->ptc255 += IGC_READ_REG(hw, IGC_PTC255);
1758         stats->ptc511 += IGC_READ_REG(hw, IGC_PTC511);
1759         stats->ptc1023 += IGC_READ_REG(hw, IGC_PTC1023);
1760         stats->ptc1522 += IGC_READ_REG(hw, IGC_PTC1522);
1761         stats->mptc += IGC_READ_REG(hw, IGC_MPTC);
1762         stats->bptc += IGC_READ_REG(hw, IGC_BPTC);
1763         stats->tsctc += IGC_READ_REG(hw, IGC_TSCTC);
1764
1765         stats->iac += IGC_READ_REG(hw, IGC_IAC);
1766         stats->rpthc += IGC_READ_REG(hw, IGC_RPTHC);
1767         stats->hgptc += IGC_READ_REG(hw, IGC_HGPTC);
1768         stats->icrxdmtc += IGC_READ_REG(hw, IGC_ICRXDMTC);
1769
1770         /* Host to Card Statistics */
1771         stats->hgorc += IGC_READ_REG(hw, IGC_HGORCL);
1772         stats->hgorc += ((uint64_t)IGC_READ_REG(hw, IGC_HGORCH) << 32);
1773         stats->hgorc -= (stats->rpthc - old_rpthc) * RTE_ETHER_CRC_LEN;
1774         stats->hgotc += IGC_READ_REG(hw, IGC_HGOTCL);
1775         stats->hgotc += ((uint64_t)IGC_READ_REG(hw, IGC_HGOTCH) << 32);
1776         stats->hgotc -= (stats->hgptc - old_hgptc) * RTE_ETHER_CRC_LEN;
1777         stats->lenerrs += IGC_READ_REG(hw, IGC_LENERRS);
1778 }
1779
1780 /*
1781  * Write 0 to all queue status registers
1782  */
1783 static void
1784 igc_reset_queue_stats_register(struct igc_hw *hw)
1785 {
1786         int i;
1787
1788         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1789                 IGC_WRITE_REG(hw, IGC_PQGPRC(i), 0);
1790                 IGC_WRITE_REG(hw, IGC_PQGPTC(i), 0);
1791                 IGC_WRITE_REG(hw, IGC_PQGORC(i), 0);
1792                 IGC_WRITE_REG(hw, IGC_PQGOTC(i), 0);
1793                 IGC_WRITE_REG(hw, IGC_PQMPRC(i), 0);
1794                 IGC_WRITE_REG(hw, IGC_RQDPC(i), 0);
1795                 IGC_WRITE_REG(hw, IGC_TQDPC(i), 0);
1796         }
1797 }
1798
1799 /*
1800  * Read all hardware queue status registers
1801  */
1802 static void
1803 igc_read_queue_stats_register(struct rte_eth_dev *dev)
1804 {
1805         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1806         struct igc_hw_queue_stats *queue_stats =
1807                                 IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1808         int i;
1809
1810         /*
1811          * This register is not cleared on read. Furthermore, the register wraps
1812          * around back to 0x00000000 on the next increment when reaching a value
1813          * of 0xFFFFFFFF and then continues normal count operation.
1814          */
1815         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1816                 union {
1817                         u64 ddword;
1818                         u32 dword[2];
1819                 } value;
1820                 u32 tmp;
1821
1822                 /*
1823                  * Read the register first, if the value is smaller than that
1824                  * previous read, that mean the register has been overflowed,
1825                  * then we add the high 4 bytes by 1 and replace the low 4
1826                  * bytes by the new value.
1827                  */
1828                 tmp = IGC_READ_REG(hw, IGC_PQGPRC(i));
1829                 value.ddword = queue_stats->pqgprc[i];
1830                 if (value.dword[U32_0_IN_U64] > tmp)
1831                         value.dword[U32_1_IN_U64]++;
1832                 value.dword[U32_0_IN_U64] = tmp;
1833                 queue_stats->pqgprc[i] = value.ddword;
1834
1835                 tmp = IGC_READ_REG(hw, IGC_PQGPTC(i));
1836                 value.ddword = queue_stats->pqgptc[i];
1837                 if (value.dword[U32_0_IN_U64] > tmp)
1838                         value.dword[U32_1_IN_U64]++;
1839                 value.dword[U32_0_IN_U64] = tmp;
1840                 queue_stats->pqgptc[i] = value.ddword;
1841
1842                 tmp = IGC_READ_REG(hw, IGC_PQGORC(i));
1843                 value.ddword = queue_stats->pqgorc[i];
1844                 if (value.dword[U32_0_IN_U64] > tmp)
1845                         value.dword[U32_1_IN_U64]++;
1846                 value.dword[U32_0_IN_U64] = tmp;
1847                 queue_stats->pqgorc[i] = value.ddword;
1848
1849                 tmp = IGC_READ_REG(hw, IGC_PQGOTC(i));
1850                 value.ddword = queue_stats->pqgotc[i];
1851                 if (value.dword[U32_0_IN_U64] > tmp)
1852                         value.dword[U32_1_IN_U64]++;
1853                 value.dword[U32_0_IN_U64] = tmp;
1854                 queue_stats->pqgotc[i] = value.ddword;
1855
1856                 tmp = IGC_READ_REG(hw, IGC_PQMPRC(i));
1857                 value.ddword = queue_stats->pqmprc[i];
1858                 if (value.dword[U32_0_IN_U64] > tmp)
1859                         value.dword[U32_1_IN_U64]++;
1860                 value.dword[U32_0_IN_U64] = tmp;
1861                 queue_stats->pqmprc[i] = value.ddword;
1862
1863                 tmp = IGC_READ_REG(hw, IGC_RQDPC(i));
1864                 value.ddword = queue_stats->rqdpc[i];
1865                 if (value.dword[U32_0_IN_U64] > tmp)
1866                         value.dword[U32_1_IN_U64]++;
1867                 value.dword[U32_0_IN_U64] = tmp;
1868                 queue_stats->rqdpc[i] = value.ddword;
1869
1870                 tmp = IGC_READ_REG(hw, IGC_TQDPC(i));
1871                 value.ddword = queue_stats->tqdpc[i];
1872                 if (value.dword[U32_0_IN_U64] > tmp)
1873                         value.dword[U32_1_IN_U64]++;
1874                 value.dword[U32_0_IN_U64] = tmp;
1875                 queue_stats->tqdpc[i] = value.ddword;
1876         }
1877 }
1878
1879 static int
1880 eth_igc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
1881 {
1882         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1883         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1884         struct igc_hw_stats *stats = IGC_DEV_PRIVATE_STATS(dev);
1885         struct igc_hw_queue_stats *queue_stats =
1886                         IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1887         int i;
1888
1889         /*
1890          * Cancel status handler since it will read the queue status registers
1891          */
1892         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1893
1894         /* Read status register */
1895         igc_read_queue_stats_register(dev);
1896         igc_read_stats_registers(hw, stats);
1897
1898         if (rte_stats == NULL) {
1899                 /* Restart queue status handler */
1900                 rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1901                                 igc_update_queue_stats_handler, dev);
1902                 return -EINVAL;
1903         }
1904
1905         /* Rx Errors */
1906         rte_stats->imissed = stats->mpc;
1907         rte_stats->ierrors = stats->crcerrs + stats->rlec +
1908                         stats->rxerrc + stats->algnerrc;
1909
1910         /* Tx Errors */
1911         rte_stats->oerrors = stats->ecol + stats->latecol;
1912
1913         rte_stats->ipackets = stats->gprc;
1914         rte_stats->opackets = stats->gptc;
1915         rte_stats->ibytes   = stats->gorc;
1916         rte_stats->obytes   = stats->gotc;
1917
1918         /* Get per-queue statuses */
1919         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1920                 /* GET TX queue statuses */
1921                 int map_id = igc->txq_stats_map[i];
1922                 if (map_id >= 0) {
1923                         rte_stats->q_opackets[map_id] += queue_stats->pqgptc[i];
1924                         rte_stats->q_obytes[map_id] += queue_stats->pqgotc[i];
1925                 }
1926                 /* Get RX queue statuses */
1927                 map_id = igc->rxq_stats_map[i];
1928                 if (map_id >= 0) {
1929                         rte_stats->q_ipackets[map_id] += queue_stats->pqgprc[i];
1930                         rte_stats->q_ibytes[map_id] += queue_stats->pqgorc[i];
1931                         rte_stats->q_errors[map_id] += queue_stats->rqdpc[i];
1932                 }
1933         }
1934
1935         /* Restart queue status handler */
1936         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1937                         igc_update_queue_stats_handler, dev);
1938         return 0;
1939 }
1940
1941 static int
1942 eth_igc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1943                    unsigned int n)
1944 {
1945         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1946         struct igc_hw_stats *hw_stats =
1947                         IGC_DEV_PRIVATE_STATS(dev);
1948         unsigned int i;
1949
1950         igc_read_stats_registers(hw, hw_stats);
1951
1952         if (n < IGC_NB_XSTATS)
1953                 return IGC_NB_XSTATS;
1954
1955         /* If this is a reset xstats is NULL, and we have cleared the
1956          * registers by reading them.
1957          */
1958         if (!xstats)
1959                 return 0;
1960
1961         /* Extended stats */
1962         for (i = 0; i < IGC_NB_XSTATS; i++) {
1963                 xstats[i].id = i;
1964                 xstats[i].value = *(uint64_t *)(((char *)hw_stats) +
1965                         rte_igc_stats_strings[i].offset);
1966         }
1967
1968         return IGC_NB_XSTATS;
1969 }
1970
1971 static int
1972 eth_igc_xstats_reset(struct rte_eth_dev *dev)
1973 {
1974         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1975         struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
1976         struct igc_hw_queue_stats *queue_stats =
1977                         IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1978
1979         /* Cancel queue status handler for avoid conflict */
1980         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1981
1982         /* HW registers are cleared on read */
1983         igc_reset_queue_stats_register(hw);
1984         igc_read_stats_registers(hw, hw_stats);
1985
1986         /* Reset software totals */
1987         memset(hw_stats, 0, sizeof(*hw_stats));
1988         memset(queue_stats, 0, sizeof(*queue_stats));
1989
1990         /* Restart the queue status handler */
1991         rte_eal_alarm_set(IGC_ALARM_INTERVAL, igc_update_queue_stats_handler,
1992                         dev);
1993
1994         return 0;
1995 }
1996
1997 static int
1998 eth_igc_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
1999         struct rte_eth_xstat_name *xstats_names, unsigned int size)
2000 {
2001         unsigned int i;
2002
2003         if (xstats_names == NULL)
2004                 return IGC_NB_XSTATS;
2005
2006         if (size < IGC_NB_XSTATS) {
2007                 PMD_DRV_LOG(ERR, "not enough buffers!");
2008                 return IGC_NB_XSTATS;
2009         }
2010
2011         for (i = 0; i < IGC_NB_XSTATS; i++)
2012                 strlcpy(xstats_names[i].name, rte_igc_stats_strings[i].name,
2013                         sizeof(xstats_names[i].name));
2014
2015         return IGC_NB_XSTATS;
2016 }
2017
2018 static int
2019 eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
2020                 struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
2021                 unsigned int limit)
2022 {
2023         unsigned int i;
2024
2025         if (!ids)
2026                 return eth_igc_xstats_get_names(dev, xstats_names, limit);
2027
2028         for (i = 0; i < limit; i++) {
2029                 if (ids[i] >= IGC_NB_XSTATS) {
2030                         PMD_DRV_LOG(ERR, "id value isn't valid");
2031                         return -EINVAL;
2032                 }
2033                 strlcpy(xstats_names[i].name,
2034                         rte_igc_stats_strings[ids[i]].name,
2035                         sizeof(xstats_names[i].name));
2036         }
2037         return limit;
2038 }
2039
2040 static int
2041 eth_igc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
2042                 uint64_t *values, unsigned int n)
2043 {
2044         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2045         struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
2046         unsigned int i;
2047
2048         igc_read_stats_registers(hw, hw_stats);
2049
2050         if (!ids) {
2051                 if (n < IGC_NB_XSTATS)
2052                         return IGC_NB_XSTATS;
2053
2054                 /* If this is a reset xstats is NULL, and we have cleared the
2055                  * registers by reading them.
2056                  */
2057                 if (!values)
2058                         return 0;
2059
2060                 /* Extended stats */
2061                 for (i = 0; i < IGC_NB_XSTATS; i++)
2062                         values[i] = *(uint64_t *)(((char *)hw_stats) +
2063                                         rte_igc_stats_strings[i].offset);
2064
2065                 return IGC_NB_XSTATS;
2066
2067         } else {
2068                 for (i = 0; i < n; i++) {
2069                         if (ids[i] >= IGC_NB_XSTATS) {
2070                                 PMD_DRV_LOG(ERR, "id value isn't valid");
2071                                 return -EINVAL;
2072                         }
2073                         values[i] = *(uint64_t *)(((char *)hw_stats) +
2074                                         rte_igc_stats_strings[ids[i]].offset);
2075                 }
2076                 return n;
2077         }
2078 }
2079
2080 static int
2081 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
2082                 uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx)
2083 {
2084         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
2085
2086         /* check queue id is valid */
2087         if (queue_id >= IGC_QUEUE_PAIRS_NUM) {
2088                 PMD_DRV_LOG(ERR, "queue id(%u) error, max is %u",
2089                         queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2090                 return -EINVAL;
2091         }
2092
2093         /* store the mapping status id */
2094         if (is_rx)
2095                 igc->rxq_stats_map[queue_id] = stat_idx;
2096         else
2097                 igc->txq_stats_map[queue_id] = stat_idx;
2098
2099         return 0;
2100 }
2101
2102 static int
2103 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
2104 {
2105         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2106         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2107         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2108         uint32_t vec = IGC_MISC_VEC_ID;
2109
2110         if (rte_intr_allow_others(intr_handle))
2111                 vec = IGC_RX_VEC_START;
2112
2113         uint32_t mask = 1u << (queue_id + vec);
2114
2115         IGC_WRITE_REG(hw, IGC_EIMC, mask);
2116         IGC_WRITE_FLUSH(hw);
2117
2118         return 0;
2119 }
2120
2121 static int
2122 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
2123 {
2124         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2125         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2126         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2127         uint32_t vec = IGC_MISC_VEC_ID;
2128
2129         if (rte_intr_allow_others(intr_handle))
2130                 vec = IGC_RX_VEC_START;
2131
2132         uint32_t mask = 1u << (queue_id + vec);
2133
2134         IGC_WRITE_REG(hw, IGC_EIMS, mask);
2135         IGC_WRITE_FLUSH(hw);
2136
2137         rte_intr_enable(intr_handle);
2138
2139         return 0;
2140 }
2141
2142 static int
2143 eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
2144 {
2145         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2146         uint32_t ctrl;
2147         int tx_pause;
2148         int rx_pause;
2149
2150         fc_conf->pause_time = hw->fc.pause_time;
2151         fc_conf->high_water = hw->fc.high_water;
2152         fc_conf->low_water = hw->fc.low_water;
2153         fc_conf->send_xon = hw->fc.send_xon;
2154         fc_conf->autoneg = hw->mac.autoneg;
2155
2156         /*
2157          * Return rx_pause and tx_pause status according to actual setting of
2158          * the TFCE and RFCE bits in the CTRL register.
2159          */
2160         ctrl = IGC_READ_REG(hw, IGC_CTRL);
2161         if (ctrl & IGC_CTRL_TFCE)
2162                 tx_pause = 1;
2163         else
2164                 tx_pause = 0;
2165
2166         if (ctrl & IGC_CTRL_RFCE)
2167                 rx_pause = 1;
2168         else
2169                 rx_pause = 0;
2170
2171         if (rx_pause && tx_pause)
2172                 fc_conf->mode = RTE_FC_FULL;
2173         else if (rx_pause)
2174                 fc_conf->mode = RTE_FC_RX_PAUSE;
2175         else if (tx_pause)
2176                 fc_conf->mode = RTE_FC_TX_PAUSE;
2177         else
2178                 fc_conf->mode = RTE_FC_NONE;
2179
2180         return 0;
2181 }
2182
2183 static int
2184 eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
2185 {
2186         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2187         uint32_t rx_buf_size;
2188         uint32_t max_high_water;
2189         uint32_t rctl;
2190         int err;
2191
2192         if (fc_conf->autoneg != hw->mac.autoneg)
2193                 return -ENOTSUP;
2194
2195         rx_buf_size = igc_get_rx_buffer_size(hw);
2196         PMD_DRV_LOG(DEBUG, "Rx packet buffer size = 0x%x", rx_buf_size);
2197
2198         /* At least reserve one Ethernet frame for watermark */
2199         max_high_water = rx_buf_size - RTE_ETHER_MAX_LEN;
2200         if (fc_conf->high_water > max_high_water ||
2201                 fc_conf->high_water < fc_conf->low_water) {
2202                 PMD_DRV_LOG(ERR,
2203                         "Incorrect high(%u)/low(%u) water value, max is %u",
2204                         fc_conf->high_water, fc_conf->low_water,
2205                         max_high_water);
2206                 return -EINVAL;
2207         }
2208
2209         switch (fc_conf->mode) {
2210         case RTE_FC_NONE:
2211                 hw->fc.requested_mode = igc_fc_none;
2212                 break;
2213         case RTE_FC_RX_PAUSE:
2214                 hw->fc.requested_mode = igc_fc_rx_pause;
2215                 break;
2216         case RTE_FC_TX_PAUSE:
2217                 hw->fc.requested_mode = igc_fc_tx_pause;
2218                 break;
2219         case RTE_FC_FULL:
2220                 hw->fc.requested_mode = igc_fc_full;
2221                 break;
2222         default:
2223                 PMD_DRV_LOG(ERR, "unsupported fc mode: %u", fc_conf->mode);
2224                 return -EINVAL;
2225         }
2226
2227         hw->fc.pause_time     = fc_conf->pause_time;
2228         hw->fc.high_water     = fc_conf->high_water;
2229         hw->fc.low_water      = fc_conf->low_water;
2230         hw->fc.send_xon       = fc_conf->send_xon;
2231
2232         err = igc_setup_link_generic(hw);
2233         if (err == IGC_SUCCESS) {
2234                 /**
2235                  * check if we want to forward MAC frames - driver doesn't have
2236                  * native capability to do that, so we'll write the registers
2237                  * ourselves
2238                  **/
2239                 rctl = IGC_READ_REG(hw, IGC_RCTL);
2240
2241                 /* set or clear MFLCN.PMCF bit depending on configuration */
2242                 if (fc_conf->mac_ctrl_frame_fwd != 0)
2243                         rctl |= IGC_RCTL_PMCF;
2244                 else
2245                         rctl &= ~IGC_RCTL_PMCF;
2246
2247                 IGC_WRITE_REG(hw, IGC_RCTL, rctl);
2248                 IGC_WRITE_FLUSH(hw);
2249
2250                 return 0;
2251         }
2252
2253         PMD_DRV_LOG(ERR, "igc_setup_link_generic = 0x%x", err);
2254         return -EIO;
2255 }
2256
2257 static int
2258 eth_igc_rss_reta_update(struct rte_eth_dev *dev,
2259                         struct rte_eth_rss_reta_entry64 *reta_conf,
2260                         uint16_t reta_size)
2261 {
2262         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2263         uint16_t i;
2264
2265         if (reta_size != ETH_RSS_RETA_SIZE_128) {
2266                 PMD_DRV_LOG(ERR,
2267                         "The size of RSS redirection table configured(%d) doesn't match the number hardware can supported(%d)",
2268                         reta_size, ETH_RSS_RETA_SIZE_128);
2269                 return -EINVAL;
2270         }
2271
2272         RTE_BUILD_BUG_ON(ETH_RSS_RETA_SIZE_128 % IGC_RSS_RDT_REG_SIZE);
2273
2274         /* set redirection table */
2275         for (i = 0; i < ETH_RSS_RETA_SIZE_128; i += IGC_RSS_RDT_REG_SIZE) {
2276                 union igc_rss_reta_reg reta, reg;
2277                 uint16_t idx, shift;
2278                 uint8_t j, mask;
2279
2280                 idx = i / RTE_RETA_GROUP_SIZE;
2281                 shift = i % RTE_RETA_GROUP_SIZE;
2282                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
2283                                 IGC_RSS_RDT_REG_SIZE_MASK);
2284
2285                 /* if no need to update the register */
2286                 if (!mask ||
2287                     shift > (RTE_RETA_GROUP_SIZE - IGC_RSS_RDT_REG_SIZE))
2288                         continue;
2289
2290                 /* check mask whether need to read the register value first */
2291                 if (mask == IGC_RSS_RDT_REG_SIZE_MASK)
2292                         reg.dword = 0;
2293                 else
2294                         reg.dword = IGC_READ_REG_LE_VALUE(hw,
2295                                         IGC_RETA(i / IGC_RSS_RDT_REG_SIZE));
2296
2297                 /* update the register */
2298                 RTE_BUILD_BUG_ON(sizeof(reta.bytes) != IGC_RSS_RDT_REG_SIZE);
2299                 for (j = 0; j < IGC_RSS_RDT_REG_SIZE; j++) {
2300                         if (mask & (1u << j))
2301                                 reta.bytes[j] =
2302                                         (uint8_t)reta_conf[idx].reta[shift + j];
2303                         else
2304                                 reta.bytes[j] = reg.bytes[j];
2305                 }
2306                 IGC_WRITE_REG_LE_VALUE(hw,
2307                         IGC_RETA(i / IGC_RSS_RDT_REG_SIZE), reta.dword);
2308         }
2309
2310         return 0;
2311 }
2312
2313 static int
2314 eth_igc_rss_reta_query(struct rte_eth_dev *dev,
2315                        struct rte_eth_rss_reta_entry64 *reta_conf,
2316                        uint16_t reta_size)
2317 {
2318         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2319         uint16_t i;
2320
2321         if (reta_size != ETH_RSS_RETA_SIZE_128) {
2322                 PMD_DRV_LOG(ERR,
2323                         "The size of RSS redirection table configured(%d) doesn't match the number hardware can supported(%d)",
2324                         reta_size, ETH_RSS_RETA_SIZE_128);
2325                 return -EINVAL;
2326         }
2327
2328         RTE_BUILD_BUG_ON(ETH_RSS_RETA_SIZE_128 % IGC_RSS_RDT_REG_SIZE);
2329
2330         /* read redirection table */
2331         for (i = 0; i < ETH_RSS_RETA_SIZE_128; i += IGC_RSS_RDT_REG_SIZE) {
2332                 union igc_rss_reta_reg reta;
2333                 uint16_t idx, shift;
2334                 uint8_t j, mask;
2335
2336                 idx = i / RTE_RETA_GROUP_SIZE;
2337                 shift = i % RTE_RETA_GROUP_SIZE;
2338                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
2339                                 IGC_RSS_RDT_REG_SIZE_MASK);
2340
2341                 /* if no need to read register */
2342                 if (!mask ||
2343                     shift > (RTE_RETA_GROUP_SIZE - IGC_RSS_RDT_REG_SIZE))
2344                         continue;
2345
2346                 /* read register and get the queue index */
2347                 RTE_BUILD_BUG_ON(sizeof(reta.bytes) != IGC_RSS_RDT_REG_SIZE);
2348                 reta.dword = IGC_READ_REG_LE_VALUE(hw,
2349                                 IGC_RETA(i / IGC_RSS_RDT_REG_SIZE));
2350                 for (j = 0; j < IGC_RSS_RDT_REG_SIZE; j++) {
2351                         if (mask & (1u << j))
2352                                 reta_conf[idx].reta[shift + j] = reta.bytes[j];
2353                 }
2354         }
2355
2356         return 0;
2357 }
2358
2359 static int
2360 eth_igc_rss_hash_update(struct rte_eth_dev *dev,
2361                         struct rte_eth_rss_conf *rss_conf)
2362 {
2363         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2364         igc_hw_rss_hash_set(hw, rss_conf);
2365         return 0;
2366 }
2367
2368 static int
2369 eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev,
2370                         struct rte_eth_rss_conf *rss_conf)
2371 {
2372         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2373         uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
2374         uint32_t mrqc;
2375         uint64_t rss_hf;
2376
2377         if (hash_key != NULL) {
2378                 int i;
2379
2380                 /* if not enough space for store hash key */
2381                 if (rss_conf->rss_key_len != IGC_HKEY_SIZE) {
2382                         PMD_DRV_LOG(ERR,
2383                                 "RSS hash key size %u in parameter doesn't match the hardware hash key size %u",
2384                                 rss_conf->rss_key_len, IGC_HKEY_SIZE);
2385                         return -EINVAL;
2386                 }
2387
2388                 /* read RSS key from register */
2389                 for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
2390                         hash_key[i] = IGC_READ_REG_LE_VALUE(hw, IGC_RSSRK(i));
2391         }
2392
2393         /* get RSS functions configured in MRQC register */
2394         mrqc = IGC_READ_REG(hw, IGC_MRQC);
2395         if ((mrqc & IGC_MRQC_ENABLE_RSS_4Q) == 0)
2396                 return 0;
2397
2398         rss_hf = 0;
2399         if (mrqc & IGC_MRQC_RSS_FIELD_IPV4)
2400                 rss_hf |= ETH_RSS_IPV4;
2401         if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_TCP)
2402                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2403         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6)
2404                 rss_hf |= ETH_RSS_IPV6;
2405         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_EX)
2406                 rss_hf |= ETH_RSS_IPV6_EX;
2407         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP)
2408                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2409         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP_EX)
2410                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2411         if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_UDP)
2412                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2413         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP)
2414                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2415         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP_EX)
2416                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2417
2418         rss_conf->rss_hf |= rss_hf;
2419         return 0;
2420 }
2421
2422 static int
2423 eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2424 {
2425         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2426         struct igc_vfta *shadow_vfta = IGC_DEV_PRIVATE_VFTA(dev);
2427         uint32_t vfta;
2428         uint32_t vid_idx;
2429         uint32_t vid_bit;
2430
2431         vid_idx = (vlan_id >> IGC_VFTA_ENTRY_SHIFT) & IGC_VFTA_ENTRY_MASK;
2432         vid_bit = 1u << (vlan_id & IGC_VFTA_ENTRY_BIT_SHIFT_MASK);
2433         vfta = shadow_vfta->vfta[vid_idx];
2434         if (on)
2435                 vfta |= vid_bit;
2436         else
2437                 vfta &= ~vid_bit;
2438         IGC_WRITE_REG_ARRAY(hw, IGC_VFTA, vid_idx, vfta);
2439
2440         /* update local VFTA copy */
2441         shadow_vfta->vfta[vid_idx] = vfta;
2442
2443         return 0;
2444 }
2445
2446 static void
2447 igc_vlan_hw_filter_disable(struct rte_eth_dev *dev)
2448 {
2449         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2450         igc_read_reg_check_clear_bits(hw, IGC_RCTL,
2451                         IGC_RCTL_CFIEN | IGC_RCTL_VFE);
2452 }
2453
2454 static void
2455 igc_vlan_hw_filter_enable(struct rte_eth_dev *dev)
2456 {
2457         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2458         struct igc_vfta *shadow_vfta = IGC_DEV_PRIVATE_VFTA(dev);
2459         uint32_t reg_val;
2460         int i;
2461
2462         /* Filter Table Enable, CFI not used for packet acceptance */
2463         reg_val = IGC_READ_REG(hw, IGC_RCTL);
2464         reg_val &= ~IGC_RCTL_CFIEN;
2465         reg_val |= IGC_RCTL_VFE;
2466         IGC_WRITE_REG(hw, IGC_RCTL, reg_val);
2467
2468         /* restore VFTA table */
2469         for (i = 0; i < IGC_VFTA_SIZE; i++)
2470                 IGC_WRITE_REG_ARRAY(hw, IGC_VFTA, i, shadow_vfta->vfta[i]);
2471 }
2472
2473 static void
2474 igc_vlan_hw_strip_disable(struct rte_eth_dev *dev)
2475 {
2476         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2477
2478         igc_read_reg_check_clear_bits(hw, IGC_CTRL, IGC_CTRL_VME);
2479 }
2480
2481 static void
2482 igc_vlan_hw_strip_enable(struct rte_eth_dev *dev)
2483 {
2484         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2485
2486         igc_read_reg_check_set_bits(hw, IGC_CTRL, IGC_CTRL_VME);
2487 }
2488
2489 static int
2490 igc_vlan_hw_extend_disable(struct rte_eth_dev *dev)
2491 {
2492         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2493         uint32_t ctrl_ext;
2494
2495         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
2496
2497         /* if extend vlan hasn't been enabled */
2498         if ((ctrl_ext & IGC_CTRL_EXT_EXT_VLAN) == 0)
2499                 return 0;
2500
2501         if ((dev->data->dev_conf.rxmode.offloads &
2502                         DEV_RX_OFFLOAD_JUMBO_FRAME) == 0)
2503                 goto write_ext_vlan;
2504
2505         /* Update maximum packet length */
2506         if (dev->data->dev_conf.rxmode.max_rx_pkt_len <
2507                 RTE_ETHER_MIN_MTU + VLAN_TAG_SIZE) {
2508                 PMD_DRV_LOG(ERR, "Maximum packet length %u error, min is %u",
2509                         dev->data->dev_conf.rxmode.max_rx_pkt_len,
2510                         VLAN_TAG_SIZE + RTE_ETHER_MIN_MTU);
2511                 return -EINVAL;
2512         }
2513         dev->data->dev_conf.rxmode.max_rx_pkt_len -= VLAN_TAG_SIZE;
2514         IGC_WRITE_REG(hw, IGC_RLPML,
2515                 dev->data->dev_conf.rxmode.max_rx_pkt_len);
2516
2517 write_ext_vlan:
2518         IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext & ~IGC_CTRL_EXT_EXT_VLAN);
2519         return 0;
2520 }
2521
2522 static int
2523 igc_vlan_hw_extend_enable(struct rte_eth_dev *dev)
2524 {
2525         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2526         uint32_t ctrl_ext;
2527
2528         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
2529
2530         /* if extend vlan has been enabled */
2531         if (ctrl_ext & IGC_CTRL_EXT_EXT_VLAN)
2532                 return 0;
2533
2534         if ((dev->data->dev_conf.rxmode.offloads &
2535                         DEV_RX_OFFLOAD_JUMBO_FRAME) == 0)
2536                 goto write_ext_vlan;
2537
2538         /* Update maximum packet length */
2539         if (dev->data->dev_conf.rxmode.max_rx_pkt_len >
2540                 MAX_RX_JUMBO_FRAME_SIZE - VLAN_TAG_SIZE) {
2541                 PMD_DRV_LOG(ERR, "Maximum packet length %u error, max is %u",
2542                         dev->data->dev_conf.rxmode.max_rx_pkt_len +
2543                         VLAN_TAG_SIZE, MAX_RX_JUMBO_FRAME_SIZE);
2544                 return -EINVAL;
2545         }
2546         dev->data->dev_conf.rxmode.max_rx_pkt_len += VLAN_TAG_SIZE;
2547         IGC_WRITE_REG(hw, IGC_RLPML,
2548                 dev->data->dev_conf.rxmode.max_rx_pkt_len);
2549
2550 write_ext_vlan:
2551         IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_EXT_VLAN);
2552         return 0;
2553 }
2554
2555 static int
2556 eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2557 {
2558         struct rte_eth_rxmode *rxmode;
2559
2560         rxmode = &dev->data->dev_conf.rxmode;
2561         if (mask & ETH_VLAN_STRIP_MASK) {
2562                 if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2563                         igc_vlan_hw_strip_enable(dev);
2564                 else
2565                         igc_vlan_hw_strip_disable(dev);
2566         }
2567
2568         if (mask & ETH_VLAN_FILTER_MASK) {
2569                 if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2570                         igc_vlan_hw_filter_enable(dev);
2571                 else
2572                         igc_vlan_hw_filter_disable(dev);
2573         }
2574
2575         if (mask & ETH_VLAN_EXTEND_MASK) {
2576                 if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
2577                         return igc_vlan_hw_extend_enable(dev);
2578                 else
2579                         return igc_vlan_hw_extend_disable(dev);
2580         }
2581
2582         return 0;
2583 }
2584
2585 static int
2586 eth_igc_vlan_tpid_set(struct rte_eth_dev *dev,
2587                       enum rte_vlan_type vlan_type,
2588                       uint16_t tpid)
2589 {
2590         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2591         uint32_t reg_val;
2592
2593         /* only outer TPID of double VLAN can be configured*/
2594         if (vlan_type == ETH_VLAN_TYPE_OUTER) {
2595                 reg_val = IGC_READ_REG(hw, IGC_VET);
2596                 reg_val = (reg_val & (~IGC_VET_EXT)) |
2597                         ((uint32_t)tpid << IGC_VET_EXT_SHIFT);
2598                 IGC_WRITE_REG(hw, IGC_VET, reg_val);
2599
2600                 return 0;
2601         }
2602
2603         /* all other TPID values are read-only*/
2604         PMD_DRV_LOG(ERR, "Not supported");
2605         return -ENOTSUP;
2606 }
2607
2608 static int
2609 eth_igc_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2610         struct rte_pci_device *pci_dev)
2611 {
2612         PMD_INIT_FUNC_TRACE();
2613         return rte_eth_dev_pci_generic_probe(pci_dev,
2614                 sizeof(struct igc_adapter), eth_igc_dev_init);
2615 }
2616
2617 static int
2618 eth_igc_pci_remove(struct rte_pci_device *pci_dev)
2619 {
2620         PMD_INIT_FUNC_TRACE();
2621         return rte_eth_dev_pci_generic_remove(pci_dev, eth_igc_dev_uninit);
2622 }
2623
2624 static struct rte_pci_driver rte_igc_pmd = {
2625         .id_table = pci_id_igc_map,
2626         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
2627         .probe = eth_igc_pci_probe,
2628         .remove = eth_igc_pci_remove,
2629 };
2630
2631 RTE_PMD_REGISTER_PCI(net_igc, rte_igc_pmd);
2632 RTE_PMD_REGISTER_PCI_TABLE(net_igc, pci_id_igc_map);
2633 RTE_PMD_REGISTER_KMOD_DEP(net_igc, "* igb_uio | uio_pci_generic | vfio-pci");