net/igc: fix boundary checks for RSS
[dpdk.git] / drivers / net / igc / igc_ethdev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2019-2020 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <string.h>
7
8 #include <rte_string_fns.h>
9 #include <rte_pci.h>
10 #include <rte_bus_pci.h>
11 #include <rte_ethdev_driver.h>
12 #include <rte_ethdev_pci.h>
13 #include <rte_malloc.h>
14 #include <rte_alarm.h>
15
16 #include "igc_logs.h"
17 #include "igc_txrx.h"
18 #include "igc_filter.h"
19 #include "igc_flow.h"
20
21 #define IGC_INTEL_VENDOR_ID             0x8086
22
23 /*
24  * The overhead from MTU to max frame size.
25  * Considering VLAN so tag needs to be counted.
26  */
27 #define IGC_ETH_OVERHEAD                (RTE_ETHER_HDR_LEN + \
28                                         RTE_ETHER_CRC_LEN + VLAN_TAG_SIZE)
29
30 #define IGC_FC_PAUSE_TIME               0x0680
31 #define IGC_LINK_UPDATE_CHECK_TIMEOUT   90  /* 9s */
32 #define IGC_LINK_UPDATE_CHECK_INTERVAL  100 /* ms */
33
34 #define IGC_MISC_VEC_ID                 RTE_INTR_VEC_ZERO_OFFSET
35 #define IGC_RX_VEC_START                RTE_INTR_VEC_RXTX_OFFSET
36 #define IGC_MSIX_OTHER_INTR_VEC         0   /* MSI-X other interrupt vector */
37 #define IGC_FLAG_NEED_LINK_UPDATE       (1u << 0)       /* need update link */
38
39 #define IGC_DEFAULT_RX_FREE_THRESH      32
40
41 #define IGC_DEFAULT_RX_PTHRESH          8
42 #define IGC_DEFAULT_RX_HTHRESH          8
43 #define IGC_DEFAULT_RX_WTHRESH          4
44
45 #define IGC_DEFAULT_TX_PTHRESH          8
46 #define IGC_DEFAULT_TX_HTHRESH          1
47 #define IGC_DEFAULT_TX_WTHRESH          16
48
49 /* MSI-X other interrupt vector */
50 #define IGC_MSIX_OTHER_INTR_VEC         0
51
52 /* External VLAN Enable bit mask */
53 #define IGC_CTRL_EXT_EXT_VLAN           (1u << 26)
54
55 /* Speed select */
56 #define IGC_CTRL_SPEED_MASK             (7u << 8)
57 #define IGC_CTRL_SPEED_2500             (6u << 8)
58
59 /* External VLAN Ether Type bit mask and shift */
60 #define IGC_VET_EXT                     0xFFFF0000
61 #define IGC_VET_EXT_SHIFT               16
62
63 /* Force EEE Auto-negotiation */
64 #define IGC_EEER_EEE_FRC_AN             (1u << 28)
65
66 /* Per Queue Good Packets Received Count */
67 #define IGC_PQGPRC(idx)         (0x10010 + 0x100 * (idx))
68 /* Per Queue Good Octets Received Count */
69 #define IGC_PQGORC(idx)         (0x10018 + 0x100 * (idx))
70 /* Per Queue Good Octets Transmitted Count */
71 #define IGC_PQGOTC(idx)         (0x10034 + 0x100 * (idx))
72 /* Per Queue Multicast Packets Received Count */
73 #define IGC_PQMPRC(idx)         (0x10038 + 0x100 * (idx))
74 /* Transmit Queue Drop Packet Count */
75 #define IGC_TQDPC(idx)          (0xe030 + 0x40 * (idx))
76
77 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
78 #define U32_0_IN_U64            0       /* lower bytes of u64 */
79 #define U32_1_IN_U64            1       /* higher bytes of u64 */
80 #else
81 #define U32_0_IN_U64            1
82 #define U32_1_IN_U64            0
83 #endif
84
85 #define IGC_ALARM_INTERVAL      8000000u
86 /* us, about 13.6s some per-queue registers will wrap around back to 0. */
87
88 static const struct rte_eth_desc_lim rx_desc_lim = {
89         .nb_max = IGC_MAX_RXD,
90         .nb_min = IGC_MIN_RXD,
91         .nb_align = IGC_RXD_ALIGN,
92 };
93
94 static const struct rte_eth_desc_lim tx_desc_lim = {
95         .nb_max = IGC_MAX_TXD,
96         .nb_min = IGC_MIN_TXD,
97         .nb_align = IGC_TXD_ALIGN,
98         .nb_seg_max = IGC_TX_MAX_SEG,
99         .nb_mtu_seg_max = IGC_TX_MAX_MTU_SEG,
100 };
101
102 static const struct rte_pci_id pci_id_igc_map[] = {
103         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_LM) },
104         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_V)  },
105         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_I)  },
106         { RTE_PCI_DEVICE(IGC_INTEL_VENDOR_ID, IGC_DEV_ID_I225_K)  },
107         { .vendor_id = 0, /* sentinel */ },
108 };
109
110 /* store statistics names and its offset in stats structure */
111 struct rte_igc_xstats_name_off {
112         char name[RTE_ETH_XSTATS_NAME_SIZE];
113         unsigned int offset;
114 };
115
116 static const struct rte_igc_xstats_name_off rte_igc_stats_strings[] = {
117         {"rx_crc_errors", offsetof(struct igc_hw_stats, crcerrs)},
118         {"rx_align_errors", offsetof(struct igc_hw_stats, algnerrc)},
119         {"rx_errors", offsetof(struct igc_hw_stats, rxerrc)},
120         {"rx_missed_packets", offsetof(struct igc_hw_stats, mpc)},
121         {"tx_single_collision_packets", offsetof(struct igc_hw_stats, scc)},
122         {"tx_multiple_collision_packets", offsetof(struct igc_hw_stats, mcc)},
123         {"tx_excessive_collision_packets", offsetof(struct igc_hw_stats,
124                 ecol)},
125         {"tx_late_collisions", offsetof(struct igc_hw_stats, latecol)},
126         {"tx_total_collisions", offsetof(struct igc_hw_stats, colc)},
127         {"tx_deferred_packets", offsetof(struct igc_hw_stats, dc)},
128         {"tx_no_carrier_sense_packets", offsetof(struct igc_hw_stats, tncrs)},
129         {"tx_discarded_packets", offsetof(struct igc_hw_stats, htdpmc)},
130         {"rx_length_errors", offsetof(struct igc_hw_stats, rlec)},
131         {"rx_xon_packets", offsetof(struct igc_hw_stats, xonrxc)},
132         {"tx_xon_packets", offsetof(struct igc_hw_stats, xontxc)},
133         {"rx_xoff_packets", offsetof(struct igc_hw_stats, xoffrxc)},
134         {"tx_xoff_packets", offsetof(struct igc_hw_stats, xofftxc)},
135         {"rx_flow_control_unsupported_packets", offsetof(struct igc_hw_stats,
136                 fcruc)},
137         {"rx_size_64_packets", offsetof(struct igc_hw_stats, prc64)},
138         {"rx_size_65_to_127_packets", offsetof(struct igc_hw_stats, prc127)},
139         {"rx_size_128_to_255_packets", offsetof(struct igc_hw_stats, prc255)},
140         {"rx_size_256_to_511_packets", offsetof(struct igc_hw_stats, prc511)},
141         {"rx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
142                 prc1023)},
143         {"rx_size_1024_to_max_packets", offsetof(struct igc_hw_stats,
144                 prc1522)},
145         {"rx_broadcast_packets", offsetof(struct igc_hw_stats, bprc)},
146         {"rx_multicast_packets", offsetof(struct igc_hw_stats, mprc)},
147         {"rx_undersize_errors", offsetof(struct igc_hw_stats, ruc)},
148         {"rx_fragment_errors", offsetof(struct igc_hw_stats, rfc)},
149         {"rx_oversize_errors", offsetof(struct igc_hw_stats, roc)},
150         {"rx_jabber_errors", offsetof(struct igc_hw_stats, rjc)},
151         {"rx_no_buffers", offsetof(struct igc_hw_stats, rnbc)},
152         {"rx_management_packets", offsetof(struct igc_hw_stats, mgprc)},
153         {"rx_management_dropped", offsetof(struct igc_hw_stats, mgpdc)},
154         {"tx_management_packets", offsetof(struct igc_hw_stats, mgptc)},
155         {"rx_total_packets", offsetof(struct igc_hw_stats, tpr)},
156         {"tx_total_packets", offsetof(struct igc_hw_stats, tpt)},
157         {"rx_total_bytes", offsetof(struct igc_hw_stats, tor)},
158         {"tx_total_bytes", offsetof(struct igc_hw_stats, tot)},
159         {"tx_size_64_packets", offsetof(struct igc_hw_stats, ptc64)},
160         {"tx_size_65_to_127_packets", offsetof(struct igc_hw_stats, ptc127)},
161         {"tx_size_128_to_255_packets", offsetof(struct igc_hw_stats, ptc255)},
162         {"tx_size_256_to_511_packets", offsetof(struct igc_hw_stats, ptc511)},
163         {"tx_size_512_to_1023_packets", offsetof(struct igc_hw_stats,
164                 ptc1023)},
165         {"tx_size_1023_to_max_packets", offsetof(struct igc_hw_stats,
166                 ptc1522)},
167         {"tx_multicast_packets", offsetof(struct igc_hw_stats, mptc)},
168         {"tx_broadcast_packets", offsetof(struct igc_hw_stats, bptc)},
169         {"tx_tso_packets", offsetof(struct igc_hw_stats, tsctc)},
170         {"rx_sent_to_host_packets", offsetof(struct igc_hw_stats, rpthc)},
171         {"tx_sent_by_host_packets", offsetof(struct igc_hw_stats, hgptc)},
172         {"interrupt_assert_count", offsetof(struct igc_hw_stats, iac)},
173         {"rx_descriptor_lower_threshold",
174                 offsetof(struct igc_hw_stats, icrxdmtc)},
175 };
176
177 #define IGC_NB_XSTATS (sizeof(rte_igc_stats_strings) / \
178                 sizeof(rte_igc_stats_strings[0]))
179
180 static int eth_igc_configure(struct rte_eth_dev *dev);
181 static int eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete);
182 static void eth_igc_stop(struct rte_eth_dev *dev);
183 static int eth_igc_start(struct rte_eth_dev *dev);
184 static int eth_igc_set_link_up(struct rte_eth_dev *dev);
185 static int eth_igc_set_link_down(struct rte_eth_dev *dev);
186 static void eth_igc_close(struct rte_eth_dev *dev);
187 static int eth_igc_reset(struct rte_eth_dev *dev);
188 static int eth_igc_promiscuous_enable(struct rte_eth_dev *dev);
189 static int eth_igc_promiscuous_disable(struct rte_eth_dev *dev);
190 static int eth_igc_fw_version_get(struct rte_eth_dev *dev,
191                                 char *fw_version, size_t fw_size);
192 static int eth_igc_infos_get(struct rte_eth_dev *dev,
193                         struct rte_eth_dev_info *dev_info);
194 static int eth_igc_led_on(struct rte_eth_dev *dev);
195 static int eth_igc_led_off(struct rte_eth_dev *dev);
196 static const uint32_t *eth_igc_supported_ptypes_get(struct rte_eth_dev *dev);
197 static int eth_igc_rar_set(struct rte_eth_dev *dev,
198                 struct rte_ether_addr *mac_addr, uint32_t index, uint32_t pool);
199 static void eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index);
200 static int eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
201                         struct rte_ether_addr *addr);
202 static int eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
203                          struct rte_ether_addr *mc_addr_set,
204                          uint32_t nb_mc_addr);
205 static int eth_igc_allmulticast_enable(struct rte_eth_dev *dev);
206 static int eth_igc_allmulticast_disable(struct rte_eth_dev *dev);
207 static int eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
208 static int eth_igc_stats_get(struct rte_eth_dev *dev,
209                         struct rte_eth_stats *rte_stats);
210 static int eth_igc_xstats_get(struct rte_eth_dev *dev,
211                         struct rte_eth_xstat *xstats, unsigned int n);
212 static int eth_igc_xstats_get_by_id(struct rte_eth_dev *dev,
213                                 const uint64_t *ids,
214                                 uint64_t *values, unsigned int n);
215 static int eth_igc_xstats_get_names(struct rte_eth_dev *dev,
216                                 struct rte_eth_xstat_name *xstats_names,
217                                 unsigned int size);
218 static int eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
219                 struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
220                 unsigned int limit);
221 static int eth_igc_xstats_reset(struct rte_eth_dev *dev);
222 static int
223 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
224         uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx);
225 static int
226 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
227 static int
228 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
229 static int
230 eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
231 static int
232 eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf);
233 static int eth_igc_rss_reta_update(struct rte_eth_dev *dev,
234                         struct rte_eth_rss_reta_entry64 *reta_conf,
235                         uint16_t reta_size);
236 static int eth_igc_rss_reta_query(struct rte_eth_dev *dev,
237                        struct rte_eth_rss_reta_entry64 *reta_conf,
238                        uint16_t reta_size);
239 static int eth_igc_rss_hash_update(struct rte_eth_dev *dev,
240                         struct rte_eth_rss_conf *rss_conf);
241 static int eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev,
242                         struct rte_eth_rss_conf *rss_conf);
243 static int
244 eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on);
245 static int eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask);
246 static int eth_igc_vlan_tpid_set(struct rte_eth_dev *dev,
247                       enum rte_vlan_type vlan_type, uint16_t tpid);
248
249 static const struct eth_dev_ops eth_igc_ops = {
250         .dev_configure          = eth_igc_configure,
251         .link_update            = eth_igc_link_update,
252         .dev_stop               = eth_igc_stop,
253         .dev_start              = eth_igc_start,
254         .dev_close              = eth_igc_close,
255         .dev_reset              = eth_igc_reset,
256         .dev_set_link_up        = eth_igc_set_link_up,
257         .dev_set_link_down      = eth_igc_set_link_down,
258         .promiscuous_enable     = eth_igc_promiscuous_enable,
259         .promiscuous_disable    = eth_igc_promiscuous_disable,
260         .allmulticast_enable    = eth_igc_allmulticast_enable,
261         .allmulticast_disable   = eth_igc_allmulticast_disable,
262         .fw_version_get         = eth_igc_fw_version_get,
263         .dev_infos_get          = eth_igc_infos_get,
264         .dev_led_on             = eth_igc_led_on,
265         .dev_led_off            = eth_igc_led_off,
266         .dev_supported_ptypes_get = eth_igc_supported_ptypes_get,
267         .mtu_set                = eth_igc_mtu_set,
268         .mac_addr_add           = eth_igc_rar_set,
269         .mac_addr_remove        = eth_igc_rar_clear,
270         .mac_addr_set           = eth_igc_default_mac_addr_set,
271         .set_mc_addr_list       = eth_igc_set_mc_addr_list,
272
273         .rx_queue_setup         = eth_igc_rx_queue_setup,
274         .rx_queue_release       = eth_igc_rx_queue_release,
275         .rx_queue_count         = eth_igc_rx_queue_count,
276         .rx_descriptor_done     = eth_igc_rx_descriptor_done,
277         .rx_descriptor_status   = eth_igc_rx_descriptor_status,
278         .tx_descriptor_status   = eth_igc_tx_descriptor_status,
279         .tx_queue_setup         = eth_igc_tx_queue_setup,
280         .tx_queue_release       = eth_igc_tx_queue_release,
281         .tx_done_cleanup        = eth_igc_tx_done_cleanup,
282         .rxq_info_get           = eth_igc_rxq_info_get,
283         .txq_info_get           = eth_igc_txq_info_get,
284         .stats_get              = eth_igc_stats_get,
285         .xstats_get             = eth_igc_xstats_get,
286         .xstats_get_by_id       = eth_igc_xstats_get_by_id,
287         .xstats_get_names_by_id = eth_igc_xstats_get_names_by_id,
288         .xstats_get_names       = eth_igc_xstats_get_names,
289         .stats_reset            = eth_igc_xstats_reset,
290         .xstats_reset           = eth_igc_xstats_reset,
291         .queue_stats_mapping_set = eth_igc_queue_stats_mapping_set,
292         .rx_queue_intr_enable   = eth_igc_rx_queue_intr_enable,
293         .rx_queue_intr_disable  = eth_igc_rx_queue_intr_disable,
294         .flow_ctrl_get          = eth_igc_flow_ctrl_get,
295         .flow_ctrl_set          = eth_igc_flow_ctrl_set,
296         .reta_update            = eth_igc_rss_reta_update,
297         .reta_query             = eth_igc_rss_reta_query,
298         .rss_hash_update        = eth_igc_rss_hash_update,
299         .rss_hash_conf_get      = eth_igc_rss_hash_conf_get,
300         .vlan_filter_set        = eth_igc_vlan_filter_set,
301         .vlan_offload_set       = eth_igc_vlan_offload_set,
302         .vlan_tpid_set          = eth_igc_vlan_tpid_set,
303         .vlan_strip_queue_set   = eth_igc_vlan_strip_queue_set,
304         .filter_ctrl            = eth_igc_filter_ctrl,
305 };
306
307 /*
308  * multiple queue mode checking
309  */
310 static int
311 igc_check_mq_mode(struct rte_eth_dev *dev)
312 {
313         enum rte_eth_rx_mq_mode rx_mq_mode = dev->data->dev_conf.rxmode.mq_mode;
314         enum rte_eth_tx_mq_mode tx_mq_mode = dev->data->dev_conf.txmode.mq_mode;
315
316         if (RTE_ETH_DEV_SRIOV(dev).active != 0) {
317                 PMD_INIT_LOG(ERR, "SRIOV is not supported.");
318                 return -EINVAL;
319         }
320
321         if (rx_mq_mode != ETH_MQ_RX_NONE &&
322                 rx_mq_mode != ETH_MQ_RX_RSS) {
323                 /* RSS together with VMDq not supported*/
324                 PMD_INIT_LOG(ERR, "RX mode %d is not supported.",
325                                 rx_mq_mode);
326                 return -EINVAL;
327         }
328
329         /* To no break software that set invalid mode, only display
330          * warning if invalid mode is used.
331          */
332         if (tx_mq_mode != ETH_MQ_TX_NONE)
333                 PMD_INIT_LOG(WARNING,
334                         "TX mode %d is not supported. Due to meaningless in this driver, just ignore",
335                         tx_mq_mode);
336
337         return 0;
338 }
339
340 static int
341 eth_igc_configure(struct rte_eth_dev *dev)
342 {
343         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
344         int ret;
345
346         PMD_INIT_FUNC_TRACE();
347
348         ret  = igc_check_mq_mode(dev);
349         if (ret != 0)
350                 return ret;
351
352         intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
353         return 0;
354 }
355
356 static int
357 eth_igc_set_link_up(struct rte_eth_dev *dev)
358 {
359         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
360
361         if (hw->phy.media_type == igc_media_type_copper)
362                 igc_power_up_phy(hw);
363         else
364                 igc_power_up_fiber_serdes_link(hw);
365         return 0;
366 }
367
368 static int
369 eth_igc_set_link_down(struct rte_eth_dev *dev)
370 {
371         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
372
373         if (hw->phy.media_type == igc_media_type_copper)
374                 igc_power_down_phy(hw);
375         else
376                 igc_shutdown_fiber_serdes_link(hw);
377         return 0;
378 }
379
380 /*
381  * disable other interrupt
382  */
383 static void
384 igc_intr_other_disable(struct rte_eth_dev *dev)
385 {
386         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
387         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
388         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
389
390         if (rte_intr_allow_others(intr_handle) &&
391                 dev->data->dev_conf.intr_conf.lsc) {
392                 IGC_WRITE_REG(hw, IGC_EIMC, 1u << IGC_MSIX_OTHER_INTR_VEC);
393         }
394
395         IGC_WRITE_REG(hw, IGC_IMC, ~0);
396         IGC_WRITE_FLUSH(hw);
397 }
398
399 /*
400  * enable other interrupt
401  */
402 static inline void
403 igc_intr_other_enable(struct rte_eth_dev *dev)
404 {
405         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
406         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
407         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
408         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
409
410         if (rte_intr_allow_others(intr_handle) &&
411                 dev->data->dev_conf.intr_conf.lsc) {
412                 IGC_WRITE_REG(hw, IGC_EIMS, 1u << IGC_MSIX_OTHER_INTR_VEC);
413         }
414
415         IGC_WRITE_REG(hw, IGC_IMS, intr->mask);
416         IGC_WRITE_FLUSH(hw);
417 }
418
419 /*
420  * It reads ICR and gets interrupt causes, check it and set a bit flag
421  * to update link status.
422  */
423 static void
424 eth_igc_interrupt_get_status(struct rte_eth_dev *dev)
425 {
426         uint32_t icr;
427         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
428         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
429
430         /* read-on-clear nic registers here */
431         icr = IGC_READ_REG(hw, IGC_ICR);
432
433         intr->flags = 0;
434         if (icr & IGC_ICR_LSC)
435                 intr->flags |= IGC_FLAG_NEED_LINK_UPDATE;
436 }
437
438 /* return 0 means link status changed, -1 means not changed */
439 static int
440 eth_igc_link_update(struct rte_eth_dev *dev, int wait_to_complete)
441 {
442         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
443         struct rte_eth_link link;
444         int link_check, count;
445
446         link_check = 0;
447         hw->mac.get_link_status = 1;
448
449         /* possible wait-to-complete in up to 9 seconds */
450         for (count = 0; count < IGC_LINK_UPDATE_CHECK_TIMEOUT; count++) {
451                 /* Read the real link status */
452                 switch (hw->phy.media_type) {
453                 case igc_media_type_copper:
454                         /* Do the work to read phy */
455                         igc_check_for_link(hw);
456                         link_check = !hw->mac.get_link_status;
457                         break;
458
459                 case igc_media_type_fiber:
460                         igc_check_for_link(hw);
461                         link_check = (IGC_READ_REG(hw, IGC_STATUS) &
462                                       IGC_STATUS_LU);
463                         break;
464
465                 case igc_media_type_internal_serdes:
466                         igc_check_for_link(hw);
467                         link_check = hw->mac.serdes_has_link;
468                         break;
469
470                 default:
471                         break;
472                 }
473                 if (link_check || wait_to_complete == 0)
474                         break;
475                 rte_delay_ms(IGC_LINK_UPDATE_CHECK_INTERVAL);
476         }
477         memset(&link, 0, sizeof(link));
478
479         /* Now we check if a transition has happened */
480         if (link_check) {
481                 uint16_t duplex, speed;
482                 hw->mac.ops.get_link_up_info(hw, &speed, &duplex);
483                 link.link_duplex = (duplex == FULL_DUPLEX) ?
484                                 ETH_LINK_FULL_DUPLEX :
485                                 ETH_LINK_HALF_DUPLEX;
486                 link.link_speed = speed;
487                 link.link_status = ETH_LINK_UP;
488                 link.link_autoneg = !(dev->data->dev_conf.link_speeds &
489                                 ETH_LINK_SPEED_FIXED);
490
491                 if (speed == SPEED_2500) {
492                         uint32_t tipg = IGC_READ_REG(hw, IGC_TIPG);
493                         if ((tipg & IGC_TIPG_IPGT_MASK) != 0x0b) {
494                                 tipg &= ~IGC_TIPG_IPGT_MASK;
495                                 tipg |= 0x0b;
496                                 IGC_WRITE_REG(hw, IGC_TIPG, tipg);
497                         }
498                 }
499         } else {
500                 link.link_speed = 0;
501                 link.link_duplex = ETH_LINK_HALF_DUPLEX;
502                 link.link_status = ETH_LINK_DOWN;
503                 link.link_autoneg = ETH_LINK_FIXED;
504         }
505
506         return rte_eth_linkstatus_set(dev, &link);
507 }
508
509 /*
510  * It executes link_update after knowing an interrupt is present.
511  */
512 static void
513 eth_igc_interrupt_action(struct rte_eth_dev *dev)
514 {
515         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
516         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
517         struct rte_eth_link link;
518         int ret;
519
520         if (intr->flags & IGC_FLAG_NEED_LINK_UPDATE) {
521                 intr->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
522
523                 /* set get_link_status to check register later */
524                 ret = eth_igc_link_update(dev, 0);
525
526                 /* check if link has changed */
527                 if (ret < 0)
528                         return;
529
530                 rte_eth_linkstatus_get(dev, &link);
531                 if (link.link_status)
532                         PMD_DRV_LOG(INFO,
533                                 " Port %d: Link Up - speed %u Mbps - %s",
534                                 dev->data->port_id,
535                                 (unsigned int)link.link_speed,
536                                 link.link_duplex == ETH_LINK_FULL_DUPLEX ?
537                                 "full-duplex" : "half-duplex");
538                 else
539                         PMD_DRV_LOG(INFO, " Port %d: Link Down",
540                                 dev->data->port_id);
541
542                 PMD_DRV_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT,
543                                 pci_dev->addr.domain,
544                                 pci_dev->addr.bus,
545                                 pci_dev->addr.devid,
546                                 pci_dev->addr.function);
547                 _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC,
548                                 NULL);
549         }
550 }
551
552 /*
553  * Interrupt handler which shall be registered at first.
554  *
555  * @handle
556  *  Pointer to interrupt handle.
557  * @param
558  *  The address of parameter (struct rte_eth_dev *) registered before.
559  */
560 static void
561 eth_igc_interrupt_handler(void *param)
562 {
563         struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
564
565         eth_igc_interrupt_get_status(dev);
566         eth_igc_interrupt_action(dev);
567 }
568
569 static void igc_read_queue_stats_register(struct rte_eth_dev *dev);
570
571 /*
572  * Update the queue status every IGC_ALARM_INTERVAL time.
573  * @param
574  *  The address of parameter (struct rte_eth_dev *) registered before.
575  */
576 static void
577 igc_update_queue_stats_handler(void *param)
578 {
579         struct rte_eth_dev *dev = param;
580         igc_read_queue_stats_register(dev);
581         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
582                         igc_update_queue_stats_handler, dev);
583 }
584
585 /*
586  * rx,tx enable/disable
587  */
588 static void
589 eth_igc_rxtx_control(struct rte_eth_dev *dev, bool enable)
590 {
591         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
592         uint32_t tctl, rctl;
593
594         tctl = IGC_READ_REG(hw, IGC_TCTL);
595         rctl = IGC_READ_REG(hw, IGC_RCTL);
596
597         if (enable) {
598                 /* enable Tx/Rx */
599                 tctl |= IGC_TCTL_EN;
600                 rctl |= IGC_RCTL_EN;
601         } else {
602                 /* disable Tx/Rx */
603                 tctl &= ~IGC_TCTL_EN;
604                 rctl &= ~IGC_RCTL_EN;
605         }
606         IGC_WRITE_REG(hw, IGC_TCTL, tctl);
607         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
608         IGC_WRITE_FLUSH(hw);
609 }
610
611 /*
612  *  This routine disables all traffic on the adapter by issuing a
613  *  global reset on the MAC.
614  */
615 static void
616 eth_igc_stop(struct rte_eth_dev *dev)
617 {
618         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
619         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
620         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
621         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
622         struct rte_eth_link link;
623
624         adapter->stopped = 1;
625
626         /* disable receive and transmit */
627         eth_igc_rxtx_control(dev, false);
628
629         /* disable all MSI-X interrupts */
630         IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
631         IGC_WRITE_FLUSH(hw);
632
633         /* clear all MSI-X interrupts */
634         IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
635
636         igc_intr_other_disable(dev);
637
638         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
639
640         /* disable intr eventfd mapping */
641         rte_intr_disable(intr_handle);
642
643         igc_reset_hw(hw);
644
645         /* disable all wake up */
646         IGC_WRITE_REG(hw, IGC_WUC, 0);
647
648         /* disable checking EEE operation in MAC loopback mode */
649         igc_read_reg_check_clear_bits(hw, IGC_EEER, IGC_EEER_EEE_FRC_AN);
650
651         /* Set bit for Go Link disconnect */
652         igc_read_reg_check_set_bits(hw, IGC_82580_PHY_POWER_MGMT,
653                         IGC_82580_PM_GO_LINKD);
654
655         /* Power down the phy. Needed to make the link go Down */
656         eth_igc_set_link_down(dev);
657
658         igc_dev_clear_queues(dev);
659
660         /* clear the recorded link status */
661         memset(&link, 0, sizeof(link));
662         rte_eth_linkstatus_set(dev, &link);
663
664         if (!rte_intr_allow_others(intr_handle))
665                 /* resume to the default handler */
666                 rte_intr_callback_register(intr_handle,
667                                            eth_igc_interrupt_handler,
668                                            (void *)dev);
669
670         /* Clean datapath event and queue/vec mapping */
671         rte_intr_efd_disable(intr_handle);
672         if (intr_handle->intr_vec != NULL) {
673                 rte_free(intr_handle->intr_vec);
674                 intr_handle->intr_vec = NULL;
675         }
676 }
677
678 /*
679  * write interrupt vector allocation register
680  * @hw
681  *  board private structure
682  * @queue_index
683  *  queue index, valid 0,1,2,3
684  * @tx
685  *  tx:1, rx:0
686  * @msix_vector
687  *  msix-vector, valid 0,1,2,3,4
688  */
689 static void
690 igc_write_ivar(struct igc_hw *hw, uint8_t queue_index,
691                 bool tx, uint8_t msix_vector)
692 {
693         uint8_t offset = 0;
694         uint8_t reg_index = queue_index >> 1;
695         uint32_t val;
696
697         /*
698          * IVAR(0)
699          * bit31...24   bit23...16      bit15...8       bit7...0
700          * TX1          RX1             TX0             RX0
701          *
702          * IVAR(1)
703          * bit31...24   bit23...16      bit15...8       bit7...0
704          * TX3          RX3             TX2             RX2
705          */
706
707         if (tx)
708                 offset = 8;
709
710         if (queue_index & 1)
711                 offset += 16;
712
713         val = IGC_READ_REG_ARRAY(hw, IGC_IVAR0, reg_index);
714
715         /* clear bits */
716         val &= ~((uint32_t)0xFF << offset);
717
718         /* write vector and valid bit */
719         val |= (uint32_t)(msix_vector | IGC_IVAR_VALID) << offset;
720
721         IGC_WRITE_REG_ARRAY(hw, IGC_IVAR0, reg_index, val);
722 }
723
724 /* Sets up the hardware to generate MSI-X interrupts properly
725  * @hw
726  *  board private structure
727  */
728 static void
729 igc_configure_msix_intr(struct rte_eth_dev *dev)
730 {
731         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
732         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
733         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
734
735         uint32_t intr_mask;
736         uint32_t vec = IGC_MISC_VEC_ID;
737         uint32_t base = IGC_MISC_VEC_ID;
738         uint32_t misc_shift = 0;
739         int i;
740
741         /* won't configure msix register if no mapping is done
742          * between intr vector and event fd
743          */
744         if (!rte_intr_dp_is_en(intr_handle))
745                 return;
746
747         if (rte_intr_allow_others(intr_handle)) {
748                 base = IGC_RX_VEC_START;
749                 vec = base;
750                 misc_shift = 1;
751         }
752
753         /* turn on MSI-X capability first */
754         IGC_WRITE_REG(hw, IGC_GPIE, IGC_GPIE_MSIX_MODE |
755                                 IGC_GPIE_PBA | IGC_GPIE_EIAME |
756                                 IGC_GPIE_NSICR);
757         intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) <<
758                 misc_shift;
759
760         if (dev->data->dev_conf.intr_conf.lsc)
761                 intr_mask |= (1u << IGC_MSIX_OTHER_INTR_VEC);
762
763         /* enable msix auto-clear */
764         igc_read_reg_check_set_bits(hw, IGC_EIAC, intr_mask);
765
766         /* set other cause interrupt vector */
767         igc_read_reg_check_set_bits(hw, IGC_IVAR_MISC,
768                 (uint32_t)(IGC_MSIX_OTHER_INTR_VEC | IGC_IVAR_VALID) << 8);
769
770         /* enable auto-mask */
771         igc_read_reg_check_set_bits(hw, IGC_EIAM, intr_mask);
772
773         for (i = 0; i < dev->data->nb_rx_queues; i++) {
774                 igc_write_ivar(hw, i, 0, vec);
775                 intr_handle->intr_vec[i] = vec;
776                 if (vec < base + intr_handle->nb_efd - 1)
777                         vec++;
778         }
779
780         IGC_WRITE_FLUSH(hw);
781 }
782
783 /**
784  * It enables the interrupt mask and then enable the interrupt.
785  *
786  * @dev
787  *  Pointer to struct rte_eth_dev.
788  * @on
789  *  Enable or Disable
790  */
791 static void
792 igc_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on)
793 {
794         struct igc_interrupt *intr = IGC_DEV_PRIVATE_INTR(dev);
795
796         if (on)
797                 intr->mask |= IGC_ICR_LSC;
798         else
799                 intr->mask &= ~IGC_ICR_LSC;
800 }
801
802 /*
803  * It enables the interrupt.
804  * It will be called once only during nic initialized.
805  */
806 static void
807 igc_rxq_interrupt_setup(struct rte_eth_dev *dev)
808 {
809         uint32_t mask;
810         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
811         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
812         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
813         int misc_shift = rte_intr_allow_others(intr_handle) ? 1 : 0;
814
815         /* won't configure msix register if no mapping is done
816          * between intr vector and event fd
817          */
818         if (!rte_intr_dp_is_en(intr_handle))
819                 return;
820
821         mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) << misc_shift;
822         IGC_WRITE_REG(hw, IGC_EIMS, mask);
823 }
824
825 /*
826  *  Get hardware rx-buffer size.
827  */
828 static inline int
829 igc_get_rx_buffer_size(struct igc_hw *hw)
830 {
831         return (IGC_READ_REG(hw, IGC_RXPBS) & 0x3f) << 10;
832 }
833
834 /*
835  * igc_hw_control_acquire sets CTRL_EXT:DRV_LOAD bit.
836  * For ASF and Pass Through versions of f/w this means
837  * that the driver is loaded.
838  */
839 static void
840 igc_hw_control_acquire(struct igc_hw *hw)
841 {
842         uint32_t ctrl_ext;
843
844         /* Let firmware know the driver has taken over */
845         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
846         IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
847 }
848
849 /*
850  * igc_hw_control_release resets CTRL_EXT:DRV_LOAD bit.
851  * For ASF and Pass Through versions of f/w this means that the
852  * driver is no longer loaded.
853  */
854 static void
855 igc_hw_control_release(struct igc_hw *hw)
856 {
857         uint32_t ctrl_ext;
858
859         /* Let firmware taken over control of h/w */
860         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
861         IGC_WRITE_REG(hw, IGC_CTRL_EXT,
862                         ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
863 }
864
865 static int
866 igc_hardware_init(struct igc_hw *hw)
867 {
868         uint32_t rx_buf_size;
869         int diag;
870
871         /* Let the firmware know the OS is in control */
872         igc_hw_control_acquire(hw);
873
874         /* Issue a global reset */
875         igc_reset_hw(hw);
876
877         /* disable all wake up */
878         IGC_WRITE_REG(hw, IGC_WUC, 0);
879
880         /*
881          * Hardware flow control
882          * - High water mark should allow for at least two standard size (1518)
883          *   frames to be received after sending an XOFF.
884          * - Low water mark works best when it is very near the high water mark.
885          *   This allows the receiver to restart by sending XON when it has
886          *   drained a bit. Here we use an arbitrary value of 1500 which will
887          *   restart after one full frame is pulled from the buffer. There
888          *   could be several smaller frames in the buffer and if so they will
889          *   not trigger the XON until their total number reduces the buffer
890          *   by 1500.
891          */
892         rx_buf_size = igc_get_rx_buffer_size(hw);
893         hw->fc.high_water = rx_buf_size - (RTE_ETHER_MAX_LEN * 2);
894         hw->fc.low_water = hw->fc.high_water - 1500;
895         hw->fc.pause_time = IGC_FC_PAUSE_TIME;
896         hw->fc.send_xon = 1;
897         hw->fc.requested_mode = igc_fc_full;
898
899         diag = igc_init_hw(hw);
900         if (diag < 0)
901                 return diag;
902
903         igc_get_phy_info(hw);
904         igc_check_for_link(hw);
905
906         return 0;
907 }
908
909 static int
910 eth_igc_start(struct rte_eth_dev *dev)
911 {
912         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
913         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
914         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
915         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
916         uint32_t *speeds;
917         int ret;
918
919         PMD_INIT_FUNC_TRACE();
920
921         /* disable all MSI-X interrupts */
922         IGC_WRITE_REG(hw, IGC_EIMC, 0x1f);
923         IGC_WRITE_FLUSH(hw);
924
925         /* clear all MSI-X interrupts */
926         IGC_WRITE_REG(hw, IGC_EICR, 0x1f);
927
928         /* disable uio/vfio intr/eventfd mapping */
929         if (!adapter->stopped)
930                 rte_intr_disable(intr_handle);
931
932         /* Power up the phy. Needed to make the link go Up */
933         eth_igc_set_link_up(dev);
934
935         /* Put the address into the Receive Address Array */
936         igc_rar_set(hw, hw->mac.addr, 0);
937
938         /* Initialize the hardware */
939         if (igc_hardware_init(hw)) {
940                 PMD_DRV_LOG(ERR, "Unable to initialize the hardware");
941                 return -EIO;
942         }
943         adapter->stopped = 0;
944
945         /* check and configure queue intr-vector mapping */
946         if (rte_intr_cap_multiple(intr_handle) &&
947                 dev->data->dev_conf.intr_conf.rxq) {
948                 uint32_t intr_vector = dev->data->nb_rx_queues;
949                 if (rte_intr_efd_enable(intr_handle, intr_vector))
950                         return -1;
951         }
952
953         if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
954                 intr_handle->intr_vec = rte_zmalloc("intr_vec",
955                         dev->data->nb_rx_queues * sizeof(int), 0);
956                 if (intr_handle->intr_vec == NULL) {
957                         PMD_DRV_LOG(ERR,
958                                 "Failed to allocate %d rx_queues intr_vec",
959                                 dev->data->nb_rx_queues);
960                         return -ENOMEM;
961                 }
962         }
963
964         /* configure msix for rx interrupt */
965         igc_configure_msix_intr(dev);
966
967         igc_tx_init(dev);
968
969         /* This can fail when allocating mbufs for descriptor rings */
970         ret = igc_rx_init(dev);
971         if (ret) {
972                 PMD_DRV_LOG(ERR, "Unable to initialize RX hardware");
973                 igc_dev_clear_queues(dev);
974                 return ret;
975         }
976
977         igc_clear_hw_cntrs_base_generic(hw);
978
979         /* VLAN Offload Settings */
980         eth_igc_vlan_offload_set(dev,
981                 ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK |
982                 ETH_VLAN_EXTEND_MASK);
983
984         /* Setup link speed and duplex */
985         speeds = &dev->data->dev_conf.link_speeds;
986         if (*speeds == ETH_LINK_SPEED_AUTONEG) {
987                 hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
988                 hw->mac.autoneg = 1;
989         } else {
990                 int num_speeds = 0;
991                 bool autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0;
992
993                 /* Reset */
994                 hw->phy.autoneg_advertised = 0;
995
996                 if (*speeds & ~(ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
997                                 ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
998                                 ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G |
999                                 ETH_LINK_SPEED_FIXED)) {
1000                         num_speeds = -1;
1001                         goto error_invalid_config;
1002                 }
1003                 if (*speeds & ETH_LINK_SPEED_10M_HD) {
1004                         hw->phy.autoneg_advertised |= ADVERTISE_10_HALF;
1005                         num_speeds++;
1006                 }
1007                 if (*speeds & ETH_LINK_SPEED_10M) {
1008                         hw->phy.autoneg_advertised |= ADVERTISE_10_FULL;
1009                         num_speeds++;
1010                 }
1011                 if (*speeds & ETH_LINK_SPEED_100M_HD) {
1012                         hw->phy.autoneg_advertised |= ADVERTISE_100_HALF;
1013                         num_speeds++;
1014                 }
1015                 if (*speeds & ETH_LINK_SPEED_100M) {
1016                         hw->phy.autoneg_advertised |= ADVERTISE_100_FULL;
1017                         num_speeds++;
1018                 }
1019                 if (*speeds & ETH_LINK_SPEED_1G) {
1020                         hw->phy.autoneg_advertised |= ADVERTISE_1000_FULL;
1021                         num_speeds++;
1022                 }
1023                 if (*speeds & ETH_LINK_SPEED_2_5G) {
1024                         hw->phy.autoneg_advertised |= ADVERTISE_2500_FULL;
1025                         num_speeds++;
1026                 }
1027                 if (num_speeds == 0 || (!autoneg && num_speeds > 1))
1028                         goto error_invalid_config;
1029
1030                 /* Set/reset the mac.autoneg based on the link speed,
1031                  * fixed or not
1032                  */
1033                 if (!autoneg) {
1034                         hw->mac.autoneg = 0;
1035                         hw->mac.forced_speed_duplex =
1036                                         hw->phy.autoneg_advertised;
1037                 } else {
1038                         hw->mac.autoneg = 1;
1039                 }
1040         }
1041
1042         igc_setup_link(hw);
1043
1044         if (rte_intr_allow_others(intr_handle)) {
1045                 /* check if lsc interrupt is enabled */
1046                 if (dev->data->dev_conf.intr_conf.lsc)
1047                         igc_lsc_interrupt_setup(dev, 1);
1048                 else
1049                         igc_lsc_interrupt_setup(dev, 0);
1050         } else {
1051                 rte_intr_callback_unregister(intr_handle,
1052                                              eth_igc_interrupt_handler,
1053                                              (void *)dev);
1054                 if (dev->data->dev_conf.intr_conf.lsc)
1055                         PMD_DRV_LOG(INFO,
1056                                 "LSC won't enable because of no intr multiplex");
1057         }
1058
1059         /* enable uio/vfio intr/eventfd mapping */
1060         rte_intr_enable(intr_handle);
1061
1062         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1063                         igc_update_queue_stats_handler, dev);
1064
1065         /* check if rxq interrupt is enabled */
1066         if (dev->data->dev_conf.intr_conf.rxq &&
1067                         rte_intr_dp_is_en(intr_handle))
1068                 igc_rxq_interrupt_setup(dev);
1069
1070         /* resume enabled intr since hw reset */
1071         igc_intr_other_enable(dev);
1072
1073         eth_igc_rxtx_control(dev, true);
1074         eth_igc_link_update(dev, 0);
1075
1076         /* configure MAC-loopback mode */
1077         if (dev->data->dev_conf.lpbk_mode == 1) {
1078                 uint32_t reg_val;
1079
1080                 reg_val = IGC_READ_REG(hw, IGC_CTRL);
1081                 reg_val &= ~IGC_CTRL_SPEED_MASK;
1082                 reg_val |= IGC_CTRL_SLU | IGC_CTRL_FRCSPD |
1083                         IGC_CTRL_FRCDPX | IGC_CTRL_FD | IGC_CTRL_SPEED_2500;
1084                 IGC_WRITE_REG(hw, IGC_CTRL, reg_val);
1085
1086                 igc_read_reg_check_set_bits(hw, IGC_EEER, IGC_EEER_EEE_FRC_AN);
1087         }
1088
1089         return 0;
1090
1091 error_invalid_config:
1092         PMD_DRV_LOG(ERR, "Invalid advertised speeds (%u) for port %u",
1093                      dev->data->dev_conf.link_speeds, dev->data->port_id);
1094         igc_dev_clear_queues(dev);
1095         return -EINVAL;
1096 }
1097
1098 static int
1099 igc_reset_swfw_lock(struct igc_hw *hw)
1100 {
1101         int ret_val;
1102
1103         /*
1104          * Do mac ops initialization manually here, since we will need
1105          * some function pointers set by this call.
1106          */
1107         ret_val = igc_init_mac_params(hw);
1108         if (ret_val)
1109                 return ret_val;
1110
1111         /*
1112          * SMBI lock should not fail in this early stage. If this is the case,
1113          * it is due to an improper exit of the application.
1114          * So force the release of the faulty lock.
1115          */
1116         if (igc_get_hw_semaphore_generic(hw) < 0)
1117                 PMD_DRV_LOG(DEBUG, "SMBI lock released");
1118
1119         igc_put_hw_semaphore_generic(hw);
1120
1121         if (hw->mac.ops.acquire_swfw_sync != NULL) {
1122                 uint16_t mask;
1123
1124                 /*
1125                  * Phy lock should not fail in this early stage.
1126                  * If this is the case, it is due to an improper exit of the
1127                  * application. So force the release of the faulty lock.
1128                  */
1129                 mask = IGC_SWFW_PHY0_SM;
1130                 if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0) {
1131                         PMD_DRV_LOG(DEBUG, "SWFW phy%d lock released",
1132                                     hw->bus.func);
1133                 }
1134                 hw->mac.ops.release_swfw_sync(hw, mask);
1135
1136                 /*
1137                  * This one is more tricky since it is common to all ports; but
1138                  * swfw_sync retries last long enough (1s) to be almost sure
1139                  * that if lock can not be taken it is due to an improper lock
1140                  * of the semaphore.
1141                  */
1142                 mask = IGC_SWFW_EEP_SM;
1143                 if (hw->mac.ops.acquire_swfw_sync(hw, mask) < 0)
1144                         PMD_DRV_LOG(DEBUG, "SWFW common locks released");
1145
1146                 hw->mac.ops.release_swfw_sync(hw, mask);
1147         }
1148
1149         return IGC_SUCCESS;
1150 }
1151
1152 /*
1153  * free all rx/tx queues.
1154  */
1155 static void
1156 igc_dev_free_queues(struct rte_eth_dev *dev)
1157 {
1158         uint16_t i;
1159
1160         for (i = 0; i < dev->data->nb_rx_queues; i++) {
1161                 eth_igc_rx_queue_release(dev->data->rx_queues[i]);
1162                 dev->data->rx_queues[i] = NULL;
1163         }
1164         dev->data->nb_rx_queues = 0;
1165
1166         for (i = 0; i < dev->data->nb_tx_queues; i++) {
1167                 eth_igc_tx_queue_release(dev->data->tx_queues[i]);
1168                 dev->data->tx_queues[i] = NULL;
1169         }
1170         dev->data->nb_tx_queues = 0;
1171 }
1172
1173 static void
1174 eth_igc_close(struct rte_eth_dev *dev)
1175 {
1176         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1177         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
1178         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1179         struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
1180         int retry = 0;
1181
1182         PMD_INIT_FUNC_TRACE();
1183
1184         if (!adapter->stopped)
1185                 eth_igc_stop(dev);
1186
1187         igc_flow_flush(dev, NULL);
1188         igc_clear_all_filter(dev);
1189
1190         igc_intr_other_disable(dev);
1191         do {
1192                 int ret = rte_intr_callback_unregister(intr_handle,
1193                                 eth_igc_interrupt_handler, dev);
1194                 if (ret >= 0 || ret == -ENOENT || ret == -EINVAL)
1195                         break;
1196
1197                 PMD_DRV_LOG(ERR, "intr callback unregister failed: %d", ret);
1198                 DELAY(200 * 1000); /* delay 200ms */
1199         } while (retry++ < 5);
1200
1201         igc_phy_hw_reset(hw);
1202         igc_hw_control_release(hw);
1203         igc_dev_free_queues(dev);
1204
1205         /* Reset any pending lock */
1206         igc_reset_swfw_lock(hw);
1207 }
1208
1209 static void
1210 igc_identify_hardware(struct rte_eth_dev *dev, struct rte_pci_device *pci_dev)
1211 {
1212         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1213
1214         hw->vendor_id = pci_dev->id.vendor_id;
1215         hw->device_id = pci_dev->id.device_id;
1216         hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id;
1217         hw->subsystem_device_id = pci_dev->id.subsystem_device_id;
1218 }
1219
1220 static int
1221 eth_igc_dev_init(struct rte_eth_dev *dev)
1222 {
1223         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
1224         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1225         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1226         int i, error = 0;
1227
1228         PMD_INIT_FUNC_TRACE();
1229         dev->dev_ops = &eth_igc_ops;
1230
1231         /*
1232          * for secondary processes, we don't initialize any further as primary
1233          * has already done this work. Only check we don't need a different
1234          * RX function.
1235          */
1236         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1237                 return 0;
1238
1239         rte_eth_copy_pci_info(dev, pci_dev);
1240
1241         hw->back = pci_dev;
1242         hw->hw_addr = (void *)pci_dev->mem_resource[0].addr;
1243
1244         igc_identify_hardware(dev, pci_dev);
1245         if (igc_setup_init_funcs(hw, false) != IGC_SUCCESS) {
1246                 error = -EIO;
1247                 goto err_late;
1248         }
1249
1250         igc_get_bus_info(hw);
1251
1252         /* Reset any pending lock */
1253         if (igc_reset_swfw_lock(hw) != IGC_SUCCESS) {
1254                 error = -EIO;
1255                 goto err_late;
1256         }
1257
1258         /* Finish initialization */
1259         if (igc_setup_init_funcs(hw, true) != IGC_SUCCESS) {
1260                 error = -EIO;
1261                 goto err_late;
1262         }
1263
1264         hw->mac.autoneg = 1;
1265         hw->phy.autoneg_wait_to_complete = 0;
1266         hw->phy.autoneg_advertised = IGC_ALL_SPEED_DUPLEX_2500;
1267
1268         /* Copper options */
1269         if (hw->phy.media_type == igc_media_type_copper) {
1270                 hw->phy.mdix = 0; /* AUTO_ALL_MODES */
1271                 hw->phy.disable_polarity_correction = 0;
1272                 hw->phy.ms_type = igc_ms_hw_default;
1273         }
1274
1275         /*
1276          * Start from a known state, this is important in reading the nvm
1277          * and mac from that.
1278          */
1279         igc_reset_hw(hw);
1280
1281         /* Make sure we have a good EEPROM before we read from it */
1282         if (igc_validate_nvm_checksum(hw) < 0) {
1283                 /*
1284                  * Some PCI-E parts fail the first check due to
1285                  * the link being in sleep state, call it again,
1286                  * if it fails a second time its a real issue.
1287                  */
1288                 if (igc_validate_nvm_checksum(hw) < 0) {
1289                         PMD_INIT_LOG(ERR, "EEPROM checksum invalid");
1290                         error = -EIO;
1291                         goto err_late;
1292                 }
1293         }
1294
1295         /* Read the permanent MAC address out of the EEPROM */
1296         if (igc_read_mac_addr(hw) != 0) {
1297                 PMD_INIT_LOG(ERR, "EEPROM error while reading MAC address");
1298                 error = -EIO;
1299                 goto err_late;
1300         }
1301
1302         /* Allocate memory for storing MAC addresses */
1303         dev->data->mac_addrs = rte_zmalloc("igc",
1304                 RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count, 0);
1305         if (dev->data->mac_addrs == NULL) {
1306                 PMD_INIT_LOG(ERR, "Failed to allocate %d bytes for storing MAC",
1307                                 RTE_ETHER_ADDR_LEN * hw->mac.rar_entry_count);
1308                 error = -ENOMEM;
1309                 goto err_late;
1310         }
1311
1312         /* Copy the permanent MAC address */
1313         rte_ether_addr_copy((struct rte_ether_addr *)hw->mac.addr,
1314                         &dev->data->mac_addrs[0]);
1315
1316         /* Now initialize the hardware */
1317         if (igc_hardware_init(hw) != 0) {
1318                 PMD_INIT_LOG(ERR, "Hardware initialization failed");
1319                 rte_free(dev->data->mac_addrs);
1320                 dev->data->mac_addrs = NULL;
1321                 error = -ENODEV;
1322                 goto err_late;
1323         }
1324
1325         /* Pass the information to the rte_eth_dev_close() that it should also
1326          * release the private port resources.
1327          */
1328         dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
1329
1330         hw->mac.get_link_status = 1;
1331         igc->stopped = 0;
1332
1333         /* Indicate SOL/IDER usage */
1334         if (igc_check_reset_block(hw) < 0)
1335                 PMD_INIT_LOG(ERR,
1336                         "PHY reset is blocked due to SOL/IDER session.");
1337
1338         PMD_INIT_LOG(DEBUG, "port_id %d vendorID=0x%x deviceID=0x%x",
1339                         dev->data->port_id, pci_dev->id.vendor_id,
1340                         pci_dev->id.device_id);
1341
1342         rte_intr_callback_register(&pci_dev->intr_handle,
1343                         eth_igc_interrupt_handler, (void *)dev);
1344
1345         /* enable uio/vfio intr/eventfd mapping */
1346         rte_intr_enable(&pci_dev->intr_handle);
1347
1348         /* enable support intr */
1349         igc_intr_other_enable(dev);
1350
1351         /* initiate queue status */
1352         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1353                 igc->txq_stats_map[i] = -1;
1354                 igc->rxq_stats_map[i] = -1;
1355         }
1356
1357         igc_flow_init(dev);
1358         igc_clear_all_filter(dev);
1359         return 0;
1360
1361 err_late:
1362         igc_hw_control_release(hw);
1363         return error;
1364 }
1365
1366 static int
1367 eth_igc_dev_uninit(__rte_unused struct rte_eth_dev *eth_dev)
1368 {
1369         PMD_INIT_FUNC_TRACE();
1370
1371         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1372                 return 0;
1373
1374         eth_igc_close(eth_dev);
1375         return 0;
1376 }
1377
1378 static int
1379 eth_igc_reset(struct rte_eth_dev *dev)
1380 {
1381         int ret;
1382
1383         PMD_INIT_FUNC_TRACE();
1384
1385         ret = eth_igc_dev_uninit(dev);
1386         if (ret)
1387                 return ret;
1388
1389         return eth_igc_dev_init(dev);
1390 }
1391
1392 static int
1393 eth_igc_promiscuous_enable(struct rte_eth_dev *dev)
1394 {
1395         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1396         uint32_t rctl;
1397
1398         rctl = IGC_READ_REG(hw, IGC_RCTL);
1399         rctl |= (IGC_RCTL_UPE | IGC_RCTL_MPE);
1400         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1401         return 0;
1402 }
1403
1404 static int
1405 eth_igc_promiscuous_disable(struct rte_eth_dev *dev)
1406 {
1407         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1408         uint32_t rctl;
1409
1410         rctl = IGC_READ_REG(hw, IGC_RCTL);
1411         rctl &= (~IGC_RCTL_UPE);
1412         if (dev->data->all_multicast == 1)
1413                 rctl |= IGC_RCTL_MPE;
1414         else
1415                 rctl &= (~IGC_RCTL_MPE);
1416         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1417         return 0;
1418 }
1419
1420 static int
1421 eth_igc_allmulticast_enable(struct rte_eth_dev *dev)
1422 {
1423         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1424         uint32_t rctl;
1425
1426         rctl = IGC_READ_REG(hw, IGC_RCTL);
1427         rctl |= IGC_RCTL_MPE;
1428         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1429         return 0;
1430 }
1431
1432 static int
1433 eth_igc_allmulticast_disable(struct rte_eth_dev *dev)
1434 {
1435         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1436         uint32_t rctl;
1437
1438         if (dev->data->promiscuous == 1)
1439                 return 0;       /* must remain in all_multicast mode */
1440
1441         rctl = IGC_READ_REG(hw, IGC_RCTL);
1442         rctl &= (~IGC_RCTL_MPE);
1443         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1444         return 0;
1445 }
1446
1447 static int
1448 eth_igc_fw_version_get(struct rte_eth_dev *dev, char *fw_version,
1449                        size_t fw_size)
1450 {
1451         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1452         struct igc_fw_version fw;
1453         int ret;
1454
1455         igc_get_fw_version(hw, &fw);
1456
1457         /* if option rom is valid, display its version too */
1458         if (fw.or_valid) {
1459                 ret = snprintf(fw_version, fw_size,
1460                          "%d.%d, 0x%08x, %d.%d.%d",
1461                          fw.eep_major, fw.eep_minor, fw.etrack_id,
1462                          fw.or_major, fw.or_build, fw.or_patch);
1463         /* no option rom */
1464         } else {
1465                 if (fw.etrack_id != 0X0000) {
1466                         ret = snprintf(fw_version, fw_size,
1467                                  "%d.%d, 0x%08x",
1468                                  fw.eep_major, fw.eep_minor,
1469                                  fw.etrack_id);
1470                 } else {
1471                         ret = snprintf(fw_version, fw_size,
1472                                  "%d.%d.%d",
1473                                  fw.eep_major, fw.eep_minor,
1474                                  fw.eep_build);
1475                 }
1476         }
1477
1478         ret += 1; /* add the size of '\0' */
1479         if (fw_size < (u32)ret)
1480                 return ret;
1481         else
1482                 return 0;
1483 }
1484
1485 static int
1486 eth_igc_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1487 {
1488         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1489
1490         dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */
1491         dev_info->max_rx_pktlen = MAX_RX_JUMBO_FRAME_SIZE;
1492         dev_info->max_mac_addrs = hw->mac.rar_entry_count;
1493         dev_info->rx_offload_capa = IGC_RX_OFFLOAD_ALL;
1494         dev_info->tx_offload_capa = IGC_TX_OFFLOAD_ALL;
1495         dev_info->rx_queue_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
1496
1497         dev_info->max_rx_queues = IGC_QUEUE_PAIRS_NUM;
1498         dev_info->max_tx_queues = IGC_QUEUE_PAIRS_NUM;
1499         dev_info->max_vmdq_pools = 0;
1500
1501         dev_info->hash_key_size = IGC_HKEY_MAX_INDEX * sizeof(uint32_t);
1502         dev_info->reta_size = ETH_RSS_RETA_SIZE_128;
1503         dev_info->flow_type_rss_offloads = IGC_RSS_OFFLOAD_ALL;
1504
1505         dev_info->default_rxconf = (struct rte_eth_rxconf) {
1506                 .rx_thresh = {
1507                         .pthresh = IGC_DEFAULT_RX_PTHRESH,
1508                         .hthresh = IGC_DEFAULT_RX_HTHRESH,
1509                         .wthresh = IGC_DEFAULT_RX_WTHRESH,
1510                 },
1511                 .rx_free_thresh = IGC_DEFAULT_RX_FREE_THRESH,
1512                 .rx_drop_en = 0,
1513                 .offloads = 0,
1514         };
1515
1516         dev_info->default_txconf = (struct rte_eth_txconf) {
1517                 .tx_thresh = {
1518                         .pthresh = IGC_DEFAULT_TX_PTHRESH,
1519                         .hthresh = IGC_DEFAULT_TX_HTHRESH,
1520                         .wthresh = IGC_DEFAULT_TX_WTHRESH,
1521                 },
1522                 .offloads = 0,
1523         };
1524
1525         dev_info->rx_desc_lim = rx_desc_lim;
1526         dev_info->tx_desc_lim = tx_desc_lim;
1527
1528         dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M |
1529                         ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M |
1530                         ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G;
1531
1532         dev_info->max_mtu = dev_info->max_rx_pktlen - IGC_ETH_OVERHEAD;
1533         dev_info->min_mtu = RTE_ETHER_MIN_MTU;
1534         return 0;
1535 }
1536
1537 static int
1538 eth_igc_led_on(struct rte_eth_dev *dev)
1539 {
1540         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1541
1542         return igc_led_on(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1543 }
1544
1545 static int
1546 eth_igc_led_off(struct rte_eth_dev *dev)
1547 {
1548         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1549
1550         return igc_led_off(hw) == IGC_SUCCESS ? 0 : -ENOTSUP;
1551 }
1552
1553 static const uint32_t *
1554 eth_igc_supported_ptypes_get(__rte_unused struct rte_eth_dev *dev)
1555 {
1556         static const uint32_t ptypes[] = {
1557                 /* refers to rx_desc_pkt_info_to_pkt_type() */
1558                 RTE_PTYPE_L2_ETHER,
1559                 RTE_PTYPE_L3_IPV4,
1560                 RTE_PTYPE_L3_IPV4_EXT,
1561                 RTE_PTYPE_L3_IPV6,
1562                 RTE_PTYPE_L3_IPV6_EXT,
1563                 RTE_PTYPE_L4_TCP,
1564                 RTE_PTYPE_L4_UDP,
1565                 RTE_PTYPE_L4_SCTP,
1566                 RTE_PTYPE_TUNNEL_IP,
1567                 RTE_PTYPE_INNER_L3_IPV6,
1568                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1569                 RTE_PTYPE_INNER_L4_TCP,
1570                 RTE_PTYPE_INNER_L4_UDP,
1571                 RTE_PTYPE_UNKNOWN
1572         };
1573
1574         return ptypes;
1575 }
1576
1577 static int
1578 eth_igc_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
1579 {
1580         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1581         uint32_t frame_size = mtu + IGC_ETH_OVERHEAD;
1582         uint32_t rctl;
1583
1584         /* if extend vlan has been enabled */
1585         if (IGC_READ_REG(hw, IGC_CTRL_EXT) & IGC_CTRL_EXT_EXT_VLAN)
1586                 frame_size += VLAN_TAG_SIZE;
1587
1588         /* check that mtu is within the allowed range */
1589         if (mtu < RTE_ETHER_MIN_MTU ||
1590                 frame_size > MAX_RX_JUMBO_FRAME_SIZE)
1591                 return -EINVAL;
1592
1593         /*
1594          * refuse mtu that requires the support of scattered packets when
1595          * this feature has not been enabled before.
1596          */
1597         if (!dev->data->scattered_rx &&
1598             frame_size > dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM)
1599                 return -EINVAL;
1600
1601         rctl = IGC_READ_REG(hw, IGC_RCTL);
1602
1603         /* switch to jumbo mode if needed */
1604         if (mtu > RTE_ETHER_MTU) {
1605                 dev->data->dev_conf.rxmode.offloads |=
1606                         DEV_RX_OFFLOAD_JUMBO_FRAME;
1607                 rctl |= IGC_RCTL_LPE;
1608         } else {
1609                 dev->data->dev_conf.rxmode.offloads &=
1610                         ~DEV_RX_OFFLOAD_JUMBO_FRAME;
1611                 rctl &= ~IGC_RCTL_LPE;
1612         }
1613         IGC_WRITE_REG(hw, IGC_RCTL, rctl);
1614
1615         /* update max frame size */
1616         dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
1617
1618         IGC_WRITE_REG(hw, IGC_RLPML,
1619                         dev->data->dev_conf.rxmode.max_rx_pkt_len);
1620
1621         return 0;
1622 }
1623
1624 static int
1625 eth_igc_rar_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1626                 uint32_t index, uint32_t pool)
1627 {
1628         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1629
1630         igc_rar_set(hw, mac_addr->addr_bytes, index);
1631         RTE_SET_USED(pool);
1632         return 0;
1633 }
1634
1635 static void
1636 eth_igc_rar_clear(struct rte_eth_dev *dev, uint32_t index)
1637 {
1638         uint8_t addr[RTE_ETHER_ADDR_LEN];
1639         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1640
1641         memset(addr, 0, sizeof(addr));
1642         igc_rar_set(hw, addr, index);
1643 }
1644
1645 static int
1646 eth_igc_default_mac_addr_set(struct rte_eth_dev *dev,
1647                         struct rte_ether_addr *addr)
1648 {
1649         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1650         igc_rar_set(hw, addr->addr_bytes, 0);
1651         return 0;
1652 }
1653
1654 static int
1655 eth_igc_set_mc_addr_list(struct rte_eth_dev *dev,
1656                          struct rte_ether_addr *mc_addr_set,
1657                          uint32_t nb_mc_addr)
1658 {
1659         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1660         igc_update_mc_addr_list(hw, (u8 *)mc_addr_set, nb_mc_addr);
1661         return 0;
1662 }
1663
1664 /*
1665  * Read hardware registers
1666  */
1667 static void
1668 igc_read_stats_registers(struct igc_hw *hw, struct igc_hw_stats *stats)
1669 {
1670         int pause_frames;
1671
1672         uint64_t old_gprc  = stats->gprc;
1673         uint64_t old_gptc  = stats->gptc;
1674         uint64_t old_tpr   = stats->tpr;
1675         uint64_t old_tpt   = stats->tpt;
1676         uint64_t old_rpthc = stats->rpthc;
1677         uint64_t old_hgptc = stats->hgptc;
1678
1679         stats->crcerrs += IGC_READ_REG(hw, IGC_CRCERRS);
1680         stats->algnerrc += IGC_READ_REG(hw, IGC_ALGNERRC);
1681         stats->rxerrc += IGC_READ_REG(hw, IGC_RXERRC);
1682         stats->mpc += IGC_READ_REG(hw, IGC_MPC);
1683         stats->scc += IGC_READ_REG(hw, IGC_SCC);
1684         stats->ecol += IGC_READ_REG(hw, IGC_ECOL);
1685
1686         stats->mcc += IGC_READ_REG(hw, IGC_MCC);
1687         stats->latecol += IGC_READ_REG(hw, IGC_LATECOL);
1688         stats->colc += IGC_READ_REG(hw, IGC_COLC);
1689
1690         stats->dc += IGC_READ_REG(hw, IGC_DC);
1691         stats->tncrs += IGC_READ_REG(hw, IGC_TNCRS);
1692         stats->htdpmc += IGC_READ_REG(hw, IGC_HTDPMC);
1693         stats->rlec += IGC_READ_REG(hw, IGC_RLEC);
1694         stats->xonrxc += IGC_READ_REG(hw, IGC_XONRXC);
1695         stats->xontxc += IGC_READ_REG(hw, IGC_XONTXC);
1696
1697         /*
1698          * For watchdog management we need to know if we have been
1699          * paused during the last interval, so capture that here.
1700          */
1701         pause_frames = IGC_READ_REG(hw, IGC_XOFFRXC);
1702         stats->xoffrxc += pause_frames;
1703         stats->xofftxc += IGC_READ_REG(hw, IGC_XOFFTXC);
1704         stats->fcruc += IGC_READ_REG(hw, IGC_FCRUC);
1705         stats->prc64 += IGC_READ_REG(hw, IGC_PRC64);
1706         stats->prc127 += IGC_READ_REG(hw, IGC_PRC127);
1707         stats->prc255 += IGC_READ_REG(hw, IGC_PRC255);
1708         stats->prc511 += IGC_READ_REG(hw, IGC_PRC511);
1709         stats->prc1023 += IGC_READ_REG(hw, IGC_PRC1023);
1710         stats->prc1522 += IGC_READ_REG(hw, IGC_PRC1522);
1711         stats->gprc += IGC_READ_REG(hw, IGC_GPRC);
1712         stats->bprc += IGC_READ_REG(hw, IGC_BPRC);
1713         stats->mprc += IGC_READ_REG(hw, IGC_MPRC);
1714         stats->gptc += IGC_READ_REG(hw, IGC_GPTC);
1715
1716         /* For the 64-bit byte counters the low dword must be read first. */
1717         /* Both registers clear on the read of the high dword */
1718
1719         /* Workaround CRC bytes included in size, take away 4 bytes/packet */
1720         stats->gorc += IGC_READ_REG(hw, IGC_GORCL);
1721         stats->gorc += ((uint64_t)IGC_READ_REG(hw, IGC_GORCH) << 32);
1722         stats->gorc -= (stats->gprc - old_gprc) * RTE_ETHER_CRC_LEN;
1723         stats->gotc += IGC_READ_REG(hw, IGC_GOTCL);
1724         stats->gotc += ((uint64_t)IGC_READ_REG(hw, IGC_GOTCH) << 32);
1725         stats->gotc -= (stats->gptc - old_gptc) * RTE_ETHER_CRC_LEN;
1726
1727         stats->rnbc += IGC_READ_REG(hw, IGC_RNBC);
1728         stats->ruc += IGC_READ_REG(hw, IGC_RUC);
1729         stats->rfc += IGC_READ_REG(hw, IGC_RFC);
1730         stats->roc += IGC_READ_REG(hw, IGC_ROC);
1731         stats->rjc += IGC_READ_REG(hw, IGC_RJC);
1732
1733         stats->mgprc += IGC_READ_REG(hw, IGC_MGTPRC);
1734         stats->mgpdc += IGC_READ_REG(hw, IGC_MGTPDC);
1735         stats->mgptc += IGC_READ_REG(hw, IGC_MGTPTC);
1736         stats->b2ospc += IGC_READ_REG(hw, IGC_B2OSPC);
1737         stats->b2ogprc += IGC_READ_REG(hw, IGC_B2OGPRC);
1738         stats->o2bgptc += IGC_READ_REG(hw, IGC_O2BGPTC);
1739         stats->o2bspc += IGC_READ_REG(hw, IGC_O2BSPC);
1740
1741         stats->tpr += IGC_READ_REG(hw, IGC_TPR);
1742         stats->tpt += IGC_READ_REG(hw, IGC_TPT);
1743
1744         stats->tor += IGC_READ_REG(hw, IGC_TORL);
1745         stats->tor += ((uint64_t)IGC_READ_REG(hw, IGC_TORH) << 32);
1746         stats->tor -= (stats->tpr - old_tpr) * RTE_ETHER_CRC_LEN;
1747         stats->tot += IGC_READ_REG(hw, IGC_TOTL);
1748         stats->tot += ((uint64_t)IGC_READ_REG(hw, IGC_TOTH) << 32);
1749         stats->tot -= (stats->tpt - old_tpt) * RTE_ETHER_CRC_LEN;
1750
1751         stats->ptc64 += IGC_READ_REG(hw, IGC_PTC64);
1752         stats->ptc127 += IGC_READ_REG(hw, IGC_PTC127);
1753         stats->ptc255 += IGC_READ_REG(hw, IGC_PTC255);
1754         stats->ptc511 += IGC_READ_REG(hw, IGC_PTC511);
1755         stats->ptc1023 += IGC_READ_REG(hw, IGC_PTC1023);
1756         stats->ptc1522 += IGC_READ_REG(hw, IGC_PTC1522);
1757         stats->mptc += IGC_READ_REG(hw, IGC_MPTC);
1758         stats->bptc += IGC_READ_REG(hw, IGC_BPTC);
1759         stats->tsctc += IGC_READ_REG(hw, IGC_TSCTC);
1760
1761         stats->iac += IGC_READ_REG(hw, IGC_IAC);
1762         stats->rpthc += IGC_READ_REG(hw, IGC_RPTHC);
1763         stats->hgptc += IGC_READ_REG(hw, IGC_HGPTC);
1764         stats->icrxdmtc += IGC_READ_REG(hw, IGC_ICRXDMTC);
1765
1766         /* Host to Card Statistics */
1767         stats->hgorc += IGC_READ_REG(hw, IGC_HGORCL);
1768         stats->hgorc += ((uint64_t)IGC_READ_REG(hw, IGC_HGORCH) << 32);
1769         stats->hgorc -= (stats->rpthc - old_rpthc) * RTE_ETHER_CRC_LEN;
1770         stats->hgotc += IGC_READ_REG(hw, IGC_HGOTCL);
1771         stats->hgotc += ((uint64_t)IGC_READ_REG(hw, IGC_HGOTCH) << 32);
1772         stats->hgotc -= (stats->hgptc - old_hgptc) * RTE_ETHER_CRC_LEN;
1773         stats->lenerrs += IGC_READ_REG(hw, IGC_LENERRS);
1774 }
1775
1776 /*
1777  * Write 0 to all queue status registers
1778  */
1779 static void
1780 igc_reset_queue_stats_register(struct igc_hw *hw)
1781 {
1782         int i;
1783
1784         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1785                 IGC_WRITE_REG(hw, IGC_PQGPRC(i), 0);
1786                 IGC_WRITE_REG(hw, IGC_PQGPTC(i), 0);
1787                 IGC_WRITE_REG(hw, IGC_PQGORC(i), 0);
1788                 IGC_WRITE_REG(hw, IGC_PQGOTC(i), 0);
1789                 IGC_WRITE_REG(hw, IGC_PQMPRC(i), 0);
1790                 IGC_WRITE_REG(hw, IGC_RQDPC(i), 0);
1791                 IGC_WRITE_REG(hw, IGC_TQDPC(i), 0);
1792         }
1793 }
1794
1795 /*
1796  * Read all hardware queue status registers
1797  */
1798 static void
1799 igc_read_queue_stats_register(struct rte_eth_dev *dev)
1800 {
1801         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1802         struct igc_hw_queue_stats *queue_stats =
1803                                 IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1804         int i;
1805
1806         /*
1807          * This register is not cleared on read. Furthermore, the register wraps
1808          * around back to 0x00000000 on the next increment when reaching a value
1809          * of 0xFFFFFFFF and then continues normal count operation.
1810          */
1811         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1812                 union {
1813                         u64 ddword;
1814                         u32 dword[2];
1815                 } value;
1816                 u32 tmp;
1817
1818                 /*
1819                  * Read the register first, if the value is smaller than that
1820                  * previous read, that mean the register has been overflowed,
1821                  * then we add the high 4 bytes by 1 and replace the low 4
1822                  * bytes by the new value.
1823                  */
1824                 tmp = IGC_READ_REG(hw, IGC_PQGPRC(i));
1825                 value.ddword = queue_stats->pqgprc[i];
1826                 if (value.dword[U32_0_IN_U64] > tmp)
1827                         value.dword[U32_1_IN_U64]++;
1828                 value.dword[U32_0_IN_U64] = tmp;
1829                 queue_stats->pqgprc[i] = value.ddword;
1830
1831                 tmp = IGC_READ_REG(hw, IGC_PQGPTC(i));
1832                 value.ddword = queue_stats->pqgptc[i];
1833                 if (value.dword[U32_0_IN_U64] > tmp)
1834                         value.dword[U32_1_IN_U64]++;
1835                 value.dword[U32_0_IN_U64] = tmp;
1836                 queue_stats->pqgptc[i] = value.ddword;
1837
1838                 tmp = IGC_READ_REG(hw, IGC_PQGORC(i));
1839                 value.ddword = queue_stats->pqgorc[i];
1840                 if (value.dword[U32_0_IN_U64] > tmp)
1841                         value.dword[U32_1_IN_U64]++;
1842                 value.dword[U32_0_IN_U64] = tmp;
1843                 queue_stats->pqgorc[i] = value.ddword;
1844
1845                 tmp = IGC_READ_REG(hw, IGC_PQGOTC(i));
1846                 value.ddword = queue_stats->pqgotc[i];
1847                 if (value.dword[U32_0_IN_U64] > tmp)
1848                         value.dword[U32_1_IN_U64]++;
1849                 value.dword[U32_0_IN_U64] = tmp;
1850                 queue_stats->pqgotc[i] = value.ddword;
1851
1852                 tmp = IGC_READ_REG(hw, IGC_PQMPRC(i));
1853                 value.ddword = queue_stats->pqmprc[i];
1854                 if (value.dword[U32_0_IN_U64] > tmp)
1855                         value.dword[U32_1_IN_U64]++;
1856                 value.dword[U32_0_IN_U64] = tmp;
1857                 queue_stats->pqmprc[i] = value.ddword;
1858
1859                 tmp = IGC_READ_REG(hw, IGC_RQDPC(i));
1860                 value.ddword = queue_stats->rqdpc[i];
1861                 if (value.dword[U32_0_IN_U64] > tmp)
1862                         value.dword[U32_1_IN_U64]++;
1863                 value.dword[U32_0_IN_U64] = tmp;
1864                 queue_stats->rqdpc[i] = value.ddword;
1865
1866                 tmp = IGC_READ_REG(hw, IGC_TQDPC(i));
1867                 value.ddword = queue_stats->tqdpc[i];
1868                 if (value.dword[U32_0_IN_U64] > tmp)
1869                         value.dword[U32_1_IN_U64]++;
1870                 value.dword[U32_0_IN_U64] = tmp;
1871                 queue_stats->tqdpc[i] = value.ddword;
1872         }
1873 }
1874
1875 static int
1876 eth_igc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *rte_stats)
1877 {
1878         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
1879         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1880         struct igc_hw_stats *stats = IGC_DEV_PRIVATE_STATS(dev);
1881         struct igc_hw_queue_stats *queue_stats =
1882                         IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1883         int i;
1884
1885         /*
1886          * Cancel status handler since it will read the queue status registers
1887          */
1888         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1889
1890         /* Read status register */
1891         igc_read_queue_stats_register(dev);
1892         igc_read_stats_registers(hw, stats);
1893
1894         if (rte_stats == NULL) {
1895                 /* Restart queue status handler */
1896                 rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1897                                 igc_update_queue_stats_handler, dev);
1898                 return -EINVAL;
1899         }
1900
1901         /* Rx Errors */
1902         rte_stats->imissed = stats->mpc;
1903         rte_stats->ierrors = stats->crcerrs +
1904                         stats->rlec + stats->ruc + stats->roc +
1905                         stats->rxerrc + stats->algnerrc;
1906
1907         /* Tx Errors */
1908         rte_stats->oerrors = stats->ecol + stats->latecol;
1909
1910         rte_stats->ipackets = stats->gprc;
1911         rte_stats->opackets = stats->gptc;
1912         rte_stats->ibytes   = stats->gorc;
1913         rte_stats->obytes   = stats->gotc;
1914
1915         /* Get per-queue statuses */
1916         for (i = 0; i < IGC_QUEUE_PAIRS_NUM; i++) {
1917                 /* GET TX queue statuses */
1918                 int map_id = igc->txq_stats_map[i];
1919                 if (map_id >= 0) {
1920                         rte_stats->q_opackets[map_id] += queue_stats->pqgptc[i];
1921                         rte_stats->q_obytes[map_id] += queue_stats->pqgotc[i];
1922                 }
1923                 /* Get RX queue statuses */
1924                 map_id = igc->rxq_stats_map[i];
1925                 if (map_id >= 0) {
1926                         rte_stats->q_ipackets[map_id] += queue_stats->pqgprc[i];
1927                         rte_stats->q_ibytes[map_id] += queue_stats->pqgorc[i];
1928                         rte_stats->q_errors[map_id] += queue_stats->rqdpc[i];
1929                 }
1930         }
1931
1932         /* Restart queue status handler */
1933         rte_eal_alarm_set(IGC_ALARM_INTERVAL,
1934                         igc_update_queue_stats_handler, dev);
1935         return 0;
1936 }
1937
1938 static int
1939 eth_igc_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1940                    unsigned int n)
1941 {
1942         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1943         struct igc_hw_stats *hw_stats =
1944                         IGC_DEV_PRIVATE_STATS(dev);
1945         unsigned int i;
1946
1947         igc_read_stats_registers(hw, hw_stats);
1948
1949         if (n < IGC_NB_XSTATS)
1950                 return IGC_NB_XSTATS;
1951
1952         /* If this is a reset xstats is NULL, and we have cleared the
1953          * registers by reading them.
1954          */
1955         if (!xstats)
1956                 return 0;
1957
1958         /* Extended stats */
1959         for (i = 0; i < IGC_NB_XSTATS; i++) {
1960                 xstats[i].id = i;
1961                 xstats[i].value = *(uint64_t *)(((char *)hw_stats) +
1962                         rte_igc_stats_strings[i].offset);
1963         }
1964
1965         return IGC_NB_XSTATS;
1966 }
1967
1968 static int
1969 eth_igc_xstats_reset(struct rte_eth_dev *dev)
1970 {
1971         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
1972         struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
1973         struct igc_hw_queue_stats *queue_stats =
1974                         IGC_DEV_PRIVATE_QUEUE_STATS(dev);
1975
1976         /* Cancel queue status handler for avoid conflict */
1977         rte_eal_alarm_cancel(igc_update_queue_stats_handler, dev);
1978
1979         /* HW registers are cleared on read */
1980         igc_reset_queue_stats_register(hw);
1981         igc_read_stats_registers(hw, hw_stats);
1982
1983         /* Reset software totals */
1984         memset(hw_stats, 0, sizeof(*hw_stats));
1985         memset(queue_stats, 0, sizeof(*queue_stats));
1986
1987         /* Restart the queue status handler */
1988         rte_eal_alarm_set(IGC_ALARM_INTERVAL, igc_update_queue_stats_handler,
1989                         dev);
1990
1991         return 0;
1992 }
1993
1994 static int
1995 eth_igc_xstats_get_names(__rte_unused struct rte_eth_dev *dev,
1996         struct rte_eth_xstat_name *xstats_names, unsigned int size)
1997 {
1998         unsigned int i;
1999
2000         if (xstats_names == NULL)
2001                 return IGC_NB_XSTATS;
2002
2003         if (size < IGC_NB_XSTATS) {
2004                 PMD_DRV_LOG(ERR, "not enough buffers!");
2005                 return IGC_NB_XSTATS;
2006         }
2007
2008         for (i = 0; i < IGC_NB_XSTATS; i++)
2009                 strlcpy(xstats_names[i].name, rte_igc_stats_strings[i].name,
2010                         sizeof(xstats_names[i].name));
2011
2012         return IGC_NB_XSTATS;
2013 }
2014
2015 static int
2016 eth_igc_xstats_get_names_by_id(struct rte_eth_dev *dev,
2017                 struct rte_eth_xstat_name *xstats_names, const uint64_t *ids,
2018                 unsigned int limit)
2019 {
2020         unsigned int i;
2021
2022         if (!ids)
2023                 return eth_igc_xstats_get_names(dev, xstats_names, limit);
2024
2025         for (i = 0; i < limit; i++) {
2026                 if (ids[i] >= IGC_NB_XSTATS) {
2027                         PMD_DRV_LOG(ERR, "id value isn't valid");
2028                         return -EINVAL;
2029                 }
2030                 strlcpy(xstats_names[i].name,
2031                         rte_igc_stats_strings[ids[i]].name,
2032                         sizeof(xstats_names[i].name));
2033         }
2034         return limit;
2035 }
2036
2037 static int
2038 eth_igc_xstats_get_by_id(struct rte_eth_dev *dev, const uint64_t *ids,
2039                 uint64_t *values, unsigned int n)
2040 {
2041         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2042         struct igc_hw_stats *hw_stats = IGC_DEV_PRIVATE_STATS(dev);
2043         unsigned int i;
2044
2045         igc_read_stats_registers(hw, hw_stats);
2046
2047         if (!ids) {
2048                 if (n < IGC_NB_XSTATS)
2049                         return IGC_NB_XSTATS;
2050
2051                 /* If this is a reset xstats is NULL, and we have cleared the
2052                  * registers by reading them.
2053                  */
2054                 if (!values)
2055                         return 0;
2056
2057                 /* Extended stats */
2058                 for (i = 0; i < IGC_NB_XSTATS; i++)
2059                         values[i] = *(uint64_t *)(((char *)hw_stats) +
2060                                         rte_igc_stats_strings[i].offset);
2061
2062                 return IGC_NB_XSTATS;
2063
2064         } else {
2065                 for (i = 0; i < n; i++) {
2066                         if (ids[i] >= IGC_NB_XSTATS) {
2067                                 PMD_DRV_LOG(ERR, "id value isn't valid");
2068                                 return -EINVAL;
2069                         }
2070                         values[i] = *(uint64_t *)(((char *)hw_stats) +
2071                                         rte_igc_stats_strings[ids[i]].offset);
2072                 }
2073                 return n;
2074         }
2075 }
2076
2077 static int
2078 eth_igc_queue_stats_mapping_set(struct rte_eth_dev *dev,
2079                 uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx)
2080 {
2081         struct igc_adapter *igc = IGC_DEV_PRIVATE(dev);
2082
2083         /* check queue id is valid */
2084         if (queue_id >= IGC_QUEUE_PAIRS_NUM) {
2085                 PMD_DRV_LOG(ERR, "queue id(%u) error, max is %u",
2086                         queue_id, IGC_QUEUE_PAIRS_NUM - 1);
2087                 return -EINVAL;
2088         }
2089
2090         /* store the mapping status id */
2091         if (is_rx)
2092                 igc->rxq_stats_map[queue_id] = stat_idx;
2093         else
2094                 igc->txq_stats_map[queue_id] = stat_idx;
2095
2096         return 0;
2097 }
2098
2099 static int
2100 eth_igc_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
2101 {
2102         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2103         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2104         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2105         uint32_t vec = IGC_MISC_VEC_ID;
2106
2107         if (rte_intr_allow_others(intr_handle))
2108                 vec = IGC_RX_VEC_START;
2109
2110         uint32_t mask = 1u << (queue_id + vec);
2111
2112         IGC_WRITE_REG(hw, IGC_EIMC, mask);
2113         IGC_WRITE_FLUSH(hw);
2114
2115         return 0;
2116 }
2117
2118 static int
2119 eth_igc_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
2120 {
2121         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2122         struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
2123         struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
2124         uint32_t vec = IGC_MISC_VEC_ID;
2125
2126         if (rte_intr_allow_others(intr_handle))
2127                 vec = IGC_RX_VEC_START;
2128
2129         uint32_t mask = 1u << (queue_id + vec);
2130
2131         IGC_WRITE_REG(hw, IGC_EIMS, mask);
2132         IGC_WRITE_FLUSH(hw);
2133
2134         rte_intr_enable(intr_handle);
2135
2136         return 0;
2137 }
2138
2139 static int
2140 eth_igc_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
2141 {
2142         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2143         uint32_t ctrl;
2144         int tx_pause;
2145         int rx_pause;
2146
2147         fc_conf->pause_time = hw->fc.pause_time;
2148         fc_conf->high_water = hw->fc.high_water;
2149         fc_conf->low_water = hw->fc.low_water;
2150         fc_conf->send_xon = hw->fc.send_xon;
2151         fc_conf->autoneg = hw->mac.autoneg;
2152
2153         /*
2154          * Return rx_pause and tx_pause status according to actual setting of
2155          * the TFCE and RFCE bits in the CTRL register.
2156          */
2157         ctrl = IGC_READ_REG(hw, IGC_CTRL);
2158         if (ctrl & IGC_CTRL_TFCE)
2159                 tx_pause = 1;
2160         else
2161                 tx_pause = 0;
2162
2163         if (ctrl & IGC_CTRL_RFCE)
2164                 rx_pause = 1;
2165         else
2166                 rx_pause = 0;
2167
2168         if (rx_pause && tx_pause)
2169                 fc_conf->mode = RTE_FC_FULL;
2170         else if (rx_pause)
2171                 fc_conf->mode = RTE_FC_RX_PAUSE;
2172         else if (tx_pause)
2173                 fc_conf->mode = RTE_FC_TX_PAUSE;
2174         else
2175                 fc_conf->mode = RTE_FC_NONE;
2176
2177         return 0;
2178 }
2179
2180 static int
2181 eth_igc_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
2182 {
2183         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2184         uint32_t rx_buf_size;
2185         uint32_t max_high_water;
2186         uint32_t rctl;
2187         int err;
2188
2189         if (fc_conf->autoneg != hw->mac.autoneg)
2190                 return -ENOTSUP;
2191
2192         rx_buf_size = igc_get_rx_buffer_size(hw);
2193         PMD_DRV_LOG(DEBUG, "Rx packet buffer size = 0x%x", rx_buf_size);
2194
2195         /* At least reserve one Ethernet frame for watermark */
2196         max_high_water = rx_buf_size - RTE_ETHER_MAX_LEN;
2197         if (fc_conf->high_water > max_high_water ||
2198                 fc_conf->high_water < fc_conf->low_water) {
2199                 PMD_DRV_LOG(ERR,
2200                         "Incorrect high(%u)/low(%u) water value, max is %u",
2201                         fc_conf->high_water, fc_conf->low_water,
2202                         max_high_water);
2203                 return -EINVAL;
2204         }
2205
2206         switch (fc_conf->mode) {
2207         case RTE_FC_NONE:
2208                 hw->fc.requested_mode = igc_fc_none;
2209                 break;
2210         case RTE_FC_RX_PAUSE:
2211                 hw->fc.requested_mode = igc_fc_rx_pause;
2212                 break;
2213         case RTE_FC_TX_PAUSE:
2214                 hw->fc.requested_mode = igc_fc_tx_pause;
2215                 break;
2216         case RTE_FC_FULL:
2217                 hw->fc.requested_mode = igc_fc_full;
2218                 break;
2219         default:
2220                 PMD_DRV_LOG(ERR, "unsupported fc mode: %u", fc_conf->mode);
2221                 return -EINVAL;
2222         }
2223
2224         hw->fc.pause_time     = fc_conf->pause_time;
2225         hw->fc.high_water     = fc_conf->high_water;
2226         hw->fc.low_water      = fc_conf->low_water;
2227         hw->fc.send_xon       = fc_conf->send_xon;
2228
2229         err = igc_setup_link_generic(hw);
2230         if (err == IGC_SUCCESS) {
2231                 /**
2232                  * check if we want to forward MAC frames - driver doesn't have
2233                  * native capability to do that, so we'll write the registers
2234                  * ourselves
2235                  **/
2236                 rctl = IGC_READ_REG(hw, IGC_RCTL);
2237
2238                 /* set or clear MFLCN.PMCF bit depending on configuration */
2239                 if (fc_conf->mac_ctrl_frame_fwd != 0)
2240                         rctl |= IGC_RCTL_PMCF;
2241                 else
2242                         rctl &= ~IGC_RCTL_PMCF;
2243
2244                 IGC_WRITE_REG(hw, IGC_RCTL, rctl);
2245                 IGC_WRITE_FLUSH(hw);
2246
2247                 return 0;
2248         }
2249
2250         PMD_DRV_LOG(ERR, "igc_setup_link_generic = 0x%x", err);
2251         return -EIO;
2252 }
2253
2254 static int
2255 eth_igc_rss_reta_update(struct rte_eth_dev *dev,
2256                         struct rte_eth_rss_reta_entry64 *reta_conf,
2257                         uint16_t reta_size)
2258 {
2259         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2260         uint16_t i;
2261
2262         if (reta_size != ETH_RSS_RETA_SIZE_128) {
2263                 PMD_DRV_LOG(ERR,
2264                         "The size of RSS redirection table configured(%d) doesn't match the number hardware can supported(%d)",
2265                         reta_size, ETH_RSS_RETA_SIZE_128);
2266                 return -EINVAL;
2267         }
2268
2269         RTE_BUILD_BUG_ON(ETH_RSS_RETA_SIZE_128 % IGC_RSS_RDT_REG_SIZE);
2270
2271         /* set redirection table */
2272         for (i = 0; i < ETH_RSS_RETA_SIZE_128; i += IGC_RSS_RDT_REG_SIZE) {
2273                 union igc_rss_reta_reg reta, reg;
2274                 uint16_t idx, shift;
2275                 uint8_t j, mask;
2276
2277                 idx = i / RTE_RETA_GROUP_SIZE;
2278                 shift = i % RTE_RETA_GROUP_SIZE;
2279                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
2280                                 IGC_RSS_RDT_REG_SIZE_MASK);
2281
2282                 /* if no need to update the register */
2283                 if (!mask ||
2284                     shift > (RTE_RETA_GROUP_SIZE - IGC_RSS_RDT_REG_SIZE))
2285                         continue;
2286
2287                 /* check mask whether need to read the register value first */
2288                 if (mask == IGC_RSS_RDT_REG_SIZE_MASK)
2289                         reg.dword = 0;
2290                 else
2291                         reg.dword = IGC_READ_REG_LE_VALUE(hw,
2292                                         IGC_RETA(i / IGC_RSS_RDT_REG_SIZE));
2293
2294                 /* update the register */
2295                 RTE_BUILD_BUG_ON(sizeof(reta.bytes) != IGC_RSS_RDT_REG_SIZE);
2296                 for (j = 0; j < IGC_RSS_RDT_REG_SIZE; j++) {
2297                         if (mask & (1u << j))
2298                                 reta.bytes[j] =
2299                                         (uint8_t)reta_conf[idx].reta[shift + j];
2300                         else
2301                                 reta.bytes[j] = reg.bytes[j];
2302                 }
2303                 IGC_WRITE_REG_LE_VALUE(hw,
2304                         IGC_RETA(i / IGC_RSS_RDT_REG_SIZE), reta.dword);
2305         }
2306
2307         return 0;
2308 }
2309
2310 static int
2311 eth_igc_rss_reta_query(struct rte_eth_dev *dev,
2312                        struct rte_eth_rss_reta_entry64 *reta_conf,
2313                        uint16_t reta_size)
2314 {
2315         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2316         uint16_t i;
2317
2318         if (reta_size != ETH_RSS_RETA_SIZE_128) {
2319                 PMD_DRV_LOG(ERR,
2320                         "The size of RSS redirection table configured(%d) doesn't match the number hardware can supported(%d)",
2321                         reta_size, ETH_RSS_RETA_SIZE_128);
2322                 return -EINVAL;
2323         }
2324
2325         RTE_BUILD_BUG_ON(ETH_RSS_RETA_SIZE_128 % IGC_RSS_RDT_REG_SIZE);
2326
2327         /* read redirection table */
2328         for (i = 0; i < ETH_RSS_RETA_SIZE_128; i += IGC_RSS_RDT_REG_SIZE) {
2329                 union igc_rss_reta_reg reta;
2330                 uint16_t idx, shift;
2331                 uint8_t j, mask;
2332
2333                 idx = i / RTE_RETA_GROUP_SIZE;
2334                 shift = i % RTE_RETA_GROUP_SIZE;
2335                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
2336                                 IGC_RSS_RDT_REG_SIZE_MASK);
2337
2338                 /* if no need to read register */
2339                 if (!mask ||
2340                     shift > (RTE_RETA_GROUP_SIZE - IGC_RSS_RDT_REG_SIZE))
2341                         continue;
2342
2343                 /* read register and get the queue index */
2344                 RTE_BUILD_BUG_ON(sizeof(reta.bytes) != IGC_RSS_RDT_REG_SIZE);
2345                 reta.dword = IGC_READ_REG_LE_VALUE(hw,
2346                                 IGC_RETA(i / IGC_RSS_RDT_REG_SIZE));
2347                 for (j = 0; j < IGC_RSS_RDT_REG_SIZE; j++) {
2348                         if (mask & (1u << j))
2349                                 reta_conf[idx].reta[shift + j] = reta.bytes[j];
2350                 }
2351         }
2352
2353         return 0;
2354 }
2355
2356 static int
2357 eth_igc_rss_hash_update(struct rte_eth_dev *dev,
2358                         struct rte_eth_rss_conf *rss_conf)
2359 {
2360         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2361         igc_hw_rss_hash_set(hw, rss_conf);
2362         return 0;
2363 }
2364
2365 static int
2366 eth_igc_rss_hash_conf_get(struct rte_eth_dev *dev,
2367                         struct rte_eth_rss_conf *rss_conf)
2368 {
2369         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2370         uint32_t *hash_key = (uint32_t *)rss_conf->rss_key;
2371         uint32_t mrqc;
2372         uint64_t rss_hf;
2373
2374         if (hash_key != NULL) {
2375                 int i;
2376
2377                 /* if not enough space for store hash key */
2378                 if (rss_conf->rss_key_len != IGC_HKEY_SIZE) {
2379                         PMD_DRV_LOG(ERR,
2380                                 "RSS hash key size %u in parameter doesn't match the hardware hash key size %u",
2381                                 rss_conf->rss_key_len, IGC_HKEY_SIZE);
2382                         return -EINVAL;
2383                 }
2384
2385                 /* read RSS key from register */
2386                 for (i = 0; i < IGC_HKEY_MAX_INDEX; i++)
2387                         hash_key[i] = IGC_READ_REG_LE_VALUE(hw, IGC_RSSRK(i));
2388         }
2389
2390         /* get RSS functions configured in MRQC register */
2391         mrqc = IGC_READ_REG(hw, IGC_MRQC);
2392         if ((mrqc & IGC_MRQC_ENABLE_RSS_4Q) == 0)
2393                 return 0;
2394
2395         rss_hf = 0;
2396         if (mrqc & IGC_MRQC_RSS_FIELD_IPV4)
2397                 rss_hf |= ETH_RSS_IPV4;
2398         if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_TCP)
2399                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2400         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6)
2401                 rss_hf |= ETH_RSS_IPV6;
2402         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_EX)
2403                 rss_hf |= ETH_RSS_IPV6_EX;
2404         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP)
2405                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2406         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_TCP_EX)
2407                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2408         if (mrqc & IGC_MRQC_RSS_FIELD_IPV4_UDP)
2409                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2410         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP)
2411                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2412         if (mrqc & IGC_MRQC_RSS_FIELD_IPV6_UDP_EX)
2413                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2414
2415         rss_conf->rss_hf |= rss_hf;
2416         return 0;
2417 }
2418
2419 static int
2420 eth_igc_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2421 {
2422         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2423         struct igc_vfta *shadow_vfta = IGC_DEV_PRIVATE_VFTA(dev);
2424         uint32_t vfta;
2425         uint32_t vid_idx;
2426         uint32_t vid_bit;
2427
2428         vid_idx = (vlan_id >> IGC_VFTA_ENTRY_SHIFT) & IGC_VFTA_ENTRY_MASK;
2429         vid_bit = 1u << (vlan_id & IGC_VFTA_ENTRY_BIT_SHIFT_MASK);
2430         vfta = shadow_vfta->vfta[vid_idx];
2431         if (on)
2432                 vfta |= vid_bit;
2433         else
2434                 vfta &= ~vid_bit;
2435         IGC_WRITE_REG_ARRAY(hw, IGC_VFTA, vid_idx, vfta);
2436
2437         /* update local VFTA copy */
2438         shadow_vfta->vfta[vid_idx] = vfta;
2439
2440         return 0;
2441 }
2442
2443 static void
2444 igc_vlan_hw_filter_disable(struct rte_eth_dev *dev)
2445 {
2446         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2447         igc_read_reg_check_clear_bits(hw, IGC_RCTL,
2448                         IGC_RCTL_CFIEN | IGC_RCTL_VFE);
2449 }
2450
2451 static void
2452 igc_vlan_hw_filter_enable(struct rte_eth_dev *dev)
2453 {
2454         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2455         struct igc_vfta *shadow_vfta = IGC_DEV_PRIVATE_VFTA(dev);
2456         uint32_t reg_val;
2457         int i;
2458
2459         /* Filter Table Enable, CFI not used for packet acceptance */
2460         reg_val = IGC_READ_REG(hw, IGC_RCTL);
2461         reg_val &= ~IGC_RCTL_CFIEN;
2462         reg_val |= IGC_RCTL_VFE;
2463         IGC_WRITE_REG(hw, IGC_RCTL, reg_val);
2464
2465         /* restore VFTA table */
2466         for (i = 0; i < IGC_VFTA_SIZE; i++)
2467                 IGC_WRITE_REG_ARRAY(hw, IGC_VFTA, i, shadow_vfta->vfta[i]);
2468 }
2469
2470 static void
2471 igc_vlan_hw_strip_disable(struct rte_eth_dev *dev)
2472 {
2473         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2474
2475         igc_read_reg_check_clear_bits(hw, IGC_CTRL, IGC_CTRL_VME);
2476 }
2477
2478 static void
2479 igc_vlan_hw_strip_enable(struct rte_eth_dev *dev)
2480 {
2481         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2482
2483         igc_read_reg_check_set_bits(hw, IGC_CTRL, IGC_CTRL_VME);
2484 }
2485
2486 static int
2487 igc_vlan_hw_extend_disable(struct rte_eth_dev *dev)
2488 {
2489         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2490         uint32_t ctrl_ext;
2491
2492         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
2493
2494         /* if extend vlan hasn't been enabled */
2495         if ((ctrl_ext & IGC_CTRL_EXT_EXT_VLAN) == 0)
2496                 return 0;
2497
2498         if ((dev->data->dev_conf.rxmode.offloads &
2499                         DEV_RX_OFFLOAD_JUMBO_FRAME) == 0)
2500                 goto write_ext_vlan;
2501
2502         /* Update maximum packet length */
2503         if (dev->data->dev_conf.rxmode.max_rx_pkt_len <
2504                 RTE_ETHER_MIN_MTU + VLAN_TAG_SIZE) {
2505                 PMD_DRV_LOG(ERR, "Maximum packet length %u error, min is %u",
2506                         dev->data->dev_conf.rxmode.max_rx_pkt_len,
2507                         VLAN_TAG_SIZE + RTE_ETHER_MIN_MTU);
2508                 return -EINVAL;
2509         }
2510         dev->data->dev_conf.rxmode.max_rx_pkt_len -= VLAN_TAG_SIZE;
2511         IGC_WRITE_REG(hw, IGC_RLPML,
2512                 dev->data->dev_conf.rxmode.max_rx_pkt_len);
2513
2514 write_ext_vlan:
2515         IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext & ~IGC_CTRL_EXT_EXT_VLAN);
2516         return 0;
2517 }
2518
2519 static int
2520 igc_vlan_hw_extend_enable(struct rte_eth_dev *dev)
2521 {
2522         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2523         uint32_t ctrl_ext;
2524
2525         ctrl_ext = IGC_READ_REG(hw, IGC_CTRL_EXT);
2526
2527         /* if extend vlan has been enabled */
2528         if (ctrl_ext & IGC_CTRL_EXT_EXT_VLAN)
2529                 return 0;
2530
2531         if ((dev->data->dev_conf.rxmode.offloads &
2532                         DEV_RX_OFFLOAD_JUMBO_FRAME) == 0)
2533                 goto write_ext_vlan;
2534
2535         /* Update maximum packet length */
2536         if (dev->data->dev_conf.rxmode.max_rx_pkt_len >
2537                 MAX_RX_JUMBO_FRAME_SIZE - VLAN_TAG_SIZE) {
2538                 PMD_DRV_LOG(ERR, "Maximum packet length %u error, max is %u",
2539                         dev->data->dev_conf.rxmode.max_rx_pkt_len +
2540                         VLAN_TAG_SIZE, MAX_RX_JUMBO_FRAME_SIZE);
2541                 return -EINVAL;
2542         }
2543         dev->data->dev_conf.rxmode.max_rx_pkt_len += VLAN_TAG_SIZE;
2544         IGC_WRITE_REG(hw, IGC_RLPML,
2545                 dev->data->dev_conf.rxmode.max_rx_pkt_len);
2546
2547 write_ext_vlan:
2548         IGC_WRITE_REG(hw, IGC_CTRL_EXT, ctrl_ext | IGC_CTRL_EXT_EXT_VLAN);
2549         return 0;
2550 }
2551
2552 static int
2553 eth_igc_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2554 {
2555         struct rte_eth_rxmode *rxmode;
2556
2557         rxmode = &dev->data->dev_conf.rxmode;
2558         if (mask & ETH_VLAN_STRIP_MASK) {
2559                 if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2560                         igc_vlan_hw_strip_enable(dev);
2561                 else
2562                         igc_vlan_hw_strip_disable(dev);
2563         }
2564
2565         if (mask & ETH_VLAN_FILTER_MASK) {
2566                 if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2567                         igc_vlan_hw_filter_enable(dev);
2568                 else
2569                         igc_vlan_hw_filter_disable(dev);
2570         }
2571
2572         if (mask & ETH_VLAN_EXTEND_MASK) {
2573                 if (rxmode->offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
2574                         return igc_vlan_hw_extend_enable(dev);
2575                 else
2576                         return igc_vlan_hw_extend_disable(dev);
2577         }
2578
2579         return 0;
2580 }
2581
2582 static int
2583 eth_igc_vlan_tpid_set(struct rte_eth_dev *dev,
2584                       enum rte_vlan_type vlan_type,
2585                       uint16_t tpid)
2586 {
2587         struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
2588         uint32_t reg_val;
2589
2590         /* only outer TPID of double VLAN can be configured*/
2591         if (vlan_type == ETH_VLAN_TYPE_OUTER) {
2592                 reg_val = IGC_READ_REG(hw, IGC_VET);
2593                 reg_val = (reg_val & (~IGC_VET_EXT)) |
2594                         ((uint32_t)tpid << IGC_VET_EXT_SHIFT);
2595                 IGC_WRITE_REG(hw, IGC_VET, reg_val);
2596
2597                 return 0;
2598         }
2599
2600         /* all other TPID values are read-only*/
2601         PMD_DRV_LOG(ERR, "Not supported");
2602         return -ENOTSUP;
2603 }
2604
2605 static int
2606 eth_igc_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2607         struct rte_pci_device *pci_dev)
2608 {
2609         PMD_INIT_FUNC_TRACE();
2610         return rte_eth_dev_pci_generic_probe(pci_dev,
2611                 sizeof(struct igc_adapter), eth_igc_dev_init);
2612 }
2613
2614 static int
2615 eth_igc_pci_remove(struct rte_pci_device *pci_dev)
2616 {
2617         PMD_INIT_FUNC_TRACE();
2618         return rte_eth_dev_pci_generic_remove(pci_dev, eth_igc_dev_uninit);
2619 }
2620
2621 static struct rte_pci_driver rte_igc_pmd = {
2622         .id_table = pci_id_igc_map,
2623         .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
2624         .probe = eth_igc_pci_probe,
2625         .remove = eth_igc_pci_remove,
2626 };
2627
2628 RTE_PMD_REGISTER_PCI(net_igc, rte_igc_pmd);
2629 RTE_PMD_REGISTER_PCI_TABLE(net_igc, pci_id_igc_map);
2630 RTE_PMD_REGISTER_KMOD_DEP(net_igc, "* igb_uio | uio_pci_generic | vfio-pci");