kni: initial import
[dpdk.git] / lib / librte_eal / linuxapp / kni / ethtool / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2012 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/tcp.h>
35 #ifdef NETIF_F_TSO
36 #include <net/checksum.h>
37 #ifdef NETIF_F_TSO6
38 #include <linux/ipv6.h>
39 #include <net/ip6_checksum.h>
40 #endif
41 #endif
42 #ifdef SIOCGMIIPHY
43 #include <linux/mii.h>
44 #endif
45 #ifdef SIOCETHTOOL
46 #include <linux/ethtool.h>
47 #endif
48 #include <linux/if_vlan.h>
49 #ifdef CONFIG_PM_RUNTIME
50 #include <linux/pm_runtime.h>
51 #endif /* CONFIG_PM_RUNTIME */
52
53 #include "igb.h"
54 #include "igb_vmdq.h"
55
56 #include <linux/uio_driver.h>
57
58 #define DRV_DEBUG
59 #define DRV_HW_PERF
60 #define VERSION_SUFFIX
61
62 #define MAJ 3
63 #define MIN 4
64 #define BUILD 8
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." __stringify(BUILD) VERSION_SUFFIX DRV_DEBUG DRV_HW_PERF
66
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70                                 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
72
73 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER) },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER) },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES) },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII) },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER) },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER) },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER) },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES) },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII) },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL) },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII) },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES) },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP) },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576) },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS) },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES) },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER) },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES) },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD) },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER) },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER) },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) },
99         /* required last entry */
100         {0, }
101 };
102
103 //MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
104 static void igb_set_sriov_capability(struct igb_adapter *adapter) __attribute__((__unused__));
105 void igb_reset(struct igb_adapter *);
106 static int igb_setup_all_tx_resources(struct igb_adapter *);
107 static int igb_setup_all_rx_resources(struct igb_adapter *);
108 static void igb_free_all_tx_resources(struct igb_adapter *);
109 static void igb_free_all_rx_resources(struct igb_adapter *);
110 static void igb_setup_mrqc(struct igb_adapter *);
111 void igb_update_stats(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 #ifdef HAVE_HW_TIME_STAMP
115 static void igb_init_hw_timer(struct igb_adapter *adapter);
116 #endif
117 static int igb_sw_init(struct igb_adapter *);
118 static int igb_open(struct net_device *);
119 static int igb_close(struct net_device *);
120 static void igb_configure_tx(struct igb_adapter *);
121 static void igb_configure_rx(struct igb_adapter *);
122 static void igb_clean_all_tx_rings(struct igb_adapter *);
123 static void igb_clean_all_rx_rings(struct igb_adapter *);
124 static void igb_clean_tx_ring(struct igb_ring *);
125 static void igb_set_rx_mode(struct net_device *);
126 static void igb_update_phy_info(unsigned long);
127 static void igb_watchdog(unsigned long);
128 static void igb_watchdog_task(struct work_struct *);
129 static void igb_dma_err_task(struct work_struct *);
130 static void igb_dma_err_timer(unsigned long data);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct net_device_stats *igb_get_stats(struct net_device *);
133 static int igb_change_mtu(struct net_device *, int);
134 void igb_full_sync_mac_table(struct igb_adapter *adapter);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 #ifdef HAVE_VLAN_RX_REGISTER
152 static void igb_vlan_mode(struct net_device *, struct vlan_group *);
153 #endif
154 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
155 static int igb_vlan_rx_add_vid(struct net_device *, u16);
156 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
157 #else
158 static void igb_vlan_rx_add_vid(struct net_device *, u16);
159 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
160 #endif
161 static void igb_restore_vlan(struct igb_adapter *);
162 void igb_rar_set(struct igb_adapter *adapter, u32 index);
163 static void igb_ping_all_vfs(struct igb_adapter *);
164 static void igb_msg_task(struct igb_adapter *);
165 static void igb_vmm_control(struct igb_adapter *);
166 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
167 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
168 static void igb_process_mdd_event(struct igb_adapter *);
169 #ifdef IFLA_VF_MAX
170 static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac);
171 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
172                                int vf, u16 vlan, u8 qos);
173 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
174 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
175                                  struct ifla_vf_info *ivi);
176 static void igb_check_vf_rate_limit(struct igb_adapter *);
177 #endif
178 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
179 static int igb_check_vf_assignment(struct igb_adapter *adapter);
180 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
181 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
182 #endif
183 #ifdef CONFIG_PM
184 #ifdef HAVE_SYSTEM_SLEEP_PM_OPS
185 static int igb_suspend(struct device *dev);
186 static int igb_resume(struct device *dev);
187 #ifdef CONFIG_PM_RUNTIME
188 static int igb_runtime_suspend(struct device *dev);
189 static int igb_runtime_resume(struct device *dev);
190 static int igb_runtime_idle(struct device *dev);
191 #endif /* CONFIG_PM_RUNTIME */
192 static const struct dev_pm_ops igb_pm_ops = {
193         SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
194 #ifdef CONFIG_PM_RUNTIME
195         SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
196                         igb_runtime_idle)
197 #endif /* CONFIG_PM_RUNTIME */
198 };
199 #endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
200 #endif /* CONFIG_PM */
201 #ifndef USE_REBOOT_NOTIFIER
202 static void igb_shutdown(struct pci_dev *);
203 #else
204 static int igb_notify_reboot(struct notifier_block *, unsigned long, void *);
205 static struct notifier_block igb_notifier_reboot = {
206         .notifier_call  = igb_notify_reboot,
207         .next           = NULL,
208         .priority       = 0
209 };
210 #endif
211 #ifdef IGB_DCA
212 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
213 static struct notifier_block dca_notifier = {
214         .notifier_call  = igb_notify_dca,
215         .next           = NULL,
216         .priority       = 0
217 };
218 #endif
219 #ifdef CONFIG_NET_POLL_CONTROLLER
220 /* for netdump / net console */
221 static void igb_netpoll(struct net_device *);
222 #endif
223
224 #ifdef HAVE_PCI_ERS
225 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
226                      pci_channel_state_t);
227 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
228 static void igb_io_resume(struct pci_dev *);
229
230 static struct pci_error_handlers igb_err_handler = {
231         .error_detected = igb_io_error_detected,
232         .slot_reset = igb_io_slot_reset,
233         .resume = igb_io_resume,
234 };
235 #endif
236
237 static void igb_init_fw(struct igb_adapter *adapter);
238 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
239
240 static struct pci_driver igb_driver = {
241         .name     = igb_driver_name,
242         .id_table = igb_pci_tbl,
243         .probe    = igb_probe,
244         .remove   = __devexit_p(igb_remove),
245 #ifdef CONFIG_PM
246 #ifdef HAVE_SYSTEM_SLEEP_PM_OPS
247         .driver.pm = &igb_pm_ops,
248 #endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
249 #endif /* CONFIG_PM */
250 #ifndef USE_REBOOT_NOTIFIER
251         .shutdown = igb_shutdown,
252 #endif
253 #ifdef HAVE_PCI_ERS
254         .err_handler = &igb_err_handler
255 #endif
256 };
257
258 //MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
259 //MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
260 //MODULE_LICENSE("GPL");
261 //MODULE_VERSION(DRV_VERSION);
262
263 static void igb_vfta_set(struct igb_adapter *adapter, u32 vid, bool add)
264 {
265         struct e1000_hw *hw = &adapter->hw;
266         struct e1000_host_mng_dhcp_cookie *mng_cookie = &hw->mng_cookie;
267         u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK;
268         u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
269         u32 vfta;
270
271         /*
272          * if this is the management vlan the only option is to add it in so
273          * that the management pass through will continue to work
274          */
275         if ((mng_cookie->status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
276             (vid == mng_cookie->vlan_id))
277                 add = TRUE;
278
279         vfta = adapter->shadow_vfta[index];
280         
281         if (add)
282                 vfta |= mask;
283         else
284                 vfta &= ~mask;
285
286         e1000_write_vfta(hw, index, vfta);
287         adapter->shadow_vfta[index] = vfta;
288 }
289
290 #ifdef HAVE_HW_TIME_STAMP
291 /**
292  * igb_read_clock - read raw cycle counter (to be used by time counter)
293  */
294 static cycle_t igb_read_clock(const struct cyclecounter *tc)
295 {
296         struct igb_adapter *adapter =
297                 container_of(tc, struct igb_adapter, cycles);
298         struct e1000_hw *hw = &adapter->hw;
299         u64 stamp = 0;
300         int shift = 0;
301
302         /*
303          * The timestamp latches on lowest register read. For the 82580
304          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
305          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
306          */
307         if (hw->mac.type >= e1000_82580) {
308                 stamp = E1000_READ_REG(hw, E1000_SYSTIMR) >> 8;
309                 shift = IGB_82580_TSYNC_SHIFT;
310         }
311
312         stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIML) << shift;
313         stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << (shift + 32);
314         return stamp;
315 }
316
317 #endif /* SIOCSHWTSTAMP */
318 static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
319 //module_param(debug, int, 0);
320 //MODULE_PARM_DESC(debug, "Debug level (0=none, ..., 16=all)");
321
322 /**
323  * igb_init_module - Driver Registration Routine
324  *
325  * igb_init_module is the first routine called when the driver is
326  * loaded. All it does is register with the PCI subsystem.
327  **/
328 static int __init igb_init_module(void)
329 {
330         int ret;
331
332         printk(KERN_INFO "%s - version %s\n",
333                igb_driver_string, igb_driver_version);
334
335         printk(KERN_INFO "%s\n", igb_copyright);
336 #ifdef IGB_SYSFS
337 /* only use IGB_PROCFS if IGB_SYSFS is not defined */
338 #else
339 #ifdef IGB_PROCFS
340         if (igb_procfs_topdir_init())
341                 printk(KERN_INFO "Procfs failed to initialize topdir\n");
342 #endif /* IGB_PROCFS */
343 #endif /* IGB_SYSFS  */
344
345 #ifdef IGB_DCA
346         dca_register_notify(&dca_notifier);
347 #endif
348         ret = pci_register_driver(&igb_driver);
349 #ifdef USE_REBOOT_NOTIFIER
350         if (ret >= 0) {
351                 register_reboot_notifier(&igb_notifier_reboot);
352         }
353 #endif
354         return ret;
355 }
356
357 #undef module_init
358 #define module_init(x) static int x(void)  __attribute__((__unused__));
359 module_init(igb_init_module);
360
361 /**
362  * igb_exit_module - Driver Exit Cleanup Routine
363  *
364  * igb_exit_module is called just before the driver is removed
365  * from memory.
366  **/
367 static void __exit igb_exit_module(void)
368 {
369 #ifdef IGB_DCA
370         dca_unregister_notify(&dca_notifier);
371 #endif
372 #ifdef USE_REBOOT_NOTIFIER
373         unregister_reboot_notifier(&igb_notifier_reboot);
374 #endif
375         pci_unregister_driver(&igb_driver);
376
377 #ifdef IGB_SYSFS
378 /* only compile IGB_PROCFS if IGB_SYSFS is not defined */
379 #else
380 #ifdef IGB_PROCFS
381         igb_procfs_topdir_exit();
382 #endif /* IGB_PROCFS */
383 #endif /* IGB_SYSFS */
384 }
385
386 #undef module_exit
387 #define module_exit(x) static void x(void)  __attribute__((__unused__));
388 module_exit(igb_exit_module);
389
390 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
391 /**
392  * igb_cache_ring_register - Descriptor ring to register mapping
393  * @adapter: board private structure to initialize
394  *
395  * Once we know the feature-set enabled for the device, we'll cache
396  * the register offset the descriptor ring is assigned to.
397  **/
398 static void igb_cache_ring_register(struct igb_adapter *adapter)
399 {
400         int i = 0, j = 0;
401         u32 rbase_offset = adapter->vfs_allocated_count;
402
403         switch (adapter->hw.mac.type) {
404         case e1000_82576:
405                 /* The queues are allocated for virtualization such that VF 0
406                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
407                  * In order to avoid collision we start at the first free queue
408                  * and continue consuming queues in the same sequence
409                  */
410                 if ((adapter->rss_queues > 1) && adapter->vmdq_pools) {
411                         for (; i < adapter->rss_queues; i++)
412                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
413                                                                Q_IDX_82576(i);
414                 }
415         case e1000_82575:
416         case e1000_82580:
417         case e1000_i350:
418         default:
419                 for (; i < adapter->num_rx_queues; i++)
420                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
421                 for (; j < adapter->num_tx_queues; j++)
422                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
423                 break;
424         }
425 }
426
427 static void igb_free_queues(struct igb_adapter *adapter)
428 {
429         int i;
430
431         for (i = 0; i < adapter->num_tx_queues; i++) {
432                 kfree(adapter->tx_ring[i]);
433                 adapter->tx_ring[i] = NULL;
434         }
435         for (i = 0; i < adapter->num_rx_queues; i++) {
436                 kfree(adapter->rx_ring[i]);
437                 adapter->rx_ring[i] = NULL;
438         }
439         adapter->num_rx_queues = 0;
440         adapter->num_tx_queues = 0;
441 }
442
443 /**
444  * igb_alloc_queues - Allocate memory for all rings
445  * @adapter: board private structure to initialize
446  *
447  * We allocate one ring per queue at run-time since we don't know the
448  * number of queues at compile-time.
449  **/
450 static int igb_alloc_queues(struct igb_adapter *adapter)
451 {
452         struct igb_ring *ring;
453         int i;
454 #ifdef HAVE_DEVICE_NUMA_NODE
455         int orig_node = adapter->node;
456 #endif /* HAVE_DEVICE_NUMA_NODE */
457
458         for (i = 0; i < adapter->num_tx_queues; i++) {
459 #ifdef HAVE_DEVICE_NUMA_NODE
460                 if (orig_node == -1) {
461                         int cur_node = next_online_node(adapter->node);
462                         if (cur_node == MAX_NUMNODES)
463                                 cur_node = first_online_node;
464                         adapter->node = cur_node;
465                 }
466 #endif /* HAVE_DEVICE_NUMA_NODE */
467                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
468                                     adapter->node);
469                 if (!ring)
470                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
471                 if (!ring)
472                         goto err;
473                 ring->count = adapter->tx_ring_count;
474                 ring->queue_index = i;
475                 ring->dev = pci_dev_to_dev(adapter->pdev);
476                 ring->netdev = adapter->netdev;
477                 ring->numa_node = adapter->node;
478                 /* For 82575, context index must be unique per ring. */
479                 if (adapter->hw.mac.type == e1000_82575)
480                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
481                 adapter->tx_ring[i] = ring;
482         }
483 #ifdef HAVE_DEVICE_NUMA_NODE
484         /* Restore the adapter's original node */
485         adapter->node = orig_node;
486 #endif /* HAVE_DEVICE_NUMA_NODE */
487
488         for (i = 0; i < adapter->num_rx_queues; i++) {
489 #ifdef HAVE_DEVICE_NUMA_NODE
490                 if (orig_node == -1) {
491                         int cur_node = next_online_node(adapter->node);
492                         if (cur_node == MAX_NUMNODES)
493                                 cur_node = first_online_node;
494                         adapter->node = cur_node;
495                 }
496 #endif /* HAVE_DEVICE_NUMA_NODE */
497                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
498                                     adapter->node);
499                 if (!ring)
500                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
501                 if (!ring)
502                         goto err;
503                 ring->count = adapter->rx_ring_count;
504                 ring->queue_index = i;
505                 ring->dev = pci_dev_to_dev(adapter->pdev);
506                 ring->netdev = adapter->netdev;
507                 ring->numa_node = adapter->node;
508 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
509                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
510 #endif
511 #ifndef HAVE_NDO_SET_FEATURES
512                 /* enable rx checksum */
513                 set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
514
515 #endif
516                 /* set flag indicating ring supports SCTP checksum offload */
517                 if (adapter->hw.mac.type >= e1000_82576)
518                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
519
520                 /* On i350, loopback VLAN packets have the tag byte-swapped. */
521                 if (adapter->hw.mac.type == e1000_i350)
522                         set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
523
524                 adapter->rx_ring[i] = ring;
525         }
526 #ifdef HAVE_DEVICE_NUMA_NODE
527         /* Restore the adapter's original node */
528         adapter->node = orig_node;
529 #endif /* HAVE_DEVICE_NUMA_NODE */
530
531         igb_cache_ring_register(adapter);
532
533         return E1000_SUCCESS;
534
535 err:
536 #ifdef HAVE_DEVICE_NUMA_NODE
537         /* Restore the adapter's original node */
538         adapter->node = orig_node;
539 #endif /* HAVE_DEVICE_NUMA_NODE */
540         igb_free_queues(adapter);
541
542         return -ENOMEM;
543 }
544
545 static void igb_configure_lli(struct igb_adapter *adapter)
546 {
547         struct e1000_hw *hw = &adapter->hw;
548         u16 port;
549
550         /* LLI should only be enabled for MSI-X or MSI interrupts */
551         if (!adapter->msix_entries && !(adapter->flags & IGB_FLAG_HAS_MSI))
552                 return;
553
554         if (adapter->lli_port) {
555                 /* use filter 0 for port */
556                 port = htons((u16)adapter->lli_port);
557                 E1000_WRITE_REG(hw, E1000_IMIR(0),
558                         (port | E1000_IMIR_PORT_IM_EN));
559                 E1000_WRITE_REG(hw, E1000_IMIREXT(0),
560                         (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
561         }
562
563         if (adapter->flags & IGB_FLAG_LLI_PUSH) {
564                 /* use filter 1 for push flag */
565                 E1000_WRITE_REG(hw, E1000_IMIR(1),
566                         (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
567                 E1000_WRITE_REG(hw, E1000_IMIREXT(1),
568                         (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_PSH));
569         }
570
571         if (adapter->lli_size) {
572                 /* use filter 2 for size */
573                 E1000_WRITE_REG(hw, E1000_IMIR(2),
574                         (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
575                 E1000_WRITE_REG(hw, E1000_IMIREXT(2),
576                         (adapter->lli_size | E1000_IMIREXT_CTRL_BP));
577         }
578
579 }
580
581 /**
582  *  igb_write_ivar - configure ivar for given MSI-X vector
583  *  @hw: pointer to the HW structure
584  *  @msix_vector: vector number we are allocating to a given ring
585  *  @index: row index of IVAR register to write within IVAR table
586  *  @offset: column offset of in IVAR, should be multiple of 8
587  *
588  *  This function is intended to handle the writing of the IVAR register
589  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
590  *  each containing an cause allocation for an Rx and Tx ring, and a
591  *  variable number of rows depending on the number of queues supported.
592  **/
593 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
594                            int index, int offset)
595 {
596         u32 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
597
598         /* clear any bits that are currently set */
599         ivar &= ~((u32)0xFF << offset);
600
601         /* write vector and valid bit */
602         ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
603
604         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
605 }
606
607 #define IGB_N0_QUEUE -1
608 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
609 {
610         struct igb_adapter *adapter = q_vector->adapter;
611         struct e1000_hw *hw = &adapter->hw;
612         int rx_queue = IGB_N0_QUEUE;
613         int tx_queue = IGB_N0_QUEUE;
614         u32 msixbm = 0;
615
616         if (q_vector->rx.ring)
617                 rx_queue = q_vector->rx.ring->reg_idx;
618         if (q_vector->tx.ring)
619                 tx_queue = q_vector->tx.ring->reg_idx;
620
621         switch (hw->mac.type) {
622         case e1000_82575:
623                 /* The 82575 assigns vectors using a bitmask, which matches the
624                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
625                    or more queues to a vector, we write the appropriate bits
626                    into the MSIXBM register for that vector. */
627                 if (rx_queue > IGB_N0_QUEUE)
628                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
629                 if (tx_queue > IGB_N0_QUEUE)
630                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
631                 if (!adapter->msix_entries && msix_vector == 0)
632                         msixbm |= E1000_EIMS_OTHER;
633                 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), msix_vector, msixbm);
634                 q_vector->eims_value = msixbm;
635                 break;
636         case e1000_82576:
637                 /*
638                  * 82576 uses a table that essentially consists of 2 columns
639                  * with 8 rows.  The ordering is column-major so we use the
640                  * lower 3 bits as the row index, and the 4th bit as the 
641                  * column offset.
642                  */
643                 if (rx_queue > IGB_N0_QUEUE)
644                         igb_write_ivar(hw, msix_vector,
645                                        rx_queue & 0x7,
646                                        (rx_queue & 0x8) << 1);
647                 if (tx_queue > IGB_N0_QUEUE)
648                         igb_write_ivar(hw, msix_vector,
649                                        tx_queue & 0x7,
650                                        ((tx_queue & 0x8) << 1) + 8);
651                 q_vector->eims_value = 1 << msix_vector;
652                 break;
653         case e1000_82580:
654         case e1000_i350:
655                 /*
656                  * On 82580 and newer adapters the scheme is similar to 82576
657                  * however instead of ordering column-major we have things
658                  * ordered row-major.  So we traverse the table by using
659                  * bit 0 as the column offset, and the remaining bits as the
660                  * row index.
661                  */
662                 if (rx_queue > IGB_N0_QUEUE)
663                         igb_write_ivar(hw, msix_vector,
664                                        rx_queue >> 1,
665                                        (rx_queue & 0x1) << 4);
666                 if (tx_queue > IGB_N0_QUEUE)
667                         igb_write_ivar(hw, msix_vector,
668                                        tx_queue >> 1,
669                                        ((tx_queue & 0x1) << 4) + 8);
670                 q_vector->eims_value = 1 << msix_vector;
671                 break;
672         default:
673                 BUG();
674                 break;
675         }
676
677         /* add q_vector eims value to global eims_enable_mask */
678         adapter->eims_enable_mask |= q_vector->eims_value;
679
680         /* configure q_vector to set itr on first interrupt */
681         q_vector->set_itr = 1;
682 }
683
684 /**
685  * igb_configure_msix - Configure MSI-X hardware
686  *
687  * igb_configure_msix sets up the hardware to properly
688  * generate MSI-X interrupts.
689  **/
690 static void igb_configure_msix(struct igb_adapter *adapter)
691 {
692         u32 tmp;
693         int i, vector = 0;
694         struct e1000_hw *hw = &adapter->hw;
695
696         adapter->eims_enable_mask = 0;
697
698         /* set vector for other causes, i.e. link changes */
699         switch (hw->mac.type) {
700         case e1000_82575:
701                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
702                 /* enable MSI-X PBA support*/
703                 tmp |= E1000_CTRL_EXT_PBA_CLR;
704
705                 /* Auto-Mask interrupts upon ICR read. */
706                 tmp |= E1000_CTRL_EXT_EIAME;
707                 tmp |= E1000_CTRL_EXT_IRCA;
708
709                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
710
711                 /* enable msix_other interrupt */
712                 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), vector++,
713                                       E1000_EIMS_OTHER);
714                 adapter->eims_other = E1000_EIMS_OTHER;
715
716                 break;
717
718         case e1000_82576:
719         case e1000_82580:
720         case e1000_i350:
721                 /* Turn on MSI-X capability first, or our settings
722                  * won't stick.  And it will take days to debug. */
723                 E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
724                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
725                                 E1000_GPIE_NSICR);
726
727                 /* enable msix_other interrupt */
728                 adapter->eims_other = 1 << vector;
729                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
730
731                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp);
732                 break;
733         default:
734                 /* do nothing, since nothing else supports MSI-X */
735                 break;
736         } /* switch (hw->mac.type) */
737
738         adapter->eims_enable_mask |= adapter->eims_other;
739
740         for (i = 0; i < adapter->num_q_vectors; i++)
741                 igb_assign_vector(adapter->q_vector[i], vector++);
742
743         E1000_WRITE_FLUSH(hw);
744 }
745
746 /**
747  * igb_request_msix - Initialize MSI-X interrupts
748  *
749  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
750  * kernel.
751  **/
752 static int igb_request_msix(struct igb_adapter *adapter)
753 {
754         struct net_device *netdev = adapter->netdev;
755         struct e1000_hw *hw = &adapter->hw;
756         int i, err = 0, vector = 0;
757
758         err = request_irq(adapter->msix_entries[vector].vector,
759                           &igb_msix_other, 0, netdev->name, adapter);
760         if (err)
761                 goto out;
762         vector++;
763
764         for (i = 0; i < adapter->num_q_vectors; i++) {
765                 struct igb_q_vector *q_vector = adapter->q_vector[i];
766
767                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
768
769                 if (q_vector->rx.ring && q_vector->tx.ring)
770                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
771                                 q_vector->rx.ring->queue_index);
772                 else if (q_vector->tx.ring)
773                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
774                                 q_vector->tx.ring->queue_index);
775                 else if (q_vector->rx.ring)
776                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
777                                 q_vector->rx.ring->queue_index);
778                 else
779                         sprintf(q_vector->name, "%s-unused", netdev->name);
780
781                 err = request_irq(adapter->msix_entries[vector].vector,
782                                   igb_msix_ring, 0, q_vector->name,
783                                   q_vector);
784                 if (err)
785                         goto out;
786                 vector++;
787         }
788
789         igb_configure_msix(adapter);
790         return 0;
791 out:
792         return err;
793 }
794
795 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
796 {
797         if (adapter->msix_entries) {
798                 pci_disable_msix(adapter->pdev);
799                 kfree(adapter->msix_entries);
800                 adapter->msix_entries = NULL;
801         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
802                 pci_disable_msi(adapter->pdev);
803         }
804 }
805
806 /**
807  * igb_free_q_vectors - Free memory allocated for interrupt vectors
808  * @adapter: board private structure to initialize
809  *
810  * This function frees the memory allocated to the q_vectors.  In addition if
811  * NAPI is enabled it will delete any references to the NAPI struct prior
812  * to freeing the q_vector.
813  **/
814 static void igb_free_q_vectors(struct igb_adapter *adapter)
815 {
816         int v_idx;
817
818         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
819                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
820                 adapter->q_vector[v_idx] = NULL;
821                 if (!q_vector)
822                         continue;
823                 netif_napi_del(&q_vector->napi);
824 #ifndef IGB_NO_LRO
825                 if (q_vector->lrolist) {
826                         __skb_queue_purge(&q_vector->lrolist->active);
827                         vfree(q_vector->lrolist);
828                         q_vector->lrolist = NULL;
829                 }
830 #endif
831                 kfree(q_vector);
832         }
833         adapter->num_q_vectors = 0;
834 }
835
836 /**
837  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
838  *
839  * This function resets the device so that it has 0 rx queues, tx queues, and
840  * MSI-X interrupts allocated.
841  */
842 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
843 {
844         igb_free_queues(adapter);
845         igb_free_q_vectors(adapter);
846         igb_reset_interrupt_capability(adapter);
847 }
848
849 /**
850  * igb_process_mdd_event
851  * @adapter - board private structure
852  *
853  * Identify a malicious VF, disable the VF TX/RX queues and log a message.
854  */
855 static void igb_process_mdd_event(struct igb_adapter *adapter)
856 {
857         struct e1000_hw *hw = &adapter->hw;
858         u32 lvmmc, vfte, vfre, mdfb;
859         u8 vf_queue;
860
861         lvmmc = E1000_READ_REG(hw, E1000_LVMMC);
862         vf_queue = lvmmc >> 29;
863
864         /* VF index cannot be bigger or equal to VFs allocated */
865         if (vf_queue >= adapter->vfs_allocated_count)
866                 return;
867
868         netdev_info(adapter->netdev,
869                     "VF %d misbehaved. VF queues are disabled. "
870                     "VM misbehavior code is 0x%x\n", vf_queue, lvmmc);
871
872         /* Disable VFTE and VFRE related bits */
873         vfte = E1000_READ_REG(hw, E1000_VFTE);
874         vfte &= ~(1 << vf_queue);
875         E1000_WRITE_REG(hw, E1000_VFTE, vfte);
876
877         vfre = E1000_READ_REG(hw, E1000_VFRE);
878         vfre &= ~(1 << vf_queue);
879         E1000_WRITE_REG(hw, E1000_VFRE, vfre);
880
881         /* Disable MDFB related bit */
882         mdfb = E1000_READ_REG(hw, E1000_MDFB);
883         mdfb &= ~(1 << vf_queue);
884         E1000_WRITE_REG(hw, E1000_MDFB, mdfb);
885
886         /* Reset the specific VF */
887         E1000_WRITE_REG(hw, E1000_VTCTRL(vf_queue), E1000_VTCTRL_RST);
888 }
889
890 /**
891  * igb_disable_mdd
892  * @adapter - board private structure
893  *
894  * Disable MDD behavior in the HW
895  **/
896 static void igb_disable_mdd(struct igb_adapter *adapter)
897 {
898         struct e1000_hw *hw = &adapter->hw;
899         u32 reg;
900
901         if (hw->mac.type != e1000_i350)
902                 return;
903
904         reg = E1000_READ_REG(hw, E1000_DTXCTL);
905         reg &= (~E1000_DTXCTL_MDP_EN);
906         E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
907 }
908
909 /**
910  * igb_enable_mdd
911  * @adapter - board private structure
912  *
913  * Enable the HW to detect malicious driver and sends an interrupt to
914  * the driver. 
915  * 
916  * Only available on i350 device
917  **/
918 static void igb_enable_mdd(struct igb_adapter *adapter)
919 {
920         struct e1000_hw *hw = &adapter->hw;
921         u32 reg;
922
923         if (hw->mac.type != e1000_i350)
924                 return;
925
926         reg = E1000_READ_REG(hw, E1000_DTXCTL);
927         reg |= E1000_DTXCTL_MDP_EN;
928         E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
929 }
930
931 /**
932  * igb_reset_sriov_capability - disable SR-IOV if enabled
933  *
934  * Attempt to disable single root IO virtualization capabilites present in the
935  * kernel.
936  **/
937 static void igb_reset_sriov_capability(struct igb_adapter *adapter)
938 {
939         struct pci_dev *pdev = adapter->pdev;
940         struct e1000_hw *hw = &adapter->hw;
941
942         /* reclaim resources allocated to VFs */
943         if (adapter->vf_data) {
944                 if (!igb_check_vf_assignment(adapter)) {
945                         /*
946                          * disable iov and allow time for transactions to
947                          * clear
948                          */
949                         pci_disable_sriov(pdev);
950                         msleep(500);
951
952                         dev_info(pci_dev_to_dev(pdev), "IOV Disabled\n");
953                 } else {
954                         dev_info(pci_dev_to_dev(pdev), "IOV Not Disabled\n "
955                                         "VF(s) are assigned to guests!\n");
956                 }
957                 /* Disable Malicious Driver Detection */
958                 igb_disable_mdd(adapter);
959
960                 /* free vf data storage */
961                 kfree(adapter->vf_data);
962                 adapter->vf_data = NULL;
963
964                 /* switch rings back to PF ownership */
965                 E1000_WRITE_REG(hw, E1000_IOVCTL,
966                                 E1000_IOVCTL_REUSE_VFQ);
967                 E1000_WRITE_FLUSH(hw);
968                 msleep(100);
969         }
970
971         adapter->vfs_allocated_count = 0;
972 }
973
974 /**
975  * igb_set_sriov_capability - setup SR-IOV if supported
976  *
977  * Attempt to enable single root IO virtualization capabilites present in the
978  * kernel.
979  **/
980 static void igb_set_sriov_capability(struct igb_adapter *adapter)
981 {
982         struct pci_dev *pdev = adapter->pdev;
983         int old_vfs = 0;
984         int i;
985
986 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
987         old_vfs = igb_find_enabled_vfs(adapter);
988 #endif
989         if (old_vfs) {
990                 dev_info(pci_dev_to_dev(pdev),
991                                 "%d pre-allocated VFs found - override "
992                                 "max_vfs setting of %d\n", old_vfs,
993                                 adapter->vfs_allocated_count);
994                 adapter->vfs_allocated_count = old_vfs;
995         }
996         /* no VFs requested, do nothing */
997         if (!adapter->vfs_allocated_count)
998                 return;
999
1000         /* allocate vf data storage */
1001         adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1002                                    sizeof(struct vf_data_storage),
1003                                    GFP_KERNEL);
1004
1005         if (adapter->vf_data) {
1006                 if (!old_vfs) {
1007                         if (pci_enable_sriov(pdev,
1008                                         adapter->vfs_allocated_count))
1009                                 goto err_out;
1010                 }
1011                 for (i = 0; i < adapter->vfs_allocated_count; i++)
1012                         igb_vf_configure(adapter, i);
1013
1014                 /* DMA Coalescing is not supported in IOV mode. */
1015                 if (adapter->hw.mac.type >= e1000_i350)
1016                 adapter->dmac = IGB_DMAC_DISABLE;
1017                 if (adapter->hw.mac.type < e1000_i350)
1018                 adapter->flags |= IGB_FLAG_DETECT_BAD_DMA;
1019                 return;
1020
1021         }
1022
1023 err_out:
1024         kfree(adapter->vf_data);
1025         adapter->vf_data = NULL;
1026         adapter->vfs_allocated_count = 0;
1027         dev_warn(pci_dev_to_dev(pdev),
1028                         "Failed to initialize SR-IOV virtualization\n");
1029 }
1030
1031 /**
1032  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1033  *
1034  * Attempt to configure interrupts using the best available
1035  * capabilities of the hardware and kernel.
1036  **/
1037 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
1038 {
1039         struct pci_dev *pdev = adapter->pdev;
1040         int err;
1041         int numvecs, i;
1042
1043         /* Number of supported queues. */
1044         adapter->num_rx_queues = adapter->rss_queues;
1045
1046         if (adapter->vmdq_pools > 1)
1047                 adapter->num_rx_queues += adapter->vmdq_pools - 1;
1048
1049 #ifdef HAVE_TX_MQ
1050         if (adapter->vmdq_pools)
1051                 adapter->num_tx_queues = adapter->vmdq_pools;
1052         else
1053                 adapter->num_tx_queues = adapter->num_rx_queues;
1054 #else
1055         adapter->num_tx_queues = max_t(u32, 1, adapter->vmdq_pools);
1056 #endif
1057
1058         switch (adapter->int_mode) {
1059         case IGB_INT_MODE_MSIX:
1060                 /* start with one vector for every rx queue */
1061                 numvecs = adapter->num_rx_queues;
1062
1063                 /* if tx handler is seperate add 1 for every tx queue */
1064                 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1065                         numvecs += adapter->num_tx_queues;
1066
1067                 /* store the number of vectors reserved for queues */
1068                 adapter->num_q_vectors = numvecs;
1069
1070                 /* add 1 vector for link status interrupts */
1071                 numvecs++;
1072                 adapter->msix_entries = kcalloc(numvecs,
1073                                                 sizeof(struct msix_entry),
1074                                                 GFP_KERNEL);
1075                 if (adapter->msix_entries) {
1076                         for (i = 0; i < numvecs; i++)
1077                                 adapter->msix_entries[i].entry = i;
1078
1079                         err = pci_enable_msix(pdev,
1080                                               adapter->msix_entries, numvecs);
1081                         if (err == 0)
1082                                 break;
1083                 }
1084                 /* MSI-X failed, so fall through and try MSI */
1085                 dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI-X interrupts. "
1086                          "Falling back to MSI interrupts.\n");
1087                 igb_reset_interrupt_capability(adapter);
1088         case IGB_INT_MODE_MSI:
1089                 if (!pci_enable_msi(pdev))
1090                         adapter->flags |= IGB_FLAG_HAS_MSI;
1091                 else
1092                         dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI "
1093                                  "interrupts.  Falling back to legacy "
1094                                  "interrupts.\n");
1095                 /* Fall through */
1096         case IGB_INT_MODE_LEGACY:
1097                 /* disable advanced features and set number of queues to 1 */
1098                 igb_reset_sriov_capability(adapter);
1099                 adapter->vmdq_pools = 0;
1100                 adapter->rss_queues = 1;
1101                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1102                 adapter->num_rx_queues = 1;
1103                 adapter->num_tx_queues = 1;
1104                 adapter->num_q_vectors = 1;
1105                 /* Don't do anything; this is system default */
1106                 break;
1107         }
1108
1109 #ifdef HAVE_TX_MQ
1110         /* Notify the stack of the (possibly) reduced Tx Queue count. */
1111 #ifdef CONFIG_NETDEVICES_MULTIQUEUE
1112         adapter->netdev->egress_subqueue_count = adapter->num_tx_queues;
1113 #else
1114         adapter->netdev->real_num_tx_queues =
1115                         (adapter->vmdq_pools ? 1 : adapter->num_tx_queues);
1116 #endif /* CONFIG_NETDEVICES_MULTIQUEUE */
1117 #endif /* HAVE_TX_MQ */
1118 }
1119
1120 /**
1121  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1122  * @adapter: board private structure to initialize
1123  *
1124  * We allocate one q_vector per queue interrupt.  If allocation fails we
1125  * return -ENOMEM.
1126  **/
1127 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1128 {
1129         struct igb_q_vector *q_vector;
1130         struct e1000_hw *hw = &adapter->hw;
1131         int v_idx;
1132 #ifdef HAVE_DEVICE_NUMA_NODE
1133         int orig_node = adapter->node;
1134 #endif /* HAVE_DEVICE_NUMA_NODE */
1135
1136         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1137 #ifdef HAVE_DEVICE_NUMA_NODE
1138                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1139                                                 adapter->num_tx_queues)) &&
1140                     (adapter->num_rx_queues == v_idx))
1141                         adapter->node = orig_node;
1142                 if (orig_node == -1) {
1143                         int cur_node = next_online_node(adapter->node);
1144                         if (cur_node == MAX_NUMNODES)
1145                                 cur_node = first_online_node;
1146                         adapter->node = cur_node;
1147                 }
1148 #endif /* HAVE_DEVICE_NUMA_NODE */
1149                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1150                                         adapter->node);
1151                 if (!q_vector)
1152                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1153                                            GFP_KERNEL);
1154                 if (!q_vector)
1155                         goto err_out;
1156                 q_vector->adapter = adapter;
1157                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1158                 q_vector->itr_val = IGB_START_ITR;
1159                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1160                 adapter->q_vector[v_idx] = q_vector;
1161 #ifndef IGB_NO_LRO
1162                 if (v_idx < adapter->num_rx_queues) {
1163                         int size = sizeof(struct igb_lro_list);
1164                         q_vector->lrolist = vzalloc_node(size, q_vector->numa_node);
1165                         if (!q_vector->lrolist)
1166                                 q_vector->lrolist = vzalloc(size);
1167                         if (!q_vector->lrolist)
1168                                 goto err_out;
1169                         __skb_queue_head_init(&q_vector->lrolist->active);
1170                 }
1171 #endif /* IGB_NO_LRO */
1172         }
1173 #ifdef HAVE_DEVICE_NUMA_NODE
1174         /* Restore the adapter's original node */
1175         adapter->node = orig_node;
1176 #endif /* HAVE_DEVICE_NUMA_NODE */
1177
1178         return 0;
1179
1180 err_out:
1181 #ifdef HAVE_DEVICE_NUMA_NODE
1182         /* Restore the adapter's original node */
1183         adapter->node = orig_node;
1184 #endif /* HAVE_DEVICE_NUMA_NODE */
1185         igb_free_q_vectors(adapter);
1186         return -ENOMEM;
1187 }
1188
1189 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1190                                       int ring_idx, int v_idx)
1191 {
1192         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1193
1194         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1195         q_vector->rx.ring->q_vector = q_vector;
1196         q_vector->rx.count++;
1197         q_vector->itr_val = adapter->rx_itr_setting;
1198         if (q_vector->itr_val && q_vector->itr_val <= 3)
1199                 q_vector->itr_val = IGB_START_ITR;
1200 }
1201
1202 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1203                                       int ring_idx, int v_idx)
1204 {
1205         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1206
1207         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1208         q_vector->tx.ring->q_vector = q_vector;
1209         q_vector->tx.count++;
1210         q_vector->itr_val = adapter->tx_itr_setting;
1211         q_vector->tx.work_limit = adapter->tx_work_limit;
1212         if (q_vector->itr_val && q_vector->itr_val <= 3)
1213                 q_vector->itr_val = IGB_START_ITR;
1214 }
1215
1216 /**
1217  * igb_map_ring_to_vector - maps allocated queues to vectors
1218  *
1219  * This function maps the recently allocated queues to vectors.
1220  **/
1221 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1222 {
1223         int i;
1224         int v_idx = 0;
1225
1226         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1227             (adapter->num_q_vectors < adapter->num_tx_queues))
1228                 return -ENOMEM;
1229
1230         if (adapter->num_q_vectors >=
1231             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1232                 for (i = 0; i < adapter->num_rx_queues; i++)
1233                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1234                 for (i = 0; i < adapter->num_tx_queues; i++)
1235                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1236         } else {
1237                 for (i = 0; i < adapter->num_rx_queues; i++) {
1238                         if (i < adapter->num_tx_queues)
1239                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1240                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1241                 }
1242                 for (; i < adapter->num_tx_queues; i++)
1243                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1244         }
1245         return 0;
1246 }
1247
1248 /**
1249  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1250  *
1251  * This function initializes the interrupts and allocates all of the queues.
1252  **/
1253 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1254 {
1255         struct pci_dev *pdev = adapter->pdev;
1256         int err;
1257
1258         igb_set_interrupt_capability(adapter);
1259
1260         err = igb_alloc_q_vectors(adapter);
1261         if (err) {
1262                 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for vectors\n");
1263                 goto err_alloc_q_vectors;
1264         }
1265
1266         err = igb_alloc_queues(adapter);
1267         if (err) {
1268                 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
1269                 goto err_alloc_queues;
1270         }
1271
1272         err = igb_map_ring_to_vector(adapter);
1273         if (err) {
1274                 dev_err(pci_dev_to_dev(pdev), "Invalid q_vector to ring mapping\n");
1275                 goto err_map_queues;
1276         }
1277
1278
1279         return 0;
1280 err_map_queues:
1281         igb_free_queues(adapter);
1282 err_alloc_queues:
1283         igb_free_q_vectors(adapter);
1284 err_alloc_q_vectors:
1285         igb_reset_interrupt_capability(adapter);
1286         return err;
1287 }
1288
1289 /**
1290  * igb_request_irq - initialize interrupts
1291  *
1292  * Attempts to configure interrupts using the best available
1293  * capabilities of the hardware and kernel.
1294  **/
1295 static int igb_request_irq(struct igb_adapter *adapter)
1296 {
1297         struct net_device *netdev = adapter->netdev;
1298         struct pci_dev *pdev = adapter->pdev;
1299         int err = 0;
1300
1301         if (adapter->msix_entries) {
1302                 err = igb_request_msix(adapter);
1303                 if (!err)
1304                         goto request_done;
1305                 /* fall back to MSI */
1306                 igb_clear_interrupt_scheme(adapter);
1307                 igb_reset_sriov_capability(adapter);
1308                 if (!pci_enable_msi(pdev))
1309                         adapter->flags |= IGB_FLAG_HAS_MSI;
1310                 igb_free_all_tx_resources(adapter);
1311                 igb_free_all_rx_resources(adapter);
1312                 adapter->num_tx_queues = 1;
1313                 adapter->num_rx_queues = 1;
1314                 adapter->num_q_vectors = 1;
1315                 err = igb_alloc_q_vectors(adapter);
1316                 if (err) {
1317                         dev_err(pci_dev_to_dev(pdev),
1318                                 "Unable to allocate memory for vectors\n");
1319                         goto request_done;
1320                 }
1321                 err = igb_alloc_queues(adapter);
1322                 if (err) {
1323                         dev_err(pci_dev_to_dev(pdev),
1324                                 "Unable to allocate memory for queues\n");
1325                         igb_free_q_vectors(adapter);
1326                         goto request_done;
1327                 }
1328                 igb_setup_all_tx_resources(adapter);
1329                 igb_setup_all_rx_resources(adapter);
1330         }
1331
1332         igb_assign_vector(adapter->q_vector[0], 0);
1333
1334         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1335                 err = request_irq(pdev->irq, &igb_intr_msi, 0,
1336                                   netdev->name, adapter);
1337                 if (!err)
1338                         goto request_done;
1339
1340                 /* fall back to legacy interrupts */
1341                 igb_reset_interrupt_capability(adapter);
1342                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1343         }
1344
1345         err = request_irq(pdev->irq, &igb_intr, IRQF_SHARED,
1346                           netdev->name, adapter);
1347
1348         if (err)
1349                 dev_err(pci_dev_to_dev(pdev), "Error %d getting interrupt\n",
1350                         err);
1351
1352 request_done:
1353         return err;
1354 }
1355
1356 static void igb_free_irq(struct igb_adapter *adapter)
1357 {
1358         if (adapter->msix_entries) {
1359                 int vector = 0, i;
1360
1361                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1362
1363                 for (i = 0; i < adapter->num_q_vectors; i++)
1364                         free_irq(adapter->msix_entries[vector++].vector,
1365                                  adapter->q_vector[i]);
1366         } else {
1367                 free_irq(adapter->pdev->irq, adapter);
1368         }
1369 }
1370
1371 /**
1372  * igb_irq_disable - Mask off interrupt generation on the NIC
1373  * @adapter: board private structure
1374  **/
1375 static void igb_irq_disable(struct igb_adapter *adapter)
1376 {
1377         struct e1000_hw *hw = &adapter->hw;
1378
1379         /*
1380          * we need to be careful when disabling interrupts.  The VFs are also
1381          * mapped into these registers and so clearing the bits can cause
1382          * issues on the VF drivers so we only need to clear what we set
1383          */
1384         if (adapter->msix_entries) {
1385                 u32 regval = E1000_READ_REG(hw, E1000_EIAM);
1386                 E1000_WRITE_REG(hw, E1000_EIAM, regval & ~adapter->eims_enable_mask);
1387                 E1000_WRITE_REG(hw, E1000_EIMC, adapter->eims_enable_mask);
1388                 regval = E1000_READ_REG(hw, E1000_EIAC);
1389                 E1000_WRITE_REG(hw, E1000_EIAC, regval & ~adapter->eims_enable_mask);
1390         }
1391
1392         E1000_WRITE_REG(hw, E1000_IAM, 0);
1393         E1000_WRITE_REG(hw, E1000_IMC, ~0);
1394         E1000_WRITE_FLUSH(hw);
1395
1396         if (adapter->msix_entries) {
1397                 int vector = 0, i;
1398
1399                 synchronize_irq(adapter->msix_entries[vector++].vector);
1400
1401                 for (i = 0; i < adapter->num_q_vectors; i++)
1402                         synchronize_irq(adapter->msix_entries[vector++].vector);
1403         } else {
1404                 synchronize_irq(adapter->pdev->irq);
1405         }
1406 }
1407
1408 /**
1409  * igb_irq_enable - Enable default interrupt generation settings
1410  * @adapter: board private structure
1411  **/
1412 static void igb_irq_enable(struct igb_adapter *adapter)
1413 {
1414         struct e1000_hw *hw = &adapter->hw;
1415
1416         if (adapter->msix_entries) {
1417                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1418                 u32 regval = E1000_READ_REG(hw, E1000_EIAC);
1419                 E1000_WRITE_REG(hw, E1000_EIAC, regval | adapter->eims_enable_mask);
1420                 regval = E1000_READ_REG(hw, E1000_EIAM);
1421                 E1000_WRITE_REG(hw, E1000_EIAM, regval | adapter->eims_enable_mask);
1422                 E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_enable_mask);
1423                 if (adapter->vfs_allocated_count) {
1424                         E1000_WRITE_REG(hw, E1000_MBVFIMR, 0xFF);
1425                         ims |= E1000_IMS_VMMB;
1426                         /* For I350 device only enable MDD interrupts*/
1427                         if ((adapter->mdd) &&
1428                             (adapter->hw.mac.type == e1000_i350))
1429                                 ims |= E1000_IMS_MDDET;
1430                 }
1431                 E1000_WRITE_REG(hw, E1000_IMS, ims);
1432         } else {
1433                 E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK |
1434                                 E1000_IMS_DRSTA);
1435                 E1000_WRITE_REG(hw, E1000_IAM, IMS_ENABLE_MASK |
1436                                 E1000_IMS_DRSTA);
1437         }
1438 }
1439
1440 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1441 {
1442         struct e1000_hw *hw = &adapter->hw;
1443         u16 vid = adapter->hw.mng_cookie.vlan_id;
1444         u16 old_vid = adapter->mng_vlan_id;
1445
1446         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1447                 /* add VID to filter table */
1448                 igb_vfta_set(adapter, vid, TRUE);
1449                 adapter->mng_vlan_id = vid;
1450         } else {
1451                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1452         }
1453
1454         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1455             (vid != old_vid) &&
1456 #ifdef HAVE_VLAN_RX_REGISTER
1457             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1458 #else
1459             !test_bit(old_vid, adapter->active_vlans)) {
1460 #endif
1461                 /* remove VID from filter table */
1462                 igb_vfta_set(adapter, old_vid, FALSE);
1463         }
1464 }
1465
1466 /**
1467  * igb_release_hw_control - release control of the h/w to f/w
1468  * @adapter: address of board private structure
1469  *
1470  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1471  * For ASF and Pass Through versions of f/w this means that the
1472  * driver is no longer loaded.
1473  *
1474  **/
1475 static void igb_release_hw_control(struct igb_adapter *adapter)
1476 {
1477         struct e1000_hw *hw = &adapter->hw;
1478         u32 ctrl_ext;
1479
1480         /* Let firmware take over control of h/w */
1481         ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
1482         E1000_WRITE_REG(hw, E1000_CTRL_EXT,
1483                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1484 }
1485
1486 /**
1487  * igb_get_hw_control - get control of the h/w from f/w
1488  * @adapter: address of board private structure
1489  *
1490  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1491  * For ASF and Pass Through versions of f/w this means that
1492  * the driver is loaded.
1493  *
1494  **/
1495 static void igb_get_hw_control(struct igb_adapter *adapter)
1496 {
1497         struct e1000_hw *hw = &adapter->hw;
1498         u32 ctrl_ext;
1499
1500         /* Let firmware know the driver has taken over */
1501         ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
1502         E1000_WRITE_REG(hw, E1000_CTRL_EXT,
1503                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1504 }
1505
1506 /**
1507  * igb_configure - configure the hardware for RX and TX
1508  * @adapter: private board structure
1509  **/
1510 static void igb_configure(struct igb_adapter *adapter)
1511 {
1512         struct net_device *netdev = adapter->netdev;
1513         int i;
1514
1515         igb_get_hw_control(adapter);
1516         igb_set_rx_mode(netdev);
1517
1518         igb_restore_vlan(adapter);
1519
1520         igb_setup_tctl(adapter);
1521         igb_setup_mrqc(adapter);
1522         igb_setup_rctl(adapter);
1523
1524         igb_configure_tx(adapter);
1525         igb_configure_rx(adapter);
1526
1527         e1000_rx_fifo_flush_82575(&adapter->hw);
1528 #ifdef CONFIG_NETDEVICES_MULTIQUEUE
1529
1530         if (adapter->num_tx_queues > 1)
1531                 netdev->features |= NETIF_F_MULTI_QUEUE;
1532         else
1533                 netdev->features &= ~NETIF_F_MULTI_QUEUE;
1534 #endif
1535
1536         /* call igb_desc_unused which always leaves
1537          * at least 1 descriptor unused to make sure
1538          * next_to_use != next_to_clean */
1539         for (i = 0; i < adapter->num_rx_queues; i++) {
1540                 struct igb_ring *ring = adapter->rx_ring[i];
1541                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1542         }
1543 }
1544
1545 /**
1546  * igb_power_up_link - Power up the phy/serdes link
1547  * @adapter: address of board private structure
1548  **/
1549 void igb_power_up_link(struct igb_adapter *adapter)
1550 {
1551         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1552                 e1000_power_up_phy(&adapter->hw);
1553         else
1554                 e1000_power_up_fiber_serdes_link(&adapter->hw);
1555
1556         e1000_phy_hw_reset(&adapter->hw);
1557 }
1558
1559 /**
1560  * igb_power_down_link - Power down the phy/serdes link
1561  * @adapter: address of board private structure
1562  */
1563 static void igb_power_down_link(struct igb_adapter *adapter)
1564 {
1565         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1566                 e1000_power_down_phy(&adapter->hw);
1567         else
1568                 e1000_shutdown_fiber_serdes_link(&adapter->hw);
1569 }
1570
1571 /**
1572  * igb_up - Open the interface and prepare it to handle traffic
1573  * @adapter: board private structure
1574  **/
1575 int igb_up(struct igb_adapter *adapter)
1576 {
1577         struct e1000_hw *hw = &adapter->hw;
1578         int i;
1579
1580         /* hardware has been reset, we need to reload some things */
1581         igb_configure(adapter);
1582
1583         clear_bit(__IGB_DOWN, &adapter->state);
1584
1585         for (i = 0; i < adapter->num_q_vectors; i++)
1586                 napi_enable(&(adapter->q_vector[i]->napi));
1587
1588         if (adapter->msix_entries)
1589                 igb_configure_msix(adapter);
1590         else
1591                 igb_assign_vector(adapter->q_vector[0], 0);
1592
1593         igb_configure_lli(adapter);
1594
1595         /* Clear any pending interrupts. */
1596         E1000_READ_REG(hw, E1000_ICR);
1597         igb_irq_enable(adapter);
1598
1599         /* notify VFs that reset has been completed */
1600         if (adapter->vfs_allocated_count) {
1601                 u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
1602                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1603                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
1604         }
1605
1606         netif_tx_start_all_queues(adapter->netdev);
1607
1608         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
1609                 schedule_work(&adapter->dma_err_task);
1610         /* start the watchdog. */
1611         hw->mac.get_link_status = 1;
1612         schedule_work(&adapter->watchdog_task);
1613
1614         return 0;
1615 }
1616
1617 void igb_down(struct igb_adapter *adapter)
1618 {
1619         struct net_device *netdev = adapter->netdev;
1620         struct e1000_hw *hw = &adapter->hw;
1621         u32 tctl, rctl;
1622         int i;
1623
1624         /* signal that we're down so the interrupt handler does not
1625          * reschedule our watchdog timer */
1626         set_bit(__IGB_DOWN, &adapter->state);
1627
1628         /* disable receives in the hardware */
1629         rctl = E1000_READ_REG(hw, E1000_RCTL);
1630         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
1631         /* flush and sleep below */
1632
1633         netif_tx_stop_all_queues(netdev);
1634
1635         /* disable transmits in the hardware */
1636         tctl = E1000_READ_REG(hw, E1000_TCTL);
1637         tctl &= ~E1000_TCTL_EN;
1638         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
1639         /* flush both disables and wait for them to finish */
1640         E1000_WRITE_FLUSH(hw);
1641         usleep_range(10000, 20000);
1642
1643         for (i = 0; i < adapter->num_q_vectors; i++)
1644                 napi_disable(&(adapter->q_vector[i]->napi));
1645
1646         igb_irq_disable(adapter);
1647
1648         del_timer_sync(&adapter->watchdog_timer);
1649         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
1650                 del_timer_sync(&adapter->dma_err_timer);
1651         del_timer_sync(&adapter->phy_info_timer);
1652
1653         netif_carrier_off(netdev);
1654
1655         /* record the stats before reset*/
1656         igb_update_stats(adapter);
1657
1658         adapter->link_speed = 0;
1659         adapter->link_duplex = 0;
1660
1661 #ifdef HAVE_PCI_ERS
1662         if (!pci_channel_offline(adapter->pdev))
1663                 igb_reset(adapter);
1664 #else
1665         igb_reset(adapter);
1666 #endif
1667         igb_clean_all_tx_rings(adapter);
1668         igb_clean_all_rx_rings(adapter);
1669 #ifdef IGB_DCA
1670
1671         /* since we reset the hardware DCA settings were cleared */
1672         igb_setup_dca(adapter);
1673 #endif
1674 }
1675
1676 void igb_reinit_locked(struct igb_adapter *adapter)
1677 {
1678         WARN_ON(in_interrupt());
1679         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1680                 usleep_range(1000, 2000);
1681         igb_down(adapter);
1682         igb_up(adapter);
1683         clear_bit(__IGB_RESETTING, &adapter->state);
1684 }
1685
1686 void igb_reset(struct igb_adapter *adapter)
1687 {
1688         struct pci_dev *pdev = adapter->pdev;
1689         struct e1000_hw *hw = &adapter->hw;
1690         struct e1000_mac_info *mac = &hw->mac;
1691         struct e1000_fc_info *fc = &hw->fc;
1692         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1693         u16 hwm;
1694
1695         /* Repartition Pba for greater than 9k mtu
1696          * To take effect CTRL.RST is required.
1697          */
1698         switch (mac->type) {
1699         case e1000_i350:
1700         case e1000_82580:
1701                 pba = E1000_READ_REG(hw, E1000_RXPBS);
1702                 pba = e1000_rxpbs_adjust_82580(pba);
1703                 break;
1704         case e1000_82576:
1705                 pba = E1000_READ_REG(hw, E1000_RXPBS);
1706                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1707                 break;
1708         case e1000_82575:
1709         default:
1710                 pba = E1000_PBA_34K;
1711                 break;
1712         }
1713
1714         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1715             (mac->type < e1000_82576)) {
1716                 /* adjust PBA for jumbo frames */
1717                 E1000_WRITE_REG(hw, E1000_PBA, pba);
1718
1719                 /* To maintain wire speed transmits, the Tx FIFO should be
1720                  * large enough to accommodate two full transmit packets,
1721                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1722                  * the Rx FIFO should be large enough to accommodate at least
1723                  * one full receive packet and is similarly rounded up and
1724                  * expressed in KB. */
1725                 pba = E1000_READ_REG(hw, E1000_PBA);
1726                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1727                 tx_space = pba >> 16;
1728                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1729                 pba &= 0xffff;
1730                 /* the tx fifo also stores 16 bytes of information about the tx
1731                  * but don't include ethernet FCS because hardware appends it */
1732                 min_tx_space = (adapter->max_frame_size +
1733                                 sizeof(union e1000_adv_tx_desc) -
1734                                 ETH_FCS_LEN) * 2;
1735                 min_tx_space = ALIGN(min_tx_space, 1024);
1736                 min_tx_space >>= 10;
1737                 /* software strips receive CRC, so leave room for it */
1738                 min_rx_space = adapter->max_frame_size;
1739                 min_rx_space = ALIGN(min_rx_space, 1024);
1740                 min_rx_space >>= 10;
1741
1742                 /* If current Tx allocation is less than the min Tx FIFO size,
1743                  * and the min Tx FIFO size is less than the current Rx FIFO
1744                  * allocation, take space away from current Rx allocation */
1745                 if (tx_space < min_tx_space &&
1746                     ((min_tx_space - tx_space) < pba)) {
1747                         pba = pba - (min_tx_space - tx_space);
1748
1749                         /* if short on rx space, rx wins and must trump tx
1750                          * adjustment */
1751                         if (pba < min_rx_space)
1752                                 pba = min_rx_space;
1753                 }
1754                 E1000_WRITE_REG(hw, E1000_PBA, pba);
1755         }
1756
1757         /* flow control settings */
1758         /* The high water mark must be low enough to fit one full frame
1759          * (or the size used for early receive) above it in the Rx FIFO.
1760          * Set it to the lower of:
1761          * - 90% of the Rx FIFO size, or
1762          * - the full Rx FIFO size minus one full frame */
1763         hwm = min(((pba << 10) * 9 / 10),
1764                         ((pba << 10) - 2 * adapter->max_frame_size));
1765
1766         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1767         fc->low_water = fc->high_water - 16;
1768         fc->pause_time = 0xFFFF;
1769         fc->send_xon = 1;
1770         fc->current_mode = fc->requested_mode;
1771
1772         /* disable receive for all VFs and wait one second */
1773         if (adapter->vfs_allocated_count) {
1774                 int i;
1775                 /*
1776                  * Clear all flags except indication that the PF has set
1777                  * the VF MAC addresses administratively
1778                  */
1779                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1780                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1781
1782                 /* ping all the active vfs to let them know we are going down */
1783                 igb_ping_all_vfs(adapter);
1784
1785                 /* disable transmits and receives */
1786                 E1000_WRITE_REG(hw, E1000_VFRE, 0);
1787                 E1000_WRITE_REG(hw, E1000_VFTE, 0);
1788         }
1789
1790         /* Allow time for pending master requests to run */
1791         e1000_reset_hw(hw);
1792         E1000_WRITE_REG(hw, E1000_WUC, 0);
1793
1794         if (e1000_init_hw(hw))
1795                 dev_err(pci_dev_to_dev(pdev), "Hardware Error\n");
1796
1797         igb_init_dmac(adapter, pba);
1798         /* Re-initialize the thermal sensor on i350 devices. */
1799         if (mac->type == e1000_i350 && hw->bus.func == 0) {
1800                 /*
1801                  * If present, re-initialize the external thermal sensor
1802                  * interface.
1803                  */
1804                 if (adapter->ets)
1805                         e1000_set_i2c_bb(hw);
1806                 e1000_init_thermal_sensor_thresh(hw);
1807         }
1808         if (!netif_running(adapter->netdev))
1809                 igb_power_down_link(adapter);
1810
1811         igb_update_mng_vlan(adapter);
1812
1813         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1814         E1000_WRITE_REG(hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1815
1816         e1000_get_phy_info(hw);
1817 }
1818
1819 #ifdef HAVE_NDO_SET_FEATURES
1820 static netdev_features_t igb_fix_features(struct net_device *netdev,
1821                                           netdev_features_t features)
1822 {
1823         /*
1824          * Since there is no support for separate tx vlan accel
1825          * enabled make sure tx flag is cleared if rx is.
1826          */
1827         if (!(features & NETIF_F_HW_VLAN_RX))
1828                 features &= ~NETIF_F_HW_VLAN_TX;
1829
1830         /* If Rx checksum is disabled, then LRO should also be disabled */
1831         if (!(features & NETIF_F_RXCSUM))
1832                 features &= ~NETIF_F_LRO;
1833
1834         return features;
1835 }
1836
1837 static int igb_set_features(struct net_device *netdev,
1838                             netdev_features_t features)
1839 {
1840         u32 changed = netdev->features ^ features;
1841
1842         if (changed & NETIF_F_HW_VLAN_RX)
1843                 igb_vlan_mode(netdev, features);
1844
1845         return 0;
1846 }
1847
1848 #endif /* HAVE_NDO_SET_FEATURES */
1849 #ifdef HAVE_NET_DEVICE_OPS
1850 static const struct net_device_ops igb_netdev_ops = {
1851         .ndo_open               = igb_open,
1852         .ndo_stop               = igb_close,
1853         .ndo_start_xmit         = igb_xmit_frame,
1854         .ndo_get_stats          = igb_get_stats,
1855         .ndo_set_rx_mode        = igb_set_rx_mode,
1856         .ndo_set_mac_address    = igb_set_mac,
1857         .ndo_change_mtu         = igb_change_mtu,
1858         .ndo_do_ioctl           = igb_ioctl,
1859         .ndo_tx_timeout         = igb_tx_timeout,
1860         .ndo_validate_addr      = eth_validate_addr,
1861         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1862         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1863 #ifdef IFLA_VF_MAX
1864         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1865         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1866         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1867         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1868 #endif
1869 #ifdef CONFIG_NET_POLL_CONTROLLER
1870         .ndo_poll_controller    = igb_netpoll,
1871 #endif
1872 #ifdef HAVE_NDO_SET_FEATURES
1873         .ndo_fix_features       = igb_fix_features,
1874         .ndo_set_features       = igb_set_features,
1875 #endif
1876 #ifdef HAVE_VLAN_RX_REGISTER
1877         .ndo_vlan_rx_register   = igb_vlan_mode,
1878 #endif
1879 };
1880
1881 #ifdef CONFIG_IGB_VMDQ_NETDEV
1882 static const struct net_device_ops igb_vmdq_ops = {
1883         .ndo_open               = &igb_vmdq_open,
1884         .ndo_stop               = &igb_vmdq_close,
1885         .ndo_start_xmit         = &igb_vmdq_xmit_frame,
1886         .ndo_get_stats          = &igb_vmdq_get_stats,
1887         .ndo_set_rx_mode        = &igb_vmdq_set_rx_mode,
1888         .ndo_validate_addr      = eth_validate_addr,
1889         .ndo_set_mac_address    = &igb_vmdq_set_mac,
1890         .ndo_change_mtu         = &igb_vmdq_change_mtu,
1891         .ndo_tx_timeout         = &igb_vmdq_tx_timeout,
1892         .ndo_vlan_rx_register   = &igb_vmdq_vlan_rx_register,
1893         .ndo_vlan_rx_add_vid    = &igb_vmdq_vlan_rx_add_vid,
1894         .ndo_vlan_rx_kill_vid   = &igb_vmdq_vlan_rx_kill_vid,
1895 };
1896
1897 #endif /* CONFIG_IGB_VMDQ_NETDEV */
1898 #endif /* HAVE_NET_DEVICE_OPS */
1899 #ifdef CONFIG_IGB_VMDQ_NETDEV
1900 void igb_assign_vmdq_netdev_ops(struct net_device *vnetdev)
1901 {
1902 #ifdef HAVE_NET_DEVICE_OPS
1903         vnetdev->netdev_ops = &igb_vmdq_ops;
1904 #else
1905         dev->open = &igb_vmdq_open;
1906         dev->stop = &igb_vmdq_close;
1907         dev->hard_start_xmit = &igb_vmdq_xmit_frame;
1908         dev->get_stats = &igb_vmdq_get_stats;
1909 #ifdef HAVE_SET_RX_MODE
1910         dev->set_rx_mode = &igb_vmdq_set_rx_mode;
1911 #endif
1912         dev->set_multicast_list = &igb_vmdq_set_rx_mode;
1913         dev->set_mac_address = &igb_vmdq_set_mac;
1914         dev->change_mtu = &igb_vmdq_change_mtu;
1915 #ifdef HAVE_TX_TIMEOUT
1916         dev->tx_timeout = &igb_vmdq_tx_timeout;
1917 #endif
1918 #ifdef NETIF_F_HW_VLAN_TX
1919         dev->vlan_rx_register = &igb_vmdq_vlan_rx_register;
1920         dev->vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid;
1921         dev->vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid;
1922 #endif
1923 #endif
1924         igb_vmdq_set_ethtool_ops(vnetdev);
1925         vnetdev->watchdog_timeo = 5 * HZ;
1926
1927 }
1928
1929 int igb_init_vmdq_netdevs(struct igb_adapter *adapter)
1930 {
1931         int pool, err = 0, base_queue;
1932         struct net_device *vnetdev;
1933         struct igb_vmdq_adapter *vmdq_adapter;
1934
1935         for (pool = 1; pool < adapter->vmdq_pools; pool++) {
1936                 int qpp = (!adapter->rss_queues ? 1 : adapter->rss_queues);
1937                 base_queue = pool * qpp;
1938                 vnetdev = alloc_etherdev(sizeof(struct igb_vmdq_adapter));
1939                 if (!vnetdev) {
1940                         err = -ENOMEM;
1941                         break;
1942                 }
1943                 vmdq_adapter = netdev_priv(vnetdev);
1944                 vmdq_adapter->vnetdev = vnetdev;
1945                 vmdq_adapter->real_adapter = adapter;
1946                 vmdq_adapter->rx_ring = adapter->rx_ring[base_queue];
1947                 vmdq_adapter->tx_ring = adapter->tx_ring[base_queue];
1948                 igb_assign_vmdq_netdev_ops(vnetdev);
1949                 snprintf(vnetdev->name, IFNAMSIZ, "%sv%d",
1950                          adapter->netdev->name, pool);
1951                 vnetdev->features = adapter->netdev->features;
1952 #ifdef HAVE_NETDEV_VLAN_FEATURES
1953                 vnetdev->vlan_features = adapter->netdev->vlan_features;
1954 #endif
1955                 adapter->vmdq_netdev[pool-1] = vnetdev;
1956                 err = register_netdev(vnetdev);
1957                 if (err)
1958                         break;
1959         }
1960         return err;
1961 }
1962
1963 int igb_remove_vmdq_netdevs(struct igb_adapter *adapter)
1964 {
1965         int pool, err = 0;
1966
1967         for (pool = 1; pool < adapter->vmdq_pools; pool++) {
1968                 unregister_netdev(adapter->vmdq_netdev[pool-1]);
1969                 free_netdev(adapter->vmdq_netdev[pool-1]);
1970                 adapter->vmdq_netdev[pool-1] = NULL;
1971         }
1972         return err;
1973 }
1974 #endif /* CONFIG_IGB_VMDQ_NETDEV */
1975
1976 /**
1977  * igb_probe - Device Initialization Routine
1978  * @pdev: PCI device information struct
1979  * @ent: entry in igb_pci_tbl
1980  *
1981  * Returns 0 on success, negative on failure
1982  *
1983  * igb_probe initializes an adapter identified by a pci_dev structure.
1984  * The OS initialization, configuring of the adapter private structure,
1985  * and a hardware reset occur.
1986  **/
1987 static int __devinit igb_probe(struct pci_dev *pdev,
1988                                const struct pci_device_id *ent)
1989 {
1990         struct net_device *netdev;
1991         struct igb_adapter *adapter;
1992         struct e1000_hw *hw;
1993         u16 eeprom_data = 0;
1994         u8 pba_str[E1000_PBANUM_LENGTH];
1995         s32 ret_val;
1996         static int global_quad_port_a; /* global quad port a indication */
1997         int i, err, pci_using_dac;
1998         static int cards_found;
1999
2000         err = pci_enable_device_mem(pdev);
2001         if (err)
2002                 return err;
2003
2004         pci_using_dac = 0;
2005         err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
2006         if (!err) {
2007                 err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
2008                 if (!err)
2009                         pci_using_dac = 1;
2010         } else {
2011                 err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
2012                 if (err) {
2013                         err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
2014                         if (err) {
2015                                 IGB_ERR("No usable DMA configuration, "
2016                                         "aborting\n");
2017                                 goto err_dma;
2018                         }
2019                 }
2020         }
2021
2022 #ifndef HAVE_ASPM_QUIRKS
2023         /* 82575 requires that the pci-e link partner disable the L0s state */
2024         switch (pdev->device) {
2025         case E1000_DEV_ID_82575EB_COPPER:
2026         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2027         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2028                 pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
2029         default:
2030                 break;
2031         }
2032
2033 #endif /* HAVE_ASPM_QUIRKS */
2034         err = pci_request_selected_regions(pdev,
2035                                            pci_select_bars(pdev,
2036                                                            IORESOURCE_MEM),
2037                                            igb_driver_name);
2038         if (err)
2039                 goto err_pci_reg;
2040
2041         pci_enable_pcie_error_reporting(pdev);
2042
2043         pci_set_master(pdev);
2044
2045         err = -ENOMEM;
2046 #ifdef HAVE_TX_MQ
2047         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
2048                                    IGB_MAX_TX_QUEUES);
2049 #else
2050         netdev = alloc_etherdev(sizeof(struct igb_adapter));
2051 #endif /* HAVE_TX_MQ */
2052         if (!netdev)
2053                 goto err_alloc_etherdev;
2054
2055         SET_MODULE_OWNER(netdev);
2056         SET_NETDEV_DEV(netdev, &pdev->dev);
2057
2058         pci_set_drvdata(pdev, netdev);
2059         adapter = netdev_priv(netdev);
2060         adapter->netdev = netdev;
2061         adapter->pdev = pdev;
2062         hw = &adapter->hw;
2063         hw->back = adapter;
2064         adapter->port_num = hw->bus.func;
2065         adapter->msg_enable = (1 << debug) - 1;
2066
2067 #ifdef HAVE_PCI_ERS
2068         err = pci_save_state(pdev);
2069         if (err)
2070                 goto err_ioremap;
2071 #endif
2072         err = -EIO;
2073         hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
2074                               pci_resource_len(pdev, 0));
2075         if (!hw->hw_addr)
2076                 goto err_ioremap;
2077
2078 #ifdef HAVE_NET_DEVICE_OPS
2079         netdev->netdev_ops = &igb_netdev_ops;
2080 #else /* HAVE_NET_DEVICE_OPS */
2081         netdev->open = &igb_open;
2082         netdev->stop = &igb_close;
2083         netdev->get_stats = &igb_get_stats;
2084 #ifdef HAVE_SET_RX_MODE
2085         netdev->set_rx_mode = &igb_set_rx_mode;
2086 #endif
2087         netdev->set_multicast_list = &igb_set_rx_mode;
2088         netdev->set_mac_address = &igb_set_mac;
2089         netdev->change_mtu = &igb_change_mtu;
2090         netdev->do_ioctl = &igb_ioctl;
2091 #ifdef HAVE_TX_TIMEOUT
2092         netdev->tx_timeout = &igb_tx_timeout;
2093 #endif
2094         netdev->vlan_rx_register = igb_vlan_mode;
2095         netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
2096         netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
2097 #ifdef CONFIG_NET_POLL_CONTROLLER
2098         netdev->poll_controller = igb_netpoll;
2099 #endif
2100         netdev->hard_start_xmit = &igb_xmit_frame;
2101 #endif /* HAVE_NET_DEVICE_OPS */
2102         igb_set_ethtool_ops(netdev);
2103 #ifdef HAVE_TX_TIMEOUT
2104         netdev->watchdog_timeo = 5 * HZ;
2105 #endif
2106
2107         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
2108
2109         adapter->bd_number = cards_found;
2110
2111         /* setup the private structure */
2112         err = igb_sw_init(adapter);
2113         if (err)
2114                 goto err_sw_init;
2115
2116         e1000_get_bus_info(hw);
2117
2118         hw->phy.autoneg_wait_to_complete = FALSE;
2119         hw->mac.adaptive_ifs = FALSE;
2120
2121         /* Copper options */
2122         if (hw->phy.media_type == e1000_media_type_copper) {
2123 #ifdef ETH_TP_MDI_X
2124                 hw->phy.mdix = ETH_TP_MDI_INVALID;
2125 #else
2126                 hw->phy.mdix = AUTO_ALL_MODES;
2127 #endif /* ETH_TP_MDI_X */
2128                 hw->phy.disable_polarity_correction = FALSE;
2129                 hw->phy.ms_type = e1000_ms_hw_default;
2130         }
2131
2132         if (e1000_check_reset_block(hw))
2133                 dev_info(pci_dev_to_dev(pdev),
2134                         "PHY reset is blocked due to SOL/IDER session.\n");
2135
2136         /*
2137          * features is initialized to 0 in allocation, it might have bits
2138          * set by igb_sw_init so we should use an or instead of an
2139          * assignment.
2140          */
2141         netdev->features |= NETIF_F_SG |
2142                             NETIF_F_IP_CSUM |
2143 #ifdef NETIF_F_IPV6_CSUM
2144                             NETIF_F_IPV6_CSUM |
2145 #endif
2146 #ifdef NETIF_F_TSO
2147                             NETIF_F_TSO |
2148 #ifdef NETIF_F_TSO6
2149                             NETIF_F_TSO6 |
2150 #endif
2151 #endif /* NETIF_F_TSO */
2152 #ifdef NETIF_F_RXHASH
2153                             NETIF_F_RXHASH |
2154 #endif
2155 #ifdef HAVE_NDO_SET_FEATURES
2156                             NETIF_F_RXCSUM |
2157 #endif
2158                             NETIF_F_HW_VLAN_RX |
2159                             NETIF_F_HW_VLAN_TX;
2160
2161 #ifdef HAVE_NDO_SET_FEATURES
2162         /* copy netdev features into list of user selectable features */
2163         netdev->hw_features |= netdev->features;
2164 #ifndef IGB_NO_LRO
2165
2166         /* give us the option of enabling LRO later */
2167         netdev->hw_features |= NETIF_F_LRO;
2168 #endif
2169 #else
2170 #ifdef NETIF_F_GRO
2171
2172         /* this is only needed on kernels prior to 2.6.39 */
2173         netdev->features |= NETIF_F_GRO;
2174 #endif
2175 #endif
2176
2177         /* set this bit last since it cannot be part of hw_features */
2178         netdev->features |= NETIF_F_HW_VLAN_FILTER;
2179
2180 #ifdef HAVE_NETDEV_VLAN_FEATURES
2181         netdev->vlan_features |= NETIF_F_TSO |
2182                                  NETIF_F_TSO6 |
2183                                  NETIF_F_IP_CSUM |
2184                                  NETIF_F_IPV6_CSUM |
2185                                  NETIF_F_SG;
2186
2187 #endif
2188         if (pci_using_dac)
2189                 netdev->features |= NETIF_F_HIGHDMA;
2190
2191         if (hw->mac.type >= e1000_82576)
2192                 netdev->features |= NETIF_F_SCTP_CSUM;
2193
2194         adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
2195
2196         /* before reading the NVM, reset the controller to put the device in a
2197          * known good starting state */
2198         e1000_reset_hw(hw);
2199
2200         /* make sure the NVM is good */
2201         if (e1000_validate_nvm_checksum(hw) < 0) {
2202                 dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
2203                         " Valid\n");
2204                 err = -EIO;
2205                 goto err_eeprom;
2206         }
2207
2208         /* copy the MAC address out of the NVM */
2209         if (e1000_read_mac_addr(hw))
2210                 dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
2211         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2212 #ifdef ETHTOOL_GPERMADDR
2213         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2214
2215         if (!is_valid_ether_addr(netdev->perm_addr)) {
2216 #else
2217         if (!is_valid_ether_addr(netdev->dev_addr)) {
2218 #endif
2219                 dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
2220                 err = -EIO;
2221                 goto err_eeprom;
2222         }
2223
2224         memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
2225         adapter->mac_table[0].queue = adapter->vfs_allocated_count;
2226         adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
2227         igb_rar_set(adapter, 0);
2228
2229         /* get firmware version for ethtool -i */
2230         e1000_read_nvm(&adapter->hw, 5, 1, &adapter->fw_version);
2231         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
2232                     (unsigned long) adapter);
2233         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2234                 setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
2235                             (unsigned long) adapter);
2236         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
2237                     (unsigned long) adapter);
2238
2239         INIT_WORK(&adapter->reset_task, igb_reset_task);
2240         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2241         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2242                 INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
2243
2244         /* Initialize link properties that are user-changeable */
2245         adapter->fc_autoneg = true;
2246         hw->mac.autoneg = true;
2247         hw->phy.autoneg_advertised = 0x2f;
2248
2249         hw->fc.requested_mode = e1000_fc_default;
2250         hw->fc.current_mode = e1000_fc_default;
2251
2252         e1000_validate_mdi_setting(hw);
2253
2254         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2255          * enable the ACPI Magic Packet filter
2256          */
2257
2258         if (hw->bus.func == 0)
2259                 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2260         else if (hw->mac.type >= e1000_82580)
2261                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2262                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2263                                  &eeprom_data);
2264         else if (hw->bus.func == 1)
2265                 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2266
2267         if (eeprom_data & IGB_EEPROM_APME)
2268                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2269
2270         /* now that we have the eeprom settings, apply the special cases where
2271          * the eeprom may be wrong or the board simply won't support wake on
2272          * lan on a particular port */
2273         switch (pdev->device) {
2274         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2275                 adapter->eeprom_wol = 0;
2276                 break;
2277         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2278         case E1000_DEV_ID_82576_FIBER:
2279         case E1000_DEV_ID_82576_SERDES:
2280                 /* Wake events only supported on port A for dual fiber
2281                  * regardless of eeprom setting */
2282                 if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
2283                         adapter->eeprom_wol = 0;
2284                 break;
2285         case E1000_DEV_ID_82576_QUAD_COPPER:
2286         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2287                 /* if quad port adapter, disable WoL on all but port A */
2288                 if (global_quad_port_a != 0)
2289                         adapter->eeprom_wol = 0;
2290                 else
2291                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2292                 /* Reset for multiple quad port adapters */
2293                 if (++global_quad_port_a == 4)
2294                         global_quad_port_a = 0;
2295                 break;
2296         }
2297
2298         /* initialize the wol settings based on the eeprom settings */
2299         adapter->wol = adapter->eeprom_wol;
2300         device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), adapter->wol);
2301
2302         /* reset the hardware with the new settings */
2303         igb_reset(adapter);
2304
2305         /* let the f/w know that the h/w is now under the control of the
2306          * driver. */
2307         igb_get_hw_control(adapter);
2308
2309         strncpy(netdev->name, "eth%d", IFNAMSIZ);
2310         err = register_netdev(netdev);
2311         if (err)
2312                 goto err_register;
2313
2314 #ifdef CONFIG_IGB_VMDQ_NETDEV
2315         err = igb_init_vmdq_netdevs(adapter);
2316         if (err)
2317                 goto err_register;
2318 #endif
2319         /* carrier off reporting is important to ethtool even BEFORE open */
2320         netif_carrier_off(netdev);
2321
2322 #ifdef IGB_DCA
2323         if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
2324                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2325                 dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
2326                 igb_setup_dca(adapter);
2327         }
2328
2329 #endif
2330 #ifdef HAVE_HW_TIME_STAMP
2331         /* do hw tstamp init after resetting */
2332         igb_init_hw_timer(adapter);
2333
2334 #endif
2335         dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
2336         /* print bus type/speed/width info */
2337         dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
2338                  netdev->name,
2339                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
2340                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
2341                                                             "unknown"),
2342                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4\n" :
2343                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2\n" :
2344                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1\n" :
2345                    "unknown"));
2346         dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
2347         for (i = 0; i < 6; i++)
2348                 printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
2349
2350         ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
2351         if (ret_val)
2352                 strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
2353         dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
2354                  pba_str);
2355
2356
2357         /* Initialize the thermal sensor on i350 devices. */
2358         if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
2359                 u16 ets_word;
2360
2361                 /*
2362                  * Read the NVM to determine if this i350 device supports an
2363                  * external thermal sensor.
2364                  */
2365                 e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
2366                 if (ets_word != 0x0000 && ets_word != 0xFFFF)
2367                         adapter->ets = true;
2368                 else
2369                         adapter->ets = false;
2370 #ifdef IGB_SYSFS
2371                 igb_sysfs_init(adapter);
2372 #else
2373 #ifdef IGB_PROCFS
2374                 igb_procfs_init(adapter);
2375 #endif /* IGB_PROCFS */
2376 #endif /* IGB_SYSFS */
2377         } else {
2378                 adapter->ets = false;
2379         }
2380
2381         switch (hw->mac.type) {
2382         case e1000_i350:
2383                 /* Enable EEE for internal copper PHY devices */
2384                 if (hw->phy.media_type == e1000_media_type_copper)
2385                         e1000_set_eee_i350(hw);
2386
2387                 /* send driver version info to firmware */
2388                 igb_init_fw(adapter);
2389                 break;
2390         default:
2391                 break;
2392         }
2393 #ifndef IGB_NO_LRO
2394         if (netdev->features & NETIF_F_LRO)
2395                 dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
2396         else
2397                 dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
2398 #endif
2399         dev_info(pci_dev_to_dev(pdev),
2400                  "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2401                  adapter->msix_entries ? "MSI-X" :
2402                  (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2403                  adapter->num_rx_queues, adapter->num_tx_queues);
2404
2405         cards_found++;
2406
2407         pm_runtime_put_noidle(&pdev->dev);
2408         return 0;
2409
2410 err_register:
2411         igb_release_hw_control(adapter);
2412 err_eeprom:
2413         if (!e1000_check_reset_block(hw))
2414                 e1000_phy_hw_reset(hw);
2415
2416         if (hw->flash_address)
2417                 iounmap(hw->flash_address);
2418 err_sw_init:
2419         igb_clear_interrupt_scheme(adapter);
2420         igb_reset_sriov_capability(adapter);
2421         iounmap(hw->hw_addr);
2422 err_ioremap:
2423         free_netdev(netdev);
2424 err_alloc_etherdev:
2425         pci_release_selected_regions(pdev,
2426                                      pci_select_bars(pdev, IORESOURCE_MEM));
2427 err_pci_reg:
2428 err_dma:
2429         pci_disable_device(pdev);
2430         return err;
2431 }
2432
2433 /**
2434  * igb_remove - Device Removal Routine
2435  * @pdev: PCI device information struct
2436  *
2437  * igb_remove is called by the PCI subsystem to alert the driver
2438  * that it should release a PCI device.  The could be caused by a
2439  * Hot-Plug event, or because the driver is going to be removed from
2440  * memory.
2441  **/
2442 static void __devexit igb_remove(struct pci_dev *pdev)
2443 {
2444         struct net_device *netdev = pci_get_drvdata(pdev);
2445         struct igb_adapter *adapter = netdev_priv(netdev);
2446         struct e1000_hw *hw = &adapter->hw;
2447
2448         pm_runtime_get_noresume(&pdev->dev);
2449
2450         /* flush_scheduled work may reschedule our watchdog task, so
2451          * explicitly disable watchdog tasks from being rescheduled  */
2452         set_bit(__IGB_DOWN, &adapter->state);
2453         del_timer_sync(&adapter->watchdog_timer);
2454         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2455                 del_timer_sync(&adapter->dma_err_timer);
2456         del_timer_sync(&adapter->phy_info_timer);
2457
2458         flush_scheduled_work();
2459
2460 #ifdef IGB_DCA
2461         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2462                 dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
2463                 dca_remove_requester(&pdev->dev);
2464                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2465                 E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
2466         }
2467 #endif
2468
2469         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2470          * would have already happened in close and is redundant. */
2471         igb_release_hw_control(adapter);
2472
2473         unregister_netdev(netdev);
2474 #ifdef CONFIG_IGB_VMDQ_NETDEV
2475         igb_remove_vmdq_netdevs(adapter);
2476 #endif
2477
2478         igb_clear_interrupt_scheme(adapter);
2479         igb_reset_sriov_capability(adapter);
2480
2481         iounmap(hw->hw_addr);
2482         if (hw->flash_address)
2483                 iounmap(hw->flash_address);
2484         pci_release_selected_regions(pdev,
2485                                      pci_select_bars(pdev, IORESOURCE_MEM));
2486
2487         kfree(adapter->mac_table);
2488         kfree(adapter->shadow_vfta);
2489         free_netdev(netdev);
2490
2491         pci_disable_pcie_error_reporting(pdev);
2492
2493         pci_disable_device(pdev);
2494
2495 #ifdef IGB_SYSFS
2496         igb_sysfs_exit(adapter);
2497 #else
2498 #ifdef IGB_PROCFS
2499         igb_procfs_exit(adapter);
2500 #endif /* IGB_PROCFS */
2501 #endif /* IGB_SYSFS */
2502 }
2503
2504 #ifdef HAVE_HW_TIME_STAMP
2505 /**
2506  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2507  * @adapter: board private structure to initialize
2508  *
2509  * igb_init_hw_timer initializes the function pointer and values for the hw
2510  * timer found in hardware.
2511  **/
2512 static void igb_init_hw_timer(struct igb_adapter *adapter)
2513 {
2514         struct e1000_hw *hw = &adapter->hw;
2515
2516         switch (hw->mac.type) {
2517         case e1000_i350:
2518         case e1000_82580:
2519                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2520                 adapter->cycles.read = igb_read_clock;
2521                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2522                 adapter->cycles.mult = 1;
2523                 /*
2524                  * The 82580 timesync updates the system timer every 8ns by 8ns
2525                  * and the value cannot be shifted.  Instead we need to shift
2526                  * the registers to generate a 64bit timer value.  As a result
2527                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2528                  * 24 in order to generate a larger value for synchronization.
2529                  */
2530                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2531                 /* disable system timer temporarily by setting bit 31 */
2532                 E1000_WRITE_REG(hw, E1000_TSAUXC, 0x80000000);
2533                 E1000_WRITE_FLUSH(hw);
2534
2535                 /* Set registers so that rollover occurs soon to test this. */
2536                 E1000_WRITE_REG(hw, E1000_SYSTIMR, 0x00000000);
2537                 E1000_WRITE_REG(hw, E1000_SYSTIML, 0x80000000);
2538                 E1000_WRITE_REG(hw, E1000_SYSTIMH, 0x000000FF);
2539                 E1000_WRITE_FLUSH(hw);
2540
2541                 /* enable system timer by clearing bit 31 */
2542                 E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
2543                 E1000_WRITE_FLUSH(hw);
2544
2545                 timecounter_init(&adapter->clock,
2546                                  &adapter->cycles,
2547                                  ktime_to_ns(ktime_get_real()));
2548                 /*
2549                  * Synchronize our NIC clock against system wall clock. NIC
2550                  * time stamp reading requires ~3us per sample, each sample
2551                  * was pretty stable even under load => only require 10
2552                  * samples for each offset comparison.
2553                  */
2554                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2555                 adapter->compare.source = &adapter->clock;
2556                 adapter->compare.target = ktime_get_real;
2557                 adapter->compare.num_samples = 10;
2558                 timecompare_update(&adapter->compare, 0);
2559                 break;
2560         case e1000_82576:
2561                 /*
2562                  * Initialize hardware timer: we keep it running just in case
2563                  * that some program needs it later on.
2564                  */
2565                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2566                 adapter->cycles.read = igb_read_clock;
2567                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2568                 adapter->cycles.mult = 1;
2569                 /**
2570                  * Scale the NIC clock cycle by a large factor so that
2571                  * relatively small clock corrections can be added or
2572                  * subtracted at each clock tick. The drawbacks of a large
2573                  * factor are a) that the clock register overflows more quickly
2574                  * (not such a big deal) and b) that the increment per tick has
2575                  * to fit into 24 bits.  As a result we need to use a shift of
2576                  * 19 so we can fit a value of 16 into the TIMINCA register.
2577                  */
2578                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2579                 E1000_WRITE_REG(hw, E1000_TIMINCA,
2580                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2581                                 (16 << IGB_82576_TSYNC_SHIFT));
2582
2583                 /* Set registers so that rollover occurs soon to test this. */
2584                 E1000_WRITE_REG(hw, E1000_SYSTIML, 0x00000000);
2585                 E1000_WRITE_REG(hw, E1000_SYSTIMH, 0xFF800000);
2586                 E1000_WRITE_FLUSH(hw);
2587
2588                 timecounter_init(&adapter->clock,
2589                                  &adapter->cycles,
2590                                  ktime_to_ns(ktime_get_real()));
2591                 /*
2592                  * Synchronize our NIC clock against system wall clock. NIC
2593                  * time stamp reading requires ~3us per sample, each sample
2594                  * was pretty stable even under load => only require 10
2595                  * samples for each offset comparison.
2596                  */
2597                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2598                 adapter->compare.source = &adapter->clock;
2599                 adapter->compare.target = ktime_get_real;
2600                 adapter->compare.num_samples = 10;
2601                 timecompare_update(&adapter->compare, 0);
2602                 break;
2603         case e1000_82575:
2604                 /* 82575 does not support timesync */
2605         default:
2606                 break;
2607         }
2608 }
2609
2610 #endif /* HAVE_HW_TIME_STAMP */
2611 /**
2612  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2613  * @adapter: board private structure to initialize
2614  *
2615  * igb_sw_init initializes the Adapter private data structure.
2616  * Fields are initialized based on PCI device information and
2617  * OS network device settings (MTU size).
2618  **/
2619 static int igb_sw_init(struct igb_adapter *adapter)
2620 {
2621         struct e1000_hw *hw = &adapter->hw;
2622         struct net_device *netdev = adapter->netdev;
2623         struct pci_dev *pdev = adapter->pdev;
2624
2625         /* PCI config space info */
2626
2627         hw->vendor_id = pdev->vendor;
2628         hw->device_id = pdev->device;
2629         hw->subsystem_vendor_id = pdev->subsystem_vendor;
2630         hw->subsystem_device_id = pdev->subsystem_device;
2631
2632         pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
2633
2634         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2635
2636         /* set default ring sizes */
2637         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2638         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2639
2640         /* set default work limits */
2641         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2642
2643         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2644                                               VLAN_HLEN;
2645
2646         /* Initialize the hardware-specific values */
2647         if (e1000_setup_init_funcs(hw, TRUE)) {
2648                 dev_err(pci_dev_to_dev(pdev), "Hardware Initialization Failure\n");
2649                 return -EIO;
2650         }
2651
2652         adapter->mac_table = kzalloc(sizeof(struct igb_mac_addr) *
2653                                      hw->mac.rar_entry_count, 
2654                                      GFP_ATOMIC);
2655
2656         /* Setup and initialize a copy of the hw vlan table array */
2657         adapter->shadow_vfta = (u32 *)kzalloc(sizeof(u32) * E1000_VFTA_ENTRIES,
2658                                         GFP_ATOMIC);
2659 #ifdef NO_KNI
2660         /* These calls may decrease the number of queues */
2661         igb_set_sriov_capability(adapter);
2662
2663         if (igb_init_interrupt_scheme(adapter)) {
2664                 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
2665                 return -ENOMEM;
2666         }
2667
2668         /* Explicitly disable IRQ since the NIC can be in any state. */
2669         igb_irq_disable(adapter);
2670
2671         set_bit(__IGB_DOWN, &adapter->state);
2672 #endif
2673         return 0;
2674 }
2675
2676 /**
2677  * igb_open - Called when a network interface is made active
2678  * @netdev: network interface device structure
2679  *
2680  * Returns 0 on success, negative value on failure
2681  *
2682  * The open entry point is called when a network interface is made
2683  * active by the system (IFF_UP).  At this point all resources needed
2684  * for transmit and receive operations are allocated, the interrupt
2685  * handler is registered with the OS, the watchdog timer is started,
2686  * and the stack is notified that the interface is ready.
2687  **/
2688 static int __igb_open(struct net_device *netdev, bool resuming)
2689 {
2690         struct igb_adapter *adapter = netdev_priv(netdev);
2691         struct e1000_hw *hw = &adapter->hw;
2692 #ifdef CONFIG_PM_RUNTIME
2693         struct pci_dev *pdev = adapter->pdev;
2694 #endif /* CONFIG_PM_RUNTIME */
2695         int err;
2696         int i;
2697
2698         /* disallow open during test */
2699         if (test_bit(__IGB_TESTING, &adapter->state)) {
2700                 WARN_ON(resuming);
2701                 return -EBUSY;
2702         }
2703
2704 #ifdef CONFIG_PM_RUNTIME
2705         if (!resuming)
2706                 pm_runtime_get_sync(&pdev->dev);
2707 #endif /* CONFIG_PM_RUNTIME */
2708
2709         netif_carrier_off(netdev);
2710
2711         /* allocate transmit descriptors */
2712         err = igb_setup_all_tx_resources(adapter);
2713         if (err)
2714                 goto err_setup_tx;
2715
2716         /* allocate receive descriptors */
2717         err = igb_setup_all_rx_resources(adapter);
2718         if (err)
2719                 goto err_setup_rx;
2720
2721         igb_power_up_link(adapter);
2722
2723         /* before we allocate an interrupt, we must be ready to handle it.
2724          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2725          * as soon as we call pci_request_irq, so we have to setup our
2726          * clean_rx handler before we do so.  */
2727         igb_configure(adapter);
2728
2729         err = igb_request_irq(adapter);
2730         if (err)
2731                 goto err_req_irq;
2732
2733         /* From here on the code is the same as igb_up() */
2734         clear_bit(__IGB_DOWN, &adapter->state);
2735
2736         for (i = 0; i < adapter->num_q_vectors; i++)
2737                 napi_enable(&(adapter->q_vector[i]->napi));
2738         igb_configure_lli(adapter);
2739
2740         /* Clear any pending interrupts. */
2741         E1000_READ_REG(hw, E1000_ICR);
2742
2743         igb_irq_enable(adapter);
2744
2745         /* notify VFs that reset has been completed */
2746         if (adapter->vfs_allocated_count) {
2747                 u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
2748                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2749                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
2750         }
2751
2752         netif_tx_start_all_queues(netdev);
2753
2754         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2755                 schedule_work(&adapter->dma_err_task);
2756
2757         /* start the watchdog. */
2758         hw->mac.get_link_status = 1;
2759         schedule_work(&adapter->watchdog_task);
2760
2761         return E1000_SUCCESS;
2762
2763 err_req_irq:
2764         igb_release_hw_control(adapter);
2765         igb_power_down_link(adapter);
2766         igb_free_all_rx_resources(adapter);
2767 err_setup_rx:
2768         igb_free_all_tx_resources(adapter);
2769 err_setup_tx:
2770         igb_reset(adapter);
2771
2772 #ifdef CONFIG_PM_RUNTIME
2773         if (!resuming)
2774                 pm_runtime_put(&pdev->dev);
2775 #endif /* CONFIG_PM_RUNTIME */
2776
2777         return err;
2778 }
2779
2780 static int igb_open(struct net_device *netdev)
2781 {
2782         return __igb_open(netdev, false);
2783 }
2784
2785 /**
2786  * igb_close - Disables a network interface
2787  * @netdev: network interface device structure
2788  *
2789  * Returns 0, this is not allowed to fail
2790  *
2791  * The close entry point is called when an interface is de-activated
2792  * by the OS.  The hardware is still under the driver's control, but
2793  * needs to be disabled.  A global MAC reset is issued to stop the
2794  * hardware, and all transmit and receive resources are freed.
2795  **/
2796 static int __igb_close(struct net_device *netdev, bool suspending)
2797 {
2798         struct igb_adapter *adapter = netdev_priv(netdev);
2799 #ifdef CONFIG_PM_RUNTIME
2800         struct pci_dev *pdev = adapter->pdev;
2801 #endif /* CONFIG_PM_RUNTIME */
2802
2803         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2804
2805 #ifdef CONFIG_PM_RUNTIME
2806         if (!suspending)
2807                 pm_runtime_get_sync(&pdev->dev);
2808 #endif /* CONFIG_PM_RUNTIME */
2809
2810         igb_down(adapter);
2811
2812         igb_release_hw_control(adapter);
2813
2814         igb_free_irq(adapter);
2815
2816         igb_free_all_tx_resources(adapter);
2817         igb_free_all_rx_resources(adapter);
2818
2819 #ifdef CONFIG_PM_RUNTIME
2820         if (!suspending)
2821                 pm_runtime_put_sync(&pdev->dev);
2822 #endif /* CONFIG_PM_RUNTIME */
2823
2824         return 0;
2825 }
2826
2827 static int igb_close(struct net_device *netdev)
2828 {
2829         return __igb_close(netdev, false);
2830 }
2831
2832 /**
2833  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2834  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2835  *
2836  * Return 0 on success, negative on failure
2837  **/
2838 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2839 {
2840         struct device *dev = tx_ring->dev;
2841         int orig_node = dev_to_node(dev);
2842         int size;
2843
2844         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2845         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2846         if (!tx_ring->tx_buffer_info)
2847                 tx_ring->tx_buffer_info = vzalloc(size);
2848         if (!tx_ring->tx_buffer_info)
2849                 goto err;
2850
2851         /* round up to nearest 4K */
2852         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2853         tx_ring->size = ALIGN(tx_ring->size, 4096);
2854
2855         set_dev_node(dev, tx_ring->numa_node);
2856         tx_ring->desc = dma_alloc_coherent(dev,
2857                                            tx_ring->size,
2858                                            &tx_ring->dma,
2859                                            GFP_KERNEL);
2860         set_dev_node(dev, orig_node);
2861         if (!tx_ring->desc)
2862                 tx_ring->desc = dma_alloc_coherent(dev,
2863                                                    tx_ring->size,
2864                                                    &tx_ring->dma,
2865                                                    GFP_KERNEL);
2866
2867         if (!tx_ring->desc)
2868                 goto err;
2869
2870         tx_ring->next_to_use = 0;
2871         tx_ring->next_to_clean = 0;
2872
2873         return 0;
2874
2875 err:
2876         vfree(tx_ring->tx_buffer_info);
2877         dev_err(dev,
2878                 "Unable to allocate memory for the transmit descriptor ring\n");
2879         return -ENOMEM;
2880 }
2881
2882 /**
2883  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2884  *                                (Descriptors) for all queues
2885  * @adapter: board private structure
2886  *
2887  * Return 0 on success, negative on failure
2888  **/
2889 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2890 {
2891         struct pci_dev *pdev = adapter->pdev;
2892         int i, err = 0;
2893
2894         for (i = 0; i < adapter->num_tx_queues; i++) {
2895                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2896                 if (err) {
2897                         dev_err(pci_dev_to_dev(pdev),
2898                                 "Allocation for Tx Queue %u failed\n", i);
2899                         for (i--; i >= 0; i--)
2900                                 igb_free_tx_resources(adapter->tx_ring[i]);
2901                         break;
2902                 }
2903         }
2904
2905         return err;
2906 }
2907
2908 /**
2909  * igb_setup_tctl - configure the transmit control registers
2910  * @adapter: Board private structure
2911  **/
2912 void igb_setup_tctl(struct igb_adapter *adapter)
2913 {
2914         struct e1000_hw *hw = &adapter->hw;
2915         u32 tctl;
2916
2917         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2918         E1000_WRITE_REG(hw, E1000_TXDCTL(0), 0);
2919
2920         /* Program the Transmit Control Register */
2921         tctl = E1000_READ_REG(hw, E1000_TCTL);
2922         tctl &= ~E1000_TCTL_CT;
2923         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2924                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2925
2926         e1000_config_collision_dist(hw);
2927
2928         /* Enable transmits */
2929         tctl |= E1000_TCTL_EN;
2930
2931         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2932 }
2933
2934 /**
2935  * igb_configure_tx_ring - Configure transmit ring after Reset
2936  * @adapter: board private structure
2937  * @ring: tx ring to configure
2938  *
2939  * Configure a transmit ring after a reset.
2940  **/
2941 void igb_configure_tx_ring(struct igb_adapter *adapter,
2942                            struct igb_ring *ring)
2943 {
2944         struct e1000_hw *hw = &adapter->hw;
2945         u32 txdctl = 0;
2946         u64 tdba = ring->dma;
2947         int reg_idx = ring->reg_idx;
2948
2949         /* disable the queue */
2950         E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), 0);
2951         E1000_WRITE_FLUSH(hw);
2952         mdelay(10);
2953
2954         E1000_WRITE_REG(hw, E1000_TDLEN(reg_idx),
2955                         ring->count * sizeof(union e1000_adv_tx_desc));
2956         E1000_WRITE_REG(hw, E1000_TDBAL(reg_idx),
2957                         tdba & 0x00000000ffffffffULL);
2958         E1000_WRITE_REG(hw, E1000_TDBAH(reg_idx), tdba >> 32);
2959
2960         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2961         E1000_WRITE_REG(hw, E1000_TDH(reg_idx), 0);
2962         writel(0, ring->tail);
2963
2964         txdctl |= IGB_TX_PTHRESH;
2965         txdctl |= IGB_TX_HTHRESH << 8;
2966         txdctl |= IGB_TX_WTHRESH << 16;
2967
2968         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2969         E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), txdctl);
2970 }
2971
2972 /**
2973  * igb_configure_tx - Configure transmit Unit after Reset
2974  * @adapter: board private structure
2975  *
2976  * Configure the Tx unit of the MAC after a reset.
2977  **/
2978 static void igb_configure_tx(struct igb_adapter *adapter)
2979 {
2980         int i;
2981
2982         for (i = 0; i < adapter->num_tx_queues; i++)
2983                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2984 }
2985
2986 /**
2987  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2988  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2989  *
2990  * Returns 0 on success, negative on failure
2991  **/
2992 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2993 {
2994         struct device *dev = rx_ring->dev;
2995         int orig_node = dev_to_node(dev);
2996         int size, desc_len;
2997
2998         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2999         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
3000         if (!rx_ring->rx_buffer_info)
3001                 rx_ring->rx_buffer_info = vzalloc(size);
3002         if (!rx_ring->rx_buffer_info)
3003                 goto err;
3004
3005         desc_len = sizeof(union e1000_adv_rx_desc);
3006
3007         /* Round up to nearest 4K */
3008         rx_ring->size = rx_ring->count * desc_len;
3009         rx_ring->size = ALIGN(rx_ring->size, 4096);
3010
3011         set_dev_node(dev, rx_ring->numa_node);
3012         rx_ring->desc = dma_alloc_coherent(dev,
3013                                            rx_ring->size,
3014                                            &rx_ring->dma,
3015                                            GFP_KERNEL);
3016         set_dev_node(dev, orig_node);
3017         if (!rx_ring->desc)
3018                 rx_ring->desc = dma_alloc_coherent(dev,
3019                                                    rx_ring->size,
3020                                                    &rx_ring->dma,
3021                                                    GFP_KERNEL);
3022
3023         if (!rx_ring->desc)
3024                 goto err;
3025
3026         rx_ring->next_to_clean = 0;
3027         rx_ring->next_to_use = 0;
3028
3029         return 0;
3030
3031 err:
3032         vfree(rx_ring->rx_buffer_info);
3033         rx_ring->rx_buffer_info = NULL;
3034         dev_err(dev, "Unable to allocate memory for the receive descriptor"
3035                 " ring\n");
3036         return -ENOMEM;
3037 }
3038
3039 /**
3040  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
3041  *                                (Descriptors) for all queues
3042  * @adapter: board private structure
3043  *
3044  * Return 0 on success, negative on failure
3045  **/
3046 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
3047 {
3048         struct pci_dev *pdev = adapter->pdev;
3049         int i, err = 0;
3050
3051         for (i = 0; i < adapter->num_rx_queues; i++) {
3052                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
3053                 if (err) {
3054                         dev_err(pci_dev_to_dev(pdev),
3055                                 "Allocation for Rx Queue %u failed\n", i);
3056                         for (i--; i >= 0; i--)
3057                                 igb_free_rx_resources(adapter->rx_ring[i]);
3058                         break;
3059                 }
3060         }
3061
3062         return err;
3063 }
3064
3065 /**
3066  * igb_setup_mrqc - configure the multiple receive queue control registers
3067  * @adapter: Board private structure
3068  **/
3069 static void igb_setup_mrqc(struct igb_adapter *adapter)
3070 {
3071         struct e1000_hw *hw = &adapter->hw;
3072         u32 mrqc, rxcsum;
3073         u32 j, num_rx_queues, shift = 0, shift2 = 0;
3074         union e1000_reta {
3075                 u32 dword;
3076                 u8  bytes[4];
3077         } reta;
3078         static const u8 rsshash[40] = {
3079                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
3080                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
3081                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
3082                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
3083
3084         /* Fill out hash function seeds */
3085         for (j = 0; j < 10; j++) {
3086                 u32 rsskey = rsshash[(j * 4)];
3087                 rsskey |= rsshash[(j * 4) + 1] << 8;
3088                 rsskey |= rsshash[(j * 4) + 2] << 16;
3089                 rsskey |= rsshash[(j * 4) + 3] << 24;
3090                 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), j, rsskey);
3091         }
3092
3093         num_rx_queues = adapter->rss_queues;
3094
3095         if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
3096                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
3097                 switch (hw->mac.type) {
3098                 case e1000_i350:
3099                 case e1000_82580:
3100                         num_rx_queues = 1;
3101                         shift = 0;
3102                         break;
3103                 case e1000_82576:
3104                         shift = 3;
3105                         num_rx_queues = 2;
3106                         break;
3107                 case e1000_82575:
3108                         shift = 2;
3109                         shift2 = 6;
3110                 default:
3111                         break;
3112                 }
3113         } else {
3114                 if (hw->mac.type == e1000_82575)
3115                         shift = 6;
3116         }
3117
3118         for (j = 0; j < (32 * 4); j++) {
3119                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
3120                 if (shift2)
3121                         reta.bytes[j & 3] |= num_rx_queues << shift2;
3122                 if ((j & 3) == 3)
3123                         E1000_WRITE_REG(hw, E1000_RETA(j >> 2), reta.dword);
3124         }
3125
3126         /*
3127          * Disable raw packet checksumming so that RSS hash is placed in
3128          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
3129          * offloads as they are enabled by default
3130          */
3131         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3132         rxcsum |= E1000_RXCSUM_PCSD;
3133
3134         if (adapter->hw.mac.type >= e1000_82576)
3135                 /* Enable Receive Checksum Offload for SCTP */
3136                 rxcsum |= E1000_RXCSUM_CRCOFL;
3137
3138         /* Don't need to set TUOFL or IPOFL, they default to 1 */
3139         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
3140
3141         /* If VMDq is enabled then we set the appropriate mode for that, else
3142          * we default to RSS so that an RSS hash is calculated per packet even
3143          * if we are only using one queue */
3144         if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
3145                 if (hw->mac.type > e1000_82575) {
3146                         /* Set the default pool for the PF's first queue */
3147                         u32 vtctl = E1000_READ_REG(hw, E1000_VT_CTL);
3148                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
3149                                    E1000_VT_CTL_DISABLE_DEF_POOL);
3150                         vtctl |= adapter->vfs_allocated_count <<
3151                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
3152                         E1000_WRITE_REG(hw, E1000_VT_CTL, vtctl);
3153                 } else if (adapter->rss_queues > 1) {
3154                         /* set default queue for pool 1 to queue 2 */
3155                         E1000_WRITE_REG(hw, E1000_VT_CTL,
3156                                         adapter->rss_queues << 7);
3157                 }
3158                 if (adapter->rss_queues > 1)
3159                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
3160                 else
3161                         mrqc = E1000_MRQC_ENABLE_VMDQ;
3162         } else {
3163                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3164         }
3165
3166         igb_vmm_control(adapter);
3167
3168         /*
3169          * Generate RSS hash based on TCP port numbers and/or
3170          * IPv4/v6 src and dst addresses since UDP cannot be
3171          * hashed reliably due to IP fragmentation
3172          */
3173         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
3174                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
3175                 E1000_MRQC_RSS_FIELD_IPV6 |
3176                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
3177                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
3178
3179         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
3180 }
3181
3182 /**
3183  * igb_setup_rctl - configure the receive control registers
3184  * @adapter: Board private structure
3185  **/
3186 void igb_setup_rctl(struct igb_adapter *adapter)
3187 {
3188         struct e1000_hw *hw = &adapter->hw;
3189         u32 rctl;
3190
3191         rctl = E1000_READ_REG(hw, E1000_RCTL);
3192
3193         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3194         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3195
3196         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3197                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3198
3199         /*
3200          * enable stripping of CRC. It's unlikely this will break BMC
3201          * redirection as it did with e1000. Newer features require
3202          * that the HW strips the CRC.
3203          */
3204         rctl |= E1000_RCTL_SECRC;
3205
3206         /* disable store bad packets and clear size bits. */
3207         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3208
3209         /* enable LPE to prevent packets larger than max_frame_size */
3210         rctl |= E1000_RCTL_LPE;
3211
3212         /* disable queue 0 to prevent tail write w/o re-config */
3213         E1000_WRITE_REG(hw, E1000_RXDCTL(0), 0);
3214
3215         /* Attention!!!  For SR-IOV PF driver operations you must enable
3216          * queue drop for all VF and PF queues to prevent head of line blocking
3217          * if an un-trusted VF does not provide descriptors to hardware.
3218          */
3219         if (adapter->vfs_allocated_count) {
3220                 /* set all queue drop enable bits */
3221                 E1000_WRITE_REG(hw, E1000_QDE, ALL_QUEUES);
3222         }
3223
3224         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
3225 }
3226
3227 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3228                                    int vfn)
3229 {
3230         struct e1000_hw *hw = &adapter->hw;
3231         u32 vmolr;
3232
3233         /* if it isn't the PF check to see if VFs are enabled and
3234          * increase the size to support vlan tags */
3235         if (vfn < adapter->vfs_allocated_count &&
3236             adapter->vf_data[vfn].vlans_enabled)
3237                 size += VLAN_HLEN;
3238
3239 #ifdef CONFIG_IGB_VMDQ_NETDEV
3240         if (vfn >= adapter->vfs_allocated_count) {
3241                 int queue = vfn - adapter->vfs_allocated_count;
3242                 struct igb_vmdq_adapter *vadapter;
3243
3244                 vadapter = netdev_priv(adapter->vmdq_netdev[queue-1]);
3245                 if (vadapter->vlgrp)
3246                         size += VLAN_HLEN;
3247         }
3248 #endif
3249         vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
3250         vmolr &= ~E1000_VMOLR_RLPML_MASK;
3251         vmolr |= size | E1000_VMOLR_LPE;
3252         E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
3253
3254         return 0;
3255 }
3256
3257 /**
3258  * igb_rlpml_set - set maximum receive packet size
3259  * @adapter: board private structure
3260  *
3261  * Configure maximum receivable packet size.
3262  **/
3263 static void igb_rlpml_set(struct igb_adapter *adapter)
3264 {
3265         u32 max_frame_size = adapter->max_frame_size;
3266         struct e1000_hw *hw = &adapter->hw;
3267         u16 pf_id = adapter->vfs_allocated_count;
3268
3269         if (adapter->vmdq_pools && hw->mac.type != e1000_82575) {
3270                 int i;
3271                 for (i = 0; i < adapter->vmdq_pools; i++)
3272                         igb_set_vf_rlpml(adapter, max_frame_size, pf_id + i);
3273                 /*
3274                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
3275                  * to our max jumbo frame size, in case we need to enable
3276                  * jumbo frames on one of the rings later.
3277                  * This will not pass over-length frames into the default
3278                  * queue because it's gated by the VMOLR.RLPML.
3279                  */
3280                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3281         }
3282         /* Set VF RLPML for the PF device. */
3283         if (adapter->vfs_allocated_count)
3284                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3285
3286         E1000_WRITE_REG(hw, E1000_RLPML, max_frame_size);
3287 }
3288
3289 static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter,
3290                                         int vfn, bool enable)
3291 {
3292         struct e1000_hw *hw = &adapter->hw;
3293         u32 val;
3294         void __iomem *reg;
3295
3296         if (hw->mac.type < e1000_82576)
3297                 return;
3298
3299         if (hw->mac.type == e1000_i350)
3300                 reg = hw->hw_addr + E1000_DVMOLR(vfn);
3301         else
3302                 reg = hw->hw_addr + E1000_VMOLR(vfn);
3303
3304         val = readl(reg);
3305         if (enable)
3306                 val |= E1000_VMOLR_STRVLAN;
3307         else
3308                 val &= ~(E1000_VMOLR_STRVLAN);
3309         writel(val, reg);
3310 }
3311 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3312                                  int vfn, bool aupe)
3313 {
3314         struct e1000_hw *hw = &adapter->hw;
3315         u32 vmolr;
3316
3317         /*
3318          * This register exists only on 82576 and newer so if we are older then
3319          * we should exit and do nothing
3320          */
3321         if (hw->mac.type < e1000_82576)
3322                 return;
3323
3324         vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
3325
3326         if (aupe)
3327                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3328         else
3329                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3330
3331         /* clear all bits that might not be set */
3332         vmolr &= ~E1000_VMOLR_RSSE;
3333
3334         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3335                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3336
3337         vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3338         vmolr |= E1000_VMOLR_LPE;          /* Accept long packets */
3339
3340         E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
3341 }
3342
3343 /**
3344  * igb_configure_rx_ring - Configure a receive ring after Reset
3345  * @adapter: board private structure
3346  * @ring: receive ring to be configured
3347  *
3348  * Configure the Rx unit of the MAC after a reset.
3349  **/
3350 void igb_configure_rx_ring(struct igb_adapter *adapter,
3351                            struct igb_ring *ring)
3352 {
3353         struct e1000_hw *hw = &adapter->hw;
3354         u64 rdba = ring->dma;
3355         int reg_idx = ring->reg_idx;
3356         u32 srrctl = 0, rxdctl = 0;
3357
3358         /* disable the queue */
3359         E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), 0);
3360
3361         /* Set DMA base address registers */
3362         E1000_WRITE_REG(hw, E1000_RDBAL(reg_idx),
3363                         rdba & 0x00000000ffffffffULL);
3364         E1000_WRITE_REG(hw, E1000_RDBAH(reg_idx), rdba >> 32);
3365         E1000_WRITE_REG(hw, E1000_RDLEN(reg_idx),
3366                        ring->count * sizeof(union e1000_adv_rx_desc));
3367
3368         /* initialize head and tail */
3369         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3370         E1000_WRITE_REG(hw, E1000_RDH(reg_idx), 0);
3371         writel(0, ring->tail);
3372
3373         /* set descriptor configuration */
3374 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
3375         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3376 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3377         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3378 #else
3379         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3380 #endif
3381         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3382 #else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
3383         srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3384                  E1000_SRRCTL_BSIZEPKT_SHIFT;
3385         srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3386 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
3387 #ifdef IGB_PER_PKT_TIMESTAMP
3388         if (hw->mac.type >= e1000_82580)
3389                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3390 #endif
3391         /*
3392          * We should set the drop enable bit if:
3393          *  SR-IOV is enabled
3394          *   or
3395          *  Flow Control is disabled and number of RX queues > 1
3396          *
3397          *  This allows us to avoid head of line blocking for security
3398          *  and performance reasons.
3399          */
3400         if (adapter->vfs_allocated_count ||
3401             (adapter->num_rx_queues > 1 &&
3402              (hw->fc.requested_mode == e1000_fc_none ||
3403               hw->fc.requested_mode == e1000_fc_rx_pause)))
3404                 srrctl |= E1000_SRRCTL_DROP_EN;
3405
3406         E1000_WRITE_REG(hw, E1000_SRRCTL(reg_idx), srrctl);
3407
3408         /* set filtering for VMDQ pools */
3409         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3410
3411         rxdctl |= IGB_RX_PTHRESH;
3412         rxdctl |= IGB_RX_HTHRESH << 8;
3413         rxdctl |= IGB_RX_WTHRESH << 16;
3414
3415         /* enable receive descriptor fetching */
3416         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3417         E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), rxdctl);
3418 }
3419
3420 /**
3421  * igb_configure_rx - Configure receive Unit after Reset
3422  * @adapter: board private structure
3423  *
3424  * Configure the Rx unit of the MAC after a reset.
3425  **/
3426 static void igb_configure_rx(struct igb_adapter *adapter)
3427 {
3428         int i;
3429
3430         /* set UTA to appropriate mode */
3431         igb_set_uta(adapter);
3432
3433         igb_full_sync_mac_table(adapter);
3434         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3435          * the Base and Length of the Rx Descriptor Ring */
3436         for (i = 0; i < adapter->num_rx_queues; i++)
3437                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3438 }
3439
3440 /**
3441  * igb_free_tx_resources - Free Tx Resources per Queue
3442  * @tx_ring: Tx descriptor ring for a specific queue
3443  *
3444  * Free all transmit software resources
3445  **/
3446 void igb_free_tx_resources(struct igb_ring *tx_ring)
3447 {
3448         igb_clean_tx_ring(tx_ring);
3449
3450         vfree(tx_ring->tx_buffer_info);
3451         tx_ring->tx_buffer_info = NULL;
3452
3453         /* if not set, then don't free */
3454         if (!tx_ring->desc)
3455                 return;
3456
3457         dma_free_coherent(tx_ring->dev, tx_ring->size,
3458                           tx_ring->desc, tx_ring->dma);
3459
3460         tx_ring->desc = NULL;
3461 }
3462
3463 /**
3464  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3465  * @adapter: board private structure
3466  *
3467  * Free all transmit software resources
3468  **/
3469 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3470 {
3471         int i;
3472
3473         for (i = 0; i < adapter->num_tx_queues; i++)
3474                 igb_free_tx_resources(adapter->tx_ring[i]);
3475 }
3476
3477 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3478                                     struct igb_tx_buffer *tx_buffer)
3479 {
3480         if (tx_buffer->skb) {
3481                 dev_kfree_skb_any(tx_buffer->skb);
3482                 if (dma_unmap_len(tx_buffer, len))
3483                         dma_unmap_single(ring->dev,
3484                                          dma_unmap_addr(tx_buffer, dma),
3485                                          dma_unmap_len(tx_buffer, len),
3486                                          DMA_TO_DEVICE);
3487         } else if (dma_unmap_len(tx_buffer, len)) {
3488                 dma_unmap_page(ring->dev,
3489                                dma_unmap_addr(tx_buffer, dma),
3490                                dma_unmap_len(tx_buffer, len),
3491                                DMA_TO_DEVICE);
3492         }
3493         tx_buffer->next_to_watch = NULL;
3494         tx_buffer->skb = NULL;
3495         dma_unmap_len_set(tx_buffer, len, 0);
3496         /* buffer_info must be completely set up in the transmit path */
3497 }
3498
3499 /**
3500  * igb_clean_tx_ring - Free Tx Buffers
3501  * @tx_ring: ring to be cleaned
3502  **/
3503 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3504 {
3505         struct igb_tx_buffer *buffer_info;
3506         unsigned long size;
3507         u16 i;
3508
3509         if (!tx_ring->tx_buffer_info)
3510                 return;
3511         /* Free all the Tx ring sk_buffs */
3512
3513         for (i = 0; i < tx_ring->count; i++) {
3514                 buffer_info = &tx_ring->tx_buffer_info[i];
3515                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3516         }
3517
3518 #ifdef CONFIG_BQL
3519         netdev_tx_reset_queue(txring_txq(tx_ring));
3520 #endif /* CONFIG_BQL */
3521
3522         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3523         memset(tx_ring->tx_buffer_info, 0, size);
3524
3525         /* Zero out the descriptor ring */
3526         memset(tx_ring->desc, 0, tx_ring->size);
3527
3528         tx_ring->next_to_use = 0;
3529         tx_ring->next_to_clean = 0;
3530 }
3531
3532 /**
3533  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3534  * @adapter: board private structure
3535  **/
3536 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3537 {
3538         int i;
3539
3540         for (i = 0; i < adapter->num_tx_queues; i++)
3541                 igb_clean_tx_ring(adapter->tx_ring[i]);
3542 }
3543
3544 /**
3545  * igb_free_rx_resources - Free Rx Resources
3546  * @rx_ring: ring to clean the resources from
3547  *
3548  * Free all receive software resources
3549  **/
3550 void igb_free_rx_resources(struct igb_ring *rx_ring)
3551 {
3552         igb_clean_rx_ring(rx_ring);
3553
3554         vfree(rx_ring->rx_buffer_info);
3555         rx_ring->rx_buffer_info = NULL;
3556
3557         /* if not set, then don't free */
3558         if (!rx_ring->desc)
3559                 return;
3560
3561         dma_free_coherent(rx_ring->dev, rx_ring->size,
3562                           rx_ring->desc, rx_ring->dma);
3563
3564         rx_ring->desc = NULL;
3565 }
3566
3567 /**
3568  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3569  * @adapter: board private structure
3570  *
3571  * Free all receive software resources
3572  **/
3573 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3574 {
3575         int i;
3576
3577         for (i = 0; i < adapter->num_rx_queues; i++)
3578                 igb_free_rx_resources(adapter->rx_ring[i]);
3579 }
3580
3581 /**
3582  * igb_clean_rx_ring - Free Rx Buffers per Queue
3583  * @rx_ring: ring to free buffers from
3584  **/
3585 void igb_clean_rx_ring(struct igb_ring *rx_ring)
3586 {
3587         unsigned long size;
3588 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
3589         const int bufsz = rx_ring->rx_buffer_len;
3590 #else
3591         const int bufsz = IGB_RX_HDR_LEN;
3592 #endif
3593         u16 i;
3594
3595         if (!rx_ring->rx_buffer_info)
3596                 return;
3597
3598         /* Free all the Rx ring sk_buffs */
3599         for (i = 0; i < rx_ring->count; i++) {
3600                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3601                 if (buffer_info->dma) {
3602                         dma_unmap_single(rx_ring->dev,
3603                                          buffer_info->dma,
3604                                          bufsz,
3605                                          DMA_FROM_DEVICE);
3606                         buffer_info->dma = 0;
3607                 }
3608
3609                 if (buffer_info->skb) {
3610                         dev_kfree_skb(buffer_info->skb);
3611                         buffer_info->skb = NULL;
3612                 }
3613 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
3614                 if (buffer_info->page_dma) {
3615                         dma_unmap_page(rx_ring->dev,
3616                                        buffer_info->page_dma,
3617                                        PAGE_SIZE / 2,
3618                                        DMA_FROM_DEVICE);
3619                         buffer_info->page_dma = 0;
3620                 }
3621                 if (buffer_info->page) {
3622                         put_page(buffer_info->page);
3623                         buffer_info->page = NULL;
3624                         buffer_info->page_offset = 0;
3625                 }
3626 #endif
3627         }
3628
3629         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3630         memset(rx_ring->rx_buffer_info, 0, size);
3631
3632         /* Zero out the descriptor ring */
3633         memset(rx_ring->desc, 0, rx_ring->size);
3634
3635         rx_ring->next_to_clean = 0;
3636         rx_ring->next_to_use = 0;
3637 }
3638
3639 /**
3640  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3641  * @adapter: board private structure
3642  **/
3643 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3644 {
3645         int i;
3646
3647         for (i = 0; i < adapter->num_rx_queues; i++)
3648                 igb_clean_rx_ring(adapter->rx_ring[i]);
3649 }
3650
3651 /**
3652  * igb_set_mac - Change the Ethernet Address of the NIC
3653  * @netdev: network interface device structure
3654  * @p: pointer to an address structure
3655  *
3656  * Returns 0 on success, negative on failure
3657  **/
3658 static int igb_set_mac(struct net_device *netdev, void *p)
3659 {
3660         struct igb_adapter *adapter = netdev_priv(netdev);
3661         struct e1000_hw *hw = &adapter->hw;
3662         struct sockaddr *addr = p;
3663
3664         if (!is_valid_ether_addr(addr->sa_data))
3665                 return -EADDRNOTAVAIL;
3666
3667         igb_del_mac_filter(adapter, hw->mac.addr,
3668                            adapter->vfs_allocated_count);
3669         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3670         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3671
3672         /* set the correct pool for the new PF MAC address in entry 0 */
3673         return igb_add_mac_filter(adapter, hw->mac.addr,
3674                            adapter->vfs_allocated_count);
3675 }
3676
3677 /**
3678  * igb_write_mc_addr_list - write multicast addresses to MTA
3679  * @netdev: network interface device structure
3680  *
3681  * Writes multicast address list to the MTA hash table.
3682  * Returns: -ENOMEM on failure
3683  *                0 on no addresses written
3684  *                X on writing X addresses to MTA
3685  **/
3686 int igb_write_mc_addr_list(struct net_device *netdev)
3687 {
3688         struct igb_adapter *adapter = netdev_priv(netdev);
3689         struct e1000_hw *hw = &adapter->hw;
3690 #ifdef NETDEV_HW_ADDR_T_MULTICAST
3691         struct netdev_hw_addr *ha;
3692 #else
3693         struct dev_mc_list *ha;
3694 #endif
3695         u8  *mta_list;
3696         int i, count;
3697 #ifdef CONFIG_IGB_VMDQ_NETDEV
3698         int vm;
3699 #endif
3700         count = netdev_mc_count(netdev);
3701 #ifdef CONFIG_IGB_VMDQ_NETDEV
3702         for (vm = 1; vm < adapter->vmdq_pools; vm++) {
3703                 if (!adapter->vmdq_netdev[vm])
3704                         break;
3705                 if (!netif_running(adapter->vmdq_netdev[vm]))
3706                         continue;
3707                 count += netdev_mc_count(adapter->vmdq_netdev[vm]);
3708         }
3709 #endif
3710
3711         if (!count) {
3712                 e1000_update_mc_addr_list(hw, NULL, 0);
3713                 return 0;
3714         }
3715         mta_list = kzalloc(count * 6, GFP_ATOMIC);
3716         if (!mta_list)
3717                 return -ENOMEM;
3718
3719         /* The shared function expects a packed array of only addresses. */
3720         i = 0;
3721         netdev_for_each_mc_addr(ha, netdev)
3722 #ifdef NETDEV_HW_ADDR_T_MULTICAST
3723                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3724 #else
3725                 memcpy(mta_list + (i++ * ETH_ALEN), ha->dmi_addr, ETH_ALEN);
3726 #endif
3727 #ifdef CONFIG_IGB_VMDQ_NETDEV
3728         for (vm = 1; vm < adapter->vmdq_pools; vm++) {
3729                 if (!adapter->vmdq_netdev[vm])
3730                         break;
3731                 if (!netif_running(adapter->vmdq_netdev[vm]) ||
3732                     !netdev_mc_count(adapter->vmdq_netdev[vm]))
3733                         continue;
3734                 netdev_for_each_mc_addr(ha, adapter->vmdq_netdev[vm])
3735 #ifdef NETDEV_HW_ADDR_T_MULTICAST
3736                         memcpy(mta_list + (i++ * ETH_ALEN),
3737                                ha->addr, ETH_ALEN);
3738 #else
3739                         memcpy(mta_list + (i++ * ETH_ALEN),
3740                                ha->dmi_addr, ETH_ALEN);
3741 #endif
3742         }
3743 #endif
3744         e1000_update_mc_addr_list(hw, mta_list, i);
3745         kfree(mta_list);
3746
3747         return count;
3748 }
3749
3750 void igb_rar_set(struct igb_adapter *adapter, u32 index)
3751 {
3752         u32 rar_low, rar_high;
3753         struct e1000_hw *hw = &adapter->hw;
3754         u8 *addr = adapter->mac_table[index].addr;
3755         /* HW expects these in little endian so we reverse the byte order
3756          * from network order (big endian) to little endian
3757          */
3758         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
3759                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
3760         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
3761
3762         /* Indicate to hardware the Address is Valid. */
3763         if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE)
3764                 rar_high |= E1000_RAH_AV;
3765
3766         if (hw->mac.type == e1000_82575)
3767                 rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue;
3768         else
3769                 rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue;
3770
3771         E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
3772         E1000_WRITE_FLUSH(hw);
3773         E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
3774         E1000_WRITE_FLUSH(hw);
3775 }
3776
3777 void igb_full_sync_mac_table(struct igb_adapter *adapter)
3778 {
3779         struct e1000_hw *hw = &adapter->hw;
3780         int i;
3781         for (i = 0; i < hw->mac.rar_entry_count; i++) {
3782                         igb_rar_set(adapter, i);
3783         }
3784 }
3785
3786 void igb_sync_mac_table(struct igb_adapter *adapter)
3787 {
3788         struct e1000_hw *hw = &adapter->hw;
3789         int i;
3790         for (i = 0; i < hw->mac.rar_entry_count; i++) {
3791                 if (adapter->mac_table[i].state & IGB_MAC_STATE_MODIFIED)
3792                         igb_rar_set(adapter, i);
3793                 adapter->mac_table[i].state &= ~(IGB_MAC_STATE_MODIFIED);
3794         }
3795 }
3796
3797 int igb_available_rars(struct igb_adapter *adapter)
3798 {
3799         struct e1000_hw *hw = &adapter->hw;
3800         int i, count = 0;
3801
3802         for (i = 0; i < hw->mac.rar_entry_count; i++) {
3803                 if (adapter->mac_table[i].state == 0)
3804                         count++;
3805         }
3806         return count;
3807 }
3808
3809 #ifdef HAVE_SET_RX_MODE
3810 /**
3811  * igb_write_uc_addr_list - write unicast addresses to RAR table
3812  * @netdev: network interface device structure
3813  *
3814  * Writes unicast address list to the RAR table.
3815  * Returns: -ENOMEM on failure/insufficient address space
3816  *                0 on no addresses written
3817  *                X on writing X addresses to the RAR table
3818  **/
3819 static int igb_write_uc_addr_list(struct net_device *netdev)
3820 {
3821         struct igb_adapter *adapter = netdev_priv(netdev);
3822         unsigned int vfn = adapter->vfs_allocated_count;
3823         int count = 0;
3824
3825         /* return ENOMEM indicating insufficient memory for addresses */
3826         if (netdev_uc_count(netdev) > igb_available_rars(adapter))
3827                 return -ENOMEM;
3828         if (!netdev_uc_empty(netdev)) {
3829 #ifdef NETDEV_HW_ADDR_T_UNICAST
3830                 struct netdev_hw_addr *ha;
3831 #else
3832                 struct dev_mc_list *ha;
3833 #endif
3834                 netdev_for_each_uc_addr(ha, netdev) {
3835 #ifdef NETDEV_HW_ADDR_T_UNICAST
3836                         igb_del_mac_filter(adapter, ha->addr, vfn);
3837                         igb_add_mac_filter(adapter, ha->addr, vfn);
3838 #else
3839                         igb_del_mac_filter(adapter, ha->da_addr, vfn);
3840                         igb_add_mac_filter(adapter, ha->da_addr, vfn);
3841 #endif
3842                         count++;
3843                 }
3844         }
3845         return count;
3846 }
3847
3848 #endif /* HAVE_SET_RX_MODE */
3849 /**
3850  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3851  * @netdev: network interface device structure
3852  *
3853  * The set_rx_mode entry point is called whenever the unicast or multicast
3854  * address lists or the network interface flags are updated.  This routine is
3855  * responsible for configuring the hardware for proper unicast, multicast,
3856  * promiscuous mode, and all-multi behavior.
3857  **/
3858 static void igb_set_rx_mode(struct net_device *netdev)
3859 {
3860         struct igb_adapter *adapter = netdev_priv(netdev);
3861         struct e1000_hw *hw = &adapter->hw;
3862         unsigned int vfn = adapter->vfs_allocated_count;
3863         u32 rctl, vmolr = 0;
3864         int count;
3865
3866         /* Check for Promiscuous and All Multicast modes */
3867         rctl = E1000_READ_REG(hw, E1000_RCTL);
3868
3869         /* clear the effected bits */
3870         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3871
3872         if (netdev->flags & IFF_PROMISC) {
3873                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3874                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3875         } else {
3876                 if (netdev->flags & IFF_ALLMULTI) {
3877                         rctl |= E1000_RCTL_MPE;
3878                         vmolr |= E1000_VMOLR_MPME;
3879                 } else {
3880                         /*
3881                          * Write addresses to the MTA, if the attempt fails
3882                          * then we should just turn on promiscuous mode so
3883                          * that we can at least receive multicast traffic
3884                          */
3885                         count = igb_write_mc_addr_list(netdev);
3886                         if (count < 0) {
3887                                 rctl |= E1000_RCTL_MPE;
3888                                 vmolr |= E1000_VMOLR_MPME;
3889                         } else if (count) {
3890                                 vmolr |= E1000_VMOLR_ROMPE;
3891                         }
3892                 }
3893 #ifdef HAVE_SET_RX_MODE
3894                 /*
3895                  * Write addresses to available RAR registers, if there is not
3896                  * sufficient space to store all the addresses then enable
3897                  * unicast promiscuous mode
3898                  */
3899                 count = igb_write_uc_addr_list(netdev);
3900                 if (count < 0) {
3901                         rctl |= E1000_RCTL_UPE;
3902                         vmolr |= E1000_VMOLR_ROPE;
3903                 }
3904 #endif /* HAVE_SET_RX_MODE */
3905                 rctl |= E1000_RCTL_VFE;
3906         }
3907         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
3908
3909         /*
3910          * In order to support SR-IOV and eventually VMDq it is necessary to set
3911          * the VMOLR to enable the appropriate modes.  Without this workaround
3912          * we will have issues with VLAN tag stripping not being done for frames
3913          * that are only arriving because we are the default pool
3914          */
3915         if (hw->mac.type < e1000_82576)
3916                 return;
3917
3918         vmolr |= E1000_READ_REG(hw, E1000_VMOLR(vfn)) &
3919                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3920         E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
3921         igb_restore_vf_multicasts(adapter);
3922 }
3923
3924 static void igb_check_wvbr(struct igb_adapter *adapter)
3925 {
3926         struct e1000_hw *hw = &adapter->hw;
3927         u32 wvbr = 0;
3928
3929         switch (hw->mac.type) {
3930         case e1000_82576:
3931         case e1000_i350:
3932                 if (!(wvbr = E1000_READ_REG(hw, E1000_WVBR)))
3933                         return;
3934                 break;
3935         default:
3936                 break;
3937         }
3938
3939         adapter->wvbr |= wvbr;
3940 }
3941
3942 #define IGB_STAGGERED_QUEUE_OFFSET 8
3943
3944 static void igb_spoof_check(struct igb_adapter *adapter)
3945 {
3946         int j;
3947
3948         if (!adapter->wvbr)
3949                 return;
3950
3951         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3952                 if (adapter->wvbr & (1 << j) ||
3953                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3954                         DPRINTK(DRV, WARNING,
3955                                 "Spoof event(s) detected on VF %d\n", j);
3956                         adapter->wvbr &=
3957                                 ~((1 << j) |
3958                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3959                 }
3960         }
3961 }
3962
3963 /* Need to wait a few seconds after link up to get diagnostic information from
3964  * the phy */
3965 static void igb_update_phy_info(unsigned long data)
3966 {
3967         struct igb_adapter *adapter = (struct igb_adapter *) data;
3968         e1000_get_phy_info(&adapter->hw);
3969 }
3970
3971 /**
3972  * igb_has_link - check shared code for link and determine up/down
3973  * @adapter: pointer to driver private info
3974  **/
3975 bool igb_has_link(struct igb_adapter *adapter)
3976 {
3977         struct e1000_hw *hw = &adapter->hw;
3978         bool link_active = FALSE;
3979
3980         /* get_link_status is set on LSC (link status) interrupt or
3981          * rx sequence error interrupt.  get_link_status will stay
3982          * false until the e1000_check_for_link establishes link
3983          * for copper adapters ONLY
3984          */
3985         switch (hw->phy.media_type) {
3986         case e1000_media_type_copper:
3987                 if (!hw->mac.get_link_status)
3988                         return true;
3989         case e1000_media_type_internal_serdes:
3990                 e1000_check_for_link(hw);
3991                 link_active = !hw->mac.get_link_status;
3992                 break;
3993         case e1000_media_type_unknown:
3994         default:
3995                 break;
3996         }
3997
3998         return link_active;
3999 }
4000
4001 /**
4002  * igb_watchdog - Timer Call-back
4003  * @data: pointer to adapter cast into an unsigned long
4004  **/
4005 static void igb_watchdog(unsigned long data)
4006 {
4007         struct igb_adapter *adapter = (struct igb_adapter *)data;
4008         /* Do the rest outside of interrupt context */
4009         schedule_work(&adapter->watchdog_task);
4010 }
4011
4012 static void igb_watchdog_task(struct work_struct *work)
4013 {
4014         struct igb_adapter *adapter = container_of(work,
4015                                                    struct igb_adapter,
4016                                                    watchdog_task);
4017         struct e1000_hw *hw = &adapter->hw;
4018         struct net_device *netdev = adapter->netdev;
4019         u32 link;
4020         int i;
4021         u32 thstat, ctrl_ext;
4022
4023
4024         link = igb_has_link(adapter);
4025         if (link) {
4026                 /* Cancel scheduled suspend requests. */
4027                 pm_runtime_resume(netdev->dev.parent);
4028
4029                 if (!netif_carrier_ok(netdev)) {
4030                         u32 ctrl;
4031                         e1000_get_speed_and_duplex(hw,
4032                                                    &adapter->link_speed,
4033                                                    &adapter->link_duplex);
4034
4035                         ctrl = E1000_READ_REG(hw, E1000_CTRL);
4036                         /* Links status message must follow this format */
4037                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
4038                                  "Flow Control: %s\n",
4039                                netdev->name,
4040                                adapter->link_speed,
4041                                adapter->link_duplex == FULL_DUPLEX ?
4042                                  "Full Duplex" : "Half Duplex",
4043                                ((ctrl & E1000_CTRL_TFCE) &&
4044                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX":
4045                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
4046                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
4047                         /* adjust timeout factor according to speed/duplex */
4048                         adapter->tx_timeout_factor = 1;
4049                         switch (adapter->link_speed) {
4050                         case SPEED_10:
4051                                 adapter->tx_timeout_factor = 14;
4052                                 break;
4053                         case SPEED_100:
4054                                 /* maybe add some timeout factor ? */
4055                                 break;
4056                         }
4057
4058                         netif_carrier_on(netdev);
4059                         netif_tx_wake_all_queues(netdev);
4060
4061                         igb_ping_all_vfs(adapter);
4062 #ifdef IFLA_VF_MAX
4063                         igb_check_vf_rate_limit(adapter);
4064 #endif /* IFLA_VF_MAX */
4065
4066                         /* link state has changed, schedule phy info update */
4067                         if (!test_bit(__IGB_DOWN, &adapter->state))
4068                                 mod_timer(&adapter->phy_info_timer,
4069                                           round_jiffies(jiffies + 2 * HZ));
4070                 }
4071         } else {
4072                 if (netif_carrier_ok(netdev)) {
4073                         adapter->link_speed = 0;
4074                         adapter->link_duplex = 0;
4075                         /* check for thermal sensor event on i350 */
4076                         if (hw->mac.type == e1000_i350) {
4077                                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
4078                                 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
4079                                 if ((hw->phy.media_type ==
4080                                         e1000_media_type_copper) &&
4081                                         !(ctrl_ext &
4082                                         E1000_CTRL_EXT_LINK_MODE_SGMII)) {
4083                                         if (thstat & E1000_THSTAT_PWR_DOWN) {
4084                                                 printk(KERN_ERR "igb: %s The "
4085                                                 "network adapter was stopped "
4086                                                 "because it overheated.\n",
4087                                                 netdev->name);
4088                                         }
4089                                         if (thstat & E1000_THSTAT_LINK_THROTTLE) {
4090                                                 printk(KERN_INFO 
4091                                                         "igb: %s The network "
4092                                                         "adapter supported "
4093                                                         "link speed "
4094                                                         "was downshifted "
4095                                                         "because it "
4096                                                         "overheated.\n",
4097                                                         netdev->name);
4098                                         }
4099                                 }
4100                         }
4101
4102                         /* Links status message must follow this format */
4103                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
4104                                netdev->name);
4105                         netif_carrier_off(netdev);
4106                         netif_tx_stop_all_queues(netdev);
4107
4108                         igb_ping_all_vfs(adapter);
4109
4110                         /* link state has changed, schedule phy info update */
4111                         if (!test_bit(__IGB_DOWN, &adapter->state))
4112                                 mod_timer(&adapter->phy_info_timer,
4113                                           round_jiffies(jiffies + 2 * HZ));
4114
4115                         pm_schedule_suspend(netdev->dev.parent,
4116                                             MSEC_PER_SEC * 5);
4117                 }
4118         }
4119
4120         igb_update_stats(adapter);
4121
4122         for (i = 0; i < adapter->num_tx_queues; i++) {
4123                 struct igb_ring *tx_ring = adapter->tx_ring[i];
4124                 if (!netif_carrier_ok(netdev)) {
4125                         /* We've lost link, so the controller stops DMA,
4126                          * but we've got queued Tx work that's never going
4127                          * to get done, so reset controller to flush Tx.
4128                          * (Do the reset outside of interrupt context). */
4129                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
4130                                 adapter->tx_timeout_count++;
4131                                 schedule_work(&adapter->reset_task);
4132                                 /* return immediately since reset is imminent */
4133                                 return;
4134                         }
4135                 }
4136
4137                 /* Force detection of hung controller every watchdog period */
4138                 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
4139         }
4140
4141         /* Cause software interrupt to ensure rx ring is cleaned */
4142         if (adapter->msix_entries) {
4143                 u32 eics = 0;
4144                 for (i = 0; i < adapter->num_q_vectors; i++)
4145                         eics |= adapter->q_vector[i]->eims_value;
4146                 E1000_WRITE_REG(hw, E1000_EICS, eics);
4147         } else {
4148                 E1000_WRITE_REG(hw, E1000_ICS, E1000_ICS_RXDMT0);
4149         }
4150
4151         igb_spoof_check(adapter);
4152
4153         /* Reset the timer */
4154         if (!test_bit(__IGB_DOWN, &adapter->state))
4155                 mod_timer(&adapter->watchdog_timer,
4156                           round_jiffies(jiffies + 2 * HZ));
4157 }
4158
4159 static void igb_dma_err_task(struct work_struct *work)
4160 {
4161         struct igb_adapter *adapter = container_of(work,
4162                                                    struct igb_adapter,
4163                                                    dma_err_task);
4164         int vf;
4165         struct e1000_hw *hw = &adapter->hw;
4166         struct net_device *netdev = adapter->netdev;
4167         u32 hgptc;
4168         u32 ciaa, ciad;
4169
4170         hgptc = E1000_READ_REG(hw, E1000_HGPTC);
4171         if (hgptc) /* If incrementing then no need for the check below */
4172                 goto dma_timer_reset;
4173         /*
4174          * Check to see if a bad DMA write target from an errant or
4175          * malicious VF has caused a PCIe error.  If so then we can
4176          * issue a VFLR to the offending VF(s) and then resume without
4177          * requesting a full slot reset.
4178          */
4179
4180         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4181                 ciaa = (vf << 16) | 0x80000000;
4182                 /* 32 bit read so align, we really want status at offset 6 */
4183                 ciaa |= PCI_COMMAND;
4184                 E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4185                 ciad = E1000_READ_REG(hw, E1000_CIAD);
4186                 ciaa &= 0x7FFFFFFF;
4187                 /* disable debug mode asap after reading data */
4188                 E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4189                 /* Get the upper 16 bits which will be the PCI status reg */
4190                 ciad >>= 16;
4191                 if (ciad & (PCI_STATUS_REC_MASTER_ABORT |
4192                             PCI_STATUS_REC_TARGET_ABORT |
4193                             PCI_STATUS_SIG_SYSTEM_ERROR)) {
4194                         netdev_err(netdev, "VF %d suffered error\n", vf);
4195                         /* Issue VFLR */
4196                         ciaa = (vf << 16) | 0x80000000;
4197                         ciaa |= 0xA8;
4198                         E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4199                         ciad = 0x00008000;  /* VFLR */
4200                         E1000_WRITE_REG(hw, E1000_CIAD, ciad);
4201                         ciaa &= 0x7FFFFFFF;
4202                         E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4203                 }
4204         }
4205 dma_timer_reset:
4206         /* Reset the timer */
4207         if (!test_bit(__IGB_DOWN, &adapter->state))
4208                 mod_timer(&adapter->dma_err_timer,
4209                           round_jiffies(jiffies + HZ / 10));
4210 }
4211
4212 /**
4213  * igb_dma_err_timer - Timer Call-back
4214  * @data: pointer to adapter cast into an unsigned long
4215  **/
4216 static void igb_dma_err_timer(unsigned long data)
4217 {
4218         struct igb_adapter *adapter = (struct igb_adapter *)data;
4219         /* Do the rest outside of interrupt context */
4220         schedule_work(&adapter->dma_err_task);
4221 }
4222
4223 enum latency_range {
4224         lowest_latency = 0,
4225         low_latency = 1,
4226         bulk_latency = 2,
4227         latency_invalid = 255
4228 };
4229
4230 /**
4231  * igb_update_ring_itr - update the dynamic ITR value based on packet size
4232  *
4233  *      Stores a new ITR value based on strictly on packet size.  This
4234  *      algorithm is less sophisticated than that used in igb_update_itr,
4235  *      due to the difficulty of synchronizing statistics across multiple
4236  *      receive rings.  The divisors and thresholds used by this function
4237  *      were determined based on theoretical maximum wire speed and testing
4238  *      data, in order to minimize response time while increasing bulk
4239  *      throughput.
4240  *      This functionality is controlled by the InterruptThrottleRate module
4241  *      parameter (see igb_param.c)
4242  *      NOTE:  This function is called only when operating in a multiqueue
4243  *             receive environment.
4244  * @q_vector: pointer to q_vector
4245  **/
4246 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
4247 {
4248         int new_val = q_vector->itr_val;
4249         int avg_wire_size = 0;
4250         struct igb_adapter *adapter = q_vector->adapter;
4251         unsigned int packets;
4252
4253         /* For non-gigabit speeds, just fix the interrupt rate at 4000
4254          * ints/sec - ITR timer value of 120 ticks.
4255          */
4256         if (adapter->link_speed != SPEED_1000) {
4257                 new_val = IGB_4K_ITR;
4258                 goto set_itr_val;
4259         }
4260
4261         packets = q_vector->rx.total_packets;
4262         if (packets)
4263                 avg_wire_size = q_vector->rx.total_bytes / packets;
4264
4265         packets = q_vector->tx.total_packets;
4266         if (packets)
4267                 avg_wire_size = max_t(u32, avg_wire_size,
4268                                       q_vector->tx.total_bytes / packets);
4269
4270         /* if avg_wire_size isn't set no work was done */
4271         if (!avg_wire_size)
4272                 goto clear_counts;
4273
4274         /* Add 24 bytes to size to account for CRC, preamble, and gap */
4275         avg_wire_size += 24;
4276
4277         /* Don't starve jumbo frames */
4278         avg_wire_size = min(avg_wire_size, 3000);
4279
4280         /* Give a little boost to mid-size frames */
4281         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
4282                 new_val = avg_wire_size / 3;
4283         else
4284                 new_val = avg_wire_size / 2;
4285
4286         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4287         if (new_val < IGB_20K_ITR &&
4288             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4289              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4290                 new_val = IGB_20K_ITR;
4291
4292 set_itr_val:
4293         if (new_val != q_vector->itr_val) {
4294                 q_vector->itr_val = new_val;
4295                 q_vector->set_itr = 1;
4296         }
4297 clear_counts:
4298         q_vector->rx.total_bytes = 0;
4299         q_vector->rx.total_packets = 0;
4300         q_vector->tx.total_bytes = 0;
4301         q_vector->tx.total_packets = 0;
4302 }
4303
4304 /**
4305  * igb_update_itr - update the dynamic ITR value based on statistics
4306  *      Stores a new ITR value based on packets and byte
4307  *      counts during the last interrupt.  The advantage of per interrupt
4308  *      computation is faster updates and more accurate ITR for the current
4309  *      traffic pattern.  Constants in this function were computed
4310  *      based on theoretical maximum wire speed and thresholds were set based
4311  *      on testing data as well as attempting to minimize response time
4312  *      while increasing bulk throughput.
4313  *      this functionality is controlled by the InterruptThrottleRate module
4314  *      parameter (see igb_param.c)
4315  *      NOTE:  These calculations are only valid when operating in a single-
4316  *             queue environment.
4317  * @q_vector: pointer to q_vector
4318  * @ring_container: ring info to update the itr for
4319  **/
4320 static void igb_update_itr(struct igb_q_vector *q_vector,
4321                            struct igb_ring_container *ring_container)
4322 {
4323         unsigned int packets = ring_container->total_packets;
4324         unsigned int bytes = ring_container->total_bytes;
4325         u8 itrval = ring_container->itr;
4326
4327         /* no packets, exit with status unchanged */
4328         if (packets == 0)
4329                 return;
4330
4331         switch (itrval) {
4332         case lowest_latency:
4333                 /* handle TSO and jumbo frames */
4334                 if (bytes/packets > 8000)
4335                         itrval = bulk_latency;
4336                 else if ((packets < 5) && (bytes > 512))
4337                         itrval = low_latency;
4338                 break;
4339         case low_latency:  /* 50 usec aka 20000 ints/s */
4340                 if (bytes > 10000) {
4341                         /* this if handles the TSO accounting */
4342                         if (bytes/packets > 8000) {
4343                                 itrval = bulk_latency;
4344                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
4345                                 itrval = bulk_latency;
4346                         } else if ((packets > 35)) {
4347                                 itrval = lowest_latency;
4348                         }
4349                 } else if (bytes/packets > 2000) {
4350                         itrval = bulk_latency;
4351                 } else if (packets <= 2 && bytes < 512) {
4352                         itrval = lowest_latency;
4353                 }
4354                 break;
4355         case bulk_latency: /* 250 usec aka 4000 ints/s */
4356                 if (bytes > 25000) {
4357                         if (packets > 35)
4358                                 itrval = low_latency;
4359                 } else if (bytes < 1500) {
4360                         itrval = low_latency;
4361                 }
4362                 break;
4363         }
4364
4365         /* clear work counters since we have the values we need */
4366         ring_container->total_bytes = 0;
4367         ring_container->total_packets = 0;
4368
4369         /* write updated itr to ring container */
4370         ring_container->itr = itrval;
4371 }
4372
4373 static void igb_set_itr(struct igb_q_vector *q_vector)
4374 {
4375         struct igb_adapter *adapter = q_vector->adapter;
4376         u32 new_itr = q_vector->itr_val;
4377         u8 current_itr = 0;
4378
4379         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4380         if (adapter->link_speed != SPEED_1000) {
4381                 current_itr = 0;
4382                 new_itr = IGB_4K_ITR;
4383                 goto set_itr_now;
4384         }
4385
4386         igb_update_itr(q_vector, &q_vector->tx);
4387         igb_update_itr(q_vector, &q_vector->rx);
4388
4389         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4390
4391         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4392         if (current_itr == lowest_latency &&
4393             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4394              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4395                 current_itr = low_latency;
4396
4397         switch (current_itr) {
4398         /* counts and packets in update_itr are dependent on these numbers */
4399         case lowest_latency:
4400                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4401                 break;
4402         case low_latency:
4403                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4404                 break;
4405         case bulk_latency:
4406                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4407                 break;
4408         default:
4409                 break;
4410         }
4411
4412 set_itr_now:
4413         if (new_itr != q_vector->itr_val) {
4414                 /* this attempts to bias the interrupt rate towards Bulk
4415                  * by adding intermediate steps when interrupt rate is
4416                  * increasing */
4417                 new_itr = new_itr > q_vector->itr_val ?
4418                              max((new_itr * q_vector->itr_val) /
4419                                  (new_itr + (q_vector->itr_val >> 2)),
4420                                  new_itr) :
4421                              new_itr;
4422                 /* Don't write the value here; it resets the adapter's
4423                  * internal timer, and causes us to delay far longer than
4424                  * we should between interrupts.  Instead, we write the ITR
4425                  * value at the beginning of the next interrupt so the timing
4426                  * ends up being correct.
4427                  */
4428                 q_vector->itr_val = new_itr;
4429                 q_vector->set_itr = 1;
4430         }
4431 }
4432
4433 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4434                      u32 type_tucmd, u32 mss_l4len_idx)
4435 {
4436         struct e1000_adv_tx_context_desc *context_desc;
4437         u16 i = tx_ring->next_to_use;
4438
4439         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4440
4441         i++;
4442         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4443
4444         /* set bits to identify this as an advanced context descriptor */
4445         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4446
4447         /* For 82575, context index must be unique per ring. */
4448         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4449                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4450
4451         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
4452         context_desc->seqnum_seed       = 0;
4453         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
4454         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
4455 }
4456
4457 static int igb_tso(struct igb_ring *tx_ring,
4458                    struct igb_tx_buffer *first,
4459                    u8 *hdr_len)
4460 {
4461 #ifdef NETIF_F_TSO
4462         struct sk_buff *skb = first->skb;
4463         u32 vlan_macip_lens, type_tucmd;
4464         u32 mss_l4len_idx, l4len;
4465
4466         if (!skb_is_gso(skb))
4467 #endif /* NETIF_F_TSO */
4468                 return 0;
4469 #ifdef NETIF_F_TSO
4470
4471         if (skb_header_cloned(skb)) {
4472                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4473                 if (err)
4474                         return err;
4475         }
4476
4477         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4478         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4479
4480         if (first->protocol == __constant_htons(ETH_P_IP)) {
4481                 struct iphdr *iph = ip_hdr(skb);
4482                 iph->tot_len = 0;
4483                 iph->check = 0;
4484                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4485                                                          iph->daddr, 0,
4486                                                          IPPROTO_TCP,
4487                                                          0);
4488                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4489                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4490                                    IGB_TX_FLAGS_CSUM |
4491                                    IGB_TX_FLAGS_IPV4;
4492 #ifdef NETIF_F_TSO6
4493         } else if (skb_is_gso_v6(skb)) {
4494                 ipv6_hdr(skb)->payload_len = 0;
4495                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4496                                                        &ipv6_hdr(skb)->daddr,
4497                                                        0, IPPROTO_TCP, 0);
4498                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4499                                    IGB_TX_FLAGS_CSUM;
4500 #endif
4501         }
4502
4503         /* compute header lengths */
4504         l4len = tcp_hdrlen(skb);
4505         *hdr_len = skb_transport_offset(skb) + l4len;
4506
4507         /* update gso size and bytecount with header size */
4508         first->gso_segs = skb_shinfo(skb)->gso_segs;
4509         first->bytecount += (first->gso_segs - 1) * *hdr_len;
4510
4511         /* MSS L4LEN IDX */
4512         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4513         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4514
4515         /* VLAN MACLEN IPLEN */
4516         vlan_macip_lens = skb_network_header_len(skb);
4517         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4518         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4519
4520         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4521
4522         return 1;
4523 #endif  /* NETIF_F_TSO */
4524 }
4525
4526 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4527 {
4528         struct sk_buff *skb = first->skb;
4529         u32 vlan_macip_lens = 0;
4530         u32 mss_l4len_idx = 0;
4531         u32 type_tucmd = 0;
4532
4533         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4534                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4535                         return;
4536         } else {
4537                 u8 l4_hdr = 0;
4538                 switch (first->protocol) {
4539                 case __constant_htons(ETH_P_IP):
4540                         vlan_macip_lens |= skb_network_header_len(skb);
4541                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4542                         l4_hdr = ip_hdr(skb)->protocol;
4543                         break;
4544 #ifdef NETIF_F_IPV6_CSUM
4545                 case __constant_htons(ETH_P_IPV6):
4546                         vlan_macip_lens |= skb_network_header_len(skb);
4547                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4548                         break;
4549 #endif
4550                 default:
4551                         if (unlikely(net_ratelimit())) {
4552                                 dev_warn(tx_ring->dev,
4553                                  "partial checksum but proto=%x!\n",
4554                                  first->protocol);
4555                         }
4556                         break;
4557                 }
4558
4559                 switch (l4_hdr) {
4560                 case IPPROTO_TCP:
4561                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4562                         mss_l4len_idx = tcp_hdrlen(skb) <<
4563                                         E1000_ADVTXD_L4LEN_SHIFT;
4564                         break;
4565 #ifdef HAVE_SCTP
4566                 case IPPROTO_SCTP:
4567                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4568                         mss_l4len_idx = sizeof(struct sctphdr) <<
4569                                         E1000_ADVTXD_L4LEN_SHIFT;
4570                         break;
4571 #endif
4572                 case IPPROTO_UDP:
4573                         mss_l4len_idx = sizeof(struct udphdr) <<
4574                                         E1000_ADVTXD_L4LEN_SHIFT;
4575                         break;
4576                 default:
4577                         if (unlikely(net_ratelimit())) {
4578                                 dev_warn(tx_ring->dev,
4579                                  "partial checksum but l4 proto=%x!\n",
4580                                  l4_hdr);
4581                         }
4582                         break;
4583                 }
4584
4585                 /* update TX checksum flag */
4586                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4587         }
4588
4589         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4590         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4591
4592         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4593 }
4594
4595 static __le32 igb_tx_cmd_type(u32 tx_flags)
4596 {
4597         /* set type for advanced descriptor with frame checksum insertion */
4598         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4599                                       E1000_ADVTXD_DCMD_IFCS |
4600                                       E1000_ADVTXD_DCMD_DEXT);
4601
4602         /* set HW vlan bit if vlan is present */
4603         if (tx_flags & IGB_TX_FLAGS_VLAN)
4604                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4605
4606         /* set timestamp bit if present */
4607         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4608                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4609
4610         /* set segmentation bits for TSO */
4611         if (tx_flags & IGB_TX_FLAGS_TSO)
4612                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4613
4614         return cmd_type;
4615 }
4616
4617 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4618                                  union e1000_adv_tx_desc *tx_desc,
4619                                  u32 tx_flags, unsigned int paylen)
4620 {
4621         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4622
4623         /* 82575 requires a unique index per ring if any offload is enabled */
4624         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4625             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4626                 olinfo_status |= tx_ring->reg_idx << 4;
4627
4628         /* insert L4 checksum */
4629         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4630                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4631
4632                 /* insert IPv4 checksum */
4633                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4634                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4635         }
4636
4637         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4638 }
4639
4640 /*
4641  * The largest size we can write to the descriptor is 65535.  In order to
4642  * maintain a power of two alignment we have to limit ourselves to 32K.
4643  */
4644 #define IGB_MAX_TXD_PWR 15
4645 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4646
4647 static void igb_tx_map(struct igb_ring *tx_ring,
4648                        struct igb_tx_buffer *first,
4649                        const u8 hdr_len)
4650 {
4651         struct sk_buff *skb = first->skb;
4652         struct igb_tx_buffer *tx_buffer;
4653         union e1000_adv_tx_desc *tx_desc;
4654         dma_addr_t dma;
4655 #ifdef MAX_SKB_FRAGS
4656         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4657         unsigned int data_len = skb->data_len;
4658 #endif
4659         unsigned int size = skb_headlen(skb);
4660         unsigned int paylen = skb->len - hdr_len;
4661         __le32 cmd_type;
4662         u32 tx_flags = first->tx_flags;
4663         u16 i = tx_ring->next_to_use;
4664
4665         tx_desc = IGB_TX_DESC(tx_ring, i);
4666
4667         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4668         cmd_type = igb_tx_cmd_type(tx_flags);
4669
4670         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4671         if (dma_mapping_error(tx_ring->dev, dma))
4672                 goto dma_error;
4673
4674         /* record length, and DMA address */
4675         dma_unmap_len_set(first, len, size);
4676         dma_unmap_addr_set(first, dma, dma);
4677         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4678
4679 #ifdef MAX_SKB_FRAGS
4680         for (;;) {
4681 #endif
4682                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4683                         tx_desc->read.cmd_type_len =
4684                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4685
4686                         i++;
4687                         tx_desc++;
4688                         if (i == tx_ring->count) {
4689                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4690                                 i = 0;
4691                         }
4692
4693                         dma += IGB_MAX_DATA_PER_TXD;
4694                         size -= IGB_MAX_DATA_PER_TXD;
4695
4696                         tx_desc->read.olinfo_status = 0;
4697                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4698                 }
4699
4700 #ifdef MAX_SKB_FRAGS
4701                 if (likely(!data_len))
4702                         break;
4703
4704                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4705
4706                 i++;
4707                 tx_desc++;
4708                 if (i == tx_ring->count) {
4709                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4710                         i = 0;
4711                 }
4712
4713                 size = skb_frag_size(frag);
4714                 data_len -= size;
4715
4716                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
4717                                 DMA_TO_DEVICE);
4718                 if (dma_mapping_error(tx_ring->dev, dma))
4719                         goto dma_error;
4720
4721                 tx_buffer = &tx_ring->tx_buffer_info[i];
4722                 dma_unmap_len_set(tx_buffer, len, size);
4723                 dma_unmap_addr_set(tx_buffer, dma, dma);
4724
4725                 tx_desc->read.olinfo_status = 0;
4726                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4727
4728                 frag++;
4729         }
4730
4731 #endif /* MAX_SKB_FRAGS */
4732 #ifdef CONFIG_BQL
4733         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4734 #endif /* CONFIG_BQL */
4735
4736         /* write last descriptor with RS and EOP bits */
4737         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4738         tx_desc->read.cmd_type_len = cmd_type;
4739
4740         /* set the timestamp */
4741         first->time_stamp = jiffies;
4742
4743         /*
4744          * Force memory writes to complete before letting h/w know there
4745          * are new descriptors to fetch.  (Only applicable for weak-ordered
4746          * memory model archs, such as IA-64).
4747          *
4748          * We also need this memory barrier to make certain all of the
4749          * status bits have been updated before next_to_watch is written.
4750          */
4751         wmb();
4752
4753         /* set next_to_watch value indicating a packet is present */
4754         first->next_to_watch = tx_desc;
4755
4756         i++;
4757         if (i == tx_ring->count)
4758                 i = 0;
4759
4760         tx_ring->next_to_use = i;
4761
4762         writel(i, tx_ring->tail);
4763
4764         /* we need this if more than one processor can write to our tail
4765          * at a time, it syncronizes IO on IA64/Altix systems */
4766         mmiowb();
4767
4768         return;
4769
4770 dma_error:
4771         dev_err(tx_ring->dev, "TX DMA map failed\n");
4772
4773         /* clear dma mappings for failed tx_buffer_info map */
4774         for (;;) {
4775                 tx_buffer= &tx_ring->tx_buffer_info[i];
4776                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
4777                 if (tx_buffer == first)
4778                         break;
4779                 if (i == 0)
4780                         i = tx_ring->count;
4781                 i--;
4782         }
4783
4784         tx_ring->next_to_use = i;
4785 }
4786
4787 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4788 {
4789         struct net_device *netdev = netdev_ring(tx_ring);
4790
4791         if (netif_is_multiqueue(netdev))
4792                 netif_stop_subqueue(netdev, ring_queue_index(tx_ring));
4793         else
4794                 netif_stop_queue(netdev);
4795
4796         /* Herbert's original patch had:
4797          *  smp_mb__after_netif_stop_queue();
4798          * but since that doesn't exist yet, just open code it. */
4799         smp_mb();
4800
4801         /* We need to check again in a case another CPU has just
4802          * made room available. */
4803         if (igb_desc_unused(tx_ring) < size)
4804                 return -EBUSY;
4805
4806         /* A reprieve! */
4807         if (netif_is_multiqueue(netdev))
4808                 netif_wake_subqueue(netdev, ring_queue_index(tx_ring));
4809         else
4810                 netif_wake_queue(netdev);
4811
4812         tx_ring->tx_stats.restart_queue++;
4813
4814         return 0;
4815 }
4816
4817 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4818 {
4819         if (igb_desc_unused(tx_ring) >= size)
4820                 return 0;
4821         return __igb_maybe_stop_tx(tx_ring, size);
4822 }
4823
4824 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4825                                 struct igb_ring *tx_ring)
4826 {
4827         struct igb_tx_buffer *first;
4828         int tso;
4829         u32 tx_flags = 0;
4830         __be16 protocol = vlan_get_protocol(skb);
4831         u8 hdr_len = 0;
4832
4833         /* need: 1 descriptor per page,
4834          *       + 2 desc gap to keep tail from touching head,
4835          *       + 1 desc for skb->data,
4836          *       + 1 desc for context descriptor,
4837          * otherwise try next time */
4838         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4839                 /* this is a hard error */
4840                 return NETDEV_TX_BUSY;
4841         }
4842
4843         /* record the location of the first descriptor for this packet */
4844         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4845         first->skb = skb;
4846         first->bytecount = skb->len;
4847         first->gso_segs = 1;
4848
4849 #ifdef HAVE_HW_TIME_STAMP
4850 #ifdef SKB_SHARED_TX_IS_UNION
4851         if (unlikely(skb_shinfo(skb)->tx_flags.flags & SKBTX_HW_TSTAMP)) {
4852                 skb_shinfo(skb)->tx_flags.flags |= SKBTX_IN_PROGRESS;
4853                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4854         }
4855 #else
4856         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4857                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4858                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4859         }
4860 #endif
4861
4862 #endif
4863         if (vlan_tx_tag_present(skb)) {
4864                 tx_flags |= IGB_TX_FLAGS_VLAN;
4865                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4866         }
4867
4868         /* record initial flags and protocol */
4869         first->tx_flags = tx_flags;
4870         first->protocol = protocol;
4871
4872         tso = igb_tso(tx_ring, first, &hdr_len);
4873         if (tso < 0)
4874                 goto out_drop;
4875         else if (!tso)
4876                 igb_tx_csum(tx_ring, first);
4877
4878         igb_tx_map(tx_ring, first, hdr_len);
4879
4880 #ifndef HAVE_TRANS_START_IN_QUEUE
4881         netdev_ring(tx_ring)->trans_start = jiffies;
4882
4883 #endif
4884         /* Make sure there is space in the ring for the next send. */
4885         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4886
4887         return NETDEV_TX_OK;
4888
4889 out_drop:
4890         igb_unmap_and_free_tx_resource(tx_ring, first);
4891
4892         return NETDEV_TX_OK;
4893 }
4894
4895 #ifdef HAVE_TX_MQ
4896 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4897                                                     struct sk_buff *skb)
4898 {
4899         unsigned int r_idx = skb->queue_mapping;
4900
4901         if (r_idx >= adapter->num_tx_queues)
4902                 r_idx = r_idx % adapter->num_tx_queues;
4903
4904         return adapter->tx_ring[r_idx];
4905 }
4906 #else
4907 #define igb_tx_queue_mapping(_adapter, _skb) (_adapter)->tx_ring[0]
4908 #endif
4909
4910 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4911                                   struct net_device *netdev)
4912 {
4913         struct igb_adapter *adapter = netdev_priv(netdev);
4914
4915         if (test_bit(__IGB_DOWN, &adapter->state)) {
4916                 dev_kfree_skb_any(skb);
4917                 return NETDEV_TX_OK;
4918         }
4919
4920         if (skb->len <= 0) {
4921                 dev_kfree_skb_any(skb);
4922                 return NETDEV_TX_OK;
4923         }
4924
4925         /*
4926          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4927          * in order to meet this minimum size requirement.
4928          */
4929         if (skb->len < 17) {
4930                 if (skb_padto(skb, 17))
4931                         return NETDEV_TX_OK;
4932                 skb->len = 17;
4933         }
4934
4935         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4936 }
4937
4938 /**
4939  * igb_tx_timeout - Respond to a Tx Hang
4940  * @netdev: network interface device structure
4941  **/
4942 static void igb_tx_timeout(struct net_device *netdev)
4943 {
4944         struct igb_adapter *adapter = netdev_priv(netdev);
4945         struct e1000_hw *hw = &adapter->hw;
4946
4947         /* Do the reset outside of interrupt context */
4948         adapter->tx_timeout_count++;
4949
4950         if (hw->mac.type >= e1000_82580)
4951                 hw->dev_spec._82575.global_device_reset = true;
4952
4953         schedule_work(&adapter->reset_task);
4954         E1000_WRITE_REG(hw, E1000_EICS,
4955                         (adapter->eims_enable_mask & ~adapter->eims_other));
4956 }
4957
4958 static void igb_reset_task(struct work_struct *work)
4959 {
4960         struct igb_adapter *adapter;
4961         adapter = container_of(work, struct igb_adapter, reset_task);
4962
4963         igb_reinit_locked(adapter);
4964 }
4965
4966 /**
4967  * igb_get_stats - Get System Network Statistics
4968  * @netdev: network interface device structure
4969  *
4970  * Returns the address of the device statistics structure.
4971  * The statistics are updated here and also from the timer callback.
4972  **/
4973 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
4974 {
4975         struct igb_adapter *adapter = netdev_priv(netdev);
4976
4977         if (!test_bit(__IGB_RESETTING, &adapter->state))
4978                 igb_update_stats(adapter);
4979
4980 #ifdef HAVE_NETDEV_STATS_IN_NETDEV
4981         /* only return the current stats */
4982         return &netdev->stats;
4983 #else
4984         /* only return the current stats */
4985         return &adapter->net_stats;
4986 #endif /* HAVE_NETDEV_STATS_IN_NETDEV */
4987 }
4988
4989 /**
4990  * igb_change_mtu - Change the Maximum Transfer Unit
4991  * @netdev: network interface device structure
4992  * @new_mtu: new value for maximum frame size
4993  *
4994  * Returns 0 on success, negative on failure
4995  **/
4996 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4997 {
4998         struct igb_adapter *adapter = netdev_priv(netdev);
4999         struct pci_dev *pdev = adapter->pdev;
5000         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
5001 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
5002         u32 rx_buffer_len, i;
5003 #endif
5004
5005         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
5006                 dev_err(pci_dev_to_dev(pdev), "Invalid MTU setting\n");
5007                 return -EINVAL;
5008         }
5009
5010 #define MAX_STD_JUMBO_FRAME_SIZE 9238
5011         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
5012                 dev_err(pci_dev_to_dev(pdev), "MTU > 9216 not supported.\n");
5013                 return -EINVAL;
5014         }
5015
5016         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
5017                 usleep_range(1000, 2000);
5018
5019         /* igb_down has a dependency on max_frame_size */
5020         adapter->max_frame_size = max_frame;
5021
5022 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
5023 #ifdef IGB_PER_PKT_TIMESTAMP
5024         if (adapter->hw.mac.type >= e1000_82580)
5025                 max_frame += IGB_TS_HDR_LEN;
5026
5027 #endif
5028         /*
5029          * RLPML prevents us from receiving a frame larger than max_frame so
5030          * it is safe to just set the rx_buffer_len to max_frame without the
5031          * risk of an skb over panic.
5032          */
5033         if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
5034                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
5035         else
5036                 rx_buffer_len = max_frame;
5037
5038 #endif
5039         if (netif_running(netdev))
5040                 igb_down(adapter);
5041
5042         dev_info(pci_dev_to_dev(pdev), "changing MTU from %d to %d\n",
5043                 netdev->mtu, new_mtu);
5044         netdev->mtu = new_mtu;
5045
5046 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
5047         for (i = 0; i < adapter->num_rx_queues; i++)
5048                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
5049
5050 #endif
5051         if (netif_running(netdev))
5052                 igb_up(adapter);
5053         else
5054                 igb_reset(adapter);
5055
5056         clear_bit(__IGB_RESETTING, &adapter->state);
5057
5058         return 0;
5059 }
5060
5061 /**
5062  * igb_update_stats - Update the board statistics counters
5063  * @adapter: board private structure
5064  **/
5065
5066 void igb_update_stats(struct igb_adapter *adapter)
5067 {
5068 #ifdef HAVE_NETDEV_STATS_IN_NETDEV
5069         struct net_device_stats *net_stats = &adapter->netdev->stats;
5070 #else
5071         struct net_device_stats *net_stats = &adapter->net_stats;
5072 #endif /* HAVE_NETDEV_STATS_IN_NETDEV */
5073         struct e1000_hw *hw = &adapter->hw;
5074 #ifdef HAVE_PCI_ERS
5075         struct pci_dev *pdev = adapter->pdev;
5076 #endif
5077         u32 reg, mpc;
5078         u16 phy_tmp;
5079         int i;
5080         u64 bytes, packets;
5081 #ifndef IGB_NO_LRO
5082         u32 flushed = 0, coal = 0, recycled = 0;
5083         struct igb_q_vector *q_vector;
5084 #endif
5085
5086 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
5087
5088         /*
5089          * Prevent stats update while adapter is being reset, or if the pci
5090          * connection is down.
5091          */
5092         if (adapter->link_speed == 0)
5093                 return;
5094 #ifdef HAVE_PCI_ERS
5095         if (pci_channel_offline(pdev))
5096                 return;
5097
5098 #endif
5099 #ifndef IGB_NO_LRO
5100         for (i = 0; i < adapter->num_q_vectors; i++) {
5101                 q_vector = adapter->q_vector[i];
5102                 if (!q_vector || !q_vector->lrolist)
5103                         continue;
5104                 flushed += q_vector->lrolist->stats.flushed;
5105                 coal += q_vector->lrolist->stats.coal;
5106                 recycled += q_vector->lrolist->stats.recycled;
5107         }
5108         adapter->lro_stats.flushed = flushed;
5109         adapter->lro_stats.coal = coal;
5110         adapter->lro_stats.recycled = recycled;
5111
5112 #endif
5113         bytes = 0;
5114         packets = 0;
5115         for (i = 0; i < adapter->num_rx_queues; i++) {
5116                 u32 rqdpc_tmp = E1000_READ_REG(hw, E1000_RQDPC(i)) & 0x0FFF;
5117                 struct igb_ring *ring = adapter->rx_ring[i];
5118                 ring->rx_stats.drops += rqdpc_tmp;
5119                 net_stats->rx_fifo_errors += rqdpc_tmp;
5120 #ifdef CONFIG_IGB_VMDQ_NETDEV
5121                 if (!ring->vmdq_netdev) {
5122                         bytes += ring->rx_stats.bytes;
5123                         packets += ring->rx_stats.packets;
5124                 }
5125 #else
5126                 bytes += ring->rx_stats.bytes;
5127                 packets += ring->rx_stats.packets;
5128 #endif
5129         }
5130
5131         net_stats->rx_bytes = bytes;
5132         net_stats->rx_packets = packets;
5133
5134         bytes = 0;
5135         packets = 0;
5136         for (i = 0; i < adapter->num_tx_queues; i++) {
5137                 struct igb_ring *ring = adapter->tx_ring[i];
5138 #ifdef CONFIG_IGB_VMDQ_NETDEV
5139                 if (!ring->vmdq_netdev) {
5140                         bytes += ring->tx_stats.bytes;
5141                         packets += ring->tx_stats.packets;
5142                 }
5143 #else
5144                 bytes += ring->tx_stats.bytes;
5145                 packets += ring->tx_stats.packets;
5146 #endif
5147         }
5148         net_stats->tx_bytes = bytes;
5149         net_stats->tx_packets = packets;
5150
5151         /* read stats registers */
5152         adapter->stats.crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5153         adapter->stats.gprc += E1000_READ_REG(hw, E1000_GPRC);
5154         adapter->stats.gorc += E1000_READ_REG(hw, E1000_GORCL);
5155         E1000_READ_REG(hw, E1000_GORCH); /* clear GORCL */
5156         adapter->stats.bprc += E1000_READ_REG(hw, E1000_BPRC);
5157         adapter->stats.mprc += E1000_READ_REG(hw, E1000_MPRC);
5158         adapter->stats.roc += E1000_READ_REG(hw, E1000_ROC);
5159
5160         adapter->stats.prc64 += E1000_READ_REG(hw, E1000_PRC64);
5161         adapter->stats.prc127 += E1000_READ_REG(hw, E1000_PRC127);
5162         adapter->stats.prc255 += E1000_READ_REG(hw, E1000_PRC255);
5163         adapter->stats.prc511 += E1000_READ_REG(hw, E1000_PRC511);
5164         adapter->stats.prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5165         adapter->stats.prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5166         adapter->stats.symerrs += E1000_READ_REG(hw, E1000_SYMERRS);
5167         adapter->stats.sec += E1000_READ_REG(hw, E1000_SEC);
5168
5169         mpc = E1000_READ_REG(hw, E1000_MPC);
5170         adapter->stats.mpc += mpc;
5171         net_stats->rx_fifo_errors += mpc;
5172         adapter->stats.scc += E1000_READ_REG(hw, E1000_SCC);
5173         adapter->stats.ecol += E1000_READ_REG(hw, E1000_ECOL);
5174         adapter->stats.mcc += E1000_READ_REG(hw, E1000_MCC);
5175         adapter->stats.latecol += E1000_READ_REG(hw, E1000_LATECOL);
5176         adapter->stats.dc += E1000_READ_REG(hw, E1000_DC);
5177         adapter->stats.rlec += E1000_READ_REG(hw, E1000_RLEC);
5178         adapter->stats.xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5179         adapter->stats.xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5180         adapter->stats.xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC);
5181         adapter->stats.xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5182         adapter->stats.fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5183         adapter->stats.gptc += E1000_READ_REG(hw, E1000_GPTC);
5184         adapter->stats.gotc += E1000_READ_REG(hw, E1000_GOTCL);
5185         E1000_READ_REG(hw, E1000_GOTCH); /* clear GOTCL */
5186         adapter->stats.rnbc += E1000_READ_REG(hw, E1000_RNBC);
5187         adapter->stats.ruc += E1000_READ_REG(hw, E1000_RUC);
5188         adapter->stats.rfc += E1000_READ_REG(hw, E1000_RFC);
5189         adapter->stats.rjc += E1000_READ_REG(hw, E1000_RJC);
5190         adapter->stats.tor += E1000_READ_REG(hw, E1000_TORH);
5191         adapter->stats.tot += E1000_READ_REG(hw, E1000_TOTH);
5192         adapter->stats.tpr += E1000_READ_REG(hw, E1000_TPR);
5193
5194         adapter->stats.ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5195         adapter->stats.ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5196         adapter->stats.ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5197         adapter->stats.ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5198         adapter->stats.ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5199         adapter->stats.ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5200
5201         adapter->stats.mptc += E1000_READ_REG(hw, E1000_MPTC);
5202         adapter->stats.bptc += E1000_READ_REG(hw, E1000_BPTC);
5203
5204         adapter->stats.tpt += E1000_READ_REG(hw, E1000_TPT);
5205         adapter->stats.colc += E1000_READ_REG(hw, E1000_COLC);
5206
5207         adapter->stats.algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5208         /* read internal phy sepecific stats */
5209         reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
5210         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
5211                 adapter->stats.rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5212                 adapter->stats.tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5213         }
5214
5215         adapter->stats.tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5216         adapter->stats.tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5217
5218         adapter->stats.iac += E1000_READ_REG(hw, E1000_IAC);
5219         adapter->stats.icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5220         adapter->stats.icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5221         adapter->stats.icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5222         adapter->stats.ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5223         adapter->stats.ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5224         adapter->stats.ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5225         adapter->stats.ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5226         adapter->stats.icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5227
5228         /* Fill out the OS statistics structure */
5229         net_stats->multicast = adapter->stats.mprc;
5230         net_stats->collisions = adapter->stats.colc;
5231
5232         /* Rx Errors */
5233
5234         /* RLEC on some newer hardware can be incorrect so build
5235          * our own version based on RUC and ROC */
5236         net_stats->rx_errors = adapter->stats.rxerrc +
5237                 adapter->stats.crcerrs + adapter->stats.algnerrc +
5238                 adapter->stats.ruc + adapter->stats.roc +
5239                 adapter->stats.cexterr;
5240         net_stats->rx_length_errors = adapter->stats.ruc +
5241                                       adapter->stats.roc;
5242         net_stats->rx_crc_errors = adapter->stats.crcerrs;
5243         net_stats->rx_frame_errors = adapter->stats.algnerrc;
5244         net_stats->rx_missed_errors = adapter->stats.mpc;
5245
5246         /* Tx Errors */
5247         net_stats->tx_errors = adapter->stats.ecol +
5248                                adapter->stats.latecol;
5249         net_stats->tx_aborted_errors = adapter->stats.ecol;
5250         net_stats->tx_window_errors = adapter->stats.latecol;
5251         net_stats->tx_carrier_errors = adapter->stats.tncrs;
5252
5253         /* Tx Dropped needs to be maintained elsewhere */
5254
5255         /* Phy Stats */
5256         if (hw->phy.media_type == e1000_media_type_copper) {
5257                 if ((adapter->link_speed == SPEED_1000) &&
5258                    (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
5259                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
5260                         adapter->phy_stats.idle_errors += phy_tmp;
5261                 }
5262         }
5263
5264         /* Management Stats */
5265         adapter->stats.mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5266         adapter->stats.mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5267         if (hw->mac.type > e1000_82580) {
5268                 adapter->stats.o2bgptc += E1000_READ_REG(hw, E1000_O2BGPTC);
5269                 adapter->stats.o2bspc += E1000_READ_REG(hw, E1000_O2BSPC);
5270                 adapter->stats.b2ospc += E1000_READ_REG(hw, E1000_B2OSPC);
5271                 adapter->stats.b2ogprc += E1000_READ_REG(hw, E1000_B2OGPRC);
5272         }
5273 }
5274
5275 static irqreturn_t igb_msix_other(int irq, void *data)
5276 {
5277         struct igb_adapter *adapter = data;
5278         struct e1000_hw *hw = &adapter->hw;
5279         u32 icr = E1000_READ_REG(hw, E1000_ICR);
5280         /* reading ICR causes bit 31 of EICR to be cleared */
5281
5282         if (icr & E1000_ICR_DRSTA)
5283                 schedule_work(&adapter->reset_task);
5284
5285         if (icr & E1000_ICR_DOUTSYNC) {
5286                 /* HW is reporting DMA is out of sync */
5287                 adapter->stats.doosync++;
5288                 /* The DMA Out of Sync is also indication of a spoof event
5289                  * in IOV mode. Check the Wrong VM Behavior register to
5290                  * see if it is really a spoof event. */
5291                 igb_check_wvbr(adapter);
5292         }
5293
5294         /* Check for a mailbox event */
5295         if (icr & E1000_ICR_VMMB)
5296                 igb_msg_task(adapter);
5297
5298         if (icr & E1000_ICR_LSC) {
5299                 hw->mac.get_link_status = 1;
5300                 /* guard against interrupt when we're going down */
5301                 if (!test_bit(__IGB_DOWN, &adapter->state))
5302                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5303         }
5304
5305         /* Check for MDD event */
5306         if (icr & E1000_ICR_MDDET)
5307                 igb_process_mdd_event(adapter);
5308
5309         E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_other);
5310
5311         return IRQ_HANDLED;
5312 }
5313
5314 static void igb_write_itr(struct igb_q_vector *q_vector)
5315 {
5316         struct igb_adapter *adapter = q_vector->adapter;
5317         u32 itr_val = q_vector->itr_val & 0x7FFC;
5318
5319         if (!q_vector->set_itr)
5320                 return;
5321
5322         if (!itr_val)
5323                 itr_val = 0x4;
5324
5325         if (adapter->hw.mac.type == e1000_82575)
5326                 itr_val |= itr_val << 16;
5327         else
5328                 itr_val |= E1000_EITR_CNT_IGNR;
5329
5330         writel(itr_val, q_vector->itr_register);
5331         q_vector->set_itr = 0;
5332 }
5333
5334 static irqreturn_t igb_msix_ring(int irq, void *data)
5335 {
5336         struct igb_q_vector *q_vector = data;
5337
5338         /* Write the ITR value calculated from the previous interrupt. */
5339         igb_write_itr(q_vector);
5340
5341         napi_schedule(&q_vector->napi);
5342
5343         return IRQ_HANDLED;
5344 }
5345
5346 #ifdef IGB_DCA
5347 static void igb_update_dca(struct igb_q_vector *q_vector)
5348 {
5349         struct igb_adapter *adapter = q_vector->adapter;
5350         struct e1000_hw *hw = &adapter->hw;
5351         int cpu = get_cpu();
5352
5353         if (q_vector->cpu == cpu)
5354                 goto out_no_update;
5355
5356         if (q_vector->tx.ring) {
5357                 int q = q_vector->tx.ring->reg_idx;
5358                 u32 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(q));
5359                 if (hw->mac.type == e1000_82575) {
5360                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
5361                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
5362                 } else {
5363                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
5364                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
5365                                       E1000_DCA_TXCTRL_CPUID_SHIFT_82576;
5366                 }
5367                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
5368                 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(q), dca_txctrl);
5369         }
5370         if (q_vector->rx.ring) {
5371                 int q = q_vector->rx.ring->reg_idx;
5372                 u32 dca_rxctrl = E1000_READ_REG(hw, E1000_DCA_RXCTRL(q));
5373                 if (hw->mac.type == e1000_82575) {
5374                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
5375                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
5376                 } else {
5377                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
5378                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
5379                                       E1000_DCA_RXCTRL_CPUID_SHIFT_82576;
5380                 }
5381                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
5382                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
5383                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
5384                 E1000_WRITE_REG(hw, E1000_DCA_RXCTRL(q), dca_rxctrl);
5385         }
5386         q_vector->cpu = cpu;
5387 out_no_update:
5388         put_cpu();
5389 }
5390
5391 static void igb_setup_dca(struct igb_adapter *adapter)
5392 {
5393         struct e1000_hw *hw = &adapter->hw;
5394         int i;
5395
5396         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
5397                 return;
5398
5399         /* Always use CB2 mode, difference is masked in the CB driver. */
5400         E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
5401
5402         for (i = 0; i < adapter->num_q_vectors; i++) {
5403                 adapter->q_vector[i]->cpu = -1;
5404                 igb_update_dca(adapter->q_vector[i]);
5405         }
5406 }
5407
5408 static int __igb_notify_dca(struct device *dev, void *data)
5409 {
5410         struct net_device *netdev = dev_get_drvdata(dev);
5411         struct igb_adapter *adapter = netdev_priv(netdev);
5412         struct pci_dev *pdev = adapter->pdev;
5413         struct e1000_hw *hw = &adapter->hw;
5414         unsigned long event = *(unsigned long *)data;
5415
5416         switch (event) {
5417         case DCA_PROVIDER_ADD:
5418                 /* if already enabled, don't do it again */
5419                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
5420                         break;
5421                 if (dca_add_requester(dev) == E1000_SUCCESS) {
5422                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
5423                         dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
5424                         igb_setup_dca(adapter);
5425                         break;
5426                 }
5427                 /* Fall Through since DCA is disabled. */
5428         case DCA_PROVIDER_REMOVE:
5429                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
5430                         /* without this a class_device is left
5431                          * hanging around in the sysfs model */
5432                         dca_remove_requester(dev);
5433                         dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
5434                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
5435                         E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
5436                 }
5437                 break;
5438         }
5439
5440         return E1000_SUCCESS;
5441 }
5442
5443 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
5444                           void *p)
5445 {
5446         int ret_val;
5447
5448         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
5449                                          __igb_notify_dca);
5450
5451         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
5452 }
5453 #endif /* IGB_DCA */
5454
5455 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5456 {
5457         unsigned char mac_addr[ETH_ALEN];
5458 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5459         struct pci_dev *pdev = adapter->pdev;
5460         struct e1000_hw *hw = &adapter->hw;
5461         struct pci_dev *pvfdev;
5462         unsigned int device_id;
5463         u16 thisvf_devfn;
5464 #endif
5465
5466         random_ether_addr(mac_addr);
5467         igb_set_vf_mac(adapter, vf, mac_addr);
5468
5469 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5470         switch (adapter->hw.mac.type) {
5471         case e1000_82576:
5472                 device_id = IGB_82576_VF_DEV_ID;
5473                 /* VF Stride for 82576 is 2 */
5474                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
5475                         (pdev->devfn & 1);
5476                 break;
5477         case e1000_i350:
5478                 device_id = IGB_I350_VF_DEV_ID;
5479                 /* VF Stride for I350 is 4 */
5480                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5481                                 (pdev->devfn & 3);
5482                 break;
5483         default:
5484                 device_id = 0;
5485                 thisvf_devfn = 0;
5486                 break;
5487         }
5488
5489         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5490         while (pvfdev) {
5491                 if (pvfdev->devfn == thisvf_devfn)
5492                         break;
5493                 pvfdev = pci_get_device(hw->vendor_id,
5494                                         device_id, pvfdev);
5495         }
5496
5497         if (pvfdev)
5498                 adapter->vf_data[vf].vfdev = pvfdev;
5499         else
5500                 dev_err(&pdev->dev,
5501                         "Couldn't find pci dev ptr for VF %4.4x\n",
5502                         thisvf_devfn);
5503         return pvfdev != NULL;
5504 #else
5505         return true;
5506 #endif
5507 }
5508
5509 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5510 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5511 {
5512         struct e1000_hw *hw = &adapter->hw;
5513         struct pci_dev *pdev = adapter->pdev;
5514         struct pci_dev *pvfdev;
5515         u16 vf_devfn = 0;
5516         u16 vf_stride;
5517         unsigned int device_id;
5518         int vfs_found = 0;
5519
5520         switch (adapter->hw.mac.type) {
5521         case e1000_82576:
5522                 device_id = IGB_82576_VF_DEV_ID;
5523                 /* VF Stride for 82576 is 2 */
5524                 vf_stride = 2;
5525                 break;
5526         case e1000_i350:
5527                 device_id = IGB_I350_VF_DEV_ID;
5528                 /* VF Stride for I350 is 4 */
5529                 vf_stride = 4;
5530                 break;
5531         default:
5532                 device_id = 0;
5533                 vf_stride = 0;
5534                 break;
5535         }
5536
5537         vf_devfn = pdev->devfn + 0x80;
5538         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5539         while (pvfdev) {
5540                 if (pvfdev->devfn == vf_devfn)
5541                         vfs_found++;
5542                 vf_devfn += vf_stride;
5543                 pvfdev = pci_get_device(hw->vendor_id,
5544                                         device_id, pvfdev);
5545         }
5546
5547         return vfs_found;
5548 }
5549 #endif
5550
5551 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5552 {
5553 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5554         int i;
5555         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5556                 if (adapter->vf_data[i].vfdev) {
5557                         if (adapter->vf_data[i].vfdev->dev_flags &
5558                             PCI_DEV_FLAGS_ASSIGNED)
5559                                 return true;
5560                 }
5561         }
5562 #endif
5563         return false;
5564 }
5565
5566 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5567 {
5568         struct e1000_hw *hw = &adapter->hw;
5569         u32 ping;
5570         int i;
5571
5572         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5573                 ping = E1000_PF_CONTROL_MSG;
5574                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5575                         ping |= E1000_VT_MSGTYPE_CTS;
5576                 e1000_write_mbx(hw, &ping, 1, i);
5577         }
5578 }
5579
5580 /**
5581  *  igb_mta_set_ - Set multicast filter table address
5582  *  @adapter: pointer to the adapter structure
5583  *  @hash_value: determines the MTA register and bit to set
5584  *
5585  *  The multicast table address is a register array of 32-bit registers.
5586  *  The hash_value is used to determine what register the bit is in, the
5587  *  current value is read, the new bit is OR'd in and the new value is
5588  *  written back into the register.
5589  **/
5590 void igb_mta_set(struct igb_adapter *adapter, u32 hash_value)
5591 {
5592         struct e1000_hw *hw = &adapter->hw;
5593         u32 hash_bit, hash_reg, mta;
5594
5595         /*
5596          * The MTA is a register array of 32-bit registers. It is
5597          * treated like an array of (32*mta_reg_count) bits.  We want to
5598          * set bit BitArray[hash_value]. So we figure out what register
5599          * the bit is in, read it, OR in the new bit, then write
5600          * back the new value.  The (hw->mac.mta_reg_count - 1) serves as a
5601          * mask to bits 31:5 of the hash value which gives us the
5602          * register we're modifying.  The hash bit within that register
5603          * is determined by the lower 5 bits of the hash value.
5604          */
5605         hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
5606         hash_bit = hash_value & 0x1F;
5607
5608         mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg);
5609
5610         mta |= (1 << hash_bit);
5611
5612         E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta);
5613         E1000_WRITE_FLUSH(hw);
5614 }
5615
5616 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5617 {
5618
5619         struct e1000_hw *hw = &adapter->hw;
5620         u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vf));
5621         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5622
5623         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5624                             IGB_VF_FLAG_MULTI_PROMISC);
5625         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5626
5627 #ifdef IGB_ENABLE_VF_PROMISC
5628         if (*msgbuf & E1000_VF_SET_PROMISC_UNICAST) {
5629                 vmolr |= E1000_VMOLR_ROPE;
5630                 vf_data->flags |= IGB_VF_FLAG_UNI_PROMISC;
5631                 *msgbuf &= ~E1000_VF_SET_PROMISC_UNICAST;
5632         }
5633 #endif
5634         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5635                 vmolr |= E1000_VMOLR_MPME;
5636                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5637                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5638         } else {
5639                 /*
5640                  * if we have hashes and we are clearing a multicast promisc
5641                  * flag we need to write the hashes to the MTA as this step
5642                  * was previously skipped
5643                  */
5644                 if (vf_data->num_vf_mc_hashes > 30) {
5645                         vmolr |= E1000_VMOLR_MPME;
5646                 } else if (vf_data->num_vf_mc_hashes) {
5647                         int j;
5648                         vmolr |= E1000_VMOLR_ROMPE;
5649                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5650                                 igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
5651                 }
5652         }
5653
5654         E1000_WRITE_REG(hw, E1000_VMOLR(vf), vmolr);
5655
5656         /* there are flags left unprocessed, likely not supported */
5657         if (*msgbuf & E1000_VT_MSGINFO_MASK)
5658                 return -EINVAL;
5659
5660         return 0;
5661
5662 }
5663
5664 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5665                                   u32 *msgbuf, u32 vf)
5666 {
5667         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5668         u16 *hash_list = (u16 *)&msgbuf[1];
5669         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5670         int i;
5671
5672         /* salt away the number of multicast addresses assigned
5673          * to this VF for later use to restore when the PF multi cast
5674          * list changes
5675          */
5676         vf_data->num_vf_mc_hashes = n;
5677
5678         /* only up to 30 hash values supported */
5679         if (n > 30)
5680                 n = 30;
5681
5682         /* store the hashes for later use */
5683         for (i = 0; i < n; i++)
5684                 vf_data->vf_mc_hashes[i] = hash_list[i];
5685
5686         /* Flush and reset the mta with the new values */
5687         igb_set_rx_mode(adapter->netdev);
5688
5689         return 0;
5690 }
5691
5692 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5693 {
5694         struct e1000_hw *hw = &adapter->hw;
5695         struct vf_data_storage *vf_data;
5696         int i, j;
5697
5698         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5699                 u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
5700                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5701
5702                 vf_data = &adapter->vf_data[i];
5703
5704                 if ((vf_data->num_vf_mc_hashes > 30) ||
5705                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5706                         vmolr |= E1000_VMOLR_MPME;
5707                 } else if (vf_data->num_vf_mc_hashes) {
5708                         vmolr |= E1000_VMOLR_ROMPE;
5709                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5710                                 igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
5711                 }
5712                 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
5713         }
5714 }
5715
5716 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5717 {
5718         struct e1000_hw *hw = &adapter->hw;
5719         u32 pool_mask, reg, vid;
5720         u16 vlan_default;
5721         int i;
5722
5723         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5724
5725         /* Find the vlan filter for this id */
5726         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5727                 reg = E1000_READ_REG(hw, E1000_VLVF(i));
5728
5729                 /* remove the vf from the pool */
5730                 reg &= ~pool_mask;
5731
5732                 /* if pool is empty then remove entry from vfta */
5733                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5734                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5735                         reg = 0;
5736                         vid = reg & E1000_VLVF_VLANID_MASK;
5737                         igb_vfta_set(adapter, vid, FALSE);
5738                 }
5739
5740                 E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
5741         }
5742
5743         adapter->vf_data[vf].vlans_enabled = 0;
5744
5745         vlan_default = adapter->vf_data[vf].default_vf_vlan_id;
5746         if (vlan_default)
5747                 igb_vlvf_set(adapter, vlan_default, true, vf);
5748 }
5749
5750 s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5751 {
5752         struct e1000_hw *hw = &adapter->hw;
5753         u32 reg, i;
5754
5755         /* The vlvf table only exists on 82576 hardware and newer */
5756         if (hw->mac.type < e1000_82576)
5757                 return -1;
5758
5759         /* we only need to do this if VMDq is enabled */
5760         if (!adapter->vmdq_pools)
5761                 return -1;
5762
5763         /* Find the vlan filter for this id */
5764         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5765                 reg = E1000_READ_REG(hw, E1000_VLVF(i));
5766                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5767                     vid == (reg & E1000_VLVF_VLANID_MASK))
5768                         break;
5769         }
5770
5771         if (add) {
5772                 if (i == E1000_VLVF_ARRAY_SIZE) {
5773                         /* Did not find a matching VLAN ID entry that was
5774                          * enabled.  Search for a free filter entry, i.e.
5775                          * one without the enable bit set
5776                          */
5777                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5778                                 reg = E1000_READ_REG(hw, E1000_VLVF(i));
5779                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5780                                         break;
5781                         }
5782                 }
5783                 if (i < E1000_VLVF_ARRAY_SIZE) {
5784                         /* Found an enabled/available entry */
5785                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5786
5787                         /* if !enabled we need to set this up in vfta */
5788                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5789                                 /* add VID to filter table */
5790                                 igb_vfta_set(adapter, vid, TRUE);
5791                                 reg |= E1000_VLVF_VLANID_ENABLE;
5792                         }
5793                         reg &= ~E1000_VLVF_VLANID_MASK;
5794                         reg |= vid;
5795                         E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
5796
5797                         /* do not modify RLPML for PF devices */
5798                         if (vf >= adapter->vfs_allocated_count)
5799                                 return E1000_SUCCESS;
5800
5801                         if (!adapter->vf_data[vf].vlans_enabled) {
5802                                 u32 size;
5803                                 reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
5804                                 size = reg & E1000_VMOLR_RLPML_MASK;
5805                                 size += 4;
5806                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5807                                 reg |= size;
5808                                 E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
5809                         }
5810
5811                         adapter->vf_data[vf].vlans_enabled++;
5812                 }
5813         } else {
5814                 if (i < E1000_VLVF_ARRAY_SIZE) {
5815                         /* remove vf from the pool */
5816                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5817                         /* if pool is empty then remove entry from vfta */
5818                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5819                                 reg = 0;
5820                                 igb_vfta_set(adapter, vid, FALSE);
5821                         }
5822                         E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
5823
5824                         /* do not modify RLPML for PF devices */
5825                         if (vf >= adapter->vfs_allocated_count)
5826                                 return E1000_SUCCESS;
5827
5828                         adapter->vf_data[vf].vlans_enabled--;
5829                         if (!adapter->vf_data[vf].vlans_enabled) {
5830                                 u32 size;
5831                                 reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
5832                                 size = reg & E1000_VMOLR_RLPML_MASK;
5833                                 size -= 4;
5834                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5835                                 reg |= size;
5836                                 E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
5837                         }
5838                 }
5839         }
5840         return E1000_SUCCESS;
5841 }
5842
5843 #ifdef IFLA_VF_MAX
5844 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5845 {
5846         struct e1000_hw *hw = &adapter->hw;
5847
5848         if (vid)
5849                 E1000_WRITE_REG(hw, E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5850         else
5851                 E1000_WRITE_REG(hw, E1000_VMVIR(vf), 0);
5852 }
5853
5854 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5855                                int vf, u16 vlan, u8 qos)
5856 {
5857         int err = 0;
5858         struct igb_adapter *adapter = netdev_priv(netdev);
5859
5860         /* VLAN IDs accepted range 0-4094 */
5861         if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7))
5862                 return -EINVAL;
5863         if (vlan || qos) {
5864                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5865                 if (err)
5866                         goto out;
5867                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5868                 igb_set_vmolr(adapter, vf, !vlan);
5869                 adapter->vf_data[vf].pf_vlan = vlan;
5870                 adapter->vf_data[vf].pf_qos = qos;
5871                 igb_set_vf_vlan_strip(adapter, vf, true); 
5872                 dev_info(&adapter->pdev->dev,
5873                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5874                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5875                         dev_warn(&adapter->pdev->dev,
5876                                  "The VF VLAN has been set,"
5877                                  " but the PF device is not up.\n");
5878                         dev_warn(&adapter->pdev->dev,
5879                                  "Bring the PF device up before"
5880                                  " attempting to use the VF device.\n");
5881                 }
5882         } else {
5883                 if (adapter->vf_data[vf].pf_vlan)
5884                         dev_info(&adapter->pdev->dev,
5885                                  "Clearing VLAN on VF %d\n", vf);
5886                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5887                                    false, vf);
5888                 igb_set_vmvir(adapter, vlan, vf);
5889                 igb_set_vmolr(adapter, vf, true);
5890                 igb_set_vf_vlan_strip(adapter, vf, false); 
5891                 adapter->vf_data[vf].pf_vlan = 0;
5892                 adapter->vf_data[vf].pf_qos = 0;
5893        }
5894 out:
5895        return err;
5896 }
5897 #endif
5898
5899 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5900 {
5901         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5902         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5903
5904         if (vid)
5905                 igb_set_vf_vlan_strip(adapter, vf, true);
5906         else
5907                 igb_set_vf_vlan_strip(adapter, vf, false);
5908
5909         return igb_vlvf_set(adapter, vid, add, vf);
5910 }
5911
5912 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5913 {
5914         struct e1000_hw *hw = &adapter->hw;
5915
5916         /* clear flags except flag that the PF has set the MAC */
5917         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5918         adapter->vf_data[vf].last_nack = jiffies;
5919
5920         /* reset offloads to defaults */
5921         igb_set_vmolr(adapter, vf, true);
5922
5923         /* reset vlans for device */
5924         igb_clear_vf_vfta(adapter, vf);
5925 #ifdef IFLA_VF_MAX
5926         if (adapter->vf_data[vf].pf_vlan)
5927                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5928                                     adapter->vf_data[vf].pf_vlan,
5929                                     adapter->vf_data[vf].pf_qos);
5930         else
5931                 igb_clear_vf_vfta(adapter, vf);
5932 #endif
5933
5934         /* reset multicast table array for vf */
5935         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5936
5937         /* Flush and reset the mta with the new values */
5938         igb_set_rx_mode(adapter->netdev);
5939
5940         /* 
5941          * Reset the VFs TDWBAL and TDWBAH registers which are not
5942          * cleared by a VFLR
5943          */
5944         E1000_WRITE_REG(hw, E1000_TDWBAH(vf), 0);
5945         E1000_WRITE_REG(hw, E1000_TDWBAL(vf), 0);
5946         if (hw->mac.type == e1000_82576) {
5947                 E1000_WRITE_REG(hw, E1000_TDWBAH(IGB_MAX_VF_FUNCTIONS + vf), 0);
5948                 E1000_WRITE_REG(hw, E1000_TDWBAL(IGB_MAX_VF_FUNCTIONS + vf), 0);
5949         }
5950 }
5951
5952 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5953 {
5954         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5955
5956         /* generate a new mac address as we were hotplug removed/added */
5957         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5958                 random_ether_addr(vf_mac);
5959
5960         /* process remaining reset events */
5961         igb_vf_reset(adapter, vf);
5962 }
5963
5964 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5965 {
5966         struct e1000_hw *hw = &adapter->hw;
5967         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5968         u32 reg, msgbuf[3];
5969         u8 *addr = (u8 *)(&msgbuf[1]);
5970
5971         /* process all the same items cleared in a function level reset */
5972         igb_vf_reset(adapter, vf);
5973
5974         /* set vf mac address */
5975         igb_del_mac_filter(adapter, vf_mac, vf);
5976         igb_add_mac_filter(adapter, vf_mac, vf);
5977
5978         /* enable transmit and receive for vf */
5979         reg = E1000_READ_REG(hw, E1000_VFTE);
5980         E1000_WRITE_REG(hw, E1000_VFTE, reg | (1 << vf));
5981         reg = E1000_READ_REG(hw, E1000_VFRE);
5982         E1000_WRITE_REG(hw, E1000_VFRE, reg | (1 << vf));
5983
5984         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5985
5986         /* reply to reset with ack and vf mac address */
5987         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5988         memcpy(addr, vf_mac, 6);
5989         e1000_write_mbx(hw, msgbuf, 3, vf);
5990 }
5991
5992 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5993 {
5994         /*
5995          * The VF MAC Address is stored in a packed array of bytes
5996          * starting at the second 32 bit word of the msg array
5997          */
5998         unsigned char *addr = (unsigned char *)&msg[1];
5999         int err = -1;
6000
6001         if (is_valid_ether_addr(addr))
6002                 err = igb_set_vf_mac(adapter, vf, addr);
6003
6004         return err;
6005 }
6006
6007 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
6008 {
6009         struct e1000_hw *hw = &adapter->hw;
6010         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6011         u32 msg = E1000_VT_MSGTYPE_NACK;
6012
6013         /* if device isn't clear to send it shouldn't be reading either */
6014         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
6015             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
6016                 e1000_write_mbx(hw, &msg, 1, vf);
6017                 vf_data->last_nack = jiffies;
6018         }
6019 }
6020
6021 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
6022 {
6023         struct pci_dev *pdev = adapter->pdev;
6024         u32 msgbuf[E1000_VFMAILBOX_SIZE];
6025         struct e1000_hw *hw = &adapter->hw;
6026         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6027         s32 retval;
6028
6029         retval = e1000_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
6030
6031         if (retval) {
6032                 dev_err(pci_dev_to_dev(pdev), "Error receiving message from VF\n");
6033                 return;
6034         }
6035
6036         /* this is a message we already processed, do nothing */
6037         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
6038                 return;
6039
6040         /*
6041          * until the vf completes a reset it should not be
6042          * allowed to start any configuration.
6043          */
6044
6045         if (msgbuf[0] == E1000_VF_RESET) {
6046                 igb_vf_reset_msg(adapter, vf);
6047                 return;
6048         }
6049
6050         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
6051                 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
6052                 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
6053                         e1000_write_mbx(hw, msgbuf, 1, vf);
6054                         vf_data->last_nack = jiffies;
6055                 }
6056                 return;
6057         }
6058
6059         switch ((msgbuf[0] & 0xFFFF)) {
6060         case E1000_VF_SET_MAC_ADDR:
6061                 retval = -EINVAL;
6062 #ifndef IGB_DISABLE_VF_MAC_SET
6063                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
6064                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
6065                 else
6066                         DPRINTK(DRV, INFO,
6067                                 "VF %d attempted to override administratively "
6068                                 "set MAC address\nReload the VF driver to "
6069                                 "resume operations\n", vf);
6070 #endif
6071                 break;
6072         case E1000_VF_SET_PROMISC:
6073                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
6074                 break;
6075         case E1000_VF_SET_MULTICAST:
6076                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
6077                 break;
6078         case E1000_VF_SET_LPE:
6079                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
6080                 break;
6081         case E1000_VF_SET_VLAN:
6082                 retval = -1;
6083 #ifdef IFLA_VF_MAX
6084                 if (vf_data->pf_vlan)
6085                         DPRINTK(DRV, INFO,
6086                                 "VF %d attempted to override administratively "
6087                                 "set VLAN tag\nReload the VF driver to "
6088                                 "resume operations\n", vf);
6089                 else
6090 #endif
6091                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
6092                 break;
6093         default:
6094                 dev_err(pci_dev_to_dev(pdev), "Unhandled Msg %08x\n", msgbuf[0]);
6095                 retval = -E1000_ERR_MBX;
6096                 break;
6097         }
6098
6099         /* notify the VF of the results of what it sent us */
6100         if (retval)
6101                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
6102         else
6103                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
6104
6105         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
6106
6107         e1000_write_mbx(hw, msgbuf, 1, vf);
6108 }
6109
6110 static void igb_msg_task(struct igb_adapter *adapter)
6111 {
6112         struct e1000_hw *hw = &adapter->hw;
6113         u32 vf;
6114
6115         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
6116                 /* process any reset requests */
6117                 if (!e1000_check_for_rst(hw, vf))
6118                         igb_vf_reset_event(adapter, vf);
6119
6120                 /* process any messages pending */
6121                 if (!e1000_check_for_msg(hw, vf))
6122                         igb_rcv_msg_from_vf(adapter, vf);
6123
6124                 /* process any acks */
6125                 if (!e1000_check_for_ack(hw, vf))
6126                         igb_rcv_ack_from_vf(adapter, vf);
6127         }
6128 }
6129
6130 /**
6131  *  igb_set_uta - Set unicast filter table address
6132  *  @adapter: board private structure
6133  *
6134  *  The unicast table address is a register array of 32-bit registers.
6135  *  The table is meant to be used in a way similar to how the MTA is used
6136  *  however due to certain limitations in the hardware it is necessary to
6137  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
6138  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
6139  **/
6140 static void igb_set_uta(struct igb_adapter *adapter)
6141 {
6142         struct e1000_hw *hw = &adapter->hw;
6143         int i;
6144
6145         /* The UTA table only exists on 82576 hardware and newer */
6146         if (hw->mac.type < e1000_82576)
6147                 return;
6148
6149         /* we only need to do this if VMDq is enabled */
6150         if (!adapter->vmdq_pools)
6151                 return;
6152
6153         for (i = 0; i < hw->mac.uta_reg_count; i++)
6154                 E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, ~0);
6155 }
6156
6157 /**
6158  * igb_intr_msi - Interrupt Handler
6159  * @irq: interrupt number
6160  * @data: pointer to a network interface device structure
6161  **/
6162 static irqreturn_t igb_intr_msi(int irq, void *data)
6163 {
6164         struct igb_adapter *adapter = data;
6165         struct igb_q_vector *q_vector = adapter->q_vector[0];
6166         struct e1000_hw *hw = &adapter->hw;
6167         /* read ICR disables interrupts using IAM */
6168         u32 icr = E1000_READ_REG(hw, E1000_ICR);
6169
6170         igb_write_itr(q_vector);
6171
6172         if (icr & E1000_ICR_DRSTA)
6173                 schedule_work(&adapter->reset_task);
6174
6175         if (icr & E1000_ICR_DOUTSYNC) {
6176                 /* HW is reporting DMA is out of sync */
6177                 adapter->stats.doosync++;
6178         }
6179
6180         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6181                 hw->mac.get_link_status = 1;
6182                 if (!test_bit(__IGB_DOWN, &adapter->state))
6183                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
6184         }
6185
6186         napi_schedule(&q_vector->napi);
6187
6188         return IRQ_HANDLED;
6189 }
6190
6191 /**
6192  * igb_intr - Legacy Interrupt Handler
6193  * @irq: interrupt number
6194  * @data: pointer to a network interface device structure
6195  **/
6196 static irqreturn_t igb_intr(int irq, void *data)
6197 {
6198         struct igb_adapter *adapter = data;
6199         struct igb_q_vector *q_vector = adapter->q_vector[0];
6200         struct e1000_hw *hw = &adapter->hw;
6201         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
6202          * need for the IMC write */
6203         u32 icr = E1000_READ_REG(hw, E1000_ICR);
6204
6205         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
6206          * not set, then the adapter didn't send an interrupt */
6207         if (!(icr & E1000_ICR_INT_ASSERTED))
6208                 return IRQ_NONE;
6209
6210         igb_write_itr(q_vector);
6211
6212         if (icr & E1000_ICR_DRSTA)
6213                 schedule_work(&adapter->reset_task);
6214
6215         if (icr & E1000_ICR_DOUTSYNC) {
6216                 /* HW is reporting DMA is out of sync */
6217                 adapter->stats.doosync++;
6218         }
6219
6220         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6221                 hw->mac.get_link_status = 1;
6222                 /* guard against interrupt when we're going down */
6223                 if (!test_bit(__IGB_DOWN, &adapter->state))
6224                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
6225         }
6226
6227         napi_schedule(&q_vector->napi);
6228
6229         return IRQ_HANDLED;
6230 }
6231
6232 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
6233 {
6234         struct igb_adapter *adapter = q_vector->adapter;
6235         struct e1000_hw *hw = &adapter->hw;
6236
6237         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
6238             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
6239                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
6240                         igb_set_itr(q_vector);
6241                 else
6242                         igb_update_ring_itr(q_vector);
6243         }
6244
6245         if (!test_bit(__IGB_DOWN, &adapter->state)) {
6246                 if (adapter->msix_entries)
6247                         E1000_WRITE_REG(hw, E1000_EIMS, q_vector->eims_value);
6248                 else
6249                         igb_irq_enable(adapter);
6250         }
6251 }
6252
6253 /**
6254  * igb_poll - NAPI Rx polling callback
6255  * @napi: napi polling structure
6256  * @budget: count of how many packets we should handle
6257  **/
6258 static int igb_poll(struct napi_struct *napi, int budget)
6259 {
6260         struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi);
6261         bool clean_complete = true;
6262
6263 #ifdef IGB_DCA
6264         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
6265                 igb_update_dca(q_vector);
6266 #endif
6267         if (q_vector->tx.ring)
6268                 clean_complete = igb_clean_tx_irq(q_vector);
6269
6270         if (q_vector->rx.ring)
6271                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
6272
6273 #ifndef HAVE_NETDEV_NAPI_LIST
6274         /* if netdev is disabled we need to stop polling */
6275         if (!netif_running(q_vector->adapter->netdev))
6276                 clean_complete = true;
6277
6278 #endif
6279         /* If all work not completed, return budget and keep polling */
6280         if (!clean_complete)
6281                 return budget;
6282
6283         /* If not enough Rx work done, exit the polling mode */
6284         napi_complete(napi);
6285         igb_ring_irq_enable(q_vector);
6286
6287         return 0;
6288 }
6289
6290 #ifdef HAVE_HW_TIME_STAMP
6291 /**
6292  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
6293  * @adapter: board private structure
6294  * @shhwtstamps: timestamp structure to update
6295  * @regval: unsigned 64bit system time value.
6296  *
6297  * We need to convert the system time value stored in the RX/TXSTMP registers
6298  * into a hwtstamp which can be used by the upper level timestamping functions
6299  */
6300 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
6301                                    struct skb_shared_hwtstamps *shhwtstamps,
6302                                    u64 regval)
6303 {
6304         u64 ns;
6305
6306         /*
6307          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
6308          * 24 to match clock shift we setup earlier.
6309          */
6310         if (adapter->hw.mac.type >= e1000_82580)
6311                 regval <<= IGB_82580_TSYNC_SHIFT;
6312
6313         ns = timecounter_cyc2time(&adapter->clock, regval);
6314
6315         /*
6316          * force a timecompare_update here (even if less than a second
6317          * has passed) in order to prevent the case when ptpd or other
6318          * software jumps the clock offset. othwerise there is a small
6319          * window when the timestamp would be based on previous skew
6320          * and invalid results would be pushed to the network stack.
6321          */
6322         timecompare_update(&adapter->compare, 0);
6323         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
6324         shhwtstamps->hwtstamp = ns_to_ktime(ns);
6325         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
6326 }
6327
6328 /**
6329  * igb_tx_hwtstamp - utility function which checks for TX time stamp
6330  * @q_vector: pointer to q_vector containing needed info
6331  * @buffer: pointer to igb_tx_buffer structure
6332  *
6333  * If we were asked to do hardware stamping and such a time stamp is
6334  * available, then it must have been for this skb here because we only
6335  * allow only one such packet into the queue.
6336  */
6337 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
6338                             struct igb_tx_buffer *buffer_info)
6339 {
6340         struct igb_adapter *adapter = q_vector->adapter;
6341         struct e1000_hw *hw = &adapter->hw;
6342         struct skb_shared_hwtstamps shhwtstamps;
6343         u64 regval;
6344
6345         /* if skb does not support hw timestamp or TX stamp not valid exit */
6346         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
6347             !(E1000_READ_REG(hw, E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
6348                 return;
6349
6350         regval = E1000_READ_REG(hw, E1000_TXSTMPL);
6351         regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32;
6352
6353         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
6354         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
6355 }
6356
6357 #endif
6358 /**
6359  * igb_clean_tx_irq - Reclaim resources after transmit completes
6360  * @q_vector: pointer to q_vector containing needed info
6361  * returns TRUE if ring is completely cleaned
6362  **/
6363 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
6364 {
6365         struct igb_adapter *adapter = q_vector->adapter;
6366         struct igb_ring *tx_ring = q_vector->tx.ring;
6367         struct igb_tx_buffer *tx_buffer;
6368         union e1000_adv_tx_desc *tx_desc, *eop_desc;
6369         unsigned int total_bytes = 0, total_packets = 0;
6370         unsigned int budget = q_vector->tx.work_limit;
6371         unsigned int i = tx_ring->next_to_clean;
6372
6373         if (test_bit(__IGB_DOWN, &adapter->state))
6374                 return true;
6375
6376         tx_buffer = &tx_ring->tx_buffer_info[i];
6377         tx_desc = IGB_TX_DESC(tx_ring, i);
6378         i -= tx_ring->count;
6379
6380         for (; budget; budget--) {
6381                 eop_desc = tx_buffer->next_to_watch;
6382
6383                 /* prevent any other reads prior to eop_desc */
6384                 rmb();
6385
6386                 /* if next_to_watch is not set then there is no work pending */
6387                 if (!eop_desc)
6388                         break;
6389
6390                 /* if DD is not set pending work has not been completed */
6391                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
6392                         break;
6393
6394                 /* clear next_to_watch to prevent false hangs */
6395                 tx_buffer->next_to_watch = NULL;
6396
6397                 /* update the statistics for this packet */
6398                 total_bytes += tx_buffer->bytecount;
6399                 total_packets += tx_buffer->gso_segs;
6400
6401 #ifdef HAVE_HW_TIME_STAMP
6402                 /* retrieve hardware timestamp */
6403                 igb_tx_hwtstamp(q_vector, tx_buffer);
6404
6405 #endif
6406                 /* free the skb */
6407                 dev_kfree_skb_any(tx_buffer->skb);
6408
6409                 /* unmap skb header data */
6410                 dma_unmap_single(tx_ring->dev,
6411                                  dma_unmap_addr(tx_buffer, dma),
6412                                  dma_unmap_len(tx_buffer, len),
6413                                  DMA_TO_DEVICE);
6414
6415                 /* clear tx_buffer data */
6416                 tx_buffer->skb = NULL;
6417                 dma_unmap_len_set(tx_buffer, len, 0);
6418
6419                 /* clear last DMA location and unmap remaining buffers */
6420                 while (tx_desc != eop_desc) {
6421                         tx_buffer++;
6422                         tx_desc++;
6423                         i++;
6424                         if (unlikely(!i)) {
6425                                 i -= tx_ring->count;
6426                                 tx_buffer = tx_ring->tx_buffer_info;
6427                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
6428                         }
6429
6430                         /* unmap any remaining paged data */
6431                         if (dma_unmap_len(tx_buffer, len)) {
6432                                 dma_unmap_page(tx_ring->dev,
6433                                                dma_unmap_addr(tx_buffer, dma),
6434                                                dma_unmap_len(tx_buffer, len),
6435                                                DMA_TO_DEVICE);
6436                                 dma_unmap_len_set(tx_buffer, len, 0);
6437                         }
6438                 }
6439
6440                 /* move us one more past the eop_desc for start of next pkt */
6441                 tx_buffer++;
6442                 tx_desc++;
6443                 i++;
6444                 if (unlikely(!i)) {
6445                         i -= tx_ring->count;
6446                         tx_buffer = tx_ring->tx_buffer_info;
6447                         tx_desc = IGB_TX_DESC(tx_ring, 0);
6448                 }
6449         }
6450
6451 #ifdef CONFIG_BQL
6452         netdev_tx_completed_queue(txring_txq(tx_ring),
6453                                   total_packets, total_bytes);
6454 #endif /* CONFIG_BQL */
6455
6456         i += tx_ring->count;
6457         tx_ring->next_to_clean = i;
6458         tx_ring->tx_stats.bytes += total_bytes;
6459         tx_ring->tx_stats.packets += total_packets;
6460         q_vector->tx.total_bytes += total_bytes;
6461         q_vector->tx.total_packets += total_packets;
6462
6463         if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
6464                 struct e1000_hw *hw = &adapter->hw;
6465
6466                 eop_desc = tx_buffer->next_to_watch;
6467
6468                 /* Detect a transmit hang in hardware, this serializes the
6469                  * check with the clearing of time_stamp and movement of i */
6470                 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
6471                 if (eop_desc &&
6472                     time_after(jiffies, tx_buffer->time_stamp +
6473                                (adapter->tx_timeout_factor * HZ))
6474                     && !(E1000_READ_REG(hw, E1000_STATUS) &
6475                          E1000_STATUS_TXOFF)) {
6476
6477                         /* detected Tx unit hang */
6478                         dev_err(tx_ring->dev,
6479                                 "Detected Tx Unit Hang\n"
6480                                 "  Tx Queue             <%d>\n"
6481                                 "  TDH                  <%x>\n"
6482                                 "  TDT                  <%x>\n"
6483                                 "  next_to_use          <%x>\n"
6484                                 "  next_to_clean        <%x>\n"
6485                                 "buffer_info[next_to_clean]\n"
6486                                 "  time_stamp           <%lx>\n"
6487                                 "  next_to_watch        <%p>\n"
6488                                 "  jiffies              <%lx>\n"
6489                                 "  desc.status          <%x>\n",
6490                                 tx_ring->queue_index,
6491                                 E1000_READ_REG(hw, E1000_TDH(tx_ring->reg_idx)),
6492                                 readl(tx_ring->tail),
6493                                 tx_ring->next_to_use,
6494                                 tx_ring->next_to_clean,
6495                                 tx_buffer->time_stamp,
6496                                 eop_desc,
6497                                 jiffies,
6498                                 eop_desc->wb.status);
6499                         if (netif_is_multiqueue(netdev_ring(tx_ring)))
6500                                 netif_stop_subqueue(netdev_ring(tx_ring),
6501                                                     ring_queue_index(tx_ring));
6502                         else
6503                                 netif_stop_queue(netdev_ring(tx_ring));
6504
6505                         /* we are about to reset, no point in enabling stuff */
6506                         return true;
6507                 }
6508         }
6509
6510         if (unlikely(total_packets &&
6511                      netif_carrier_ok(netdev_ring(tx_ring)) &&
6512                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
6513                 /* Make sure that anybody stopping the queue after this
6514                  * sees the new next_to_clean.
6515                  */
6516                 smp_mb();
6517                 if (netif_is_multiqueue(netdev_ring(tx_ring))) {
6518                         if (__netif_subqueue_stopped(netdev_ring(tx_ring),
6519                                                      ring_queue_index(tx_ring)) &&
6520                             !(test_bit(__IGB_DOWN, &adapter->state))) {
6521                                 netif_wake_subqueue(netdev_ring(tx_ring),
6522                                                     ring_queue_index(tx_ring));
6523                                 tx_ring->tx_stats.restart_queue++;
6524                         }
6525                 } else {
6526                         if (netif_queue_stopped(netdev_ring(tx_ring)) &&
6527                             !(test_bit(__IGB_DOWN, &adapter->state))) {
6528                                 netif_wake_queue(netdev_ring(tx_ring));
6529                                 tx_ring->tx_stats.restart_queue++;
6530                         }
6531                 }
6532         }
6533
6534         return !!budget;
6535 }
6536
6537 #ifdef HAVE_VLAN_RX_REGISTER
6538 /**
6539  * igb_receive_skb - helper function to handle rx indications
6540  * @q_vector: structure containing interrupt and ring information
6541  * @skb: packet to send up
6542  **/
6543 static void igb_receive_skb(struct igb_q_vector *q_vector,
6544                             struct sk_buff *skb)
6545 {
6546         struct vlan_group **vlgrp = netdev_priv(skb->dev);
6547
6548         if (IGB_CB(skb)->vid) {
6549                 if (*vlgrp) {
6550                         vlan_gro_receive(&q_vector->napi, *vlgrp,
6551                                          IGB_CB(skb)->vid, skb);
6552                 } else {
6553                         dev_kfree_skb_any(skb);
6554                 }
6555         } else {
6556                 napi_gro_receive(&q_vector->napi, skb);
6557         }
6558 }
6559
6560 #endif /* HAVE_VLAN_RX_REGISTER */
6561 static inline void igb_rx_checksum(struct igb_ring *ring,
6562                                    union e1000_adv_rx_desc *rx_desc,
6563                                    struct sk_buff *skb)
6564 {
6565         skb_checksum_none_assert(skb);
6566
6567         /* Ignore Checksum bit is set */
6568         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
6569                 return;
6570
6571         /* Rx checksum disabled via ethtool */
6572 #ifdef HAVE_NDO_SET_FEATURES
6573         if (!(netdev_ring(ring)->features & NETIF_F_RXCSUM))
6574 #else
6575         if (!test_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags))
6576 #endif
6577                 return;
6578
6579         /* TCP/UDP checksum error bit is set */
6580         if (igb_test_staterr(rx_desc,
6581                              E1000_RXDEXT_STATERR_TCPE |
6582                              E1000_RXDEXT_STATERR_IPE)) {
6583                 /*
6584                  * work around errata with sctp packets where the TCPE aka
6585                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
6586                  * packets, (aka let the stack check the crc32c)
6587                  */
6588                 if (!((skb->len == 60) &&
6589                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags)))
6590                         ring->rx_stats.csum_err++;
6591
6592                 /* let the stack verify checksum errors */
6593                 return;
6594         }
6595         /* It must be a TCP or UDP packet with a valid checksum */
6596         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
6597                                       E1000_RXD_STAT_UDPCS))
6598                 skb->ip_summed = CHECKSUM_UNNECESSARY;
6599 }
6600
6601 #ifdef NETIF_F_RXHASH
6602 static inline void igb_rx_hash(struct igb_ring *ring,
6603                                union e1000_adv_rx_desc *rx_desc,
6604                                struct sk_buff *skb)
6605 {
6606         if (netdev_ring(ring)->features & NETIF_F_RXHASH)
6607                 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
6608 }
6609
6610 #endif
6611 #ifdef HAVE_HW_TIME_STAMP
6612 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
6613                             union e1000_adv_rx_desc *rx_desc,
6614                             struct sk_buff *skb)
6615 {
6616         struct igb_adapter *adapter = q_vector->adapter;
6617         struct e1000_hw *hw = &adapter->hw;
6618         u64 regval;
6619
6620         if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6621                                        E1000_RXDADV_STAT_TS))
6622                 return;
6623
6624         /*
6625          * If this bit is set, then the RX registers contain the time stamp. No
6626          * other packet will be time stamped until we read these registers, so
6627          * read the registers to make them available again. Because only one
6628          * packet can be time stamped at a time, we know that the register
6629          * values must belong to this one here and therefore we don't need to
6630          * compare any of the additional attributes stored for it.
6631          *
6632          * If nothing went wrong, then it should have a skb_shared_tx that we
6633          * can turn into a skb_shared_hwtstamps.
6634          */
6635         if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6636                 u32 *stamp = (u32 *)skb->data;
6637                 regval = le32_to_cpu(*(stamp + 2));
6638                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6639                 skb_pull(skb, IGB_TS_HDR_LEN);
6640         } else {
6641                 if(!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6642                         return;
6643
6644                 regval = E1000_READ_REG(hw, E1000_RXSTMPL);
6645                 regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32;
6646         }
6647
6648         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6649 }
6650 #endif
6651 static void igb_rx_vlan(struct igb_ring *ring,
6652                         union e1000_adv_rx_desc *rx_desc,
6653                         struct sk_buff *skb)
6654 {
6655         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6656                 u16 vid = 0;
6657                 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6658                     test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6659                         vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6660                 else
6661                         vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6662 #ifdef HAVE_VLAN_RX_REGISTER
6663                 IGB_CB(skb)->vid = vid;
6664         } else {
6665                 IGB_CB(skb)->vid = 0;
6666 #else
6667                 __vlan_hwaccel_put_tag(skb, vid);
6668 #endif
6669         }
6670 }
6671
6672 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
6673 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6674 {
6675         /* HW will not DMA in data larger than the given buffer, even if it
6676          * parses the (NFS, of course) header to be larger.  In that case, it
6677          * fills the header buffer and spills the rest into the page.
6678          */
6679         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info) &
6680                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6681         if (hlen > IGB_RX_HDR_LEN)
6682                 hlen = IGB_RX_HDR_LEN;
6683         return hlen;
6684 }
6685
6686 #endif
6687 #ifndef IGB_NO_LRO
6688 /**
6689  * igb_merge_active_tail - merge active tail into lro skb
6690  * @tail: pointer to active tail in frag_list
6691  *
6692  * This function merges the length and data of an active tail into the
6693  * skb containing the frag_list.  It resets the tail's pointer to the head,
6694  * but it leaves the heads pointer to tail intact.
6695  **/
6696 static inline struct sk_buff *igb_merge_active_tail(struct sk_buff *tail)
6697 {
6698         struct sk_buff *head = IGB_CB(tail)->head;
6699
6700         if (!head)
6701                 return tail;
6702
6703         head->len += tail->len;
6704         head->data_len += tail->len;
6705         head->truesize += tail->len;
6706
6707         IGB_CB(tail)->head = NULL;
6708
6709         return head;
6710 }
6711
6712 /**
6713  * igb_add_active_tail - adds an active tail into the skb frag_list
6714  * @head: pointer to the start of the skb
6715  * @tail: pointer to active tail to add to frag_list
6716  *
6717  * This function adds an active tail to the end of the frag list.  This tail
6718  * will still be receiving data so we cannot yet ad it's stats to the main
6719  * skb.  That is done via igb_merge_active_tail.
6720  **/
6721 static inline void igb_add_active_tail(struct sk_buff *head, struct sk_buff *tail)
6722 {
6723         struct sk_buff *old_tail = IGB_CB(head)->tail;
6724
6725         if (old_tail) {
6726                 igb_merge_active_tail(old_tail);
6727                 old_tail->next = tail;
6728         } else {
6729                 skb_shinfo(head)->frag_list = tail;
6730         }
6731
6732         IGB_CB(tail)->head = head;
6733         IGB_CB(head)->tail = tail;
6734
6735         IGB_CB(head)->append_cnt++;
6736 }
6737
6738 /**
6739  * igb_close_active_frag_list - cleanup pointers on a frag_list skb
6740  * @head: pointer to head of an active frag list
6741  *
6742  * This function will clear the frag_tail_tracker pointer on an active
6743  * frag_list and returns true if the pointer was actually set
6744  **/
6745 static inline bool igb_close_active_frag_list(struct sk_buff *head)
6746 {
6747         struct sk_buff *tail = IGB_CB(head)->tail;
6748
6749         if (!tail)
6750                 return false;
6751
6752         igb_merge_active_tail(tail);
6753
6754         IGB_CB(head)->tail = NULL;
6755
6756         return true;
6757 }
6758
6759 /**
6760  * igb_can_lro - returns true if packet is TCP/IPV4 and LRO is enabled
6761  * @adapter: board private structure
6762  * @rx_desc: pointer to the rx descriptor
6763  * @skb: pointer to the skb to be merged
6764  *
6765  **/
6766 static inline bool igb_can_lro(struct igb_ring *rx_ring,
6767                                union e1000_adv_rx_desc *rx_desc,
6768                                struct sk_buff *skb)
6769 {
6770         struct iphdr *iph = (struct iphdr *)skb->data;
6771         __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
6772
6773         /* verify LRO is enabled */
6774         if (!(netdev_ring(rx_ring)->features & NETIF_F_LRO))
6775                 return false;
6776
6777         /* verify hardware indicates this is IPv4/TCP */
6778         if((!(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) ||
6779             !(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4))))
6780                 return false;
6781
6782         /* verify the header is large enough for us to read IP/TCP fields */
6783         if (!pskb_may_pull(skb, sizeof(struct igb_lrohdr)))
6784                 return false;
6785
6786         /* verify there are no VLANs on packet */
6787         if (skb->protocol != __constant_htons(ETH_P_IP))
6788                 return false;
6789
6790         /* ensure we are version 4 with no options */
6791         if (*(u8 *)iph != 0x45)
6792                 return false;
6793
6794         /* .. and the packet is not fragmented */
6795         if (iph->frag_off & htons(IP_MF | IP_OFFSET))
6796                 return false;
6797
6798         /* .. and that next header is TCP */
6799         if (iph->protocol != IPPROTO_TCP)
6800                 return false;
6801
6802         return true;
6803 }
6804
6805 static inline struct igb_lrohdr *igb_lro_hdr(struct sk_buff *skb)
6806 {
6807         return (struct igb_lrohdr *)skb->data;
6808 }
6809
6810 /**
6811  * igb_lro_flush - Indicate packets to upper layer.
6812  *
6813  * Update IP and TCP header part of head skb if more than one
6814  * skb's chained and indicate packets to upper layer.
6815  **/
6816 static void igb_lro_flush(struct igb_q_vector *q_vector,
6817                           struct sk_buff *skb)
6818 {
6819         struct igb_lro_list *lrolist = q_vector->lrolist;
6820
6821         __skb_unlink(skb, &lrolist->active);
6822
6823         if (IGB_CB(skb)->append_cnt) {
6824                 struct igb_lrohdr *lroh = igb_lro_hdr(skb);
6825
6826                 /* close any active lro contexts */
6827                 igb_close_active_frag_list(skb);
6828
6829                 /* incorporate ip header and re-calculate checksum */
6830                 lroh->iph.tot_len = ntohs(skb->len);
6831                 lroh->iph.check = 0;
6832
6833                 /* header length is 5 since we know no options exist */
6834                 lroh->iph.check = ip_fast_csum((u8 *)lroh, 5);
6835
6836                 /* clear TCP checksum to indicate we are an LRO frame */
6837                 lroh->th.check = 0;
6838
6839                 /* incorporate latest timestamp into the tcp header */
6840                 if (IGB_CB(skb)->tsecr) {
6841                         lroh->ts[2] = IGB_CB(skb)->tsecr;
6842                         lroh->ts[1] = htonl(IGB_CB(skb)->tsval);
6843                 }
6844 #ifdef NETIF_F_TSO
6845
6846                 skb_shinfo(skb)->gso_size = IGB_CB(skb)->mss;
6847 #endif
6848         }
6849
6850 #ifdef HAVE_VLAN_RX_REGISTER
6851         igb_receive_skb(q_vector, skb);
6852 #else
6853         napi_gro_receive(&q_vector->napi, skb);
6854 #endif
6855         lrolist->stats.flushed++;
6856 }
6857
6858 static void igb_lro_flush_all(struct igb_q_vector *q_vector)
6859 {
6860         struct igb_lro_list *lrolist = q_vector->lrolist;
6861         struct sk_buff *skb, *tmp;
6862
6863         skb_queue_reverse_walk_safe(&lrolist->active, skb, tmp)
6864                 igb_lro_flush(q_vector, skb);
6865 }
6866
6867 /*
6868  * igb_lro_header_ok - Main LRO function.
6869  **/
6870 static void igb_lro_header_ok(struct sk_buff *skb)
6871 {
6872         struct igb_lrohdr *lroh = igb_lro_hdr(skb);
6873         u16 opt_bytes, data_len;
6874
6875         IGB_CB(skb)->tail = NULL;
6876         IGB_CB(skb)->tsecr = 0;
6877         IGB_CB(skb)->append_cnt = 0;
6878         IGB_CB(skb)->mss = 0;
6879
6880         /* ensure that the checksum is valid */
6881         if (skb->ip_summed != CHECKSUM_UNNECESSARY)
6882                 return;
6883
6884         /* If we see CE codepoint in IP header, packet is not mergeable */
6885         if (INET_ECN_is_ce(ipv4_get_dsfield(&lroh->iph)))
6886                 return;
6887
6888         /* ensure no bits set besides ack or psh */
6889         if (lroh->th.fin || lroh->th.syn || lroh->th.rst ||
6890             lroh->th.urg || lroh->th.ece || lroh->th.cwr ||
6891             !lroh->th.ack)
6892                 return;
6893
6894         /* store the total packet length */
6895         data_len = ntohs(lroh->iph.tot_len);
6896
6897         /* remove any padding from the end of the skb */
6898         __pskb_trim(skb, data_len);
6899
6900         /* remove header length from data length */
6901         data_len -= sizeof(struct igb_lrohdr);
6902
6903         /*
6904          * check for timestamps. Since the only option we handle are timestamps,
6905          * we only have to handle the simple case of aligned timestamps
6906          */
6907         opt_bytes = (lroh->th.doff << 2) - sizeof(struct tcphdr);
6908         if (opt_bytes != 0) {
6909                 if ((opt_bytes != TCPOLEN_TSTAMP_ALIGNED) ||
6910                     !pskb_may_pull(skb, sizeof(struct igb_lrohdr) +
6911                                         TCPOLEN_TSTAMP_ALIGNED) ||
6912                     (lroh->ts[0] != htonl((TCPOPT_NOP << 24) |
6913                                              (TCPOPT_NOP << 16) |
6914                                              (TCPOPT_TIMESTAMP << 8) |
6915                                               TCPOLEN_TIMESTAMP)) ||
6916                     (lroh->ts[2] == 0)) {
6917                         return;
6918                 }
6919                 
6920                 IGB_CB(skb)->tsval = ntohl(lroh->ts[1]);
6921                 IGB_CB(skb)->tsecr = lroh->ts[2];
6922
6923                 data_len -= TCPOLEN_TSTAMP_ALIGNED;
6924         }
6925
6926         /* record data_len as mss for the packet */
6927         IGB_CB(skb)->mss = data_len;
6928         IGB_CB(skb)->next_seq = ntohl(lroh->th.seq);
6929 }
6930
6931 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
6932 static bool igb_merge_frags(struct sk_buff *lro_skb, struct sk_buff *new_skb)
6933 {
6934         struct sk_buff *tail;
6935         struct skb_shared_info *tail_info;
6936         struct skb_shared_info *new_skb_info;
6937         u16 data_len;
6938
6939         /* header must be empty to pull frags into current skb */
6940         if (skb_headlen(new_skb))
6941                 return false;
6942
6943         if (IGB_CB(lro_skb)->tail)
6944                 tail = IGB_CB(lro_skb)->tail;
6945         else
6946                 tail = lro_skb;
6947
6948         tail_info = skb_shinfo(tail);
6949         new_skb_info = skb_shinfo(new_skb);
6950
6951         /* make sure we have room in frags list */
6952         if (new_skb_info->nr_frags >= (MAX_SKB_FRAGS - tail_info->nr_frags))
6953                 return false;
6954
6955         /* bump append count */
6956         IGB_CB(lro_skb)->append_cnt++;
6957
6958         /* copy frags into the last skb */
6959         memcpy(tail_info->frags + tail_info->nr_frags,
6960                new_skb_info->frags,
6961                new_skb_info->nr_frags * sizeof(skb_frag_t));
6962
6963         /* copy size data over */
6964         tail_info->nr_frags += new_skb_info->nr_frags;
6965         data_len = IGB_CB(new_skb)->mss;
6966         tail->len += data_len;
6967         tail->data_len += data_len;
6968         tail->truesize += data_len;
6969
6970         /* wipe record of data from new_skb */
6971         new_skb_info->nr_frags = 0;
6972         new_skb->len = new_skb->data_len = 0;
6973         new_skb->truesize -= data_len;
6974         new_skb->data = new_skb->head + NET_SKB_PAD + NET_IP_ALIGN;
6975         skb_reset_tail_pointer(new_skb);
6976         new_skb->protocol = 0;
6977         new_skb->ip_summed = CHECKSUM_NONE;
6978 #ifdef HAVE_VLAN_RX_REGISTER
6979         IGB_CB(new_skb)->vid = 0;
6980 #else
6981         new_skb->vlan_tci = 0;
6982 #endif
6983
6984         return true;
6985 }
6986
6987 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
6988 /**
6989  * igb_lro_queue - if able, queue skb into lro chain
6990  * @q_vector: structure containing interrupt and ring information
6991  * @new_skb: pointer to current skb being checked
6992  *
6993  * Checks whether the skb given is eligible for LRO and if that's
6994  * fine chains it to the existing lro_skb based on flowid. If an LRO for
6995  * the flow doesn't exist create one.
6996  **/
6997 static struct sk_buff *igb_lro_queue(struct igb_q_vector *q_vector,
6998                                        struct sk_buff *new_skb)
6999 {
7000         struct sk_buff *lro_skb;
7001         struct igb_lro_list *lrolist = q_vector->lrolist;
7002         struct igb_lrohdr *lroh = igb_lro_hdr(new_skb);
7003         __be32 saddr = lroh->iph.saddr;
7004         __be32 daddr = lroh->iph.daddr;
7005         __be32 tcp_ports = *(__be32 *)&lroh->th;
7006         u16 data_len;
7007 #ifdef HAVE_VLAN_RX_REGISTER
7008         u16 vid = IGB_CB(new_skb)->vid;
7009 #else
7010         u16 vid = new_skb->vlan_tci;
7011 #endif
7012
7013         igb_lro_header_ok(new_skb);
7014
7015         /*
7016          * we have a packet that might be eligible for LRO,
7017          * so see if it matches anything we might expect
7018          */
7019         skb_queue_walk(&lrolist->active, lro_skb) {
7020                 if (*(__be32 *)&igb_lro_hdr(lro_skb)->th != tcp_ports ||
7021                     igb_lro_hdr(lro_skb)->iph.saddr != saddr ||
7022                     igb_lro_hdr(lro_skb)->iph.daddr != daddr)
7023                         continue;
7024
7025 #ifdef HAVE_VLAN_RX_REGISTER
7026                 if (IGB_CB(lro_skb)->vid != vid)
7027 #else
7028                 if (lro_skb->vlan_tci != vid)
7029 #endif
7030                         continue;
7031
7032                 /* out of order packet */
7033                 if (IGB_CB(lro_skb)->next_seq != IGB_CB(new_skb)->next_seq) {
7034                         igb_lro_flush(q_vector, lro_skb);
7035                         IGB_CB(new_skb)->mss = 0;
7036                         break;
7037                 }
7038
7039                 /* TCP timestamp options have changed */
7040                 if (!IGB_CB(lro_skb)->tsecr != !IGB_CB(new_skb)->tsecr) {
7041                         igb_lro_flush(q_vector, lro_skb);
7042                         break;
7043                 }
7044
7045                 /* make sure timestamp values are increasing */
7046                 if (IGB_CB(lro_skb)->tsecr &&
7047                     IGB_CB(lro_skb)->tsval > IGB_CB(new_skb)->tsval) {
7048                         igb_lro_flush(q_vector, lro_skb);
7049                         IGB_CB(new_skb)->mss = 0;
7050                         break;
7051                 }
7052
7053                 data_len = IGB_CB(new_skb)->mss;
7054
7055                 /*
7056                  * malformed header, no tcp data, resultant packet would
7057                  * be too large, or new skb is larger than our current mss.
7058                  */
7059                 if (data_len == 0 ||
7060                     data_len > IGB_CB(lro_skb)->mss ||
7061                     data_len > IGB_CB(lro_skb)->free) {
7062                         igb_lro_flush(q_vector, lro_skb);
7063                         break;
7064                 }
7065
7066                 /* ack sequence numbers or window size has changed */
7067                 if (igb_lro_hdr(lro_skb)->th.ack_seq != lroh->th.ack_seq ||
7068                     igb_lro_hdr(lro_skb)->th.window != lroh->th.window) {
7069                         igb_lro_flush(q_vector, lro_skb);
7070                         break;
7071                 }
7072
7073                 /* Remove IP and TCP header*/
7074                 skb_pull(new_skb, new_skb->len - data_len);
7075
7076                 /* update timestamp and timestamp echo response */
7077                 IGB_CB(lro_skb)->tsval = IGB_CB(new_skb)->tsval;
7078                 IGB_CB(lro_skb)->tsecr = IGB_CB(new_skb)->tsecr;
7079
7080                 /* update sequence and free space */
7081                 IGB_CB(lro_skb)->next_seq += data_len;
7082                 IGB_CB(lro_skb)->free -= data_len;
7083
7084 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7085                 /* if header is empty pull pages into current skb */
7086                 if (igb_merge_frags(lro_skb, new_skb)) {
7087                         lrolist->stats.recycled++;
7088                 } else {
7089 #endif
7090                         /* chain this new skb in frag_list */
7091                         igb_add_active_tail(lro_skb, new_skb);
7092                         new_skb = NULL;
7093 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7094                 }
7095 #endif
7096
7097                 if ((data_len < IGB_CB(lro_skb)->mss) || lroh->th.psh) {
7098                         igb_lro_hdr(lro_skb)->th.psh |= lroh->th.psh;
7099                         igb_lro_flush(q_vector, lro_skb);
7100                 }
7101
7102                 lrolist->stats.coal++;
7103                 return new_skb;
7104         }
7105
7106         if (IGB_CB(new_skb)->mss && !lroh->th.psh) {
7107                 /* if we are at capacity flush the tail */
7108                 if (skb_queue_len(&lrolist->active) >= IGB_LRO_MAX) {
7109                         lro_skb = skb_peek_tail(&lrolist->active);
7110                         if (lro_skb)
7111                                 igb_lro_flush(q_vector, lro_skb);
7112                 }
7113
7114                 /* update sequence and free space */
7115                 IGB_CB(new_skb)->next_seq += IGB_CB(new_skb)->mss;
7116                 IGB_CB(new_skb)->free = 65521 - new_skb->len;
7117
7118                 /* .. and insert at the front of the active list */
7119                 __skb_queue_head(&lrolist->active, new_skb);
7120
7121                 lrolist->stats.coal++;
7122                 return NULL;
7123         }
7124
7125         /* packet not handled by any of the above, pass it to the stack */
7126 #ifdef HAVE_VLAN_RX_REGISTER
7127         igb_receive_skb(q_vector, new_skb);
7128 #else
7129         napi_gro_receive(&q_vector->napi, new_skb);
7130 #endif
7131         return NULL;
7132 }
7133
7134 #endif /* IGB_NO_LRO */
7135 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
7136 {
7137         struct igb_ring *rx_ring = q_vector->rx.ring;
7138         union e1000_adv_rx_desc *rx_desc;
7139 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7140         const int current_node = numa_node_id();
7141 #endif
7142         unsigned int total_bytes = 0, total_packets = 0;
7143         u16 cleaned_count = igb_desc_unused(rx_ring);
7144         u16 i = rx_ring->next_to_clean;
7145
7146         rx_desc = IGB_RX_DESC(rx_ring, i);
7147
7148         while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
7149                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
7150                 struct sk_buff *skb = buffer_info->skb;
7151                 union e1000_adv_rx_desc *next_rxd;
7152
7153                 buffer_info->skb = NULL;
7154                 prefetch(skb->data);
7155
7156                 i++;
7157                 if (i == rx_ring->count)
7158                         i = 0;
7159
7160                 next_rxd = IGB_RX_DESC(rx_ring, i);
7161                 prefetch(next_rxd);
7162
7163                 /*
7164                  * This memory barrier is needed to keep us from reading
7165                  * any other fields out of the rx_desc until we know the
7166                  * RXD_STAT_DD bit is set
7167                  */
7168                 rmb();
7169
7170 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7171                 __skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length));
7172                 dma_unmap_single(rx_ring->dev, buffer_info->dma,
7173                                  rx_ring->rx_buffer_len,
7174                                  DMA_FROM_DEVICE);
7175                 buffer_info->dma = 0;
7176
7177 #else
7178                 if (!skb_is_nonlinear(skb)) {
7179                         __skb_put(skb, igb_get_hlen(rx_desc));
7180                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
7181                                          IGB_RX_HDR_LEN,
7182                                          DMA_FROM_DEVICE);
7183                         buffer_info->dma = 0;
7184                 }
7185
7186                 if (rx_desc->wb.upper.length) {
7187                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
7188
7189                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
7190                                            buffer_info->page,
7191                                            buffer_info->page_offset,
7192                                            length);
7193
7194                         skb->len += length;
7195                         skb->data_len += length;
7196                         skb->truesize += length;
7197
7198                         if ((page_count(buffer_info->page) != 1) ||
7199                             (page_to_nid(buffer_info->page) != current_node))
7200                                 buffer_info->page = NULL;
7201                         else
7202                                 get_page(buffer_info->page);
7203
7204                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
7205                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
7206                         buffer_info->page_dma = 0;
7207                 }
7208
7209                 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
7210                         struct igb_rx_buffer *next_buffer;
7211                         next_buffer = &rx_ring->rx_buffer_info[i];
7212                         buffer_info->skb = next_buffer->skb;
7213                         buffer_info->dma = next_buffer->dma;
7214                         next_buffer->skb = skb;
7215                         next_buffer->dma = 0;
7216                         goto next_desc;
7217                 }
7218
7219 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7220                 if (igb_test_staterr(rx_desc,
7221                                      E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
7222                         dev_kfree_skb_any(skb);
7223                         goto next_desc;
7224                 }
7225
7226 #ifdef HAVE_HW_TIME_STAMP
7227                 igb_rx_hwtstamp(q_vector, rx_desc, skb);
7228 #endif
7229 #ifdef NETIF_F_RXHASH
7230                 igb_rx_hash(rx_ring, rx_desc, skb);
7231 #endif
7232                 igb_rx_checksum(rx_ring, rx_desc, skb);
7233                 igb_rx_vlan(rx_ring, rx_desc, skb);
7234
7235                 total_bytes += skb->len;
7236                 total_packets++;
7237
7238                 skb->protocol = eth_type_trans(skb, netdev_ring(rx_ring));
7239
7240 #ifndef IGB_NO_LRO
7241                 if (igb_can_lro(rx_ring, rx_desc, skb))
7242                         buffer_info->skb = igb_lro_queue(q_vector, skb);
7243                 else
7244 #endif
7245 #ifdef HAVE_VLAN_RX_REGISTER
7246                         igb_receive_skb(q_vector, skb);
7247 #else
7248                         napi_gro_receive(&q_vector->napi, skb);
7249 #endif
7250
7251 #ifndef NETIF_F_GRO
7252                 netdev_ring(rx_ring)->last_rx = jiffies;
7253
7254 #endif
7255                 budget--;
7256 next_desc:
7257                 cleaned_count++;
7258
7259                 if (!budget)
7260                         break;
7261
7262                 /* return some buffers to hardware, one at a time is too slow */
7263                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
7264                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
7265                         cleaned_count = 0;
7266                 }
7267
7268                 /* use prefetched values */
7269                 rx_desc = next_rxd;
7270         }
7271
7272         rx_ring->next_to_clean = i;
7273         rx_ring->rx_stats.packets += total_packets;
7274         rx_ring->rx_stats.bytes += total_bytes;
7275         q_vector->rx.total_packets += total_packets;
7276         q_vector->rx.total_bytes += total_bytes;
7277
7278         if (cleaned_count)
7279                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
7280
7281 #ifndef IGB_NO_LRO
7282         if (netdev_ring(rx_ring)->features & NETIF_F_LRO)
7283                 igb_lro_flush_all(q_vector);
7284
7285 #endif /* IGB_NO_LRO */
7286         return !!budget;
7287 }
7288
7289 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
7290                                  struct igb_rx_buffer *bi)
7291 {
7292         struct sk_buff *skb = bi->skb;
7293         dma_addr_t dma = bi->dma;
7294
7295         if (dma)
7296                 return true;
7297
7298         if (likely(!skb)) {
7299 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7300                 skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring),
7301                                                 rx_ring->rx_buffer_len);
7302 #else
7303                 skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring),
7304                                                 IGB_RX_HDR_LEN);
7305 #endif
7306                 bi->skb = skb;
7307                 if (!skb) {
7308                         rx_ring->rx_stats.alloc_failed++;
7309                         return false;
7310                 }
7311
7312                 /* initialize skb for ring */
7313                 skb_record_rx_queue(skb, ring_queue_index(rx_ring));
7314         }
7315
7316 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7317         dma = dma_map_single(rx_ring->dev, skb->data,
7318                              rx_ring->rx_buffer_len, DMA_FROM_DEVICE);
7319 #else
7320         dma = dma_map_single(rx_ring->dev, skb->data,
7321                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
7322 #endif
7323
7324         if (dma_mapping_error(rx_ring->dev, dma)) {
7325                 rx_ring->rx_stats.alloc_failed++;
7326                 return false;
7327         }
7328
7329         bi->dma = dma;
7330         return true;
7331 }
7332
7333 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7334 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
7335                                   struct igb_rx_buffer *bi)
7336 {
7337         struct page *page = bi->page;
7338         dma_addr_t page_dma = bi->page_dma;
7339         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
7340
7341         if (page_dma)
7342                 return true;
7343
7344         if (!page) {
7345                 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
7346                 bi->page = page;
7347                 if (unlikely(!page)) {
7348                         rx_ring->rx_stats.alloc_failed++;
7349                         return false;
7350                 }
7351         }
7352
7353         page_dma = dma_map_page(rx_ring->dev, page,
7354                                 page_offset, PAGE_SIZE / 2,
7355                                 DMA_FROM_DEVICE);
7356
7357         if (dma_mapping_error(rx_ring->dev, page_dma)) {
7358                 rx_ring->rx_stats.alloc_failed++;
7359                 return false;
7360         }
7361
7362         bi->page_dma = page_dma;
7363         bi->page_offset = page_offset;
7364         return true;
7365 }
7366
7367 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7368 /**
7369  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
7370  * @adapter: address of board private structure
7371  **/
7372 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
7373 {
7374         union e1000_adv_rx_desc *rx_desc;
7375         struct igb_rx_buffer *bi;
7376         u16 i = rx_ring->next_to_use;
7377
7378         rx_desc = IGB_RX_DESC(rx_ring, i);
7379         bi = &rx_ring->rx_buffer_info[i];
7380         i -= rx_ring->count;
7381
7382         while (cleaned_count--) {
7383                 if (!igb_alloc_mapped_skb(rx_ring, bi))
7384                         break;
7385
7386                 /* Refresh the desc even if buffer_addrs didn't change
7387                  * because each write-back erases this info. */
7388 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7389                 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
7390 #else
7391                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
7392
7393                 if (!igb_alloc_mapped_page(rx_ring, bi))
7394                         break;
7395
7396                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
7397
7398 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7399                 rx_desc++;
7400                 bi++;
7401                 i++;
7402                 if (unlikely(!i)) {
7403                         rx_desc = IGB_RX_DESC(rx_ring, 0);
7404                         bi = rx_ring->rx_buffer_info;
7405                         i -= rx_ring->count;
7406                 }
7407
7408                 /* clear the hdr_addr for the next_to_use descriptor */
7409                 rx_desc->read.hdr_addr = 0;
7410         }
7411
7412         i += rx_ring->count;
7413
7414         if (rx_ring->next_to_use != i) {
7415                 rx_ring->next_to_use = i;
7416
7417                 /* Force memory writes to complete before letting h/w
7418                  * know there are new descriptors to fetch.  (Only
7419                  * applicable for weak-ordered memory model archs,
7420                  * such as IA-64). */
7421                 wmb();
7422                 writel(i, rx_ring->tail);
7423         }
7424 }
7425
7426 #ifdef SIOCGMIIPHY
7427 /**
7428  * igb_mii_ioctl -
7429  * @netdev:
7430  * @ifreq:
7431  * @cmd:
7432  **/
7433 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7434 {
7435         struct igb_adapter *adapter = netdev_priv(netdev);
7436         struct mii_ioctl_data *data = if_mii(ifr);
7437
7438         if (adapter->hw.phy.media_type != e1000_media_type_copper)
7439                 return -EOPNOTSUPP;
7440
7441         switch (cmd) {
7442         case SIOCGMIIPHY:
7443                 data->phy_id = adapter->hw.phy.addr;
7444                 break;
7445         case SIOCGMIIREG:
7446                 if (!capable(CAP_NET_ADMIN))
7447                         return -EPERM;
7448                 if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
7449                                    &data->val_out))
7450                         return -EIO;
7451                 break;
7452         case SIOCSMIIREG:
7453         default:
7454                 return -EOPNOTSUPP;
7455         }
7456         return E1000_SUCCESS;
7457 }
7458
7459 #endif
7460 #ifdef HAVE_HW_TIME_STAMP
7461 /**
7462  * igb_hwtstamp_ioctl - control hardware time stamping
7463  * @netdev:
7464  * @ifreq:
7465  * @cmd:
7466  *
7467  * Outgoing time stamping can be enabled and disabled. Play nice and
7468  * disable it when requested, although it shouldn't case any overhead
7469  * when no packet needs it. At most one packet in the queue may be
7470  * marked for time stamping, otherwise it would be impossible to tell
7471  * for sure to which packet the hardware time stamp belongs.
7472  *
7473  * Incoming time stamping has to be configured via the hardware
7474  * filters. Not all combinations are supported, in particular event
7475  * type has to be specified. Matching the kind of event packet is
7476  * not supported, with the exception of "all V2 events regardless of
7477  * level 2 or 4".
7478  *
7479  **/
7480 static int igb_hwtstamp_ioctl(struct net_device *netdev,
7481                               struct ifreq *ifr, int cmd)
7482 {
7483         struct igb_adapter *adapter = netdev_priv(netdev);
7484         struct e1000_hw *hw = &adapter->hw;
7485         struct hwtstamp_config config;
7486         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
7487         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
7488         u32 tsync_rx_cfg = 0;
7489         bool is_l4 = false;
7490         bool is_l2 = false;
7491         u32 regval;
7492
7493         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
7494                 return -EFAULT;
7495
7496         /* reserved for future extensions */
7497         if (config.flags)
7498                 return -EINVAL;
7499
7500         switch (config.tx_type) {
7501         case HWTSTAMP_TX_OFF:
7502                 tsync_tx_ctl = 0;
7503         case HWTSTAMP_TX_ON:
7504                 break;
7505         default:
7506                 return -ERANGE;
7507         }
7508
7509         switch (config.rx_filter) {
7510         case HWTSTAMP_FILTER_NONE:
7511                 tsync_rx_ctl = 0;
7512                 break;
7513         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
7514         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
7515         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
7516         case HWTSTAMP_FILTER_ALL:
7517                 /*
7518                  * register TSYNCRXCFG must be set, therefore it is not
7519                  * possible to time stamp both Sync and Delay_Req messages
7520                  * => fall back to time stamping all packets
7521                  */
7522                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
7523                 config.rx_filter = HWTSTAMP_FILTER_ALL;
7524                 break;
7525         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
7526                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
7527                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
7528                 is_l4 = true;
7529                 break;
7530         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
7531                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
7532                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
7533                 is_l4 = true;
7534                 break;
7535         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
7536         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
7537                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
7538                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
7539                 is_l2 = true;
7540                 is_l4 = true;
7541                 config.rx_filter = HWTSTAMP_FILTER_SOME;
7542                 break;
7543         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
7544         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
7545                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
7546                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
7547                 is_l2 = true;
7548                 is_l4 = true;
7549                 config.rx_filter = HWTSTAMP_FILTER_SOME;
7550                 break;
7551         case HWTSTAMP_FILTER_PTP_V2_EVENT:
7552         case HWTSTAMP_FILTER_PTP_V2_SYNC:
7553         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
7554                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
7555                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
7556                 is_l2 = true;
7557                 is_l4 = true;
7558                 break;
7559         default:
7560                 return -ERANGE;
7561         }
7562
7563         if (hw->mac.type == e1000_82575) {
7564                 if (tsync_rx_ctl | tsync_tx_ctl)
7565                         return -EINVAL;
7566                 return 0;
7567         }
7568
7569 #ifdef IGB_PER_PKT_TIMESTAMP
7570         /*
7571          * Per-packet timestamping only works if all packets are
7572          * timestamped, so enable timestamping in all packets as
7573          * long as one rx filter was configured.
7574          */
7575         if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
7576                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
7577                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
7578         }
7579 #endif
7580
7581         /* enable/disable TX */
7582         regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
7583         regval &= ~E1000_TSYNCTXCTL_ENABLED;
7584         regval |= tsync_tx_ctl;
7585         E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
7586
7587         /* enable/disable RX */
7588         regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
7589         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
7590         regval |= tsync_rx_ctl;
7591         E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
7592
7593         /* define which PTP packets are time stamped */
7594         E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
7595
7596         /* define ethertype filter for timestamped packets */
7597         if (is_l2)
7598                 E1000_WRITE_REG(hw, E1000_ETQF(3),
7599                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
7600                                  E1000_ETQF_1588 | /* enable timestamping */
7601                                  ETH_P_1588));     /* 1588 eth protocol type */
7602         else
7603                 E1000_WRITE_REG(hw, E1000_ETQF(3), 0);
7604
7605 #define PTP_PORT 319
7606         /* L4 Queue Filter[3]: filter by destination port and protocol */
7607         if (is_l4) {
7608                 u32 ftqf = (IPPROTO_UDP /* UDP */
7609                         | E1000_FTQF_VF_BP /* VF not compared */
7610                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
7611                         | E1000_FTQF_MASK); /* mask all inputs */
7612                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
7613
7614                 E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_PORT));
7615                 E1000_WRITE_REG(hw, E1000_IMIREXT(3),
7616                                 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
7617                 if (hw->mac.type == e1000_82576) {
7618                         /* enable source port check */
7619                         E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_PORT));
7620                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
7621                 }
7622                 E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf);
7623         } else {
7624                 E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK);
7625         }
7626         E1000_WRITE_FLUSH(hw);
7627
7628         adapter->hwtstamp_config = config;
7629
7630         /* clear TX/RX time stamp registers, just to be sure */
7631         regval = E1000_READ_REG(hw, E1000_TXSTMPH);
7632         regval = E1000_READ_REG(hw, E1000_RXSTMPH);
7633
7634         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
7635                 -EFAULT : 0;
7636 }
7637
7638 #endif
7639 /**
7640  * igb_ioctl -
7641  * @netdev:
7642  * @ifreq:
7643  * @cmd:
7644  **/
7645 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7646 {
7647         switch (cmd) {
7648 #ifdef SIOCGMIIPHY
7649         case SIOCGMIIPHY:
7650         case SIOCGMIIREG:
7651         case SIOCSMIIREG:
7652                 return igb_mii_ioctl(netdev, ifr, cmd);
7653 #endif
7654 #ifdef HAVE_HW_TIME_STAMP
7655         case SIOCSHWTSTAMP:
7656                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
7657 #endif
7658 #ifdef ETHTOOL_OPS_COMPAT
7659         case SIOCETHTOOL:
7660                 return ethtool_ioctl(ifr);
7661 #endif
7662         default:
7663                 return -EOPNOTSUPP;
7664         }
7665 }
7666
7667 s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7668 {
7669         struct igb_adapter *adapter = hw->back;
7670         u16 cap_offset;
7671
7672         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
7673         if (!cap_offset)
7674                 return -E1000_ERR_CONFIG;
7675
7676         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
7677
7678         return E1000_SUCCESS;
7679 }
7680
7681 s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7682 {
7683         struct igb_adapter *adapter = hw->back;
7684         u16 cap_offset;
7685
7686         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
7687         if (!cap_offset)
7688                 return -E1000_ERR_CONFIG;
7689
7690         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
7691
7692         return E1000_SUCCESS;
7693 }
7694
7695 #ifdef HAVE_VLAN_RX_REGISTER
7696 static void igb_vlan_mode(struct net_device *netdev, struct vlan_group *vlgrp)
7697 #else
7698 void igb_vlan_mode(struct net_device *netdev, u32 features)
7699 #endif
7700 {
7701         struct igb_adapter *adapter = netdev_priv(netdev);
7702         struct e1000_hw *hw = &adapter->hw;
7703         u32 ctrl, rctl;
7704         int i;
7705 #ifdef HAVE_VLAN_RX_REGISTER
7706         bool enable = !!vlgrp;
7707
7708         igb_irq_disable(adapter);
7709
7710         adapter->vlgrp = vlgrp;
7711
7712         if (!test_bit(__IGB_DOWN, &adapter->state))
7713                 igb_irq_enable(adapter);
7714 #else
7715         bool enable = !!(features & NETIF_F_HW_VLAN_RX);
7716 #endif
7717
7718         if (enable) {
7719                 /* enable VLAN tag insert/strip */
7720                 ctrl = E1000_READ_REG(hw, E1000_CTRL);
7721                 ctrl |= E1000_CTRL_VME;
7722                 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
7723
7724                 /* Disable CFI check */
7725                 rctl = E1000_READ_REG(hw, E1000_RCTL);
7726                 rctl &= ~E1000_RCTL_CFIEN;
7727                 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
7728         } else {
7729                 /* disable VLAN tag insert/strip */
7730                 ctrl = E1000_READ_REG(hw, E1000_CTRL);
7731                 ctrl &= ~E1000_CTRL_VME;
7732                 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
7733         }
7734
7735 #ifndef CONFIG_IGB_VMDQ_NETDEV
7736         for (i = 0; i < adapter->vmdq_pools; i++) {
7737                 igb_set_vf_vlan_strip(adapter,
7738                                       adapter->vfs_allocated_count + i,
7739                                       enable);
7740         }
7741
7742 #else
7743         igb_set_vf_vlan_strip(adapter,
7744                               adapter->vfs_allocated_count,
7745                               enable);
7746
7747         for (i = 1; i < adapter->vmdq_pools; i++) {
7748 #ifdef HAVE_VLAN_RX_REGISTER
7749                 struct igb_vmdq_adapter *vadapter;
7750                 vadapter = netdev_priv(adapter->vmdq_netdev[i-1]);
7751                 enable = !!vadapter->vlgrp;
7752 #else
7753                 struct net_device *vnetdev;
7754                 vnetdev = adapter->vmdq_netdev[i-1];
7755                 enable = !!(vnetdev->features & NETIF_F_HW_VLAN_RX);
7756 #endif
7757                 igb_set_vf_vlan_strip(adapter, 
7758                                       adapter->vfs_allocated_count + i,
7759                                       enable);
7760         }
7761
7762 #endif
7763         igb_rlpml_set(adapter);
7764 }
7765
7766 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7767 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
7768 #else
7769 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
7770 #endif
7771 {
7772         struct igb_adapter *adapter = netdev_priv(netdev);
7773         int pf_id = adapter->vfs_allocated_count;
7774
7775         /* attempt to add filter to vlvf array */
7776         igb_vlvf_set(adapter, vid, TRUE, pf_id);
7777
7778         /* add the filter since PF can receive vlans w/o entry in vlvf */
7779         igb_vfta_set(adapter, vid, TRUE);
7780 #ifndef HAVE_NETDEV_VLAN_FEATURES
7781
7782         /* Copy feature flags from netdev to the vlan netdev for this vid.
7783          * This allows things like TSO to bubble down to our vlan device.
7784          * There is no need to update netdev for vlan 0 (DCB), since it
7785          * wouldn't has v_netdev.
7786          */
7787         if (adapter->vlgrp) {
7788                 struct vlan_group *vlgrp = adapter->vlgrp;
7789                 struct net_device *v_netdev = vlan_group_get_device(vlgrp, vid);
7790                 if (v_netdev) {
7791                         v_netdev->features |= netdev->features;
7792                         vlan_group_set_device(vlgrp, vid, v_netdev);
7793                 }
7794         }
7795 #endif
7796 #ifndef HAVE_VLAN_RX_REGISTER
7797
7798         set_bit(vid, adapter->active_vlans);
7799 #endif
7800 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7801         return 0;
7802 #endif
7803 }
7804
7805 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7806 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
7807 #else
7808 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
7809 #endif
7810 {
7811         struct igb_adapter *adapter = netdev_priv(netdev);
7812         int pf_id = adapter->vfs_allocated_count;
7813         s32 err;
7814
7815 #ifdef HAVE_VLAN_RX_REGISTER
7816         igb_irq_disable(adapter);
7817
7818         vlan_group_set_device(adapter->vlgrp, vid, NULL);
7819
7820         if (!test_bit(__IGB_DOWN, &adapter->state))
7821                 igb_irq_enable(adapter);
7822
7823 #endif /* HAVE_VLAN_RX_REGISTER */
7824         /* remove vlan from VLVF table array */
7825         err = igb_vlvf_set(adapter, vid, FALSE, pf_id);
7826
7827         /* if vid was not present in VLVF just remove it from table */
7828         if (err)
7829                 igb_vfta_set(adapter, vid, FALSE);
7830 #ifndef HAVE_VLAN_RX_REGISTER
7831
7832         clear_bit(vid, adapter->active_vlans);
7833 #endif
7834 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7835         return 0;
7836 #endif
7837 }
7838
7839 static void igb_restore_vlan(struct igb_adapter *adapter)
7840 {
7841 #ifdef HAVE_VLAN_RX_REGISTER
7842         igb_vlan_mode(adapter->netdev, adapter->vlgrp);
7843
7844         if (adapter->vlgrp) {
7845                 u16 vid;
7846                 for (vid = 0; vid < VLAN_N_VID; vid++) {
7847                         if (!vlan_group_get_device(adapter->vlgrp, vid))
7848                                 continue;
7849                         igb_vlan_rx_add_vid(adapter->netdev, vid);
7850                 }
7851         }
7852 #else
7853         u16 vid;
7854
7855         igb_vlan_mode(adapter->netdev, adapter->netdev->features);
7856
7857         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
7858                 igb_vlan_rx_add_vid(adapter->netdev, vid);
7859 #endif
7860 }
7861
7862 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
7863 {
7864         struct pci_dev *pdev = adapter->pdev;
7865         struct e1000_mac_info *mac = &adapter->hw.mac;
7866
7867         mac->autoneg = 0;
7868
7869         /* Fiber NIC's only allow 1000 gbps Full duplex */
7870         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes ) &&
7871                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
7872                 dev_err(pci_dev_to_dev(pdev),
7873                         "Unsupported Speed/Duplex configuration\n");
7874                 return -EINVAL;
7875         }
7876
7877         switch (spddplx) {
7878         case SPEED_10 + DUPLEX_HALF:
7879                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
7880                 break;
7881         case SPEED_10 + DUPLEX_FULL:
7882                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
7883                 break;
7884         case SPEED_100 + DUPLEX_HALF:
7885                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
7886                 break;
7887         case SPEED_100 + DUPLEX_FULL:
7888                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
7889                 break;
7890         case SPEED_1000 + DUPLEX_FULL:
7891                 mac->autoneg = 1;
7892                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
7893                 break;
7894         case SPEED_1000 + DUPLEX_HALF: /* not supported */
7895         default:
7896                 dev_err(pci_dev_to_dev(pdev), "Unsupported Speed/Duplex configuration\n");
7897                 return -EINVAL;
7898         }
7899         return 0;
7900 }
7901
7902 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
7903                           bool runtime)
7904 {
7905         struct net_device *netdev = pci_get_drvdata(pdev);
7906         struct igb_adapter *adapter = netdev_priv(netdev);
7907         struct e1000_hw *hw = &adapter->hw;
7908         u32 ctrl, rctl, status;
7909         u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
7910 #ifdef CONFIG_PM
7911         int retval = 0;
7912 #endif
7913
7914         netif_device_detach(netdev);
7915
7916         if (netif_running(netdev))
7917                 __igb_close(netdev, true);
7918
7919         igb_clear_interrupt_scheme(adapter);
7920
7921 #ifdef CONFIG_PM
7922         retval = pci_save_state(pdev);
7923         if (retval)
7924                 return retval;
7925 #endif
7926
7927         status = E1000_READ_REG(hw, E1000_STATUS);
7928         if (status & E1000_STATUS_LU)
7929                 wufc &= ~E1000_WUFC_LNKC;
7930
7931         if (wufc) {
7932                 igb_setup_rctl(adapter);
7933                 igb_set_rx_mode(netdev);
7934
7935                 /* turn on all-multi mode if wake on multicast is enabled */
7936                 if (wufc & E1000_WUFC_MC) {
7937                         rctl = E1000_READ_REG(hw, E1000_RCTL);
7938                         rctl |= E1000_RCTL_MPE;
7939                         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
7940                 }
7941
7942                 ctrl = E1000_READ_REG(hw, E1000_CTRL);
7943                 /* phy power management enable */
7944                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
7945                 ctrl |= E1000_CTRL_ADVD3WUC;
7946                 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
7947
7948                 /* Allow time for pending master requests to run */
7949                 e1000_disable_pcie_master(hw);
7950
7951                 E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PME_EN);
7952                 E1000_WRITE_REG(hw, E1000_WUFC, wufc);
7953         } else {
7954                 E1000_WRITE_REG(hw, E1000_WUC, 0);
7955                 E1000_WRITE_REG(hw, E1000_WUFC, 0);
7956         }
7957
7958         *enable_wake = wufc || adapter->en_mng_pt;
7959         if (!*enable_wake)
7960                 igb_power_down_link(adapter);
7961         else
7962                 igb_power_up_link(adapter);
7963
7964         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
7965          * would have already happened in close and is redundant. */
7966         igb_release_hw_control(adapter);
7967
7968         pci_disable_device(pdev);
7969
7970         return 0;
7971 }
7972
7973 #ifdef CONFIG_PM
7974 #ifdef HAVE_SYSTEM_SLEEP_PM_OPS
7975 static int igb_suspend(struct device *dev)
7976 {
7977         int retval;
7978         bool wake;
7979         struct pci_dev *pdev = to_pci_dev(dev);
7980
7981         retval = __igb_shutdown(pdev, &wake, 0);
7982         if (retval)
7983                 return retval;
7984
7985         if (wake) {
7986                 pci_prepare_to_sleep(pdev);
7987         } else {
7988                 pci_wake_from_d3(pdev, false);
7989                 pci_set_power_state(pdev, PCI_D3hot);
7990         }
7991
7992         return 0;
7993 }
7994
7995 static int igb_resume(struct device *dev)
7996 {
7997         struct pci_dev *pdev = to_pci_dev(dev);
7998         struct net_device *netdev = pci_get_drvdata(pdev);
7999         struct igb_adapter *adapter = netdev_priv(netdev);
8000         struct e1000_hw *hw = &adapter->hw;
8001         u32 err;
8002
8003         pci_set_power_state(pdev, PCI_D0);
8004         pci_restore_state(pdev);
8005         pci_save_state(pdev);
8006
8007         err = pci_enable_device_mem(pdev);
8008         if (err) {
8009                 dev_err(pci_dev_to_dev(pdev),
8010                         "igb: Cannot enable PCI device from suspend\n");
8011                 return err;
8012         }
8013         pci_set_master(pdev);
8014
8015         pci_enable_wake(pdev, PCI_D3hot, 0);
8016         pci_enable_wake(pdev, PCI_D3cold, 0);
8017
8018 #ifdef CONFIG_PM_RUNTIME
8019         if (!rtnl_is_locked()) {
8020                 /*
8021                  * shut up ASSERT_RTNL() warning in
8022                  * netif_set_real_num_tx/rx_queues.
8023                  */
8024                 rtnl_lock();
8025                 err = igb_init_interrupt_scheme(adapter);
8026                 rtnl_unlock();
8027         } else {
8028                 err = igb_init_interrupt_scheme(adapter);
8029         }
8030         if (err) {
8031 #else
8032         if (igb_init_interrupt_scheme(adapter)) {
8033 #endif /* CONFIG_PM_RUNTIME */
8034                 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
8035                 return -ENOMEM;
8036         }
8037
8038         igb_reset(adapter);
8039
8040         /* let the f/w know that the h/w is now under the control of the
8041          * driver. */
8042         igb_get_hw_control(adapter);
8043
8044         E1000_WRITE_REG(hw, E1000_WUS, ~0);
8045
8046         if (netdev->flags & IFF_UP) {
8047                 err = __igb_open(netdev, true);
8048                 if (err)
8049                         return err;
8050         }
8051
8052         netif_device_attach(netdev);
8053
8054         return 0;
8055 }
8056
8057 #ifdef CONFIG_PM_RUNTIME
8058 static int igb_runtime_idle(struct device *dev)
8059 {
8060         struct pci_dev *pdev = to_pci_dev(dev);
8061         struct net_device *netdev = pci_get_drvdata(pdev);
8062         struct igb_adapter *adapter = netdev_priv(netdev);
8063
8064         if (!igb_has_link(adapter))
8065                 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
8066
8067         return -EBUSY;
8068 }
8069
8070 static int igb_runtime_suspend(struct device *dev)
8071 {
8072         struct pci_dev *pdev = to_pci_dev(dev);
8073         int retval;
8074         bool wake;
8075
8076         retval = __igb_shutdown(pdev, &wake, 1);
8077         if (retval)
8078                 return retval;
8079
8080         if (wake) {
8081                 pci_prepare_to_sleep(pdev);
8082         } else {
8083                 pci_wake_from_d3(pdev, false);
8084                 pci_set_power_state(pdev, PCI_D3hot);
8085         }
8086
8087         return 0;
8088 }
8089
8090 static int igb_runtime_resume(struct device *dev)
8091 {
8092         return igb_resume(dev);
8093 }
8094 #endif /* CONFIG_PM_RUNTIME */
8095 #endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
8096 #endif /* CONFIG_PM */
8097
8098 #ifdef USE_REBOOT_NOTIFIER
8099 /* only want to do this for 2.4 kernels? */
8100 static int igb_notify_reboot(struct notifier_block *nb, unsigned long event,
8101                              void *p)
8102 {
8103         struct pci_dev *pdev = NULL;
8104         bool wake;
8105
8106         switch (event) {
8107         case SYS_DOWN:
8108         case SYS_HALT:
8109         case SYS_POWER_OFF:
8110                 while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
8111                         if (pci_dev_driver(pdev) == &igb_driver) {
8112                                 __igb_shutdown(pdev, &wake, 0);
8113                                 if (event == SYS_POWER_OFF) {
8114                                         pci_wake_from_d3(pdev, wake);
8115                                         pci_set_power_state(pdev, PCI_D3hot);
8116                                 }
8117                         }
8118                 }
8119         }
8120         return NOTIFY_DONE;
8121 }
8122 #else
8123 static void igb_shutdown(struct pci_dev *pdev)
8124 {
8125         bool wake = false;
8126
8127         __igb_shutdown(pdev, &wake, 0);
8128
8129         if (system_state == SYSTEM_POWER_OFF) {
8130                 pci_wake_from_d3(pdev, wake);
8131                 pci_set_power_state(pdev, PCI_D3hot);
8132         }
8133 }
8134 #endif /* USE_REBOOT_NOTIFIER */
8135
8136 #ifdef CONFIG_NET_POLL_CONTROLLER
8137 /*
8138  * Polling 'interrupt' - used by things like netconsole to send skbs
8139  * without having to re-enable interrupts. It's not called while
8140  * the interrupt routine is executing.
8141  */
8142 static void igb_netpoll(struct net_device *netdev)
8143 {
8144         struct igb_adapter *adapter = netdev_priv(netdev);
8145         struct e1000_hw *hw = &adapter->hw;
8146         struct igb_q_vector *q_vector;
8147         int i;
8148
8149         for (i = 0; i < adapter->num_q_vectors; i++) {
8150                 q_vector = adapter->q_vector[i];
8151                 if (adapter->msix_entries)
8152                         E1000_WRITE_REG(hw, E1000_EIMC, q_vector->eims_value);
8153                 else
8154                         igb_irq_disable(adapter);
8155                 napi_schedule(&q_vector->napi);
8156         }
8157 }
8158 #endif /* CONFIG_NET_POLL_CONTROLLER */
8159
8160 #ifdef HAVE_PCI_ERS
8161 #define E1000_DEV_ID_82576_VF 0x10CA
8162 /**
8163  * igb_io_error_detected - called when PCI error is detected
8164  * @pdev: Pointer to PCI device
8165  * @state: The current pci connection state
8166  *
8167  * This function is called after a PCI bus error affecting
8168  * this device has been detected.
8169  */
8170 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
8171                                               pci_channel_state_t state)
8172 {
8173         struct net_device *netdev = pci_get_drvdata(pdev);
8174         struct igb_adapter *adapter = netdev_priv(netdev);
8175
8176 #ifdef CONFIG_PCI_IOV__UNUSED
8177         struct pci_dev *bdev, *vfdev;
8178         u32 dw0, dw1, dw2, dw3;
8179         int vf, pos;
8180         u16 req_id, pf_func;
8181
8182         if (!(adapter->flags & IGB_FLAG_DETECT_BAD_DMA))
8183                 goto skip_bad_vf_detection;
8184
8185         bdev = pdev->bus->self;
8186         while (bdev && (bdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT))
8187                 bdev = bdev->bus->self;
8188
8189         if (!bdev)
8190                 goto skip_bad_vf_detection;
8191
8192         pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
8193         if (!pos)
8194                 goto skip_bad_vf_detection;
8195
8196         pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0);
8197         pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1);
8198         pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2);
8199         pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3);
8200
8201         req_id = dw1 >> 16;
8202         /* On the 82576 if bit 7 of the requestor ID is set then it's a VF */
8203         if (!(req_id & 0x0080))
8204                 goto skip_bad_vf_detection;
8205
8206         pf_func = req_id & 0x01;
8207         if ((pf_func & 1) == (pdev->devfn & 1)) {
8208
8209                 vf = (req_id & 0x7F) >> 1;
8210                 dev_err(pci_dev_to_dev(pdev),
8211                         "VF %d has caused a PCIe error\n", vf);
8212                 dev_err(pci_dev_to_dev(pdev),
8213                         "TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
8214                         "%8.8x\tdw3: %8.8x\n",
8215                         dw0, dw1, dw2, dw3);
8216
8217                 /* Find the pci device of the offending VF */
8218                 vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
8219                                        E1000_DEV_ID_82576_VF, NULL);
8220                 while (vfdev) {
8221                         if (vfdev->devfn == (req_id & 0xFF))
8222                                 break;
8223                         vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
8224                                                E1000_DEV_ID_82576_VF, vfdev);
8225                 }
8226                 /*
8227                  * There's a slim chance the VF could have been hot plugged,
8228                  * so if it is no longer present we don't need to issue the
8229                  * VFLR.  Just clean up the AER in that case.
8230                  */
8231                 if (vfdev) {
8232                         dev_err(pci_dev_to_dev(pdev),
8233                                 "Issuing VFLR to VF %d\n", vf);
8234                         pci_write_config_dword(vfdev, 0xA8, 0x00008000);
8235                 }
8236
8237                 pci_cleanup_aer_uncorrect_error_status(pdev);
8238         }
8239
8240         /*
8241          * Even though the error may have occurred on the other port
8242          * we still need to increment the vf error reference count for
8243          * both ports because the I/O resume function will be called
8244          * for both of them.
8245          */
8246         adapter->vferr_refcount++;
8247
8248         return PCI_ERS_RESULT_RECOVERED;
8249
8250 skip_bad_vf_detection:
8251 #endif /* CONFIG_PCI_IOV */
8252
8253         netif_device_detach(netdev);
8254
8255         if (state == pci_channel_io_perm_failure)
8256                 return PCI_ERS_RESULT_DISCONNECT;
8257
8258         if (netif_running(netdev))
8259                 igb_down(adapter);
8260         pci_disable_device(pdev);
8261
8262         /* Request a slot slot reset. */
8263         return PCI_ERS_RESULT_NEED_RESET;
8264 }
8265
8266 /**
8267  * igb_io_slot_reset - called after the pci bus has been reset.
8268  * @pdev: Pointer to PCI device
8269  *
8270  * Restart the card from scratch, as if from a cold-boot. Implementation
8271  * resembles the first-half of the igb_resume routine.
8272  */
8273 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
8274 {
8275         struct net_device *netdev = pci_get_drvdata(pdev);
8276         struct igb_adapter *adapter = netdev_priv(netdev);
8277         struct e1000_hw *hw = &adapter->hw;
8278         pci_ers_result_t result;
8279
8280         if (pci_enable_device_mem(pdev)) {
8281                 dev_err(pci_dev_to_dev(pdev),
8282                         "Cannot re-enable PCI device after reset.\n");
8283                 result = PCI_ERS_RESULT_DISCONNECT;
8284         } else {
8285                 pci_set_master(pdev);
8286                 pci_restore_state(pdev);
8287                 pci_save_state(pdev);
8288
8289                 pci_enable_wake(pdev, PCI_D3hot, 0);
8290                 pci_enable_wake(pdev, PCI_D3cold, 0);
8291
8292                 schedule_work(&adapter->reset_task);
8293                 E1000_WRITE_REG(hw, E1000_WUS, ~0);
8294                 result = PCI_ERS_RESULT_RECOVERED;
8295         }
8296
8297         pci_cleanup_aer_uncorrect_error_status(pdev);
8298
8299         return result;
8300 }
8301
8302 /**
8303  * igb_io_resume - called when traffic can start flowing again.
8304  * @pdev: Pointer to PCI device
8305  *
8306  * This callback is called when the error recovery driver tells us that
8307  * its OK to resume normal operation. Implementation resembles the
8308  * second-half of the igb_resume routine.
8309  */
8310 static void igb_io_resume(struct pci_dev *pdev)
8311 {
8312         struct net_device *netdev = pci_get_drvdata(pdev);
8313         struct igb_adapter *adapter = netdev_priv(netdev);
8314
8315         if (adapter->vferr_refcount) {
8316                 dev_info(pci_dev_to_dev(pdev), "Resuming after VF err\n");
8317                 adapter->vferr_refcount--;
8318                 return;
8319         }
8320
8321         if (netif_running(netdev)) {
8322                 if (igb_up(adapter)) {
8323                         dev_err(pci_dev_to_dev(pdev), "igb_up failed after reset\n");
8324                         return;
8325                 }
8326         }
8327
8328         netif_device_attach(netdev);
8329
8330         /* let the f/w know that the h/w is now under the control of the
8331          * driver. */
8332         igb_get_hw_control(adapter);
8333 }
8334
8335 #endif /* HAVE_PCI_ERS */
8336
8337 int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue)
8338 {
8339         struct e1000_hw *hw = &adapter->hw;
8340         int i;
8341
8342         if (is_zero_ether_addr(addr))
8343                 return 0;
8344
8345         for (i = 0; i < hw->mac.rar_entry_count; i++) {
8346                 if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)
8347                         continue;
8348                 adapter->mac_table[i].state = (IGB_MAC_STATE_MODIFIED |
8349                                                    IGB_MAC_STATE_IN_USE);
8350                 memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
8351                 adapter->mac_table[i].queue = queue;
8352                 igb_sync_mac_table(adapter);
8353                 return 0;
8354         }
8355         return -ENOMEM;
8356 }
8357 int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue)
8358 {
8359         /* search table for addr, if found, set to 0 and sync */
8360         int i;
8361         struct e1000_hw *hw = &adapter->hw;
8362
8363         if (is_zero_ether_addr(addr))
8364                 return 0;
8365         for (i = 0; i < hw->mac.rar_entry_count; i++) {
8366                 if (!compare_ether_addr(addr, adapter->mac_table[i].addr) &&
8367                     adapter->mac_table[i].queue == queue) {
8368                         adapter->mac_table[i].state = IGB_MAC_STATE_MODIFIED;
8369                         memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
8370                         adapter->mac_table[i].queue = 0;
8371                         igb_sync_mac_table(adapter);
8372                         return 0;
8373                 }
8374         }
8375         return -ENOMEM;
8376 }
8377 static int igb_set_vf_mac(struct igb_adapter *adapter,
8378                           int vf, unsigned char *mac_addr)
8379 {
8380         igb_del_mac_filter(adapter, adapter->vf_data[vf].vf_mac_addresses, vf);
8381         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
8382
8383         igb_add_mac_filter(adapter, mac_addr, vf);
8384
8385         return 0;
8386 }
8387
8388 #ifdef IFLA_VF_MAX
8389 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
8390 {
8391         struct igb_adapter *adapter = netdev_priv(netdev);
8392         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
8393                 return -EINVAL;
8394         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
8395         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
8396         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
8397                                       " change effective.\n");
8398         if (test_bit(__IGB_DOWN, &adapter->state)) {
8399                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
8400                          " but the PF device is not up.\n");
8401                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
8402                          " attempting to use the VF device.\n");
8403         }
8404         return igb_set_vf_mac(adapter, vf, mac);
8405 }
8406
8407 static int igb_link_mbps(int internal_link_speed)
8408 {
8409         switch (internal_link_speed) {
8410         case SPEED_100:
8411                 return 100;
8412         case SPEED_1000:
8413                 return 1000;
8414         default:
8415                 return 0;
8416         }
8417 }
8418
8419 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
8420                         int link_speed)
8421 {
8422         int rf_dec, rf_int;
8423         u32 bcnrc_val;
8424
8425         if (tx_rate != 0) {
8426                 /* Calculate the rate factor values to set */
8427                 rf_int = link_speed / tx_rate;
8428                 rf_dec = (link_speed - (rf_int * tx_rate));
8429                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
8430
8431                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
8432                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
8433                                 E1000_RTTBCNRC_RF_INT_MASK);
8434                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
8435         } else {
8436                 bcnrc_val = 0;
8437         }
8438
8439         E1000_WRITE_REG(hw, E1000_RTTDQSEL, vf); /* vf X uses queue X */
8440         /*
8441          * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
8442          * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
8443          */
8444         E1000_WRITE_REG(hw, E1000_RTTBCNRM(0), 0x14);
8445         E1000_WRITE_REG(hw, E1000_RTTBCNRC, bcnrc_val);
8446 }
8447
8448 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
8449 {
8450         int actual_link_speed, i;
8451         bool reset_rate = false;
8452
8453         /* VF TX rate limit was not set */
8454         if ((adapter->vf_rate_link_speed == 0) || 
8455                 (adapter->hw.mac.type != e1000_82576))
8456                 return;
8457
8458         actual_link_speed = igb_link_mbps(adapter->link_speed);
8459         if (actual_link_speed != adapter->vf_rate_link_speed) {
8460                 reset_rate = true;
8461                 adapter->vf_rate_link_speed = 0;
8462                 dev_info(&adapter->pdev->dev,
8463                 "Link speed has been changed. VF Transmit rate is disabled\n");
8464         }
8465
8466         for (i = 0; i < adapter->vfs_allocated_count; i++) {
8467                 if (reset_rate)
8468                         adapter->vf_data[i].tx_rate = 0;
8469
8470                 igb_set_vf_rate_limit(&adapter->hw, i,
8471                         adapter->vf_data[i].tx_rate, actual_link_speed);
8472         }
8473 }
8474
8475 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
8476 {
8477         struct igb_adapter *adapter = netdev_priv(netdev);
8478         struct e1000_hw *hw = &adapter->hw;
8479         int actual_link_speed;
8480         
8481         if (hw->mac.type != e1000_82576)
8482                 return -EOPNOTSUPP;
8483
8484         actual_link_speed = igb_link_mbps(adapter->link_speed);
8485         if ((vf >= adapter->vfs_allocated_count) ||
8486                 (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) ||
8487                 (tx_rate < 0) || (tx_rate > actual_link_speed))
8488                 return -EINVAL;
8489
8490         adapter->vf_rate_link_speed = actual_link_speed;
8491         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
8492         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
8493
8494         return 0;
8495 }
8496
8497 static int igb_ndo_get_vf_config(struct net_device *netdev,
8498                                  int vf, struct ifla_vf_info *ivi)
8499 {
8500         struct igb_adapter *adapter = netdev_priv(netdev);
8501         if (vf >= adapter->vfs_allocated_count)
8502                 return -EINVAL;
8503         ivi->vf = vf;
8504         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
8505         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
8506         ivi->vlan = adapter->vf_data[vf].pf_vlan;
8507         ivi->qos = adapter->vf_data[vf].pf_qos;
8508         return 0;
8509 }
8510 #endif
8511 static void igb_vmm_control(struct igb_adapter *adapter)
8512 {
8513         struct e1000_hw *hw = &adapter->hw;
8514         u32 reg;
8515
8516         switch (hw->mac.type) {
8517         case e1000_82575:
8518         default:
8519                 /* replication is not supported for 82575 */
8520                 return;
8521         case e1000_82576:
8522                 /* notify HW that the MAC is adding vlan tags */
8523                 reg = E1000_READ_REG(hw, E1000_DTXCTL);
8524                 reg |= (E1000_DTXCTL_VLAN_ADDED |
8525                         E1000_DTXCTL_SPOOF_INT);
8526                 E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
8527         case e1000_82580:
8528                 /* enable replication vlan tag stripping */
8529                 reg = E1000_READ_REG(hw, E1000_RPLOLR);
8530                 reg |= E1000_RPLOLR_STRVLAN;
8531                 E1000_WRITE_REG(hw, E1000_RPLOLR, reg);
8532         case e1000_i350:
8533                 /* none of the above registers are supported by i350 */
8534                 break;
8535         }
8536
8537         /* Enable Malicious Driver Detection */
8538         if ((hw->mac.type == e1000_i350) && (adapter->vfs_allocated_count) &&
8539             (adapter->mdd))
8540                 igb_enable_mdd(adapter);
8541
8542         /* enable replication and loopback support */
8543         e1000_vmdq_set_loopback_pf(hw, adapter->vfs_allocated_count ||
8544                                    adapter->vmdq_pools);
8545
8546         e1000_vmdq_set_anti_spoofing_pf(hw, adapter->vfs_allocated_count ||
8547                                         adapter->vmdq_pools,
8548                                         adapter->vfs_allocated_count);
8549         e1000_vmdq_set_replication_pf(hw, adapter->vfs_allocated_count ||
8550                                       adapter->vmdq_pools);
8551 }
8552
8553 static void igb_init_fw(struct igb_adapter *adapter) 
8554 {
8555         struct e1000_fw_drv_info fw_cmd;
8556         struct e1000_hw *hw = &adapter->hw;
8557         int i;
8558         u16 mask;
8559
8560         mask = E1000_SWFW_PHY0_SM;
8561
8562         if (!hw->mac.ops.acquire_swfw_sync(hw, mask)) {
8563                 for (i = 0; i <= FW_MAX_RETRIES; i++) {
8564                         E1000_WRITE_REG(hw, E1000_FWSTS, E1000_FWSTS_FWRI);
8565                         fw_cmd.hdr.cmd = FW_CMD_DRV_INFO;
8566                         fw_cmd.hdr.buf_len = FW_CMD_DRV_INFO_LEN;
8567                         fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CMD_RESERVED;
8568                         fw_cmd.port_num = hw->bus.func;
8569                         fw_cmd.drv_version = FW_FAMILY_DRV_VER;
8570                         fw_cmd.hdr.checksum = 0;
8571                         fw_cmd.hdr.checksum = e1000_calculate_checksum((u8 *)&fw_cmd,
8572                                                                    (FW_HDR_LEN +
8573                                                                     fw_cmd.hdr.buf_len));
8574                          e1000_host_interface_command(hw, (u8*)&fw_cmd,
8575                                                      sizeof(fw_cmd));
8576                         if (fw_cmd.hdr.cmd_or_resp.ret_status == FW_STATUS_SUCCESS)
8577                                 break;
8578                 }
8579         } else
8580                 dev_warn(pci_dev_to_dev(adapter->pdev),
8581                          "Unable to get semaphore, firmware init failed.\n");
8582         hw->mac.ops.release_swfw_sync(hw, mask);
8583 }
8584
8585 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
8586 {
8587         struct e1000_hw *hw = &adapter->hw;
8588         u32 dmac_thr;
8589         u16 hwm;
8590
8591         if (hw->mac.type > e1000_82580) {
8592                 if (adapter->dmac != IGB_DMAC_DISABLE) {
8593                         u32 reg;
8594
8595                         /* force threshold to 0.  */
8596                         E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
8597
8598                         /*
8599                          * DMA Coalescing high water mark needs to be greater
8600                          * than the Rx threshold. Set hwm to PBA - max frame
8601                          * size in 16B units, capping it at PBA - 6KB.
8602                          */
8603                         hwm = 64 * pba - adapter->max_frame_size / 16;
8604                         if (hwm < 64 * (pba - 6))
8605                                 hwm = 64 * (pba - 6);
8606                         reg = E1000_READ_REG(hw, E1000_FCRTC);
8607                         reg &= ~E1000_FCRTC_RTH_COAL_MASK;
8608                         reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
8609                                 & E1000_FCRTC_RTH_COAL_MASK);
8610                         E1000_WRITE_REG(hw, E1000_FCRTC, reg);
8611
8612                         /* 
8613                          * Set the DMA Coalescing Rx threshold to PBA - 2 * max
8614                          * frame size, capping it at PBA - 10KB.
8615                          */
8616                         dmac_thr = pba - adapter->max_frame_size / 512;
8617                         if (dmac_thr < pba - 10)
8618                                 dmac_thr = pba - 10;
8619                         reg = E1000_READ_REG(hw, E1000_DMACR);
8620                         reg &= ~E1000_DMACR_DMACTHR_MASK;
8621                         reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
8622                                 & E1000_DMACR_DMACTHR_MASK);
8623
8624                         /* transition to L0x or L1 if available..*/
8625                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
8626
8627                         /* watchdog timer= msec values in 32usec intervals */
8628                         reg |= ((adapter->dmac) >> 5);
8629                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
8630
8631                         /* no lower threshold to disable coalescing(smart fifb)-UTRESH=0*/
8632                         E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
8633
8634                         /*
8635                          * This sets the time to wait before requesting transition to
8636                          * low power state to number of usecs needed to receive 1 512
8637                          * byte frame at gigabit line rate
8638                          */
8639                         reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
8640
8641                         E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
8642
8643                         /* free space in tx packet buffer to wake from DMA coal */
8644                         E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
8645                                 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
8646
8647                         /* make low power state decision controlled by DMA coal */
8648                         reg = E1000_READ_REG(hw, E1000_PCIEMISC);
8649                         reg &= ~E1000_PCIEMISC_LX_DECISION;
8650                         E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
8651                 } /* endif adapter->dmac is not disabled */
8652         } else if (hw->mac.type == e1000_82580) {
8653                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
8654                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
8655                                 reg & ~E1000_PCIEMISC_LX_DECISION);
8656                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
8657         }
8658 }
8659
8660 /* igb_main.c */
8661
8662
8663 /**
8664  * igb_probe - Device Initialization Routine
8665  * @pdev: PCI device information struct
8666  * @ent: entry in igb_pci_tbl
8667  *
8668  * Returns 0 on success, negative on failure
8669  *
8670  * igb_probe initializes an adapter identified by a pci_dev structure.
8671  * The OS initialization, configuring of the adapter private structure,
8672  * and a hardware reset occur.
8673  **/
8674 int igb_kni_probe(struct pci_dev *pdev,
8675                                struct net_device **lad_dev)
8676 {
8677         struct net_device *netdev;
8678         struct igb_adapter *adapter;
8679         struct e1000_hw *hw;
8680         u16 eeprom_data = 0;
8681         u8 pba_str[E1000_PBANUM_LENGTH];
8682         s32 ret_val;
8683         static int global_quad_port_a; /* global quad port a indication */
8684         int i, err, pci_using_dac = 0;
8685         static int cards_found;
8686
8687         err = pci_enable_device_mem(pdev);
8688         if (err)
8689                 return err;
8690
8691 #ifdef NO_KNI
8692         pci_using_dac = 0;
8693         err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
8694         if (!err) {
8695                 err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
8696                 if (!err)
8697                         pci_using_dac = 1;
8698         } else {
8699                 err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
8700                 if (err) {
8701                         err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
8702                         if (err) {
8703                                 IGB_ERR("No usable DMA configuration, "
8704                                         "aborting\n");
8705                                 goto err_dma;
8706                         }
8707                 }
8708         }
8709
8710 #ifndef HAVE_ASPM_QUIRKS
8711         /* 82575 requires that the pci-e link partner disable the L0s state */
8712         switch (pdev->device) {
8713         case E1000_DEV_ID_82575EB_COPPER:
8714         case E1000_DEV_ID_82575EB_FIBER_SERDES:
8715         case E1000_DEV_ID_82575GB_QUAD_COPPER:
8716                 pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
8717         default:
8718                 break;
8719         }
8720
8721 #endif /* HAVE_ASPM_QUIRKS */
8722         err = pci_request_selected_regions(pdev,
8723                                            pci_select_bars(pdev,
8724                                                            IORESOURCE_MEM),
8725                                            igb_driver_name);
8726         if (err)
8727                 goto err_pci_reg;
8728
8729         pci_enable_pcie_error_reporting(pdev);
8730
8731         pci_set_master(pdev);
8732 #endif /* NO_KNI */
8733         err = -ENOMEM;
8734 #ifdef HAVE_TX_MQ
8735         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
8736                                    IGB_MAX_TX_QUEUES);
8737 #else
8738         netdev = alloc_etherdev(sizeof(struct igb_adapter));
8739 #endif /* HAVE_TX_MQ */
8740         if (!netdev)
8741                 goto err_alloc_etherdev;
8742
8743
8744         SET_MODULE_OWNER(netdev);
8745         SET_NETDEV_DEV(netdev, &pdev->dev);
8746
8747         //pci_set_drvdata(pdev, netdev);
8748
8749         adapter = netdev_priv(netdev);
8750         adapter->netdev = netdev;
8751         adapter->pdev = pdev;
8752         hw = &adapter->hw;
8753         hw->back = adapter;
8754         adapter->port_num = hw->bus.func;
8755         adapter->msg_enable = (1 << debug) - 1;
8756
8757 #ifdef HAVE_PCI_ERS
8758         err = pci_save_state(pdev);
8759         if (err)
8760                 goto err_ioremap;
8761 #endif
8762         err = -EIO;
8763         hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
8764                               pci_resource_len(pdev, 0));
8765         if (!hw->hw_addr)
8766                 goto err_ioremap;
8767
8768 #ifdef HAVE_NET_DEVICE_OPS
8769         netdev->netdev_ops = &igb_netdev_ops;
8770 #else /* HAVE_NET_DEVICE_OPS */
8771         netdev->open = &igb_open;
8772         netdev->stop = &igb_close;
8773         netdev->get_stats = &igb_get_stats;
8774 #ifdef HAVE_SET_RX_MODE
8775         netdev->set_rx_mode = &igb_set_rx_mode;
8776 #endif
8777         netdev->set_multicast_list = &igb_set_rx_mode;
8778         netdev->set_mac_address = &igb_set_mac;
8779         netdev->change_mtu = &igb_change_mtu;
8780         netdev->do_ioctl = &igb_ioctl;
8781 #ifdef HAVE_TX_TIMEOUT
8782         netdev->tx_timeout = &igb_tx_timeout;
8783 #endif
8784         netdev->vlan_rx_register = igb_vlan_mode;
8785         netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
8786         netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
8787 #ifdef CONFIG_NET_POLL_CONTROLLER
8788         netdev->poll_controller = igb_netpoll;
8789 #endif
8790         netdev->hard_start_xmit = &igb_xmit_frame;
8791 #endif /* HAVE_NET_DEVICE_OPS */
8792         igb_set_ethtool_ops(netdev);
8793 #ifdef HAVE_TX_TIMEOUT
8794         netdev->watchdog_timeo = 5 * HZ;
8795 #endif
8796
8797         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
8798
8799         adapter->bd_number = cards_found;
8800
8801         /* setup the private structure */
8802         err = igb_sw_init(adapter);
8803         if (err)
8804                 goto err_sw_init;
8805
8806         e1000_get_bus_info(hw);
8807
8808         hw->phy.autoneg_wait_to_complete = FALSE;
8809         hw->mac.adaptive_ifs = FALSE;
8810
8811         /* Copper options */
8812         if (hw->phy.media_type == e1000_media_type_copper) {
8813 #ifdef ETH_TP_MDI_X
8814                 hw->phy.mdix = ETH_TP_MDI_INVALID;
8815 #else
8816                 hw->phy.mdix = AUTO_ALL_MODES;
8817 #endif /* ETH_TP_MDI_X */
8818                 hw->phy.disable_polarity_correction = FALSE;
8819                 hw->phy.ms_type = e1000_ms_hw_default;
8820         }
8821
8822         if (e1000_check_reset_block(hw))
8823                 dev_info(pci_dev_to_dev(pdev),
8824                         "PHY reset is blocked due to SOL/IDER session.\n");
8825
8826         /*
8827          * features is initialized to 0 in allocation, it might have bits
8828          * set by igb_sw_init so we should use an or instead of an
8829          * assignment.
8830          */
8831         netdev->features |= NETIF_F_SG |
8832                             NETIF_F_IP_CSUM |
8833 #ifdef NETIF_F_IPV6_CSUM
8834                             NETIF_F_IPV6_CSUM |
8835 #endif
8836 #ifdef NETIF_F_TSO
8837                             NETIF_F_TSO |
8838 #ifdef NETIF_F_TSO6
8839                             NETIF_F_TSO6 |
8840 #endif
8841 #endif /* NETIF_F_TSO */
8842 #ifdef NETIF_F_RXHASH
8843                             NETIF_F_RXHASH |
8844 #endif
8845 #ifdef HAVE_NDO_SET_FEATURES
8846                             NETIF_F_RXCSUM |
8847 #endif
8848                             NETIF_F_HW_VLAN_RX |
8849                             NETIF_F_HW_VLAN_TX;
8850
8851 #ifdef HAVE_NDO_SET_FEATURES
8852         /* copy netdev features into list of user selectable features */
8853         netdev->hw_features |= netdev->features;
8854 #ifndef IGB_NO_LRO
8855
8856         /* give us the option of enabling LRO later */
8857         netdev->hw_features |= NETIF_F_LRO;
8858 #endif
8859 #else
8860 #ifdef NETIF_F_GRO
8861
8862         /* this is only needed on kernels prior to 2.6.39 */
8863         netdev->features |= NETIF_F_GRO;
8864 #endif
8865 #endif
8866
8867         /* set this bit last since it cannot be part of hw_features */
8868         netdev->features |= NETIF_F_HW_VLAN_FILTER;
8869
8870 #ifdef HAVE_NETDEV_VLAN_FEATURES
8871         netdev->vlan_features |= NETIF_F_TSO |
8872                                  NETIF_F_TSO6 |
8873                                  NETIF_F_IP_CSUM |
8874                                  NETIF_F_IPV6_CSUM |
8875                                  NETIF_F_SG;
8876
8877 #endif
8878         if (pci_using_dac)
8879                 netdev->features |= NETIF_F_HIGHDMA;
8880
8881         if (hw->mac.type >= e1000_82576)
8882                 netdev->features |= NETIF_F_SCTP_CSUM;
8883
8884 #ifdef NO_KNI
8885         adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
8886
8887         /* before reading the NVM, reset the controller to put the device in a
8888          * known good starting state */
8889         e1000_reset_hw(hw);
8890 #endif
8891
8892         /* make sure the NVM is good */
8893         if (e1000_validate_nvm_checksum(hw) < 0) {
8894                 dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
8895                         " Valid\n");
8896                 err = -EIO;
8897                 goto err_eeprom;
8898         }
8899
8900         /* copy the MAC address out of the NVM */
8901         if (e1000_read_mac_addr(hw))
8902                 dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
8903         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
8904 #ifdef ETHTOOL_GPERMADDR
8905         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
8906
8907         if (!is_valid_ether_addr(netdev->perm_addr)) {
8908 #else
8909         if (!is_valid_ether_addr(netdev->dev_addr)) {
8910 #endif
8911                 dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
8912                 err = -EIO;
8913                 goto err_eeprom;
8914         }
8915
8916         memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
8917         adapter->mac_table[0].queue = adapter->vfs_allocated_count;
8918         adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
8919         igb_rar_set(adapter, 0);
8920
8921         /* get firmware version for ethtool -i */
8922         e1000_read_nvm(&adapter->hw, 5, 1, &adapter->fw_version);
8923 #ifdef NO_KNI
8924         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
8925                     (unsigned long) adapter);
8926         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
8927                 setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
8928                             (unsigned long) adapter);
8929         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
8930                     (unsigned long) adapter);
8931
8932         INIT_WORK(&adapter->reset_task, igb_reset_task);
8933         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
8934         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
8935                 INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
8936 #endif
8937
8938         /* Initialize link properties that are user-changeable */
8939         adapter->fc_autoneg = true;
8940         hw->mac.autoneg = true;
8941         hw->phy.autoneg_advertised = 0x2f;
8942
8943         hw->fc.requested_mode = e1000_fc_default;
8944         hw->fc.current_mode = e1000_fc_default;
8945
8946         e1000_validate_mdi_setting(hw);
8947
8948         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
8949          * enable the ACPI Magic Packet filter
8950          */
8951
8952         if (hw->bus.func == 0)
8953                 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
8954         else if (hw->mac.type >= e1000_82580)
8955                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
8956                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
8957                                  &eeprom_data);
8958         else if (hw->bus.func == 1)
8959                 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
8960
8961         if (eeprom_data & IGB_EEPROM_APME)
8962                 adapter->eeprom_wol |= E1000_WUFC_MAG;
8963
8964         /* now that we have the eeprom settings, apply the special cases where
8965          * the eeprom may be wrong or the board simply won't support wake on
8966          * lan on a particular port */
8967         switch (pdev->device) {
8968         case E1000_DEV_ID_82575GB_QUAD_COPPER:
8969                 adapter->eeprom_wol = 0;
8970                 break;
8971         case E1000_DEV_ID_82575EB_FIBER_SERDES:
8972         case E1000_DEV_ID_82576_FIBER:
8973         case E1000_DEV_ID_82576_SERDES:
8974                 /* Wake events only supported on port A for dual fiber
8975                  * regardless of eeprom setting */
8976                 if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
8977                         adapter->eeprom_wol = 0;
8978                 break;
8979         case E1000_DEV_ID_82576_QUAD_COPPER:
8980         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
8981                 /* if quad port adapter, disable WoL on all but port A */
8982                 if (global_quad_port_a != 0)
8983                         adapter->eeprom_wol = 0;
8984                 else
8985                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
8986                 /* Reset for multiple quad port adapters */
8987                 if (++global_quad_port_a == 4)
8988                         global_quad_port_a = 0;
8989                 break;
8990         }
8991
8992         /* initialize the wol settings based on the eeprom settings */
8993         adapter->wol = adapter->eeprom_wol;
8994 #ifdef NO_KNI
8995         device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), adapter->wol);
8996
8997         /* reset the hardware with the new settings */
8998         igb_reset(adapter);
8999
9000         /* let the f/w know that the h/w is now under the control of the
9001          * driver. */
9002         igb_get_hw_control(adapter);
9003
9004         strncpy(netdev->name, "eth%d", IFNAMSIZ);
9005         err = register_netdev(netdev);
9006         if (err)
9007                 goto err_register;
9008
9009 #ifdef CONFIG_IGB_VMDQ_NETDEV
9010         err = igb_init_vmdq_netdevs(adapter);
9011         if (err)
9012                 goto err_register;
9013 #endif
9014         /* carrier off reporting is important to ethtool even BEFORE open */
9015         netif_carrier_off(netdev);
9016
9017 #ifdef IGB_DCA
9018         if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
9019                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
9020                 dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
9021                 igb_setup_dca(adapter);
9022         }
9023
9024 #endif
9025 #ifdef HAVE_HW_TIME_STAMP
9026         /* do hw tstamp init after resetting */
9027         igb_init_hw_timer(adapter);
9028
9029 #endif
9030
9031 #endif /* NO_KNI */
9032         dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
9033         /* print bus type/speed/width info */
9034         dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
9035                  netdev->name,
9036                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
9037                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
9038                                                             "unknown"),
9039                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4\n" :
9040                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2\n" :
9041                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1\n" :
9042                    "unknown"));
9043         dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
9044         for (i = 0; i < 6; i++)
9045                 printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
9046
9047         ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
9048         if (ret_val)
9049                 strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
9050         dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
9051                  pba_str);
9052
9053         /* Initialize the thermal sensor on i350 devices. */
9054         if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
9055                 u16 ets_word;
9056
9057                 /*
9058                  * Read the NVM to determine if this i350 device supports an
9059                  * external thermal sensor.
9060                  */
9061                 e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
9062                 if (ets_word != 0x0000 && ets_word != 0xFFFF)
9063                         adapter->ets = true;
9064                 else
9065                         adapter->ets = false;
9066 #ifdef NO_KNI
9067 #ifdef IGB_SYSFS
9068                 igb_sysfs_init(adapter);
9069 #else
9070 #ifdef IGB_PROCFS
9071                 igb_procfs_init(adapter);
9072 #endif /* IGB_PROCFS */
9073 #endif /* IGB_SYSFS */
9074 #endif /* NO_KNI */
9075         } else {
9076                 adapter->ets = false;
9077         }
9078
9079         switch (hw->mac.type) {
9080         case e1000_i350:
9081                 /* Enable EEE for internal copper PHY devices */
9082                 if (hw->phy.media_type == e1000_media_type_copper)
9083                         e1000_set_eee_i350(hw);
9084
9085                 /* send driver version info to firmware */
9086                 igb_init_fw(adapter);
9087                 break;
9088         default:
9089                 break;
9090         }
9091 #ifndef IGB_NO_LRO
9092         if (netdev->features & NETIF_F_LRO)
9093                 dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
9094         else
9095                 dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
9096 #endif
9097         dev_info(pci_dev_to_dev(pdev),
9098                  "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
9099                  adapter->msix_entries ? "MSI-X" :
9100                  (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
9101                  adapter->num_rx_queues, adapter->num_tx_queues);
9102
9103         cards_found++;
9104         *lad_dev = netdev;
9105
9106         pm_runtime_put_noidle(&pdev->dev);
9107         return 0;
9108
9109 //err_register:
9110         //igb_release_hw_control(adapter);
9111 err_eeprom:
9112         //if (!e1000_check_reset_block(hw))
9113         //      e1000_phy_hw_reset(hw);
9114
9115         if (hw->flash_address)
9116                 iounmap(hw->flash_address);
9117 err_sw_init:
9118         //igb_clear_interrupt_scheme(adapter);
9119         //igb_reset_sriov_capability(adapter);
9120         iounmap(hw->hw_addr);
9121 err_ioremap:
9122         free_netdev(netdev);
9123 err_alloc_etherdev:
9124         //pci_release_selected_regions(pdev,
9125         //                             pci_select_bars(pdev, IORESOURCE_MEM));
9126 //err_pci_reg:
9127 //err_dma:
9128         pci_disable_device(pdev);
9129         return err;
9130 }
9131
9132
9133 void igb_kni_remove(struct pci_dev *pdev)
9134 {
9135         struct net_device *netdev = pci_get_drvdata(pdev);
9136         struct igb_adapter *adapter = netdev_priv(netdev);
9137         struct e1000_hw *hw = &adapter->hw;
9138
9139         iounmap(hw->hw_addr);
9140
9141         if (hw->flash_address)
9142                 iounmap(hw->flash_address);
9143
9144         pci_disable_device(pdev);
9145 }
9146