kni: fix build with kernel < 2.6.34
[dpdk.git] / lib / librte_eal / linuxapp / kni / ethtool / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2012 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/tcp.h>
35 #ifdef NETIF_F_TSO
36 #include <net/checksum.h>
37 #ifdef NETIF_F_TSO6
38 #include <linux/ipv6.h>
39 #include <net/ip6_checksum.h>
40 #endif
41 #endif
42 #ifdef SIOCGMIIPHY
43 #include <linux/mii.h>
44 #endif
45 #ifdef SIOCETHTOOL
46 #include <linux/ethtool.h>
47 #endif
48 #include <linux/if_vlan.h>
49 #ifdef CONFIG_PM_RUNTIME
50 #include <linux/pm_runtime.h>
51 #endif /* CONFIG_PM_RUNTIME */
52
53 #include "igb.h"
54 #include "igb_vmdq.h"
55
56 #include <linux/uio_driver.h>
57
58 #define DRV_DEBUG
59 #define DRV_HW_PERF
60 #define VERSION_SUFFIX
61
62 #define MAJ 3
63 #define MIN 4
64 #define BUILD 8
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." __stringify(BUILD) VERSION_SUFFIX DRV_DEBUG DRV_HW_PERF
66
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70                                 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
72
73 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER) },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER) },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES) },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII) },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER) },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER) },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER) },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES) },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII) },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL) },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII) },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES) },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP) },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576) },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS) },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES) },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER) },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES) },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD) },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER) },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER) },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) },
99         /* required last entry */
100         {0, }
101 };
102
103 //MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
104 static void igb_set_sriov_capability(struct igb_adapter *adapter) __attribute__((__unused__));
105 void igb_reset(struct igb_adapter *);
106 static int igb_setup_all_tx_resources(struct igb_adapter *);
107 static int igb_setup_all_rx_resources(struct igb_adapter *);
108 static void igb_free_all_tx_resources(struct igb_adapter *);
109 static void igb_free_all_rx_resources(struct igb_adapter *);
110 static void igb_setup_mrqc(struct igb_adapter *);
111 void igb_update_stats(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 #ifdef HAVE_HW_TIME_STAMP
115 static void igb_init_hw_timer(struct igb_adapter *adapter);
116 #endif
117 static int igb_sw_init(struct igb_adapter *);
118 static int igb_open(struct net_device *);
119 static int igb_close(struct net_device *);
120 static void igb_configure_tx(struct igb_adapter *);
121 static void igb_configure_rx(struct igb_adapter *);
122 static void igb_clean_all_tx_rings(struct igb_adapter *);
123 static void igb_clean_all_rx_rings(struct igb_adapter *);
124 static void igb_clean_tx_ring(struct igb_ring *);
125 static void igb_set_rx_mode(struct net_device *);
126 static void igb_update_phy_info(unsigned long);
127 static void igb_watchdog(unsigned long);
128 static void igb_watchdog_task(struct work_struct *);
129 static void igb_dma_err_task(struct work_struct *);
130 static void igb_dma_err_timer(unsigned long data);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct net_device_stats *igb_get_stats(struct net_device *);
133 static int igb_change_mtu(struct net_device *, int);
134 void igb_full_sync_mac_table(struct igb_adapter *adapter);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 #ifdef HAVE_VLAN_RX_REGISTER
152 static void igb_vlan_mode(struct net_device *, struct vlan_group *);
153 #endif
154 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
155 static int igb_vlan_rx_add_vid(struct net_device *, u16);
156 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
157 #else
158 static void igb_vlan_rx_add_vid(struct net_device *, u16);
159 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
160 #endif
161 static void igb_restore_vlan(struct igb_adapter *);
162 void igb_rar_set(struct igb_adapter *adapter, u32 index);
163 static void igb_ping_all_vfs(struct igb_adapter *);
164 static void igb_msg_task(struct igb_adapter *);
165 static void igb_vmm_control(struct igb_adapter *);
166 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
167 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
168 static void igb_process_mdd_event(struct igb_adapter *);
169 #ifdef IFLA_VF_MAX
170 static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac);
171 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
172                                int vf, u16 vlan, u8 qos);
173 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
174 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
175                                  struct ifla_vf_info *ivi);
176 static void igb_check_vf_rate_limit(struct igb_adapter *);
177 #endif
178 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
179 static int igb_check_vf_assignment(struct igb_adapter *adapter);
180 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
181 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
182 #endif
183 #ifdef CONFIG_PM
184 #ifdef HAVE_SYSTEM_SLEEP_PM_OPS
185 static int igb_suspend(struct device *dev);
186 static int igb_resume(struct device *dev);
187 #ifdef CONFIG_PM_RUNTIME
188 static int igb_runtime_suspend(struct device *dev);
189 static int igb_runtime_resume(struct device *dev);
190 static int igb_runtime_idle(struct device *dev);
191 #endif /* CONFIG_PM_RUNTIME */
192 static const struct dev_pm_ops igb_pm_ops = {
193 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34)
194         .suspend = igb_suspend,
195         .resume = igb_resume,
196         .freeze = igb_suspend,
197         .thaw = igb_resume,
198         .poweroff = igb_suspend,
199         .restore = igb_resume,
200 #ifdef CONFIG_PM_RUNTIME
201         .runtime_suspend = igb_runtime_suspend,
202         .runtime_resume = igb_runtime_resume,
203         .runtime_idle = igb_runtime_idle,
204 #endif
205 #else /* Linux >= 2.6.34 */
206         SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
207 #ifdef CONFIG_PM_RUNTIME
208         SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
209                         igb_runtime_idle)
210 #endif /* CONFIG_PM_RUNTIME */
211 #endif /* Linux version */
212 };
213 #endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
214 #endif /* CONFIG_PM */
215 #ifndef USE_REBOOT_NOTIFIER
216 static void igb_shutdown(struct pci_dev *);
217 #else
218 static int igb_notify_reboot(struct notifier_block *, unsigned long, void *);
219 static struct notifier_block igb_notifier_reboot = {
220         .notifier_call  = igb_notify_reboot,
221         .next           = NULL,
222         .priority       = 0
223 };
224 #endif
225 #ifdef IGB_DCA
226 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
227 static struct notifier_block dca_notifier = {
228         .notifier_call  = igb_notify_dca,
229         .next           = NULL,
230         .priority       = 0
231 };
232 #endif
233 #ifdef CONFIG_NET_POLL_CONTROLLER
234 /* for netdump / net console */
235 static void igb_netpoll(struct net_device *);
236 #endif
237
238 #ifdef HAVE_PCI_ERS
239 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
240                      pci_channel_state_t);
241 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
242 static void igb_io_resume(struct pci_dev *);
243
244 static struct pci_error_handlers igb_err_handler = {
245         .error_detected = igb_io_error_detected,
246         .slot_reset = igb_io_slot_reset,
247         .resume = igb_io_resume,
248 };
249 #endif
250
251 static void igb_init_fw(struct igb_adapter *adapter);
252 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
253
254 static struct pci_driver igb_driver = {
255         .name     = igb_driver_name,
256         .id_table = igb_pci_tbl,
257         .probe    = igb_probe,
258         .remove   = __devexit_p(igb_remove),
259 #ifdef CONFIG_PM
260 #ifdef HAVE_SYSTEM_SLEEP_PM_OPS
261         .driver.pm = &igb_pm_ops,
262 #endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
263 #endif /* CONFIG_PM */
264 #ifndef USE_REBOOT_NOTIFIER
265         .shutdown = igb_shutdown,
266 #endif
267 #ifdef HAVE_PCI_ERS
268         .err_handler = &igb_err_handler
269 #endif
270 };
271
272 //MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
273 //MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
274 //MODULE_LICENSE("GPL");
275 //MODULE_VERSION(DRV_VERSION);
276
277 static void igb_vfta_set(struct igb_adapter *adapter, u32 vid, bool add)
278 {
279         struct e1000_hw *hw = &adapter->hw;
280         struct e1000_host_mng_dhcp_cookie *mng_cookie = &hw->mng_cookie;
281         u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK;
282         u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
283         u32 vfta;
284
285         /*
286          * if this is the management vlan the only option is to add it in so
287          * that the management pass through will continue to work
288          */
289         if ((mng_cookie->status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
290             (vid == mng_cookie->vlan_id))
291                 add = TRUE;
292
293         vfta = adapter->shadow_vfta[index];
294         
295         if (add)
296                 vfta |= mask;
297         else
298                 vfta &= ~mask;
299
300         e1000_write_vfta(hw, index, vfta);
301         adapter->shadow_vfta[index] = vfta;
302 }
303
304 #ifdef HAVE_HW_TIME_STAMP
305 /**
306  * igb_read_clock - read raw cycle counter (to be used by time counter)
307  */
308 static cycle_t igb_read_clock(const struct cyclecounter *tc)
309 {
310         struct igb_adapter *adapter =
311                 container_of(tc, struct igb_adapter, cycles);
312         struct e1000_hw *hw = &adapter->hw;
313         u64 stamp = 0;
314         int shift = 0;
315
316         /*
317          * The timestamp latches on lowest register read. For the 82580
318          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
319          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
320          */
321         if (hw->mac.type >= e1000_82580) {
322                 stamp = E1000_READ_REG(hw, E1000_SYSTIMR) >> 8;
323                 shift = IGB_82580_TSYNC_SHIFT;
324         }
325
326         stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIML) << shift;
327         stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << (shift + 32);
328         return stamp;
329 }
330
331 #endif /* SIOCSHWTSTAMP */
332 static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
333 //module_param(debug, int, 0);
334 //MODULE_PARM_DESC(debug, "Debug level (0=none, ..., 16=all)");
335
336 /**
337  * igb_init_module - Driver Registration Routine
338  *
339  * igb_init_module is the first routine called when the driver is
340  * loaded. All it does is register with the PCI subsystem.
341  **/
342 static int __init igb_init_module(void)
343 {
344         int ret;
345
346         printk(KERN_INFO "%s - version %s\n",
347                igb_driver_string, igb_driver_version);
348
349         printk(KERN_INFO "%s\n", igb_copyright);
350 #ifdef IGB_SYSFS
351 /* only use IGB_PROCFS if IGB_SYSFS is not defined */
352 #else
353 #ifdef IGB_PROCFS
354         if (igb_procfs_topdir_init())
355                 printk(KERN_INFO "Procfs failed to initialize topdir\n");
356 #endif /* IGB_PROCFS */
357 #endif /* IGB_SYSFS  */
358
359 #ifdef IGB_DCA
360         dca_register_notify(&dca_notifier);
361 #endif
362         ret = pci_register_driver(&igb_driver);
363 #ifdef USE_REBOOT_NOTIFIER
364         if (ret >= 0) {
365                 register_reboot_notifier(&igb_notifier_reboot);
366         }
367 #endif
368         return ret;
369 }
370
371 #undef module_init
372 #define module_init(x) static int x(void)  __attribute__((__unused__));
373 module_init(igb_init_module);
374
375 /**
376  * igb_exit_module - Driver Exit Cleanup Routine
377  *
378  * igb_exit_module is called just before the driver is removed
379  * from memory.
380  **/
381 static void __exit igb_exit_module(void)
382 {
383 #ifdef IGB_DCA
384         dca_unregister_notify(&dca_notifier);
385 #endif
386 #ifdef USE_REBOOT_NOTIFIER
387         unregister_reboot_notifier(&igb_notifier_reboot);
388 #endif
389         pci_unregister_driver(&igb_driver);
390
391 #ifdef IGB_SYSFS
392 /* only compile IGB_PROCFS if IGB_SYSFS is not defined */
393 #else
394 #ifdef IGB_PROCFS
395         igb_procfs_topdir_exit();
396 #endif /* IGB_PROCFS */
397 #endif /* IGB_SYSFS */
398 }
399
400 #undef module_exit
401 #define module_exit(x) static void x(void)  __attribute__((__unused__));
402 module_exit(igb_exit_module);
403
404 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
405 /**
406  * igb_cache_ring_register - Descriptor ring to register mapping
407  * @adapter: board private structure to initialize
408  *
409  * Once we know the feature-set enabled for the device, we'll cache
410  * the register offset the descriptor ring is assigned to.
411  **/
412 static void igb_cache_ring_register(struct igb_adapter *adapter)
413 {
414         int i = 0, j = 0;
415         u32 rbase_offset = adapter->vfs_allocated_count;
416
417         switch (adapter->hw.mac.type) {
418         case e1000_82576:
419                 /* The queues are allocated for virtualization such that VF 0
420                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
421                  * In order to avoid collision we start at the first free queue
422                  * and continue consuming queues in the same sequence
423                  */
424                 if ((adapter->rss_queues > 1) && adapter->vmdq_pools) {
425                         for (; i < adapter->rss_queues; i++)
426                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
427                                                                Q_IDX_82576(i);
428                 }
429         case e1000_82575:
430         case e1000_82580:
431         case e1000_i350:
432         default:
433                 for (; i < adapter->num_rx_queues; i++)
434                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
435                 for (; j < adapter->num_tx_queues; j++)
436                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
437                 break;
438         }
439 }
440
441 static void igb_free_queues(struct igb_adapter *adapter)
442 {
443         int i;
444
445         for (i = 0; i < adapter->num_tx_queues; i++) {
446                 kfree(adapter->tx_ring[i]);
447                 adapter->tx_ring[i] = NULL;
448         }
449         for (i = 0; i < adapter->num_rx_queues; i++) {
450                 kfree(adapter->rx_ring[i]);
451                 adapter->rx_ring[i] = NULL;
452         }
453         adapter->num_rx_queues = 0;
454         adapter->num_tx_queues = 0;
455 }
456
457 /**
458  * igb_alloc_queues - Allocate memory for all rings
459  * @adapter: board private structure to initialize
460  *
461  * We allocate one ring per queue at run-time since we don't know the
462  * number of queues at compile-time.
463  **/
464 static int igb_alloc_queues(struct igb_adapter *adapter)
465 {
466         struct igb_ring *ring;
467         int i;
468 #ifdef HAVE_DEVICE_NUMA_NODE
469         int orig_node = adapter->node;
470 #endif /* HAVE_DEVICE_NUMA_NODE */
471
472         for (i = 0; i < adapter->num_tx_queues; i++) {
473 #ifdef HAVE_DEVICE_NUMA_NODE
474                 if (orig_node == -1) {
475                         int cur_node = next_online_node(adapter->node);
476                         if (cur_node == MAX_NUMNODES)
477                                 cur_node = first_online_node;
478                         adapter->node = cur_node;
479                 }
480 #endif /* HAVE_DEVICE_NUMA_NODE */
481                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
482                                     adapter->node);
483                 if (!ring)
484                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
485                 if (!ring)
486                         goto err;
487                 ring->count = adapter->tx_ring_count;
488                 ring->queue_index = i;
489                 ring->dev = pci_dev_to_dev(adapter->pdev);
490                 ring->netdev = adapter->netdev;
491                 ring->numa_node = adapter->node;
492                 /* For 82575, context index must be unique per ring. */
493                 if (adapter->hw.mac.type == e1000_82575)
494                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
495                 adapter->tx_ring[i] = ring;
496         }
497 #ifdef HAVE_DEVICE_NUMA_NODE
498         /* Restore the adapter's original node */
499         adapter->node = orig_node;
500 #endif /* HAVE_DEVICE_NUMA_NODE */
501
502         for (i = 0; i < adapter->num_rx_queues; i++) {
503 #ifdef HAVE_DEVICE_NUMA_NODE
504                 if (orig_node == -1) {
505                         int cur_node = next_online_node(adapter->node);
506                         if (cur_node == MAX_NUMNODES)
507                                 cur_node = first_online_node;
508                         adapter->node = cur_node;
509                 }
510 #endif /* HAVE_DEVICE_NUMA_NODE */
511                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
512                                     adapter->node);
513                 if (!ring)
514                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
515                 if (!ring)
516                         goto err;
517                 ring->count = adapter->rx_ring_count;
518                 ring->queue_index = i;
519                 ring->dev = pci_dev_to_dev(adapter->pdev);
520                 ring->netdev = adapter->netdev;
521                 ring->numa_node = adapter->node;
522 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
523                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
524 #endif
525 #ifndef HAVE_NDO_SET_FEATURES
526                 /* enable rx checksum */
527                 set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
528
529 #endif
530                 /* set flag indicating ring supports SCTP checksum offload */
531                 if (adapter->hw.mac.type >= e1000_82576)
532                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
533
534                 /* On i350, loopback VLAN packets have the tag byte-swapped. */
535                 if (adapter->hw.mac.type == e1000_i350)
536                         set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
537
538                 adapter->rx_ring[i] = ring;
539         }
540 #ifdef HAVE_DEVICE_NUMA_NODE
541         /* Restore the adapter's original node */
542         adapter->node = orig_node;
543 #endif /* HAVE_DEVICE_NUMA_NODE */
544
545         igb_cache_ring_register(adapter);
546
547         return E1000_SUCCESS;
548
549 err:
550 #ifdef HAVE_DEVICE_NUMA_NODE
551         /* Restore the adapter's original node */
552         adapter->node = orig_node;
553 #endif /* HAVE_DEVICE_NUMA_NODE */
554         igb_free_queues(adapter);
555
556         return -ENOMEM;
557 }
558
559 static void igb_configure_lli(struct igb_adapter *adapter)
560 {
561         struct e1000_hw *hw = &adapter->hw;
562         u16 port;
563
564         /* LLI should only be enabled for MSI-X or MSI interrupts */
565         if (!adapter->msix_entries && !(adapter->flags & IGB_FLAG_HAS_MSI))
566                 return;
567
568         if (adapter->lli_port) {
569                 /* use filter 0 for port */
570                 port = htons((u16)adapter->lli_port);
571                 E1000_WRITE_REG(hw, E1000_IMIR(0),
572                         (port | E1000_IMIR_PORT_IM_EN));
573                 E1000_WRITE_REG(hw, E1000_IMIREXT(0),
574                         (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
575         }
576
577         if (adapter->flags & IGB_FLAG_LLI_PUSH) {
578                 /* use filter 1 for push flag */
579                 E1000_WRITE_REG(hw, E1000_IMIR(1),
580                         (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
581                 E1000_WRITE_REG(hw, E1000_IMIREXT(1),
582                         (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_PSH));
583         }
584
585         if (adapter->lli_size) {
586                 /* use filter 2 for size */
587                 E1000_WRITE_REG(hw, E1000_IMIR(2),
588                         (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
589                 E1000_WRITE_REG(hw, E1000_IMIREXT(2),
590                         (adapter->lli_size | E1000_IMIREXT_CTRL_BP));
591         }
592
593 }
594
595 /**
596  *  igb_write_ivar - configure ivar for given MSI-X vector
597  *  @hw: pointer to the HW structure
598  *  @msix_vector: vector number we are allocating to a given ring
599  *  @index: row index of IVAR register to write within IVAR table
600  *  @offset: column offset of in IVAR, should be multiple of 8
601  *
602  *  This function is intended to handle the writing of the IVAR register
603  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
604  *  each containing an cause allocation for an Rx and Tx ring, and a
605  *  variable number of rows depending on the number of queues supported.
606  **/
607 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
608                            int index, int offset)
609 {
610         u32 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
611
612         /* clear any bits that are currently set */
613         ivar &= ~((u32)0xFF << offset);
614
615         /* write vector and valid bit */
616         ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
617
618         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
619 }
620
621 #define IGB_N0_QUEUE -1
622 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
623 {
624         struct igb_adapter *adapter = q_vector->adapter;
625         struct e1000_hw *hw = &adapter->hw;
626         int rx_queue = IGB_N0_QUEUE;
627         int tx_queue = IGB_N0_QUEUE;
628         u32 msixbm = 0;
629
630         if (q_vector->rx.ring)
631                 rx_queue = q_vector->rx.ring->reg_idx;
632         if (q_vector->tx.ring)
633                 tx_queue = q_vector->tx.ring->reg_idx;
634
635         switch (hw->mac.type) {
636         case e1000_82575:
637                 /* The 82575 assigns vectors using a bitmask, which matches the
638                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
639                    or more queues to a vector, we write the appropriate bits
640                    into the MSIXBM register for that vector. */
641                 if (rx_queue > IGB_N0_QUEUE)
642                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
643                 if (tx_queue > IGB_N0_QUEUE)
644                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
645                 if (!adapter->msix_entries && msix_vector == 0)
646                         msixbm |= E1000_EIMS_OTHER;
647                 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), msix_vector, msixbm);
648                 q_vector->eims_value = msixbm;
649                 break;
650         case e1000_82576:
651                 /*
652                  * 82576 uses a table that essentially consists of 2 columns
653                  * with 8 rows.  The ordering is column-major so we use the
654                  * lower 3 bits as the row index, and the 4th bit as the 
655                  * column offset.
656                  */
657                 if (rx_queue > IGB_N0_QUEUE)
658                         igb_write_ivar(hw, msix_vector,
659                                        rx_queue & 0x7,
660                                        (rx_queue & 0x8) << 1);
661                 if (tx_queue > IGB_N0_QUEUE)
662                         igb_write_ivar(hw, msix_vector,
663                                        tx_queue & 0x7,
664                                        ((tx_queue & 0x8) << 1) + 8);
665                 q_vector->eims_value = 1 << msix_vector;
666                 break;
667         case e1000_82580:
668         case e1000_i350:
669                 /*
670                  * On 82580 and newer adapters the scheme is similar to 82576
671                  * however instead of ordering column-major we have things
672                  * ordered row-major.  So we traverse the table by using
673                  * bit 0 as the column offset, and the remaining bits as the
674                  * row index.
675                  */
676                 if (rx_queue > IGB_N0_QUEUE)
677                         igb_write_ivar(hw, msix_vector,
678                                        rx_queue >> 1,
679                                        (rx_queue & 0x1) << 4);
680                 if (tx_queue > IGB_N0_QUEUE)
681                         igb_write_ivar(hw, msix_vector,
682                                        tx_queue >> 1,
683                                        ((tx_queue & 0x1) << 4) + 8);
684                 q_vector->eims_value = 1 << msix_vector;
685                 break;
686         default:
687                 BUG();
688                 break;
689         }
690
691         /* add q_vector eims value to global eims_enable_mask */
692         adapter->eims_enable_mask |= q_vector->eims_value;
693
694         /* configure q_vector to set itr on first interrupt */
695         q_vector->set_itr = 1;
696 }
697
698 /**
699  * igb_configure_msix - Configure MSI-X hardware
700  *
701  * igb_configure_msix sets up the hardware to properly
702  * generate MSI-X interrupts.
703  **/
704 static void igb_configure_msix(struct igb_adapter *adapter)
705 {
706         u32 tmp;
707         int i, vector = 0;
708         struct e1000_hw *hw = &adapter->hw;
709
710         adapter->eims_enable_mask = 0;
711
712         /* set vector for other causes, i.e. link changes */
713         switch (hw->mac.type) {
714         case e1000_82575:
715                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
716                 /* enable MSI-X PBA support*/
717                 tmp |= E1000_CTRL_EXT_PBA_CLR;
718
719                 /* Auto-Mask interrupts upon ICR read. */
720                 tmp |= E1000_CTRL_EXT_EIAME;
721                 tmp |= E1000_CTRL_EXT_IRCA;
722
723                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
724
725                 /* enable msix_other interrupt */
726                 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), vector++,
727                                       E1000_EIMS_OTHER);
728                 adapter->eims_other = E1000_EIMS_OTHER;
729
730                 break;
731
732         case e1000_82576:
733         case e1000_82580:
734         case e1000_i350:
735                 /* Turn on MSI-X capability first, or our settings
736                  * won't stick.  And it will take days to debug. */
737                 E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
738                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
739                                 E1000_GPIE_NSICR);
740
741                 /* enable msix_other interrupt */
742                 adapter->eims_other = 1 << vector;
743                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
744
745                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp);
746                 break;
747         default:
748                 /* do nothing, since nothing else supports MSI-X */
749                 break;
750         } /* switch (hw->mac.type) */
751
752         adapter->eims_enable_mask |= adapter->eims_other;
753
754         for (i = 0; i < adapter->num_q_vectors; i++)
755                 igb_assign_vector(adapter->q_vector[i], vector++);
756
757         E1000_WRITE_FLUSH(hw);
758 }
759
760 /**
761  * igb_request_msix - Initialize MSI-X interrupts
762  *
763  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
764  * kernel.
765  **/
766 static int igb_request_msix(struct igb_adapter *adapter)
767 {
768         struct net_device *netdev = adapter->netdev;
769         struct e1000_hw *hw = &adapter->hw;
770         int i, err = 0, vector = 0;
771
772         err = request_irq(adapter->msix_entries[vector].vector,
773                           &igb_msix_other, 0, netdev->name, adapter);
774         if (err)
775                 goto out;
776         vector++;
777
778         for (i = 0; i < adapter->num_q_vectors; i++) {
779                 struct igb_q_vector *q_vector = adapter->q_vector[i];
780
781                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
782
783                 if (q_vector->rx.ring && q_vector->tx.ring)
784                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
785                                 q_vector->rx.ring->queue_index);
786                 else if (q_vector->tx.ring)
787                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
788                                 q_vector->tx.ring->queue_index);
789                 else if (q_vector->rx.ring)
790                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
791                                 q_vector->rx.ring->queue_index);
792                 else
793                         sprintf(q_vector->name, "%s-unused", netdev->name);
794
795                 err = request_irq(adapter->msix_entries[vector].vector,
796                                   igb_msix_ring, 0, q_vector->name,
797                                   q_vector);
798                 if (err)
799                         goto out;
800                 vector++;
801         }
802
803         igb_configure_msix(adapter);
804         return 0;
805 out:
806         return err;
807 }
808
809 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
810 {
811         if (adapter->msix_entries) {
812                 pci_disable_msix(adapter->pdev);
813                 kfree(adapter->msix_entries);
814                 adapter->msix_entries = NULL;
815         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
816                 pci_disable_msi(adapter->pdev);
817         }
818 }
819
820 /**
821  * igb_free_q_vectors - Free memory allocated for interrupt vectors
822  * @adapter: board private structure to initialize
823  *
824  * This function frees the memory allocated to the q_vectors.  In addition if
825  * NAPI is enabled it will delete any references to the NAPI struct prior
826  * to freeing the q_vector.
827  **/
828 static void igb_free_q_vectors(struct igb_adapter *adapter)
829 {
830         int v_idx;
831
832         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
833                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
834                 adapter->q_vector[v_idx] = NULL;
835                 if (!q_vector)
836                         continue;
837                 netif_napi_del(&q_vector->napi);
838 #ifndef IGB_NO_LRO
839                 if (q_vector->lrolist) {
840                         __skb_queue_purge(&q_vector->lrolist->active);
841                         vfree(q_vector->lrolist);
842                         q_vector->lrolist = NULL;
843                 }
844 #endif
845                 kfree(q_vector);
846         }
847         adapter->num_q_vectors = 0;
848 }
849
850 /**
851  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
852  *
853  * This function resets the device so that it has 0 rx queues, tx queues, and
854  * MSI-X interrupts allocated.
855  */
856 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
857 {
858         igb_free_queues(adapter);
859         igb_free_q_vectors(adapter);
860         igb_reset_interrupt_capability(adapter);
861 }
862
863 /**
864  * igb_process_mdd_event
865  * @adapter - board private structure
866  *
867  * Identify a malicious VF, disable the VF TX/RX queues and log a message.
868  */
869 static void igb_process_mdd_event(struct igb_adapter *adapter)
870 {
871         struct e1000_hw *hw = &adapter->hw;
872         u32 lvmmc, vfte, vfre, mdfb;
873         u8 vf_queue;
874
875         lvmmc = E1000_READ_REG(hw, E1000_LVMMC);
876         vf_queue = lvmmc >> 29;
877
878         /* VF index cannot be bigger or equal to VFs allocated */
879         if (vf_queue >= adapter->vfs_allocated_count)
880                 return;
881
882         netdev_info(adapter->netdev,
883                     "VF %d misbehaved. VF queues are disabled. "
884                     "VM misbehavior code is 0x%x\n", vf_queue, lvmmc);
885
886         /* Disable VFTE and VFRE related bits */
887         vfte = E1000_READ_REG(hw, E1000_VFTE);
888         vfte &= ~(1 << vf_queue);
889         E1000_WRITE_REG(hw, E1000_VFTE, vfte);
890
891         vfre = E1000_READ_REG(hw, E1000_VFRE);
892         vfre &= ~(1 << vf_queue);
893         E1000_WRITE_REG(hw, E1000_VFRE, vfre);
894
895         /* Disable MDFB related bit */
896         mdfb = E1000_READ_REG(hw, E1000_MDFB);
897         mdfb &= ~(1 << vf_queue);
898         E1000_WRITE_REG(hw, E1000_MDFB, mdfb);
899
900         /* Reset the specific VF */
901         E1000_WRITE_REG(hw, E1000_VTCTRL(vf_queue), E1000_VTCTRL_RST);
902 }
903
904 /**
905  * igb_disable_mdd
906  * @adapter - board private structure
907  *
908  * Disable MDD behavior in the HW
909  **/
910 static void igb_disable_mdd(struct igb_adapter *adapter)
911 {
912         struct e1000_hw *hw = &adapter->hw;
913         u32 reg;
914
915         if (hw->mac.type != e1000_i350)
916                 return;
917
918         reg = E1000_READ_REG(hw, E1000_DTXCTL);
919         reg &= (~E1000_DTXCTL_MDP_EN);
920         E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
921 }
922
923 /**
924  * igb_enable_mdd
925  * @adapter - board private structure
926  *
927  * Enable the HW to detect malicious driver and sends an interrupt to
928  * the driver. 
929  * 
930  * Only available on i350 device
931  **/
932 static void igb_enable_mdd(struct igb_adapter *adapter)
933 {
934         struct e1000_hw *hw = &adapter->hw;
935         u32 reg;
936
937         if (hw->mac.type != e1000_i350)
938                 return;
939
940         reg = E1000_READ_REG(hw, E1000_DTXCTL);
941         reg |= E1000_DTXCTL_MDP_EN;
942         E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
943 }
944
945 /**
946  * igb_reset_sriov_capability - disable SR-IOV if enabled
947  *
948  * Attempt to disable single root IO virtualization capabilites present in the
949  * kernel.
950  **/
951 static void igb_reset_sriov_capability(struct igb_adapter *adapter)
952 {
953         struct pci_dev *pdev = adapter->pdev;
954         struct e1000_hw *hw = &adapter->hw;
955
956         /* reclaim resources allocated to VFs */
957         if (adapter->vf_data) {
958                 if (!igb_check_vf_assignment(adapter)) {
959                         /*
960                          * disable iov and allow time for transactions to
961                          * clear
962                          */
963                         pci_disable_sriov(pdev);
964                         msleep(500);
965
966                         dev_info(pci_dev_to_dev(pdev), "IOV Disabled\n");
967                 } else {
968                         dev_info(pci_dev_to_dev(pdev), "IOV Not Disabled\n "
969                                         "VF(s) are assigned to guests!\n");
970                 }
971                 /* Disable Malicious Driver Detection */
972                 igb_disable_mdd(adapter);
973
974                 /* free vf data storage */
975                 kfree(adapter->vf_data);
976                 adapter->vf_data = NULL;
977
978                 /* switch rings back to PF ownership */
979                 E1000_WRITE_REG(hw, E1000_IOVCTL,
980                                 E1000_IOVCTL_REUSE_VFQ);
981                 E1000_WRITE_FLUSH(hw);
982                 msleep(100);
983         }
984
985         adapter->vfs_allocated_count = 0;
986 }
987
988 /**
989  * igb_set_sriov_capability - setup SR-IOV if supported
990  *
991  * Attempt to enable single root IO virtualization capabilites present in the
992  * kernel.
993  **/
994 static void igb_set_sriov_capability(struct igb_adapter *adapter)
995 {
996         struct pci_dev *pdev = adapter->pdev;
997         int old_vfs = 0;
998         int i;
999
1000 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
1001         old_vfs = igb_find_enabled_vfs(adapter);
1002 #endif
1003         if (old_vfs) {
1004                 dev_info(pci_dev_to_dev(pdev),
1005                                 "%d pre-allocated VFs found - override "
1006                                 "max_vfs setting of %d\n", old_vfs,
1007                                 adapter->vfs_allocated_count);
1008                 adapter->vfs_allocated_count = old_vfs;
1009         }
1010         /* no VFs requested, do nothing */
1011         if (!adapter->vfs_allocated_count)
1012                 return;
1013
1014         /* allocate vf data storage */
1015         adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1016                                    sizeof(struct vf_data_storage),
1017                                    GFP_KERNEL);
1018
1019         if (adapter->vf_data) {
1020                 if (!old_vfs) {
1021                         if (pci_enable_sriov(pdev,
1022                                         adapter->vfs_allocated_count))
1023                                 goto err_out;
1024                 }
1025                 for (i = 0; i < adapter->vfs_allocated_count; i++)
1026                         igb_vf_configure(adapter, i);
1027
1028                 /* DMA Coalescing is not supported in IOV mode. */
1029                 if (adapter->hw.mac.type >= e1000_i350)
1030                 adapter->dmac = IGB_DMAC_DISABLE;
1031                 if (adapter->hw.mac.type < e1000_i350)
1032                 adapter->flags |= IGB_FLAG_DETECT_BAD_DMA;
1033                 return;
1034
1035         }
1036
1037 err_out:
1038         kfree(adapter->vf_data);
1039         adapter->vf_data = NULL;
1040         adapter->vfs_allocated_count = 0;
1041         dev_warn(pci_dev_to_dev(pdev),
1042                         "Failed to initialize SR-IOV virtualization\n");
1043 }
1044
1045 /**
1046  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1047  *
1048  * Attempt to configure interrupts using the best available
1049  * capabilities of the hardware and kernel.
1050  **/
1051 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
1052 {
1053         struct pci_dev *pdev = adapter->pdev;
1054         int err;
1055         int numvecs, i;
1056
1057         /* Number of supported queues. */
1058         adapter->num_rx_queues = adapter->rss_queues;
1059
1060         if (adapter->vmdq_pools > 1)
1061                 adapter->num_rx_queues += adapter->vmdq_pools - 1;
1062
1063 #ifdef HAVE_TX_MQ
1064         if (adapter->vmdq_pools)
1065                 adapter->num_tx_queues = adapter->vmdq_pools;
1066         else
1067                 adapter->num_tx_queues = adapter->num_rx_queues;
1068 #else
1069         adapter->num_tx_queues = max_t(u32, 1, adapter->vmdq_pools);
1070 #endif
1071
1072         switch (adapter->int_mode) {
1073         case IGB_INT_MODE_MSIX:
1074                 /* start with one vector for every rx queue */
1075                 numvecs = adapter->num_rx_queues;
1076
1077                 /* if tx handler is seperate add 1 for every tx queue */
1078                 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1079                         numvecs += adapter->num_tx_queues;
1080
1081                 /* store the number of vectors reserved for queues */
1082                 adapter->num_q_vectors = numvecs;
1083
1084                 /* add 1 vector for link status interrupts */
1085                 numvecs++;
1086                 adapter->msix_entries = kcalloc(numvecs,
1087                                                 sizeof(struct msix_entry),
1088                                                 GFP_KERNEL);
1089                 if (adapter->msix_entries) {
1090                         for (i = 0; i < numvecs; i++)
1091                                 adapter->msix_entries[i].entry = i;
1092
1093                         err = pci_enable_msix(pdev,
1094                                               adapter->msix_entries, numvecs);
1095                         if (err == 0)
1096                                 break;
1097                 }
1098                 /* MSI-X failed, so fall through and try MSI */
1099                 dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI-X interrupts. "
1100                          "Falling back to MSI interrupts.\n");
1101                 igb_reset_interrupt_capability(adapter);
1102         case IGB_INT_MODE_MSI:
1103                 if (!pci_enable_msi(pdev))
1104                         adapter->flags |= IGB_FLAG_HAS_MSI;
1105                 else
1106                         dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI "
1107                                  "interrupts.  Falling back to legacy "
1108                                  "interrupts.\n");
1109                 /* Fall through */
1110         case IGB_INT_MODE_LEGACY:
1111                 /* disable advanced features and set number of queues to 1 */
1112                 igb_reset_sriov_capability(adapter);
1113                 adapter->vmdq_pools = 0;
1114                 adapter->rss_queues = 1;
1115                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1116                 adapter->num_rx_queues = 1;
1117                 adapter->num_tx_queues = 1;
1118                 adapter->num_q_vectors = 1;
1119                 /* Don't do anything; this is system default */
1120                 break;
1121         }
1122
1123 #ifdef HAVE_TX_MQ
1124         /* Notify the stack of the (possibly) reduced Tx Queue count. */
1125 #ifdef CONFIG_NETDEVICES_MULTIQUEUE
1126         adapter->netdev->egress_subqueue_count = adapter->num_tx_queues;
1127 #else
1128         adapter->netdev->real_num_tx_queues =
1129                         (adapter->vmdq_pools ? 1 : adapter->num_tx_queues);
1130 #endif /* CONFIG_NETDEVICES_MULTIQUEUE */
1131 #endif /* HAVE_TX_MQ */
1132 }
1133
1134 /**
1135  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1136  * @adapter: board private structure to initialize
1137  *
1138  * We allocate one q_vector per queue interrupt.  If allocation fails we
1139  * return -ENOMEM.
1140  **/
1141 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1142 {
1143         struct igb_q_vector *q_vector;
1144         struct e1000_hw *hw = &adapter->hw;
1145         int v_idx;
1146 #ifdef HAVE_DEVICE_NUMA_NODE
1147         int orig_node = adapter->node;
1148 #endif /* HAVE_DEVICE_NUMA_NODE */
1149
1150         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1151 #ifdef HAVE_DEVICE_NUMA_NODE
1152                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1153                                                 adapter->num_tx_queues)) &&
1154                     (adapter->num_rx_queues == v_idx))
1155                         adapter->node = orig_node;
1156                 if (orig_node == -1) {
1157                         int cur_node = next_online_node(adapter->node);
1158                         if (cur_node == MAX_NUMNODES)
1159                                 cur_node = first_online_node;
1160                         adapter->node = cur_node;
1161                 }
1162 #endif /* HAVE_DEVICE_NUMA_NODE */
1163                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1164                                         adapter->node);
1165                 if (!q_vector)
1166                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1167                                            GFP_KERNEL);
1168                 if (!q_vector)
1169                         goto err_out;
1170                 q_vector->adapter = adapter;
1171                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1172                 q_vector->itr_val = IGB_START_ITR;
1173                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1174                 adapter->q_vector[v_idx] = q_vector;
1175 #ifndef IGB_NO_LRO
1176                 if (v_idx < adapter->num_rx_queues) {
1177                         int size = sizeof(struct igb_lro_list);
1178                         q_vector->lrolist = vzalloc_node(size, q_vector->numa_node);
1179                         if (!q_vector->lrolist)
1180                                 q_vector->lrolist = vzalloc(size);
1181                         if (!q_vector->lrolist)
1182                                 goto err_out;
1183                         __skb_queue_head_init(&q_vector->lrolist->active);
1184                 }
1185 #endif /* IGB_NO_LRO */
1186         }
1187 #ifdef HAVE_DEVICE_NUMA_NODE
1188         /* Restore the adapter's original node */
1189         adapter->node = orig_node;
1190 #endif /* HAVE_DEVICE_NUMA_NODE */
1191
1192         return 0;
1193
1194 err_out:
1195 #ifdef HAVE_DEVICE_NUMA_NODE
1196         /* Restore the adapter's original node */
1197         adapter->node = orig_node;
1198 #endif /* HAVE_DEVICE_NUMA_NODE */
1199         igb_free_q_vectors(adapter);
1200         return -ENOMEM;
1201 }
1202
1203 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1204                                       int ring_idx, int v_idx)
1205 {
1206         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1207
1208         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1209         q_vector->rx.ring->q_vector = q_vector;
1210         q_vector->rx.count++;
1211         q_vector->itr_val = adapter->rx_itr_setting;
1212         if (q_vector->itr_val && q_vector->itr_val <= 3)
1213                 q_vector->itr_val = IGB_START_ITR;
1214 }
1215
1216 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1217                                       int ring_idx, int v_idx)
1218 {
1219         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1220
1221         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1222         q_vector->tx.ring->q_vector = q_vector;
1223         q_vector->tx.count++;
1224         q_vector->itr_val = adapter->tx_itr_setting;
1225         q_vector->tx.work_limit = adapter->tx_work_limit;
1226         if (q_vector->itr_val && q_vector->itr_val <= 3)
1227                 q_vector->itr_val = IGB_START_ITR;
1228 }
1229
1230 /**
1231  * igb_map_ring_to_vector - maps allocated queues to vectors
1232  *
1233  * This function maps the recently allocated queues to vectors.
1234  **/
1235 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1236 {
1237         int i;
1238         int v_idx = 0;
1239
1240         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1241             (adapter->num_q_vectors < adapter->num_tx_queues))
1242                 return -ENOMEM;
1243
1244         if (adapter->num_q_vectors >=
1245             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1246                 for (i = 0; i < adapter->num_rx_queues; i++)
1247                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1248                 for (i = 0; i < adapter->num_tx_queues; i++)
1249                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1250         } else {
1251                 for (i = 0; i < adapter->num_rx_queues; i++) {
1252                         if (i < adapter->num_tx_queues)
1253                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1254                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1255                 }
1256                 for (; i < adapter->num_tx_queues; i++)
1257                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1258         }
1259         return 0;
1260 }
1261
1262 /**
1263  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1264  *
1265  * This function initializes the interrupts and allocates all of the queues.
1266  **/
1267 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1268 {
1269         struct pci_dev *pdev = adapter->pdev;
1270         int err;
1271
1272         igb_set_interrupt_capability(adapter);
1273
1274         err = igb_alloc_q_vectors(adapter);
1275         if (err) {
1276                 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for vectors\n");
1277                 goto err_alloc_q_vectors;
1278         }
1279
1280         err = igb_alloc_queues(adapter);
1281         if (err) {
1282                 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
1283                 goto err_alloc_queues;
1284         }
1285
1286         err = igb_map_ring_to_vector(adapter);
1287         if (err) {
1288                 dev_err(pci_dev_to_dev(pdev), "Invalid q_vector to ring mapping\n");
1289                 goto err_map_queues;
1290         }
1291
1292
1293         return 0;
1294 err_map_queues:
1295         igb_free_queues(adapter);
1296 err_alloc_queues:
1297         igb_free_q_vectors(adapter);
1298 err_alloc_q_vectors:
1299         igb_reset_interrupt_capability(adapter);
1300         return err;
1301 }
1302
1303 /**
1304  * igb_request_irq - initialize interrupts
1305  *
1306  * Attempts to configure interrupts using the best available
1307  * capabilities of the hardware and kernel.
1308  **/
1309 static int igb_request_irq(struct igb_adapter *adapter)
1310 {
1311         struct net_device *netdev = adapter->netdev;
1312         struct pci_dev *pdev = adapter->pdev;
1313         int err = 0;
1314
1315         if (adapter->msix_entries) {
1316                 err = igb_request_msix(adapter);
1317                 if (!err)
1318                         goto request_done;
1319                 /* fall back to MSI */
1320                 igb_clear_interrupt_scheme(adapter);
1321                 igb_reset_sriov_capability(adapter);
1322                 if (!pci_enable_msi(pdev))
1323                         adapter->flags |= IGB_FLAG_HAS_MSI;
1324                 igb_free_all_tx_resources(adapter);
1325                 igb_free_all_rx_resources(adapter);
1326                 adapter->num_tx_queues = 1;
1327                 adapter->num_rx_queues = 1;
1328                 adapter->num_q_vectors = 1;
1329                 err = igb_alloc_q_vectors(adapter);
1330                 if (err) {
1331                         dev_err(pci_dev_to_dev(pdev),
1332                                 "Unable to allocate memory for vectors\n");
1333                         goto request_done;
1334                 }
1335                 err = igb_alloc_queues(adapter);
1336                 if (err) {
1337                         dev_err(pci_dev_to_dev(pdev),
1338                                 "Unable to allocate memory for queues\n");
1339                         igb_free_q_vectors(adapter);
1340                         goto request_done;
1341                 }
1342                 igb_setup_all_tx_resources(adapter);
1343                 igb_setup_all_rx_resources(adapter);
1344         }
1345
1346         igb_assign_vector(adapter->q_vector[0], 0);
1347
1348         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1349                 err = request_irq(pdev->irq, &igb_intr_msi, 0,
1350                                   netdev->name, adapter);
1351                 if (!err)
1352                         goto request_done;
1353
1354                 /* fall back to legacy interrupts */
1355                 igb_reset_interrupt_capability(adapter);
1356                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1357         }
1358
1359         err = request_irq(pdev->irq, &igb_intr, IRQF_SHARED,
1360                           netdev->name, adapter);
1361
1362         if (err)
1363                 dev_err(pci_dev_to_dev(pdev), "Error %d getting interrupt\n",
1364                         err);
1365
1366 request_done:
1367         return err;
1368 }
1369
1370 static void igb_free_irq(struct igb_adapter *adapter)
1371 {
1372         if (adapter->msix_entries) {
1373                 int vector = 0, i;
1374
1375                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1376
1377                 for (i = 0; i < adapter->num_q_vectors; i++)
1378                         free_irq(adapter->msix_entries[vector++].vector,
1379                                  adapter->q_vector[i]);
1380         } else {
1381                 free_irq(adapter->pdev->irq, adapter);
1382         }
1383 }
1384
1385 /**
1386  * igb_irq_disable - Mask off interrupt generation on the NIC
1387  * @adapter: board private structure
1388  **/
1389 static void igb_irq_disable(struct igb_adapter *adapter)
1390 {
1391         struct e1000_hw *hw = &adapter->hw;
1392
1393         /*
1394          * we need to be careful when disabling interrupts.  The VFs are also
1395          * mapped into these registers and so clearing the bits can cause
1396          * issues on the VF drivers so we only need to clear what we set
1397          */
1398         if (adapter->msix_entries) {
1399                 u32 regval = E1000_READ_REG(hw, E1000_EIAM);
1400                 E1000_WRITE_REG(hw, E1000_EIAM, regval & ~adapter->eims_enable_mask);
1401                 E1000_WRITE_REG(hw, E1000_EIMC, adapter->eims_enable_mask);
1402                 regval = E1000_READ_REG(hw, E1000_EIAC);
1403                 E1000_WRITE_REG(hw, E1000_EIAC, regval & ~adapter->eims_enable_mask);
1404         }
1405
1406         E1000_WRITE_REG(hw, E1000_IAM, 0);
1407         E1000_WRITE_REG(hw, E1000_IMC, ~0);
1408         E1000_WRITE_FLUSH(hw);
1409
1410         if (adapter->msix_entries) {
1411                 int vector = 0, i;
1412
1413                 synchronize_irq(adapter->msix_entries[vector++].vector);
1414
1415                 for (i = 0; i < adapter->num_q_vectors; i++)
1416                         synchronize_irq(adapter->msix_entries[vector++].vector);
1417         } else {
1418                 synchronize_irq(adapter->pdev->irq);
1419         }
1420 }
1421
1422 /**
1423  * igb_irq_enable - Enable default interrupt generation settings
1424  * @adapter: board private structure
1425  **/
1426 static void igb_irq_enable(struct igb_adapter *adapter)
1427 {
1428         struct e1000_hw *hw = &adapter->hw;
1429
1430         if (adapter->msix_entries) {
1431                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1432                 u32 regval = E1000_READ_REG(hw, E1000_EIAC);
1433                 E1000_WRITE_REG(hw, E1000_EIAC, regval | adapter->eims_enable_mask);
1434                 regval = E1000_READ_REG(hw, E1000_EIAM);
1435                 E1000_WRITE_REG(hw, E1000_EIAM, regval | adapter->eims_enable_mask);
1436                 E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_enable_mask);
1437                 if (adapter->vfs_allocated_count) {
1438                         E1000_WRITE_REG(hw, E1000_MBVFIMR, 0xFF);
1439                         ims |= E1000_IMS_VMMB;
1440                         /* For I350 device only enable MDD interrupts*/
1441                         if ((adapter->mdd) &&
1442                             (adapter->hw.mac.type == e1000_i350))
1443                                 ims |= E1000_IMS_MDDET;
1444                 }
1445                 E1000_WRITE_REG(hw, E1000_IMS, ims);
1446         } else {
1447                 E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK |
1448                                 E1000_IMS_DRSTA);
1449                 E1000_WRITE_REG(hw, E1000_IAM, IMS_ENABLE_MASK |
1450                                 E1000_IMS_DRSTA);
1451         }
1452 }
1453
1454 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1455 {
1456         struct e1000_hw *hw = &adapter->hw;
1457         u16 vid = adapter->hw.mng_cookie.vlan_id;
1458         u16 old_vid = adapter->mng_vlan_id;
1459
1460         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1461                 /* add VID to filter table */
1462                 igb_vfta_set(adapter, vid, TRUE);
1463                 adapter->mng_vlan_id = vid;
1464         } else {
1465                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1466         }
1467
1468         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1469             (vid != old_vid) &&
1470 #ifdef HAVE_VLAN_RX_REGISTER
1471             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1472 #else
1473             !test_bit(old_vid, adapter->active_vlans)) {
1474 #endif
1475                 /* remove VID from filter table */
1476                 igb_vfta_set(adapter, old_vid, FALSE);
1477         }
1478 }
1479
1480 /**
1481  * igb_release_hw_control - release control of the h/w to f/w
1482  * @adapter: address of board private structure
1483  *
1484  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1485  * For ASF and Pass Through versions of f/w this means that the
1486  * driver is no longer loaded.
1487  *
1488  **/
1489 static void igb_release_hw_control(struct igb_adapter *adapter)
1490 {
1491         struct e1000_hw *hw = &adapter->hw;
1492         u32 ctrl_ext;
1493
1494         /* Let firmware take over control of h/w */
1495         ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
1496         E1000_WRITE_REG(hw, E1000_CTRL_EXT,
1497                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1498 }
1499
1500 /**
1501  * igb_get_hw_control - get control of the h/w from f/w
1502  * @adapter: address of board private structure
1503  *
1504  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1505  * For ASF and Pass Through versions of f/w this means that
1506  * the driver is loaded.
1507  *
1508  **/
1509 static void igb_get_hw_control(struct igb_adapter *adapter)
1510 {
1511         struct e1000_hw *hw = &adapter->hw;
1512         u32 ctrl_ext;
1513
1514         /* Let firmware know the driver has taken over */
1515         ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
1516         E1000_WRITE_REG(hw, E1000_CTRL_EXT,
1517                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1518 }
1519
1520 /**
1521  * igb_configure - configure the hardware for RX and TX
1522  * @adapter: private board structure
1523  **/
1524 static void igb_configure(struct igb_adapter *adapter)
1525 {
1526         struct net_device *netdev = adapter->netdev;
1527         int i;
1528
1529         igb_get_hw_control(adapter);
1530         igb_set_rx_mode(netdev);
1531
1532         igb_restore_vlan(adapter);
1533
1534         igb_setup_tctl(adapter);
1535         igb_setup_mrqc(adapter);
1536         igb_setup_rctl(adapter);
1537
1538         igb_configure_tx(adapter);
1539         igb_configure_rx(adapter);
1540
1541         e1000_rx_fifo_flush_82575(&adapter->hw);
1542 #ifdef CONFIG_NETDEVICES_MULTIQUEUE
1543
1544         if (adapter->num_tx_queues > 1)
1545                 netdev->features |= NETIF_F_MULTI_QUEUE;
1546         else
1547                 netdev->features &= ~NETIF_F_MULTI_QUEUE;
1548 #endif
1549
1550         /* call igb_desc_unused which always leaves
1551          * at least 1 descriptor unused to make sure
1552          * next_to_use != next_to_clean */
1553         for (i = 0; i < adapter->num_rx_queues; i++) {
1554                 struct igb_ring *ring = adapter->rx_ring[i];
1555                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1556         }
1557 }
1558
1559 /**
1560  * igb_power_up_link - Power up the phy/serdes link
1561  * @adapter: address of board private structure
1562  **/
1563 void igb_power_up_link(struct igb_adapter *adapter)
1564 {
1565         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1566                 e1000_power_up_phy(&adapter->hw);
1567         else
1568                 e1000_power_up_fiber_serdes_link(&adapter->hw);
1569
1570         e1000_phy_hw_reset(&adapter->hw);
1571 }
1572
1573 /**
1574  * igb_power_down_link - Power down the phy/serdes link
1575  * @adapter: address of board private structure
1576  */
1577 static void igb_power_down_link(struct igb_adapter *adapter)
1578 {
1579         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1580                 e1000_power_down_phy(&adapter->hw);
1581         else
1582                 e1000_shutdown_fiber_serdes_link(&adapter->hw);
1583 }
1584
1585 /**
1586  * igb_up - Open the interface and prepare it to handle traffic
1587  * @adapter: board private structure
1588  **/
1589 int igb_up(struct igb_adapter *adapter)
1590 {
1591         struct e1000_hw *hw = &adapter->hw;
1592         int i;
1593
1594         /* hardware has been reset, we need to reload some things */
1595         igb_configure(adapter);
1596
1597         clear_bit(__IGB_DOWN, &adapter->state);
1598
1599         for (i = 0; i < adapter->num_q_vectors; i++)
1600                 napi_enable(&(adapter->q_vector[i]->napi));
1601
1602         if (adapter->msix_entries)
1603                 igb_configure_msix(adapter);
1604         else
1605                 igb_assign_vector(adapter->q_vector[0], 0);
1606
1607         igb_configure_lli(adapter);
1608
1609         /* Clear any pending interrupts. */
1610         E1000_READ_REG(hw, E1000_ICR);
1611         igb_irq_enable(adapter);
1612
1613         /* notify VFs that reset has been completed */
1614         if (adapter->vfs_allocated_count) {
1615                 u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
1616                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1617                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
1618         }
1619
1620         netif_tx_start_all_queues(adapter->netdev);
1621
1622         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
1623                 schedule_work(&adapter->dma_err_task);
1624         /* start the watchdog. */
1625         hw->mac.get_link_status = 1;
1626         schedule_work(&adapter->watchdog_task);
1627
1628         return 0;
1629 }
1630
1631 void igb_down(struct igb_adapter *adapter)
1632 {
1633         struct net_device *netdev = adapter->netdev;
1634         struct e1000_hw *hw = &adapter->hw;
1635         u32 tctl, rctl;
1636         int i;
1637
1638         /* signal that we're down so the interrupt handler does not
1639          * reschedule our watchdog timer */
1640         set_bit(__IGB_DOWN, &adapter->state);
1641
1642         /* disable receives in the hardware */
1643         rctl = E1000_READ_REG(hw, E1000_RCTL);
1644         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
1645         /* flush and sleep below */
1646
1647         netif_tx_stop_all_queues(netdev);
1648
1649         /* disable transmits in the hardware */
1650         tctl = E1000_READ_REG(hw, E1000_TCTL);
1651         tctl &= ~E1000_TCTL_EN;
1652         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
1653         /* flush both disables and wait for them to finish */
1654         E1000_WRITE_FLUSH(hw);
1655         usleep_range(10000, 20000);
1656
1657         for (i = 0; i < adapter->num_q_vectors; i++)
1658                 napi_disable(&(adapter->q_vector[i]->napi));
1659
1660         igb_irq_disable(adapter);
1661
1662         del_timer_sync(&adapter->watchdog_timer);
1663         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
1664                 del_timer_sync(&adapter->dma_err_timer);
1665         del_timer_sync(&adapter->phy_info_timer);
1666
1667         netif_carrier_off(netdev);
1668
1669         /* record the stats before reset*/
1670         igb_update_stats(adapter);
1671
1672         adapter->link_speed = 0;
1673         adapter->link_duplex = 0;
1674
1675 #ifdef HAVE_PCI_ERS
1676         if (!pci_channel_offline(adapter->pdev))
1677                 igb_reset(adapter);
1678 #else
1679         igb_reset(adapter);
1680 #endif
1681         igb_clean_all_tx_rings(adapter);
1682         igb_clean_all_rx_rings(adapter);
1683 #ifdef IGB_DCA
1684
1685         /* since we reset the hardware DCA settings were cleared */
1686         igb_setup_dca(adapter);
1687 #endif
1688 }
1689
1690 void igb_reinit_locked(struct igb_adapter *adapter)
1691 {
1692         WARN_ON(in_interrupt());
1693         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1694                 usleep_range(1000, 2000);
1695         igb_down(adapter);
1696         igb_up(adapter);
1697         clear_bit(__IGB_RESETTING, &adapter->state);
1698 }
1699
1700 void igb_reset(struct igb_adapter *adapter)
1701 {
1702         struct pci_dev *pdev = adapter->pdev;
1703         struct e1000_hw *hw = &adapter->hw;
1704         struct e1000_mac_info *mac = &hw->mac;
1705         struct e1000_fc_info *fc = &hw->fc;
1706         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1707         u16 hwm;
1708
1709         /* Repartition Pba for greater than 9k mtu
1710          * To take effect CTRL.RST is required.
1711          */
1712         switch (mac->type) {
1713         case e1000_i350:
1714         case e1000_82580:
1715                 pba = E1000_READ_REG(hw, E1000_RXPBS);
1716                 pba = e1000_rxpbs_adjust_82580(pba);
1717                 break;
1718         case e1000_82576:
1719                 pba = E1000_READ_REG(hw, E1000_RXPBS);
1720                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1721                 break;
1722         case e1000_82575:
1723         default:
1724                 pba = E1000_PBA_34K;
1725                 break;
1726         }
1727
1728         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1729             (mac->type < e1000_82576)) {
1730                 /* adjust PBA for jumbo frames */
1731                 E1000_WRITE_REG(hw, E1000_PBA, pba);
1732
1733                 /* To maintain wire speed transmits, the Tx FIFO should be
1734                  * large enough to accommodate two full transmit packets,
1735                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1736                  * the Rx FIFO should be large enough to accommodate at least
1737                  * one full receive packet and is similarly rounded up and
1738                  * expressed in KB. */
1739                 pba = E1000_READ_REG(hw, E1000_PBA);
1740                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1741                 tx_space = pba >> 16;
1742                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1743                 pba &= 0xffff;
1744                 /* the tx fifo also stores 16 bytes of information about the tx
1745                  * but don't include ethernet FCS because hardware appends it */
1746                 min_tx_space = (adapter->max_frame_size +
1747                                 sizeof(union e1000_adv_tx_desc) -
1748                                 ETH_FCS_LEN) * 2;
1749                 min_tx_space = ALIGN(min_tx_space, 1024);
1750                 min_tx_space >>= 10;
1751                 /* software strips receive CRC, so leave room for it */
1752                 min_rx_space = adapter->max_frame_size;
1753                 min_rx_space = ALIGN(min_rx_space, 1024);
1754                 min_rx_space >>= 10;
1755
1756                 /* If current Tx allocation is less than the min Tx FIFO size,
1757                  * and the min Tx FIFO size is less than the current Rx FIFO
1758                  * allocation, take space away from current Rx allocation */
1759                 if (tx_space < min_tx_space &&
1760                     ((min_tx_space - tx_space) < pba)) {
1761                         pba = pba - (min_tx_space - tx_space);
1762
1763                         /* if short on rx space, rx wins and must trump tx
1764                          * adjustment */
1765                         if (pba < min_rx_space)
1766                                 pba = min_rx_space;
1767                 }
1768                 E1000_WRITE_REG(hw, E1000_PBA, pba);
1769         }
1770
1771         /* flow control settings */
1772         /* The high water mark must be low enough to fit one full frame
1773          * (or the size used for early receive) above it in the Rx FIFO.
1774          * Set it to the lower of:
1775          * - 90% of the Rx FIFO size, or
1776          * - the full Rx FIFO size minus one full frame */
1777         hwm = min(((pba << 10) * 9 / 10),
1778                         ((pba << 10) - 2 * adapter->max_frame_size));
1779
1780         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1781         fc->low_water = fc->high_water - 16;
1782         fc->pause_time = 0xFFFF;
1783         fc->send_xon = 1;
1784         fc->current_mode = fc->requested_mode;
1785
1786         /* disable receive for all VFs and wait one second */
1787         if (adapter->vfs_allocated_count) {
1788                 int i;
1789                 /*
1790                  * Clear all flags except indication that the PF has set
1791                  * the VF MAC addresses administratively
1792                  */
1793                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1794                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1795
1796                 /* ping all the active vfs to let them know we are going down */
1797                 igb_ping_all_vfs(adapter);
1798
1799                 /* disable transmits and receives */
1800                 E1000_WRITE_REG(hw, E1000_VFRE, 0);
1801                 E1000_WRITE_REG(hw, E1000_VFTE, 0);
1802         }
1803
1804         /* Allow time for pending master requests to run */
1805         e1000_reset_hw(hw);
1806         E1000_WRITE_REG(hw, E1000_WUC, 0);
1807
1808         if (e1000_init_hw(hw))
1809                 dev_err(pci_dev_to_dev(pdev), "Hardware Error\n");
1810
1811         igb_init_dmac(adapter, pba);
1812         /* Re-initialize the thermal sensor on i350 devices. */
1813         if (mac->type == e1000_i350 && hw->bus.func == 0) {
1814                 /*
1815                  * If present, re-initialize the external thermal sensor
1816                  * interface.
1817                  */
1818                 if (adapter->ets)
1819                         e1000_set_i2c_bb(hw);
1820                 e1000_init_thermal_sensor_thresh(hw);
1821         }
1822         if (!netif_running(adapter->netdev))
1823                 igb_power_down_link(adapter);
1824
1825         igb_update_mng_vlan(adapter);
1826
1827         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1828         E1000_WRITE_REG(hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1829
1830         e1000_get_phy_info(hw);
1831 }
1832
1833 #ifdef HAVE_NDO_SET_FEATURES
1834 static netdev_features_t igb_fix_features(struct net_device *netdev,
1835                                           netdev_features_t features)
1836 {
1837         /*
1838          * Since there is no support for separate tx vlan accel
1839          * enabled make sure tx flag is cleared if rx is.
1840          */
1841         if (!(features & NETIF_F_HW_VLAN_RX))
1842                 features &= ~NETIF_F_HW_VLAN_TX;
1843
1844         /* If Rx checksum is disabled, then LRO should also be disabled */
1845         if (!(features & NETIF_F_RXCSUM))
1846                 features &= ~NETIF_F_LRO;
1847
1848         return features;
1849 }
1850
1851 static int igb_set_features(struct net_device *netdev,
1852                             netdev_features_t features)
1853 {
1854         u32 changed = netdev->features ^ features;
1855
1856         if (changed & NETIF_F_HW_VLAN_RX)
1857                 igb_vlan_mode(netdev, features);
1858
1859         return 0;
1860 }
1861
1862 #endif /* HAVE_NDO_SET_FEATURES */
1863 #ifdef HAVE_NET_DEVICE_OPS
1864 static const struct net_device_ops igb_netdev_ops = {
1865         .ndo_open               = igb_open,
1866         .ndo_stop               = igb_close,
1867         .ndo_start_xmit         = igb_xmit_frame,
1868         .ndo_get_stats          = igb_get_stats,
1869         .ndo_set_rx_mode        = igb_set_rx_mode,
1870         .ndo_set_mac_address    = igb_set_mac,
1871         .ndo_change_mtu         = igb_change_mtu,
1872         .ndo_do_ioctl           = igb_ioctl,
1873         .ndo_tx_timeout         = igb_tx_timeout,
1874         .ndo_validate_addr      = eth_validate_addr,
1875         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1876         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1877 #ifdef IFLA_VF_MAX
1878         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1879         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1880         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1881         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1882 #endif
1883 #ifdef CONFIG_NET_POLL_CONTROLLER
1884         .ndo_poll_controller    = igb_netpoll,
1885 #endif
1886 #ifdef HAVE_NDO_SET_FEATURES
1887         .ndo_fix_features       = igb_fix_features,
1888         .ndo_set_features       = igb_set_features,
1889 #endif
1890 #ifdef HAVE_VLAN_RX_REGISTER
1891         .ndo_vlan_rx_register   = igb_vlan_mode,
1892 #endif
1893 };
1894
1895 #ifdef CONFIG_IGB_VMDQ_NETDEV
1896 static const struct net_device_ops igb_vmdq_ops = {
1897         .ndo_open               = &igb_vmdq_open,
1898         .ndo_stop               = &igb_vmdq_close,
1899         .ndo_start_xmit         = &igb_vmdq_xmit_frame,
1900         .ndo_get_stats          = &igb_vmdq_get_stats,
1901         .ndo_set_rx_mode        = &igb_vmdq_set_rx_mode,
1902         .ndo_validate_addr      = eth_validate_addr,
1903         .ndo_set_mac_address    = &igb_vmdq_set_mac,
1904         .ndo_change_mtu         = &igb_vmdq_change_mtu,
1905         .ndo_tx_timeout         = &igb_vmdq_tx_timeout,
1906         .ndo_vlan_rx_register   = &igb_vmdq_vlan_rx_register,
1907         .ndo_vlan_rx_add_vid    = &igb_vmdq_vlan_rx_add_vid,
1908         .ndo_vlan_rx_kill_vid   = &igb_vmdq_vlan_rx_kill_vid,
1909 };
1910
1911 #endif /* CONFIG_IGB_VMDQ_NETDEV */
1912 #endif /* HAVE_NET_DEVICE_OPS */
1913 #ifdef CONFIG_IGB_VMDQ_NETDEV
1914 void igb_assign_vmdq_netdev_ops(struct net_device *vnetdev)
1915 {
1916 #ifdef HAVE_NET_DEVICE_OPS
1917         vnetdev->netdev_ops = &igb_vmdq_ops;
1918 #else
1919         dev->open = &igb_vmdq_open;
1920         dev->stop = &igb_vmdq_close;
1921         dev->hard_start_xmit = &igb_vmdq_xmit_frame;
1922         dev->get_stats = &igb_vmdq_get_stats;
1923 #ifdef HAVE_SET_RX_MODE
1924         dev->set_rx_mode = &igb_vmdq_set_rx_mode;
1925 #endif
1926         dev->set_multicast_list = &igb_vmdq_set_rx_mode;
1927         dev->set_mac_address = &igb_vmdq_set_mac;
1928         dev->change_mtu = &igb_vmdq_change_mtu;
1929 #ifdef HAVE_TX_TIMEOUT
1930         dev->tx_timeout = &igb_vmdq_tx_timeout;
1931 #endif
1932 #ifdef NETIF_F_HW_VLAN_TX
1933         dev->vlan_rx_register = &igb_vmdq_vlan_rx_register;
1934         dev->vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid;
1935         dev->vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid;
1936 #endif
1937 #endif
1938         igb_vmdq_set_ethtool_ops(vnetdev);
1939         vnetdev->watchdog_timeo = 5 * HZ;
1940
1941 }
1942
1943 int igb_init_vmdq_netdevs(struct igb_adapter *adapter)
1944 {
1945         int pool, err = 0, base_queue;
1946         struct net_device *vnetdev;
1947         struct igb_vmdq_adapter *vmdq_adapter;
1948
1949         for (pool = 1; pool < adapter->vmdq_pools; pool++) {
1950                 int qpp = (!adapter->rss_queues ? 1 : adapter->rss_queues);
1951                 base_queue = pool * qpp;
1952                 vnetdev = alloc_etherdev(sizeof(struct igb_vmdq_adapter));
1953                 if (!vnetdev) {
1954                         err = -ENOMEM;
1955                         break;
1956                 }
1957                 vmdq_adapter = netdev_priv(vnetdev);
1958                 vmdq_adapter->vnetdev = vnetdev;
1959                 vmdq_adapter->real_adapter = adapter;
1960                 vmdq_adapter->rx_ring = adapter->rx_ring[base_queue];
1961                 vmdq_adapter->tx_ring = adapter->tx_ring[base_queue];
1962                 igb_assign_vmdq_netdev_ops(vnetdev);
1963                 snprintf(vnetdev->name, IFNAMSIZ, "%sv%d",
1964                          adapter->netdev->name, pool);
1965                 vnetdev->features = adapter->netdev->features;
1966 #ifdef HAVE_NETDEV_VLAN_FEATURES
1967                 vnetdev->vlan_features = adapter->netdev->vlan_features;
1968 #endif
1969                 adapter->vmdq_netdev[pool-1] = vnetdev;
1970                 err = register_netdev(vnetdev);
1971                 if (err)
1972                         break;
1973         }
1974         return err;
1975 }
1976
1977 int igb_remove_vmdq_netdevs(struct igb_adapter *adapter)
1978 {
1979         int pool, err = 0;
1980
1981         for (pool = 1; pool < adapter->vmdq_pools; pool++) {
1982                 unregister_netdev(adapter->vmdq_netdev[pool-1]);
1983                 free_netdev(adapter->vmdq_netdev[pool-1]);
1984                 adapter->vmdq_netdev[pool-1] = NULL;
1985         }
1986         return err;
1987 }
1988 #endif /* CONFIG_IGB_VMDQ_NETDEV */
1989
1990 /**
1991  * igb_probe - Device Initialization Routine
1992  * @pdev: PCI device information struct
1993  * @ent: entry in igb_pci_tbl
1994  *
1995  * Returns 0 on success, negative on failure
1996  *
1997  * igb_probe initializes an adapter identified by a pci_dev structure.
1998  * The OS initialization, configuring of the adapter private structure,
1999  * and a hardware reset occur.
2000  **/
2001 static int __devinit igb_probe(struct pci_dev *pdev,
2002                                const struct pci_device_id *ent)
2003 {
2004         struct net_device *netdev;
2005         struct igb_adapter *adapter;
2006         struct e1000_hw *hw;
2007         u16 eeprom_data = 0;
2008         u8 pba_str[E1000_PBANUM_LENGTH];
2009         s32 ret_val;
2010         static int global_quad_port_a; /* global quad port a indication */
2011         int i, err, pci_using_dac;
2012         static int cards_found;
2013
2014         err = pci_enable_device_mem(pdev);
2015         if (err)
2016                 return err;
2017
2018         pci_using_dac = 0;
2019         err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
2020         if (!err) {
2021                 err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
2022                 if (!err)
2023                         pci_using_dac = 1;
2024         } else {
2025                 err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
2026                 if (err) {
2027                         err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
2028                         if (err) {
2029                                 IGB_ERR("No usable DMA configuration, "
2030                                         "aborting\n");
2031                                 goto err_dma;
2032                         }
2033                 }
2034         }
2035
2036 #ifndef HAVE_ASPM_QUIRKS
2037         /* 82575 requires that the pci-e link partner disable the L0s state */
2038         switch (pdev->device) {
2039         case E1000_DEV_ID_82575EB_COPPER:
2040         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2041         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2042                 pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
2043         default:
2044                 break;
2045         }
2046
2047 #endif /* HAVE_ASPM_QUIRKS */
2048         err = pci_request_selected_regions(pdev,
2049                                            pci_select_bars(pdev,
2050                                                            IORESOURCE_MEM),
2051                                            igb_driver_name);
2052         if (err)
2053                 goto err_pci_reg;
2054
2055         pci_enable_pcie_error_reporting(pdev);
2056
2057         pci_set_master(pdev);
2058
2059         err = -ENOMEM;
2060 #ifdef HAVE_TX_MQ
2061         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
2062                                    IGB_MAX_TX_QUEUES);
2063 #else
2064         netdev = alloc_etherdev(sizeof(struct igb_adapter));
2065 #endif /* HAVE_TX_MQ */
2066         if (!netdev)
2067                 goto err_alloc_etherdev;
2068
2069         SET_MODULE_OWNER(netdev);
2070         SET_NETDEV_DEV(netdev, &pdev->dev);
2071
2072         pci_set_drvdata(pdev, netdev);
2073         adapter = netdev_priv(netdev);
2074         adapter->netdev = netdev;
2075         adapter->pdev = pdev;
2076         hw = &adapter->hw;
2077         hw->back = adapter;
2078         adapter->port_num = hw->bus.func;
2079         adapter->msg_enable = (1 << debug) - 1;
2080
2081 #ifdef HAVE_PCI_ERS
2082         err = pci_save_state(pdev);
2083         if (err)
2084                 goto err_ioremap;
2085 #endif
2086         err = -EIO;
2087         hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
2088                               pci_resource_len(pdev, 0));
2089         if (!hw->hw_addr)
2090                 goto err_ioremap;
2091
2092 #ifdef HAVE_NET_DEVICE_OPS
2093         netdev->netdev_ops = &igb_netdev_ops;
2094 #else /* HAVE_NET_DEVICE_OPS */
2095         netdev->open = &igb_open;
2096         netdev->stop = &igb_close;
2097         netdev->get_stats = &igb_get_stats;
2098 #ifdef HAVE_SET_RX_MODE
2099         netdev->set_rx_mode = &igb_set_rx_mode;
2100 #endif
2101         netdev->set_multicast_list = &igb_set_rx_mode;
2102         netdev->set_mac_address = &igb_set_mac;
2103         netdev->change_mtu = &igb_change_mtu;
2104         netdev->do_ioctl = &igb_ioctl;
2105 #ifdef HAVE_TX_TIMEOUT
2106         netdev->tx_timeout = &igb_tx_timeout;
2107 #endif
2108         netdev->vlan_rx_register = igb_vlan_mode;
2109         netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
2110         netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
2111 #ifdef CONFIG_NET_POLL_CONTROLLER
2112         netdev->poll_controller = igb_netpoll;
2113 #endif
2114         netdev->hard_start_xmit = &igb_xmit_frame;
2115 #endif /* HAVE_NET_DEVICE_OPS */
2116         igb_set_ethtool_ops(netdev);
2117 #ifdef HAVE_TX_TIMEOUT
2118         netdev->watchdog_timeo = 5 * HZ;
2119 #endif
2120
2121         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
2122
2123         adapter->bd_number = cards_found;
2124
2125         /* setup the private structure */
2126         err = igb_sw_init(adapter);
2127         if (err)
2128                 goto err_sw_init;
2129
2130         e1000_get_bus_info(hw);
2131
2132         hw->phy.autoneg_wait_to_complete = FALSE;
2133         hw->mac.adaptive_ifs = FALSE;
2134
2135         /* Copper options */
2136         if (hw->phy.media_type == e1000_media_type_copper) {
2137 #ifdef ETH_TP_MDI_X
2138                 hw->phy.mdix = ETH_TP_MDI_INVALID;
2139 #else
2140                 hw->phy.mdix = AUTO_ALL_MODES;
2141 #endif /* ETH_TP_MDI_X */
2142                 hw->phy.disable_polarity_correction = FALSE;
2143                 hw->phy.ms_type = e1000_ms_hw_default;
2144         }
2145
2146         if (e1000_check_reset_block(hw))
2147                 dev_info(pci_dev_to_dev(pdev),
2148                         "PHY reset is blocked due to SOL/IDER session.\n");
2149
2150         /*
2151          * features is initialized to 0 in allocation, it might have bits
2152          * set by igb_sw_init so we should use an or instead of an
2153          * assignment.
2154          */
2155         netdev->features |= NETIF_F_SG |
2156                             NETIF_F_IP_CSUM |
2157 #ifdef NETIF_F_IPV6_CSUM
2158                             NETIF_F_IPV6_CSUM |
2159 #endif
2160 #ifdef NETIF_F_TSO
2161                             NETIF_F_TSO |
2162 #ifdef NETIF_F_TSO6
2163                             NETIF_F_TSO6 |
2164 #endif
2165 #endif /* NETIF_F_TSO */
2166 #ifdef NETIF_F_RXHASH
2167                             NETIF_F_RXHASH |
2168 #endif
2169 #ifdef HAVE_NDO_SET_FEATURES
2170                             NETIF_F_RXCSUM |
2171 #endif
2172                             NETIF_F_HW_VLAN_RX |
2173                             NETIF_F_HW_VLAN_TX;
2174
2175 #ifdef HAVE_NDO_SET_FEATURES
2176         /* copy netdev features into list of user selectable features */
2177         netdev->hw_features |= netdev->features;
2178 #ifndef IGB_NO_LRO
2179
2180         /* give us the option of enabling LRO later */
2181         netdev->hw_features |= NETIF_F_LRO;
2182 #endif
2183 #else
2184 #ifdef NETIF_F_GRO
2185
2186         /* this is only needed on kernels prior to 2.6.39 */
2187         netdev->features |= NETIF_F_GRO;
2188 #endif
2189 #endif
2190
2191         /* set this bit last since it cannot be part of hw_features */
2192         netdev->features |= NETIF_F_HW_VLAN_FILTER;
2193
2194 #ifdef HAVE_NETDEV_VLAN_FEATURES
2195         netdev->vlan_features |= NETIF_F_TSO |
2196                                  NETIF_F_TSO6 |
2197                                  NETIF_F_IP_CSUM |
2198                                  NETIF_F_IPV6_CSUM |
2199                                  NETIF_F_SG;
2200
2201 #endif
2202         if (pci_using_dac)
2203                 netdev->features |= NETIF_F_HIGHDMA;
2204
2205         if (hw->mac.type >= e1000_82576)
2206                 netdev->features |= NETIF_F_SCTP_CSUM;
2207
2208         adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
2209
2210         /* before reading the NVM, reset the controller to put the device in a
2211          * known good starting state */
2212         e1000_reset_hw(hw);
2213
2214         /* make sure the NVM is good */
2215         if (e1000_validate_nvm_checksum(hw) < 0) {
2216                 dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
2217                         " Valid\n");
2218                 err = -EIO;
2219                 goto err_eeprom;
2220         }
2221
2222         /* copy the MAC address out of the NVM */
2223         if (e1000_read_mac_addr(hw))
2224                 dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
2225         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2226 #ifdef ETHTOOL_GPERMADDR
2227         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2228
2229         if (!is_valid_ether_addr(netdev->perm_addr)) {
2230 #else
2231         if (!is_valid_ether_addr(netdev->dev_addr)) {
2232 #endif
2233                 dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
2234                 err = -EIO;
2235                 goto err_eeprom;
2236         }
2237
2238         memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
2239         adapter->mac_table[0].queue = adapter->vfs_allocated_count;
2240         adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
2241         igb_rar_set(adapter, 0);
2242
2243         /* get firmware version for ethtool -i */
2244         e1000_read_nvm(&adapter->hw, 5, 1, &adapter->fw_version);
2245         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
2246                     (unsigned long) adapter);
2247         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2248                 setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
2249                             (unsigned long) adapter);
2250         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
2251                     (unsigned long) adapter);
2252
2253         INIT_WORK(&adapter->reset_task, igb_reset_task);
2254         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2255         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2256                 INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
2257
2258         /* Initialize link properties that are user-changeable */
2259         adapter->fc_autoneg = true;
2260         hw->mac.autoneg = true;
2261         hw->phy.autoneg_advertised = 0x2f;
2262
2263         hw->fc.requested_mode = e1000_fc_default;
2264         hw->fc.current_mode = e1000_fc_default;
2265
2266         e1000_validate_mdi_setting(hw);
2267
2268         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2269          * enable the ACPI Magic Packet filter
2270          */
2271
2272         if (hw->bus.func == 0)
2273                 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2274         else if (hw->mac.type >= e1000_82580)
2275                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2276                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2277                                  &eeprom_data);
2278         else if (hw->bus.func == 1)
2279                 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2280
2281         if (eeprom_data & IGB_EEPROM_APME)
2282                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2283
2284         /* now that we have the eeprom settings, apply the special cases where
2285          * the eeprom may be wrong or the board simply won't support wake on
2286          * lan on a particular port */
2287         switch (pdev->device) {
2288         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2289                 adapter->eeprom_wol = 0;
2290                 break;
2291         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2292         case E1000_DEV_ID_82576_FIBER:
2293         case E1000_DEV_ID_82576_SERDES:
2294                 /* Wake events only supported on port A for dual fiber
2295                  * regardless of eeprom setting */
2296                 if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
2297                         adapter->eeprom_wol = 0;
2298                 break;
2299         case E1000_DEV_ID_82576_QUAD_COPPER:
2300         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2301                 /* if quad port adapter, disable WoL on all but port A */
2302                 if (global_quad_port_a != 0)
2303                         adapter->eeprom_wol = 0;
2304                 else
2305                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2306                 /* Reset for multiple quad port adapters */
2307                 if (++global_quad_port_a == 4)
2308                         global_quad_port_a = 0;
2309                 break;
2310         }
2311
2312         /* initialize the wol settings based on the eeprom settings */
2313         adapter->wol = adapter->eeprom_wol;
2314         device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), adapter->wol);
2315
2316         /* reset the hardware with the new settings */
2317         igb_reset(adapter);
2318
2319         /* let the f/w know that the h/w is now under the control of the
2320          * driver. */
2321         igb_get_hw_control(adapter);
2322
2323         strncpy(netdev->name, "eth%d", IFNAMSIZ);
2324         err = register_netdev(netdev);
2325         if (err)
2326                 goto err_register;
2327
2328 #ifdef CONFIG_IGB_VMDQ_NETDEV
2329         err = igb_init_vmdq_netdevs(adapter);
2330         if (err)
2331                 goto err_register;
2332 #endif
2333         /* carrier off reporting is important to ethtool even BEFORE open */
2334         netif_carrier_off(netdev);
2335
2336 #ifdef IGB_DCA
2337         if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
2338                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2339                 dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
2340                 igb_setup_dca(adapter);
2341         }
2342
2343 #endif
2344 #ifdef HAVE_HW_TIME_STAMP
2345         /* do hw tstamp init after resetting */
2346         igb_init_hw_timer(adapter);
2347
2348 #endif
2349         dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
2350         /* print bus type/speed/width info */
2351         dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
2352                  netdev->name,
2353                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
2354                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
2355                                                             "unknown"),
2356                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4\n" :
2357                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2\n" :
2358                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1\n" :
2359                    "unknown"));
2360         dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
2361         for (i = 0; i < 6; i++)
2362                 printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
2363
2364         ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
2365         if (ret_val)
2366                 strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
2367         dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
2368                  pba_str);
2369
2370
2371         /* Initialize the thermal sensor on i350 devices. */
2372         if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
2373                 u16 ets_word;
2374
2375                 /*
2376                  * Read the NVM to determine if this i350 device supports an
2377                  * external thermal sensor.
2378                  */
2379                 e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
2380                 if (ets_word != 0x0000 && ets_word != 0xFFFF)
2381                         adapter->ets = true;
2382                 else
2383                         adapter->ets = false;
2384 #ifdef IGB_SYSFS
2385                 igb_sysfs_init(adapter);
2386 #else
2387 #ifdef IGB_PROCFS
2388                 igb_procfs_init(adapter);
2389 #endif /* IGB_PROCFS */
2390 #endif /* IGB_SYSFS */
2391         } else {
2392                 adapter->ets = false;
2393         }
2394
2395         switch (hw->mac.type) {
2396         case e1000_i350:
2397                 /* Enable EEE for internal copper PHY devices */
2398                 if (hw->phy.media_type == e1000_media_type_copper)
2399                         e1000_set_eee_i350(hw);
2400
2401                 /* send driver version info to firmware */
2402                 igb_init_fw(adapter);
2403                 break;
2404         default:
2405                 break;
2406         }
2407 #ifndef IGB_NO_LRO
2408         if (netdev->features & NETIF_F_LRO)
2409                 dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
2410         else
2411                 dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
2412 #endif
2413         dev_info(pci_dev_to_dev(pdev),
2414                  "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2415                  adapter->msix_entries ? "MSI-X" :
2416                  (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2417                  adapter->num_rx_queues, adapter->num_tx_queues);
2418
2419         cards_found++;
2420
2421         pm_runtime_put_noidle(&pdev->dev);
2422         return 0;
2423
2424 err_register:
2425         igb_release_hw_control(adapter);
2426 err_eeprom:
2427         if (!e1000_check_reset_block(hw))
2428                 e1000_phy_hw_reset(hw);
2429
2430         if (hw->flash_address)
2431                 iounmap(hw->flash_address);
2432 err_sw_init:
2433         igb_clear_interrupt_scheme(adapter);
2434         igb_reset_sriov_capability(adapter);
2435         iounmap(hw->hw_addr);
2436 err_ioremap:
2437         free_netdev(netdev);
2438 err_alloc_etherdev:
2439         pci_release_selected_regions(pdev,
2440                                      pci_select_bars(pdev, IORESOURCE_MEM));
2441 err_pci_reg:
2442 err_dma:
2443         pci_disable_device(pdev);
2444         return err;
2445 }
2446
2447 /**
2448  * igb_remove - Device Removal Routine
2449  * @pdev: PCI device information struct
2450  *
2451  * igb_remove is called by the PCI subsystem to alert the driver
2452  * that it should release a PCI device.  The could be caused by a
2453  * Hot-Plug event, or because the driver is going to be removed from
2454  * memory.
2455  **/
2456 static void __devexit igb_remove(struct pci_dev *pdev)
2457 {
2458         struct net_device *netdev = pci_get_drvdata(pdev);
2459         struct igb_adapter *adapter = netdev_priv(netdev);
2460         struct e1000_hw *hw = &adapter->hw;
2461
2462         pm_runtime_get_noresume(&pdev->dev);
2463
2464         /* flush_scheduled work may reschedule our watchdog task, so
2465          * explicitly disable watchdog tasks from being rescheduled  */
2466         set_bit(__IGB_DOWN, &adapter->state);
2467         del_timer_sync(&adapter->watchdog_timer);
2468         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2469                 del_timer_sync(&adapter->dma_err_timer);
2470         del_timer_sync(&adapter->phy_info_timer);
2471
2472         flush_scheduled_work();
2473
2474 #ifdef IGB_DCA
2475         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2476                 dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
2477                 dca_remove_requester(&pdev->dev);
2478                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2479                 E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
2480         }
2481 #endif
2482
2483         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2484          * would have already happened in close and is redundant. */
2485         igb_release_hw_control(adapter);
2486
2487         unregister_netdev(netdev);
2488 #ifdef CONFIG_IGB_VMDQ_NETDEV
2489         igb_remove_vmdq_netdevs(adapter);
2490 #endif
2491
2492         igb_clear_interrupt_scheme(adapter);
2493         igb_reset_sriov_capability(adapter);
2494
2495         iounmap(hw->hw_addr);
2496         if (hw->flash_address)
2497                 iounmap(hw->flash_address);
2498         pci_release_selected_regions(pdev,
2499                                      pci_select_bars(pdev, IORESOURCE_MEM));
2500
2501         kfree(adapter->mac_table);
2502         kfree(adapter->shadow_vfta);
2503         free_netdev(netdev);
2504
2505         pci_disable_pcie_error_reporting(pdev);
2506
2507         pci_disable_device(pdev);
2508
2509 #ifdef IGB_SYSFS
2510         igb_sysfs_exit(adapter);
2511 #else
2512 #ifdef IGB_PROCFS
2513         igb_procfs_exit(adapter);
2514 #endif /* IGB_PROCFS */
2515 #endif /* IGB_SYSFS */
2516 }
2517
2518 #ifdef HAVE_HW_TIME_STAMP
2519 /**
2520  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2521  * @adapter: board private structure to initialize
2522  *
2523  * igb_init_hw_timer initializes the function pointer and values for the hw
2524  * timer found in hardware.
2525  **/
2526 static void igb_init_hw_timer(struct igb_adapter *adapter)
2527 {
2528         struct e1000_hw *hw = &adapter->hw;
2529
2530         switch (hw->mac.type) {
2531         case e1000_i350:
2532         case e1000_82580:
2533                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2534                 adapter->cycles.read = igb_read_clock;
2535                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2536                 adapter->cycles.mult = 1;
2537                 /*
2538                  * The 82580 timesync updates the system timer every 8ns by 8ns
2539                  * and the value cannot be shifted.  Instead we need to shift
2540                  * the registers to generate a 64bit timer value.  As a result
2541                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2542                  * 24 in order to generate a larger value for synchronization.
2543                  */
2544                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2545                 /* disable system timer temporarily by setting bit 31 */
2546                 E1000_WRITE_REG(hw, E1000_TSAUXC, 0x80000000);
2547                 E1000_WRITE_FLUSH(hw);
2548
2549                 /* Set registers so that rollover occurs soon to test this. */
2550                 E1000_WRITE_REG(hw, E1000_SYSTIMR, 0x00000000);
2551                 E1000_WRITE_REG(hw, E1000_SYSTIML, 0x80000000);
2552                 E1000_WRITE_REG(hw, E1000_SYSTIMH, 0x000000FF);
2553                 E1000_WRITE_FLUSH(hw);
2554
2555                 /* enable system timer by clearing bit 31 */
2556                 E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
2557                 E1000_WRITE_FLUSH(hw);
2558
2559                 timecounter_init(&adapter->clock,
2560                                  &adapter->cycles,
2561                                  ktime_to_ns(ktime_get_real()));
2562                 /*
2563                  * Synchronize our NIC clock against system wall clock. NIC
2564                  * time stamp reading requires ~3us per sample, each sample
2565                  * was pretty stable even under load => only require 10
2566                  * samples for each offset comparison.
2567                  */
2568                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2569                 adapter->compare.source = &adapter->clock;
2570                 adapter->compare.target = ktime_get_real;
2571                 adapter->compare.num_samples = 10;
2572                 timecompare_update(&adapter->compare, 0);
2573                 break;
2574         case e1000_82576:
2575                 /*
2576                  * Initialize hardware timer: we keep it running just in case
2577                  * that some program needs it later on.
2578                  */
2579                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2580                 adapter->cycles.read = igb_read_clock;
2581                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2582                 adapter->cycles.mult = 1;
2583                 /**
2584                  * Scale the NIC clock cycle by a large factor so that
2585                  * relatively small clock corrections can be added or
2586                  * subtracted at each clock tick. The drawbacks of a large
2587                  * factor are a) that the clock register overflows more quickly
2588                  * (not such a big deal) and b) that the increment per tick has
2589                  * to fit into 24 bits.  As a result we need to use a shift of
2590                  * 19 so we can fit a value of 16 into the TIMINCA register.
2591                  */
2592                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2593                 E1000_WRITE_REG(hw, E1000_TIMINCA,
2594                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2595                                 (16 << IGB_82576_TSYNC_SHIFT));
2596
2597                 /* Set registers so that rollover occurs soon to test this. */
2598                 E1000_WRITE_REG(hw, E1000_SYSTIML, 0x00000000);
2599                 E1000_WRITE_REG(hw, E1000_SYSTIMH, 0xFF800000);
2600                 E1000_WRITE_FLUSH(hw);
2601
2602                 timecounter_init(&adapter->clock,
2603                                  &adapter->cycles,
2604                                  ktime_to_ns(ktime_get_real()));
2605                 /*
2606                  * Synchronize our NIC clock against system wall clock. NIC
2607                  * time stamp reading requires ~3us per sample, each sample
2608                  * was pretty stable even under load => only require 10
2609                  * samples for each offset comparison.
2610                  */
2611                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2612                 adapter->compare.source = &adapter->clock;
2613                 adapter->compare.target = ktime_get_real;
2614                 adapter->compare.num_samples = 10;
2615                 timecompare_update(&adapter->compare, 0);
2616                 break;
2617         case e1000_82575:
2618                 /* 82575 does not support timesync */
2619         default:
2620                 break;
2621         }
2622 }
2623
2624 #endif /* HAVE_HW_TIME_STAMP */
2625 /**
2626  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2627  * @adapter: board private structure to initialize
2628  *
2629  * igb_sw_init initializes the Adapter private data structure.
2630  * Fields are initialized based on PCI device information and
2631  * OS network device settings (MTU size).
2632  **/
2633 static int igb_sw_init(struct igb_adapter *adapter)
2634 {
2635         struct e1000_hw *hw = &adapter->hw;
2636         struct net_device *netdev = adapter->netdev;
2637         struct pci_dev *pdev = adapter->pdev;
2638
2639         /* PCI config space info */
2640
2641         hw->vendor_id = pdev->vendor;
2642         hw->device_id = pdev->device;
2643         hw->subsystem_vendor_id = pdev->subsystem_vendor;
2644         hw->subsystem_device_id = pdev->subsystem_device;
2645
2646         pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
2647
2648         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2649
2650         /* set default ring sizes */
2651         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2652         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2653
2654         /* set default work limits */
2655         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2656
2657         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2658                                               VLAN_HLEN;
2659
2660         /* Initialize the hardware-specific values */
2661         if (e1000_setup_init_funcs(hw, TRUE)) {
2662                 dev_err(pci_dev_to_dev(pdev), "Hardware Initialization Failure\n");
2663                 return -EIO;
2664         }
2665
2666         adapter->mac_table = kzalloc(sizeof(struct igb_mac_addr) *
2667                                      hw->mac.rar_entry_count, 
2668                                      GFP_ATOMIC);
2669
2670         /* Setup and initialize a copy of the hw vlan table array */
2671         adapter->shadow_vfta = (u32 *)kzalloc(sizeof(u32) * E1000_VFTA_ENTRIES,
2672                                         GFP_ATOMIC);
2673 #ifdef NO_KNI
2674         /* These calls may decrease the number of queues */
2675         igb_set_sriov_capability(adapter);
2676
2677         if (igb_init_interrupt_scheme(adapter)) {
2678                 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
2679                 return -ENOMEM;
2680         }
2681
2682         /* Explicitly disable IRQ since the NIC can be in any state. */
2683         igb_irq_disable(adapter);
2684
2685         set_bit(__IGB_DOWN, &adapter->state);
2686 #endif
2687         return 0;
2688 }
2689
2690 /**
2691  * igb_open - Called when a network interface is made active
2692  * @netdev: network interface device structure
2693  *
2694  * Returns 0 on success, negative value on failure
2695  *
2696  * The open entry point is called when a network interface is made
2697  * active by the system (IFF_UP).  At this point all resources needed
2698  * for transmit and receive operations are allocated, the interrupt
2699  * handler is registered with the OS, the watchdog timer is started,
2700  * and the stack is notified that the interface is ready.
2701  **/
2702 static int __igb_open(struct net_device *netdev, bool resuming)
2703 {
2704         struct igb_adapter *adapter = netdev_priv(netdev);
2705         struct e1000_hw *hw = &adapter->hw;
2706 #ifdef CONFIG_PM_RUNTIME
2707         struct pci_dev *pdev = adapter->pdev;
2708 #endif /* CONFIG_PM_RUNTIME */
2709         int err;
2710         int i;
2711
2712         /* disallow open during test */
2713         if (test_bit(__IGB_TESTING, &adapter->state)) {
2714                 WARN_ON(resuming);
2715                 return -EBUSY;
2716         }
2717
2718 #ifdef CONFIG_PM_RUNTIME
2719         if (!resuming)
2720                 pm_runtime_get_sync(&pdev->dev);
2721 #endif /* CONFIG_PM_RUNTIME */
2722
2723         netif_carrier_off(netdev);
2724
2725         /* allocate transmit descriptors */
2726         err = igb_setup_all_tx_resources(adapter);
2727         if (err)
2728                 goto err_setup_tx;
2729
2730         /* allocate receive descriptors */
2731         err = igb_setup_all_rx_resources(adapter);
2732         if (err)
2733                 goto err_setup_rx;
2734
2735         igb_power_up_link(adapter);
2736
2737         /* before we allocate an interrupt, we must be ready to handle it.
2738          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2739          * as soon as we call pci_request_irq, so we have to setup our
2740          * clean_rx handler before we do so.  */
2741         igb_configure(adapter);
2742
2743         err = igb_request_irq(adapter);
2744         if (err)
2745                 goto err_req_irq;
2746
2747         /* From here on the code is the same as igb_up() */
2748         clear_bit(__IGB_DOWN, &adapter->state);
2749
2750         for (i = 0; i < adapter->num_q_vectors; i++)
2751                 napi_enable(&(adapter->q_vector[i]->napi));
2752         igb_configure_lli(adapter);
2753
2754         /* Clear any pending interrupts. */
2755         E1000_READ_REG(hw, E1000_ICR);
2756
2757         igb_irq_enable(adapter);
2758
2759         /* notify VFs that reset has been completed */
2760         if (adapter->vfs_allocated_count) {
2761                 u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
2762                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2763                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
2764         }
2765
2766         netif_tx_start_all_queues(netdev);
2767
2768         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2769                 schedule_work(&adapter->dma_err_task);
2770
2771         /* start the watchdog. */
2772         hw->mac.get_link_status = 1;
2773         schedule_work(&adapter->watchdog_task);
2774
2775         return E1000_SUCCESS;
2776
2777 err_req_irq:
2778         igb_release_hw_control(adapter);
2779         igb_power_down_link(adapter);
2780         igb_free_all_rx_resources(adapter);
2781 err_setup_rx:
2782         igb_free_all_tx_resources(adapter);
2783 err_setup_tx:
2784         igb_reset(adapter);
2785
2786 #ifdef CONFIG_PM_RUNTIME
2787         if (!resuming)
2788                 pm_runtime_put(&pdev->dev);
2789 #endif /* CONFIG_PM_RUNTIME */
2790
2791         return err;
2792 }
2793
2794 static int igb_open(struct net_device *netdev)
2795 {
2796         return __igb_open(netdev, false);
2797 }
2798
2799 /**
2800  * igb_close - Disables a network interface
2801  * @netdev: network interface device structure
2802  *
2803  * Returns 0, this is not allowed to fail
2804  *
2805  * The close entry point is called when an interface is de-activated
2806  * by the OS.  The hardware is still under the driver's control, but
2807  * needs to be disabled.  A global MAC reset is issued to stop the
2808  * hardware, and all transmit and receive resources are freed.
2809  **/
2810 static int __igb_close(struct net_device *netdev, bool suspending)
2811 {
2812         struct igb_adapter *adapter = netdev_priv(netdev);
2813 #ifdef CONFIG_PM_RUNTIME
2814         struct pci_dev *pdev = adapter->pdev;
2815 #endif /* CONFIG_PM_RUNTIME */
2816
2817         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2818
2819 #ifdef CONFIG_PM_RUNTIME
2820         if (!suspending)
2821                 pm_runtime_get_sync(&pdev->dev);
2822 #endif /* CONFIG_PM_RUNTIME */
2823
2824         igb_down(adapter);
2825
2826         igb_release_hw_control(adapter);
2827
2828         igb_free_irq(adapter);
2829
2830         igb_free_all_tx_resources(adapter);
2831         igb_free_all_rx_resources(adapter);
2832
2833 #ifdef CONFIG_PM_RUNTIME
2834         if (!suspending)
2835                 pm_runtime_put_sync(&pdev->dev);
2836 #endif /* CONFIG_PM_RUNTIME */
2837
2838         return 0;
2839 }
2840
2841 static int igb_close(struct net_device *netdev)
2842 {
2843         return __igb_close(netdev, false);
2844 }
2845
2846 /**
2847  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2848  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2849  *
2850  * Return 0 on success, negative on failure
2851  **/
2852 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2853 {
2854         struct device *dev = tx_ring->dev;
2855         int orig_node = dev_to_node(dev);
2856         int size;
2857
2858         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2859         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2860         if (!tx_ring->tx_buffer_info)
2861                 tx_ring->tx_buffer_info = vzalloc(size);
2862         if (!tx_ring->tx_buffer_info)
2863                 goto err;
2864
2865         /* round up to nearest 4K */
2866         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2867         tx_ring->size = ALIGN(tx_ring->size, 4096);
2868
2869         set_dev_node(dev, tx_ring->numa_node);
2870         tx_ring->desc = dma_alloc_coherent(dev,
2871                                            tx_ring->size,
2872                                            &tx_ring->dma,
2873                                            GFP_KERNEL);
2874         set_dev_node(dev, orig_node);
2875         if (!tx_ring->desc)
2876                 tx_ring->desc = dma_alloc_coherent(dev,
2877                                                    tx_ring->size,
2878                                                    &tx_ring->dma,
2879                                                    GFP_KERNEL);
2880
2881         if (!tx_ring->desc)
2882                 goto err;
2883
2884         tx_ring->next_to_use = 0;
2885         tx_ring->next_to_clean = 0;
2886
2887         return 0;
2888
2889 err:
2890         vfree(tx_ring->tx_buffer_info);
2891         dev_err(dev,
2892                 "Unable to allocate memory for the transmit descriptor ring\n");
2893         return -ENOMEM;
2894 }
2895
2896 /**
2897  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2898  *                                (Descriptors) for all queues
2899  * @adapter: board private structure
2900  *
2901  * Return 0 on success, negative on failure
2902  **/
2903 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2904 {
2905         struct pci_dev *pdev = adapter->pdev;
2906         int i, err = 0;
2907
2908         for (i = 0; i < adapter->num_tx_queues; i++) {
2909                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2910                 if (err) {
2911                         dev_err(pci_dev_to_dev(pdev),
2912                                 "Allocation for Tx Queue %u failed\n", i);
2913                         for (i--; i >= 0; i--)
2914                                 igb_free_tx_resources(adapter->tx_ring[i]);
2915                         break;
2916                 }
2917         }
2918
2919         return err;
2920 }
2921
2922 /**
2923  * igb_setup_tctl - configure the transmit control registers
2924  * @adapter: Board private structure
2925  **/
2926 void igb_setup_tctl(struct igb_adapter *adapter)
2927 {
2928         struct e1000_hw *hw = &adapter->hw;
2929         u32 tctl;
2930
2931         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2932         E1000_WRITE_REG(hw, E1000_TXDCTL(0), 0);
2933
2934         /* Program the Transmit Control Register */
2935         tctl = E1000_READ_REG(hw, E1000_TCTL);
2936         tctl &= ~E1000_TCTL_CT;
2937         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2938                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2939
2940         e1000_config_collision_dist(hw);
2941
2942         /* Enable transmits */
2943         tctl |= E1000_TCTL_EN;
2944
2945         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2946 }
2947
2948 /**
2949  * igb_configure_tx_ring - Configure transmit ring after Reset
2950  * @adapter: board private structure
2951  * @ring: tx ring to configure
2952  *
2953  * Configure a transmit ring after a reset.
2954  **/
2955 void igb_configure_tx_ring(struct igb_adapter *adapter,
2956                            struct igb_ring *ring)
2957 {
2958         struct e1000_hw *hw = &adapter->hw;
2959         u32 txdctl = 0;
2960         u64 tdba = ring->dma;
2961         int reg_idx = ring->reg_idx;
2962
2963         /* disable the queue */
2964         E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), 0);
2965         E1000_WRITE_FLUSH(hw);
2966         mdelay(10);
2967
2968         E1000_WRITE_REG(hw, E1000_TDLEN(reg_idx),
2969                         ring->count * sizeof(union e1000_adv_tx_desc));
2970         E1000_WRITE_REG(hw, E1000_TDBAL(reg_idx),
2971                         tdba & 0x00000000ffffffffULL);
2972         E1000_WRITE_REG(hw, E1000_TDBAH(reg_idx), tdba >> 32);
2973
2974         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2975         E1000_WRITE_REG(hw, E1000_TDH(reg_idx), 0);
2976         writel(0, ring->tail);
2977
2978         txdctl |= IGB_TX_PTHRESH;
2979         txdctl |= IGB_TX_HTHRESH << 8;
2980         txdctl |= IGB_TX_WTHRESH << 16;
2981
2982         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2983         E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), txdctl);
2984 }
2985
2986 /**
2987  * igb_configure_tx - Configure transmit Unit after Reset
2988  * @adapter: board private structure
2989  *
2990  * Configure the Tx unit of the MAC after a reset.
2991  **/
2992 static void igb_configure_tx(struct igb_adapter *adapter)
2993 {
2994         int i;
2995
2996         for (i = 0; i < adapter->num_tx_queues; i++)
2997                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2998 }
2999
3000 /**
3001  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
3002  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
3003  *
3004  * Returns 0 on success, negative on failure
3005  **/
3006 int igb_setup_rx_resources(struct igb_ring *rx_ring)
3007 {
3008         struct device *dev = rx_ring->dev;
3009         int orig_node = dev_to_node(dev);
3010         int size, desc_len;
3011
3012         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3013         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
3014         if (!rx_ring->rx_buffer_info)
3015                 rx_ring->rx_buffer_info = vzalloc(size);
3016         if (!rx_ring->rx_buffer_info)
3017                 goto err;
3018
3019         desc_len = sizeof(union e1000_adv_rx_desc);
3020
3021         /* Round up to nearest 4K */
3022         rx_ring->size = rx_ring->count * desc_len;
3023         rx_ring->size = ALIGN(rx_ring->size, 4096);
3024
3025         set_dev_node(dev, rx_ring->numa_node);
3026         rx_ring->desc = dma_alloc_coherent(dev,
3027                                            rx_ring->size,
3028                                            &rx_ring->dma,
3029                                            GFP_KERNEL);
3030         set_dev_node(dev, orig_node);
3031         if (!rx_ring->desc)
3032                 rx_ring->desc = dma_alloc_coherent(dev,
3033                                                    rx_ring->size,
3034                                                    &rx_ring->dma,
3035                                                    GFP_KERNEL);
3036
3037         if (!rx_ring->desc)
3038                 goto err;
3039
3040         rx_ring->next_to_clean = 0;
3041         rx_ring->next_to_use = 0;
3042
3043         return 0;
3044
3045 err:
3046         vfree(rx_ring->rx_buffer_info);
3047         rx_ring->rx_buffer_info = NULL;
3048         dev_err(dev, "Unable to allocate memory for the receive descriptor"
3049                 " ring\n");
3050         return -ENOMEM;
3051 }
3052
3053 /**
3054  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
3055  *                                (Descriptors) for all queues
3056  * @adapter: board private structure
3057  *
3058  * Return 0 on success, negative on failure
3059  **/
3060 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
3061 {
3062         struct pci_dev *pdev = adapter->pdev;
3063         int i, err = 0;
3064
3065         for (i = 0; i < adapter->num_rx_queues; i++) {
3066                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
3067                 if (err) {
3068                         dev_err(pci_dev_to_dev(pdev),
3069                                 "Allocation for Rx Queue %u failed\n", i);
3070                         for (i--; i >= 0; i--)
3071                                 igb_free_rx_resources(adapter->rx_ring[i]);
3072                         break;
3073                 }
3074         }
3075
3076         return err;
3077 }
3078
3079 /**
3080  * igb_setup_mrqc - configure the multiple receive queue control registers
3081  * @adapter: Board private structure
3082  **/
3083 static void igb_setup_mrqc(struct igb_adapter *adapter)
3084 {
3085         struct e1000_hw *hw = &adapter->hw;
3086         u32 mrqc, rxcsum;
3087         u32 j, num_rx_queues, shift = 0, shift2 = 0;
3088         union e1000_reta {
3089                 u32 dword;
3090                 u8  bytes[4];
3091         } reta;
3092         static const u8 rsshash[40] = {
3093                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
3094                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
3095                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
3096                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
3097
3098         /* Fill out hash function seeds */
3099         for (j = 0; j < 10; j++) {
3100                 u32 rsskey = rsshash[(j * 4)];
3101                 rsskey |= rsshash[(j * 4) + 1] << 8;
3102                 rsskey |= rsshash[(j * 4) + 2] << 16;
3103                 rsskey |= rsshash[(j * 4) + 3] << 24;
3104                 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), j, rsskey);
3105         }
3106
3107         num_rx_queues = adapter->rss_queues;
3108
3109         if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
3110                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
3111                 switch (hw->mac.type) {
3112                 case e1000_i350:
3113                 case e1000_82580:
3114                         num_rx_queues = 1;
3115                         shift = 0;
3116                         break;
3117                 case e1000_82576:
3118                         shift = 3;
3119                         num_rx_queues = 2;
3120                         break;
3121                 case e1000_82575:
3122                         shift = 2;
3123                         shift2 = 6;
3124                 default:
3125                         break;
3126                 }
3127         } else {
3128                 if (hw->mac.type == e1000_82575)
3129                         shift = 6;
3130         }
3131
3132         for (j = 0; j < (32 * 4); j++) {
3133                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
3134                 if (shift2)
3135                         reta.bytes[j & 3] |= num_rx_queues << shift2;
3136                 if ((j & 3) == 3)
3137                         E1000_WRITE_REG(hw, E1000_RETA(j >> 2), reta.dword);
3138         }
3139
3140         /*
3141          * Disable raw packet checksumming so that RSS hash is placed in
3142          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
3143          * offloads as they are enabled by default
3144          */
3145         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3146         rxcsum |= E1000_RXCSUM_PCSD;
3147
3148         if (adapter->hw.mac.type >= e1000_82576)
3149                 /* Enable Receive Checksum Offload for SCTP */
3150                 rxcsum |= E1000_RXCSUM_CRCOFL;
3151
3152         /* Don't need to set TUOFL or IPOFL, they default to 1 */
3153         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
3154
3155         /* If VMDq is enabled then we set the appropriate mode for that, else
3156          * we default to RSS so that an RSS hash is calculated per packet even
3157          * if we are only using one queue */
3158         if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
3159                 if (hw->mac.type > e1000_82575) {
3160                         /* Set the default pool for the PF's first queue */
3161                         u32 vtctl = E1000_READ_REG(hw, E1000_VT_CTL);
3162                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
3163                                    E1000_VT_CTL_DISABLE_DEF_POOL);
3164                         vtctl |= adapter->vfs_allocated_count <<
3165                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
3166                         E1000_WRITE_REG(hw, E1000_VT_CTL, vtctl);
3167                 } else if (adapter->rss_queues > 1) {
3168                         /* set default queue for pool 1 to queue 2 */
3169                         E1000_WRITE_REG(hw, E1000_VT_CTL,
3170                                         adapter->rss_queues << 7);
3171                 }
3172                 if (adapter->rss_queues > 1)
3173                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
3174                 else
3175                         mrqc = E1000_MRQC_ENABLE_VMDQ;
3176         } else {
3177                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3178         }
3179
3180         igb_vmm_control(adapter);
3181
3182         /*
3183          * Generate RSS hash based on TCP port numbers and/or
3184          * IPv4/v6 src and dst addresses since UDP cannot be
3185          * hashed reliably due to IP fragmentation
3186          */
3187         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
3188                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
3189                 E1000_MRQC_RSS_FIELD_IPV6 |
3190                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
3191                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
3192
3193         E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
3194 }
3195
3196 /**
3197  * igb_setup_rctl - configure the receive control registers
3198  * @adapter: Board private structure
3199  **/
3200 void igb_setup_rctl(struct igb_adapter *adapter)
3201 {
3202         struct e1000_hw *hw = &adapter->hw;
3203         u32 rctl;
3204
3205         rctl = E1000_READ_REG(hw, E1000_RCTL);
3206
3207         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3208         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3209
3210         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3211                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3212
3213         /*
3214          * enable stripping of CRC. It's unlikely this will break BMC
3215          * redirection as it did with e1000. Newer features require
3216          * that the HW strips the CRC.
3217          */
3218         rctl |= E1000_RCTL_SECRC;
3219
3220         /* disable store bad packets and clear size bits. */
3221         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3222
3223         /* enable LPE to prevent packets larger than max_frame_size */
3224         rctl |= E1000_RCTL_LPE;
3225
3226         /* disable queue 0 to prevent tail write w/o re-config */
3227         E1000_WRITE_REG(hw, E1000_RXDCTL(0), 0);
3228
3229         /* Attention!!!  For SR-IOV PF driver operations you must enable
3230          * queue drop for all VF and PF queues to prevent head of line blocking
3231          * if an un-trusted VF does not provide descriptors to hardware.
3232          */
3233         if (adapter->vfs_allocated_count) {
3234                 /* set all queue drop enable bits */
3235                 E1000_WRITE_REG(hw, E1000_QDE, ALL_QUEUES);
3236         }
3237
3238         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
3239 }
3240
3241 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3242                                    int vfn)
3243 {
3244         struct e1000_hw *hw = &adapter->hw;
3245         u32 vmolr;
3246
3247         /* if it isn't the PF check to see if VFs are enabled and
3248          * increase the size to support vlan tags */
3249         if (vfn < adapter->vfs_allocated_count &&
3250             adapter->vf_data[vfn].vlans_enabled)
3251                 size += VLAN_HLEN;
3252
3253 #ifdef CONFIG_IGB_VMDQ_NETDEV
3254         if (vfn >= adapter->vfs_allocated_count) {
3255                 int queue = vfn - adapter->vfs_allocated_count;
3256                 struct igb_vmdq_adapter *vadapter;
3257
3258                 vadapter = netdev_priv(adapter->vmdq_netdev[queue-1]);
3259                 if (vadapter->vlgrp)
3260                         size += VLAN_HLEN;
3261         }
3262 #endif
3263         vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
3264         vmolr &= ~E1000_VMOLR_RLPML_MASK;
3265         vmolr |= size | E1000_VMOLR_LPE;
3266         E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
3267
3268         return 0;
3269 }
3270
3271 /**
3272  * igb_rlpml_set - set maximum receive packet size
3273  * @adapter: board private structure
3274  *
3275  * Configure maximum receivable packet size.
3276  **/
3277 static void igb_rlpml_set(struct igb_adapter *adapter)
3278 {
3279         u32 max_frame_size = adapter->max_frame_size;
3280         struct e1000_hw *hw = &adapter->hw;
3281         u16 pf_id = adapter->vfs_allocated_count;
3282
3283         if (adapter->vmdq_pools && hw->mac.type != e1000_82575) {
3284                 int i;
3285                 for (i = 0; i < adapter->vmdq_pools; i++)
3286                         igb_set_vf_rlpml(adapter, max_frame_size, pf_id + i);
3287                 /*
3288                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
3289                  * to our max jumbo frame size, in case we need to enable
3290                  * jumbo frames on one of the rings later.
3291                  * This will not pass over-length frames into the default
3292                  * queue because it's gated by the VMOLR.RLPML.
3293                  */
3294                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3295         }
3296         /* Set VF RLPML for the PF device. */
3297         if (adapter->vfs_allocated_count)
3298                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3299
3300         E1000_WRITE_REG(hw, E1000_RLPML, max_frame_size);
3301 }
3302
3303 static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter,
3304                                         int vfn, bool enable)
3305 {
3306         struct e1000_hw *hw = &adapter->hw;
3307         u32 val;
3308         void __iomem *reg;
3309
3310         if (hw->mac.type < e1000_82576)
3311                 return;
3312
3313         if (hw->mac.type == e1000_i350)
3314                 reg = hw->hw_addr + E1000_DVMOLR(vfn);
3315         else
3316                 reg = hw->hw_addr + E1000_VMOLR(vfn);
3317
3318         val = readl(reg);
3319         if (enable)
3320                 val |= E1000_VMOLR_STRVLAN;
3321         else
3322                 val &= ~(E1000_VMOLR_STRVLAN);
3323         writel(val, reg);
3324 }
3325 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3326                                  int vfn, bool aupe)
3327 {
3328         struct e1000_hw *hw = &adapter->hw;
3329         u32 vmolr;
3330
3331         /*
3332          * This register exists only on 82576 and newer so if we are older then
3333          * we should exit and do nothing
3334          */
3335         if (hw->mac.type < e1000_82576)
3336                 return;
3337
3338         vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
3339
3340         if (aupe)
3341                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3342         else
3343                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3344
3345         /* clear all bits that might not be set */
3346         vmolr &= ~E1000_VMOLR_RSSE;
3347
3348         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3349                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3350
3351         vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3352         vmolr |= E1000_VMOLR_LPE;          /* Accept long packets */
3353
3354         E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
3355 }
3356
3357 /**
3358  * igb_configure_rx_ring - Configure a receive ring after Reset
3359  * @adapter: board private structure
3360  * @ring: receive ring to be configured
3361  *
3362  * Configure the Rx unit of the MAC after a reset.
3363  **/
3364 void igb_configure_rx_ring(struct igb_adapter *adapter,
3365                            struct igb_ring *ring)
3366 {
3367         struct e1000_hw *hw = &adapter->hw;
3368         u64 rdba = ring->dma;
3369         int reg_idx = ring->reg_idx;
3370         u32 srrctl = 0, rxdctl = 0;
3371
3372         /* disable the queue */
3373         E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), 0);
3374
3375         /* Set DMA base address registers */
3376         E1000_WRITE_REG(hw, E1000_RDBAL(reg_idx),
3377                         rdba & 0x00000000ffffffffULL);
3378         E1000_WRITE_REG(hw, E1000_RDBAH(reg_idx), rdba >> 32);
3379         E1000_WRITE_REG(hw, E1000_RDLEN(reg_idx),
3380                        ring->count * sizeof(union e1000_adv_rx_desc));
3381
3382         /* initialize head and tail */
3383         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3384         E1000_WRITE_REG(hw, E1000_RDH(reg_idx), 0);
3385         writel(0, ring->tail);
3386
3387         /* set descriptor configuration */
3388 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
3389         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3390 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3391         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3392 #else
3393         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3394 #endif
3395         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3396 #else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
3397         srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3398                  E1000_SRRCTL_BSIZEPKT_SHIFT;
3399         srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3400 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
3401 #ifdef IGB_PER_PKT_TIMESTAMP
3402         if (hw->mac.type >= e1000_82580)
3403                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3404 #endif
3405         /*
3406          * We should set the drop enable bit if:
3407          *  SR-IOV is enabled
3408          *   or
3409          *  Flow Control is disabled and number of RX queues > 1
3410          *
3411          *  This allows us to avoid head of line blocking for security
3412          *  and performance reasons.
3413          */
3414         if (adapter->vfs_allocated_count ||
3415             (adapter->num_rx_queues > 1 &&
3416              (hw->fc.requested_mode == e1000_fc_none ||
3417               hw->fc.requested_mode == e1000_fc_rx_pause)))
3418                 srrctl |= E1000_SRRCTL_DROP_EN;
3419
3420         E1000_WRITE_REG(hw, E1000_SRRCTL(reg_idx), srrctl);
3421
3422         /* set filtering for VMDQ pools */
3423         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3424
3425         rxdctl |= IGB_RX_PTHRESH;
3426         rxdctl |= IGB_RX_HTHRESH << 8;
3427         rxdctl |= IGB_RX_WTHRESH << 16;
3428
3429         /* enable receive descriptor fetching */
3430         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3431         E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), rxdctl);
3432 }
3433
3434 /**
3435  * igb_configure_rx - Configure receive Unit after Reset
3436  * @adapter: board private structure
3437  *
3438  * Configure the Rx unit of the MAC after a reset.
3439  **/
3440 static void igb_configure_rx(struct igb_adapter *adapter)
3441 {
3442         int i;
3443
3444         /* set UTA to appropriate mode */
3445         igb_set_uta(adapter);
3446
3447         igb_full_sync_mac_table(adapter);
3448         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3449          * the Base and Length of the Rx Descriptor Ring */
3450         for (i = 0; i < adapter->num_rx_queues; i++)
3451                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3452 }
3453
3454 /**
3455  * igb_free_tx_resources - Free Tx Resources per Queue
3456  * @tx_ring: Tx descriptor ring for a specific queue
3457  *
3458  * Free all transmit software resources
3459  **/
3460 void igb_free_tx_resources(struct igb_ring *tx_ring)
3461 {
3462         igb_clean_tx_ring(tx_ring);
3463
3464         vfree(tx_ring->tx_buffer_info);
3465         tx_ring->tx_buffer_info = NULL;
3466
3467         /* if not set, then don't free */
3468         if (!tx_ring->desc)
3469                 return;
3470
3471         dma_free_coherent(tx_ring->dev, tx_ring->size,
3472                           tx_ring->desc, tx_ring->dma);
3473
3474         tx_ring->desc = NULL;
3475 }
3476
3477 /**
3478  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3479  * @adapter: board private structure
3480  *
3481  * Free all transmit software resources
3482  **/
3483 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3484 {
3485         int i;
3486
3487         for (i = 0; i < adapter->num_tx_queues; i++)
3488                 igb_free_tx_resources(adapter->tx_ring[i]);
3489 }
3490
3491 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3492                                     struct igb_tx_buffer *tx_buffer)
3493 {
3494         if (tx_buffer->skb) {
3495                 dev_kfree_skb_any(tx_buffer->skb);
3496                 if (dma_unmap_len(tx_buffer, len))
3497                         dma_unmap_single(ring->dev,
3498                                          dma_unmap_addr(tx_buffer, dma),
3499                                          dma_unmap_len(tx_buffer, len),
3500                                          DMA_TO_DEVICE);
3501         } else if (dma_unmap_len(tx_buffer, len)) {
3502                 dma_unmap_page(ring->dev,
3503                                dma_unmap_addr(tx_buffer, dma),
3504                                dma_unmap_len(tx_buffer, len),
3505                                DMA_TO_DEVICE);
3506         }
3507         tx_buffer->next_to_watch = NULL;
3508         tx_buffer->skb = NULL;
3509         dma_unmap_len_set(tx_buffer, len, 0);
3510         /* buffer_info must be completely set up in the transmit path */
3511 }
3512
3513 /**
3514  * igb_clean_tx_ring - Free Tx Buffers
3515  * @tx_ring: ring to be cleaned
3516  **/
3517 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3518 {
3519         struct igb_tx_buffer *buffer_info;
3520         unsigned long size;
3521         u16 i;
3522
3523         if (!tx_ring->tx_buffer_info)
3524                 return;
3525         /* Free all the Tx ring sk_buffs */
3526
3527         for (i = 0; i < tx_ring->count; i++) {
3528                 buffer_info = &tx_ring->tx_buffer_info[i];
3529                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3530         }
3531
3532 #ifdef CONFIG_BQL
3533         netdev_tx_reset_queue(txring_txq(tx_ring));
3534 #endif /* CONFIG_BQL */
3535
3536         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3537         memset(tx_ring->tx_buffer_info, 0, size);
3538
3539         /* Zero out the descriptor ring */
3540         memset(tx_ring->desc, 0, tx_ring->size);
3541
3542         tx_ring->next_to_use = 0;
3543         tx_ring->next_to_clean = 0;
3544 }
3545
3546 /**
3547  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3548  * @adapter: board private structure
3549  **/
3550 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3551 {
3552         int i;
3553
3554         for (i = 0; i < adapter->num_tx_queues; i++)
3555                 igb_clean_tx_ring(adapter->tx_ring[i]);
3556 }
3557
3558 /**
3559  * igb_free_rx_resources - Free Rx Resources
3560  * @rx_ring: ring to clean the resources from
3561  *
3562  * Free all receive software resources
3563  **/
3564 void igb_free_rx_resources(struct igb_ring *rx_ring)
3565 {
3566         igb_clean_rx_ring(rx_ring);
3567
3568         vfree(rx_ring->rx_buffer_info);
3569         rx_ring->rx_buffer_info = NULL;
3570
3571         /* if not set, then don't free */
3572         if (!rx_ring->desc)
3573                 return;
3574
3575         dma_free_coherent(rx_ring->dev, rx_ring->size,
3576                           rx_ring->desc, rx_ring->dma);
3577
3578         rx_ring->desc = NULL;
3579 }
3580
3581 /**
3582  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3583  * @adapter: board private structure
3584  *
3585  * Free all receive software resources
3586  **/
3587 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3588 {
3589         int i;
3590
3591         for (i = 0; i < adapter->num_rx_queues; i++)
3592                 igb_free_rx_resources(adapter->rx_ring[i]);
3593 }
3594
3595 /**
3596  * igb_clean_rx_ring - Free Rx Buffers per Queue
3597  * @rx_ring: ring to free buffers from
3598  **/
3599 void igb_clean_rx_ring(struct igb_ring *rx_ring)
3600 {
3601         unsigned long size;
3602 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
3603         const int bufsz = rx_ring->rx_buffer_len;
3604 #else
3605         const int bufsz = IGB_RX_HDR_LEN;
3606 #endif
3607         u16 i;
3608
3609         if (!rx_ring->rx_buffer_info)
3610                 return;
3611
3612         /* Free all the Rx ring sk_buffs */
3613         for (i = 0; i < rx_ring->count; i++) {
3614                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3615                 if (buffer_info->dma) {
3616                         dma_unmap_single(rx_ring->dev,
3617                                          buffer_info->dma,
3618                                          bufsz,
3619                                          DMA_FROM_DEVICE);
3620                         buffer_info->dma = 0;
3621                 }
3622
3623                 if (buffer_info->skb) {
3624                         dev_kfree_skb(buffer_info->skb);
3625                         buffer_info->skb = NULL;
3626                 }
3627 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
3628                 if (buffer_info->page_dma) {
3629                         dma_unmap_page(rx_ring->dev,
3630                                        buffer_info->page_dma,
3631                                        PAGE_SIZE / 2,
3632                                        DMA_FROM_DEVICE);
3633                         buffer_info->page_dma = 0;
3634                 }
3635                 if (buffer_info->page) {
3636                         put_page(buffer_info->page);
3637                         buffer_info->page = NULL;
3638                         buffer_info->page_offset = 0;
3639                 }
3640 #endif
3641         }
3642
3643         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3644         memset(rx_ring->rx_buffer_info, 0, size);
3645
3646         /* Zero out the descriptor ring */
3647         memset(rx_ring->desc, 0, rx_ring->size);
3648
3649         rx_ring->next_to_clean = 0;
3650         rx_ring->next_to_use = 0;
3651 }
3652
3653 /**
3654  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3655  * @adapter: board private structure
3656  **/
3657 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3658 {
3659         int i;
3660
3661         for (i = 0; i < adapter->num_rx_queues; i++)
3662                 igb_clean_rx_ring(adapter->rx_ring[i]);
3663 }
3664
3665 /**
3666  * igb_set_mac - Change the Ethernet Address of the NIC
3667  * @netdev: network interface device structure
3668  * @p: pointer to an address structure
3669  *
3670  * Returns 0 on success, negative on failure
3671  **/
3672 static int igb_set_mac(struct net_device *netdev, void *p)
3673 {
3674         struct igb_adapter *adapter = netdev_priv(netdev);
3675         struct e1000_hw *hw = &adapter->hw;
3676         struct sockaddr *addr = p;
3677
3678         if (!is_valid_ether_addr(addr->sa_data))
3679                 return -EADDRNOTAVAIL;
3680
3681         igb_del_mac_filter(adapter, hw->mac.addr,
3682                            adapter->vfs_allocated_count);
3683         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3684         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3685
3686         /* set the correct pool for the new PF MAC address in entry 0 */
3687         return igb_add_mac_filter(adapter, hw->mac.addr,
3688                            adapter->vfs_allocated_count);
3689 }
3690
3691 /**
3692  * igb_write_mc_addr_list - write multicast addresses to MTA
3693  * @netdev: network interface device structure
3694  *
3695  * Writes multicast address list to the MTA hash table.
3696  * Returns: -ENOMEM on failure
3697  *                0 on no addresses written
3698  *                X on writing X addresses to MTA
3699  **/
3700 int igb_write_mc_addr_list(struct net_device *netdev)
3701 {
3702         struct igb_adapter *adapter = netdev_priv(netdev);
3703         struct e1000_hw *hw = &adapter->hw;
3704 #ifdef NETDEV_HW_ADDR_T_MULTICAST
3705         struct netdev_hw_addr *ha;
3706 #else
3707         struct dev_mc_list *ha;
3708 #endif
3709         u8  *mta_list;
3710         int i, count;
3711 #ifdef CONFIG_IGB_VMDQ_NETDEV
3712         int vm;
3713 #endif
3714         count = netdev_mc_count(netdev);
3715 #ifdef CONFIG_IGB_VMDQ_NETDEV
3716         for (vm = 1; vm < adapter->vmdq_pools; vm++) {
3717                 if (!adapter->vmdq_netdev[vm])
3718                         break;
3719                 if (!netif_running(adapter->vmdq_netdev[vm]))
3720                         continue;
3721                 count += netdev_mc_count(adapter->vmdq_netdev[vm]);
3722         }
3723 #endif
3724
3725         if (!count) {
3726                 e1000_update_mc_addr_list(hw, NULL, 0);
3727                 return 0;
3728         }
3729         mta_list = kzalloc(count * 6, GFP_ATOMIC);
3730         if (!mta_list)
3731                 return -ENOMEM;
3732
3733         /* The shared function expects a packed array of only addresses. */
3734         i = 0;
3735         netdev_for_each_mc_addr(ha, netdev)
3736 #ifdef NETDEV_HW_ADDR_T_MULTICAST
3737                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3738 #else
3739                 memcpy(mta_list + (i++ * ETH_ALEN), ha->dmi_addr, ETH_ALEN);
3740 #endif
3741 #ifdef CONFIG_IGB_VMDQ_NETDEV
3742         for (vm = 1; vm < adapter->vmdq_pools; vm++) {
3743                 if (!adapter->vmdq_netdev[vm])
3744                         break;
3745                 if (!netif_running(adapter->vmdq_netdev[vm]) ||
3746                     !netdev_mc_count(adapter->vmdq_netdev[vm]))
3747                         continue;
3748                 netdev_for_each_mc_addr(ha, adapter->vmdq_netdev[vm])
3749 #ifdef NETDEV_HW_ADDR_T_MULTICAST
3750                         memcpy(mta_list + (i++ * ETH_ALEN),
3751                                ha->addr, ETH_ALEN);
3752 #else
3753                         memcpy(mta_list + (i++ * ETH_ALEN),
3754                                ha->dmi_addr, ETH_ALEN);
3755 #endif
3756         }
3757 #endif
3758         e1000_update_mc_addr_list(hw, mta_list, i);
3759         kfree(mta_list);
3760
3761         return count;
3762 }
3763
3764 void igb_rar_set(struct igb_adapter *adapter, u32 index)
3765 {
3766         u32 rar_low, rar_high;
3767         struct e1000_hw *hw = &adapter->hw;
3768         u8 *addr = adapter->mac_table[index].addr;
3769         /* HW expects these in little endian so we reverse the byte order
3770          * from network order (big endian) to little endian
3771          */
3772         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
3773                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
3774         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
3775
3776         /* Indicate to hardware the Address is Valid. */
3777         if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE)
3778                 rar_high |= E1000_RAH_AV;
3779
3780         if (hw->mac.type == e1000_82575)
3781                 rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue;
3782         else
3783                 rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue;
3784
3785         E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
3786         E1000_WRITE_FLUSH(hw);
3787         E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
3788         E1000_WRITE_FLUSH(hw);
3789 }
3790
3791 void igb_full_sync_mac_table(struct igb_adapter *adapter)
3792 {
3793         struct e1000_hw *hw = &adapter->hw;
3794         int i;
3795         for (i = 0; i < hw->mac.rar_entry_count; i++) {
3796                         igb_rar_set(adapter, i);
3797         }
3798 }
3799
3800 void igb_sync_mac_table(struct igb_adapter *adapter)
3801 {
3802         struct e1000_hw *hw = &adapter->hw;
3803         int i;
3804         for (i = 0; i < hw->mac.rar_entry_count; i++) {
3805                 if (adapter->mac_table[i].state & IGB_MAC_STATE_MODIFIED)
3806                         igb_rar_set(adapter, i);
3807                 adapter->mac_table[i].state &= ~(IGB_MAC_STATE_MODIFIED);
3808         }
3809 }
3810
3811 int igb_available_rars(struct igb_adapter *adapter)
3812 {
3813         struct e1000_hw *hw = &adapter->hw;
3814         int i, count = 0;
3815
3816         for (i = 0; i < hw->mac.rar_entry_count; i++) {
3817                 if (adapter->mac_table[i].state == 0)
3818                         count++;
3819         }
3820         return count;
3821 }
3822
3823 #ifdef HAVE_SET_RX_MODE
3824 /**
3825  * igb_write_uc_addr_list - write unicast addresses to RAR table
3826  * @netdev: network interface device structure
3827  *
3828  * Writes unicast address list to the RAR table.
3829  * Returns: -ENOMEM on failure/insufficient address space
3830  *                0 on no addresses written
3831  *                X on writing X addresses to the RAR table
3832  **/
3833 static int igb_write_uc_addr_list(struct net_device *netdev)
3834 {
3835         struct igb_adapter *adapter = netdev_priv(netdev);
3836         unsigned int vfn = adapter->vfs_allocated_count;
3837         int count = 0;
3838
3839         /* return ENOMEM indicating insufficient memory for addresses */
3840         if (netdev_uc_count(netdev) > igb_available_rars(adapter))
3841                 return -ENOMEM;
3842         if (!netdev_uc_empty(netdev)) {
3843 #ifdef NETDEV_HW_ADDR_T_UNICAST
3844                 struct netdev_hw_addr *ha;
3845 #else
3846                 struct dev_mc_list *ha;
3847 #endif
3848                 netdev_for_each_uc_addr(ha, netdev) {
3849 #ifdef NETDEV_HW_ADDR_T_UNICAST
3850                         igb_del_mac_filter(adapter, ha->addr, vfn);
3851                         igb_add_mac_filter(adapter, ha->addr, vfn);
3852 #else
3853                         igb_del_mac_filter(adapter, ha->da_addr, vfn);
3854                         igb_add_mac_filter(adapter, ha->da_addr, vfn);
3855 #endif
3856                         count++;
3857                 }
3858         }
3859         return count;
3860 }
3861
3862 #endif /* HAVE_SET_RX_MODE */
3863 /**
3864  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3865  * @netdev: network interface device structure
3866  *
3867  * The set_rx_mode entry point is called whenever the unicast or multicast
3868  * address lists or the network interface flags are updated.  This routine is
3869  * responsible for configuring the hardware for proper unicast, multicast,
3870  * promiscuous mode, and all-multi behavior.
3871  **/
3872 static void igb_set_rx_mode(struct net_device *netdev)
3873 {
3874         struct igb_adapter *adapter = netdev_priv(netdev);
3875         struct e1000_hw *hw = &adapter->hw;
3876         unsigned int vfn = adapter->vfs_allocated_count;
3877         u32 rctl, vmolr = 0;
3878         int count;
3879
3880         /* Check for Promiscuous and All Multicast modes */
3881         rctl = E1000_READ_REG(hw, E1000_RCTL);
3882
3883         /* clear the effected bits */
3884         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3885
3886         if (netdev->flags & IFF_PROMISC) {
3887                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3888                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3889         } else {
3890                 if (netdev->flags & IFF_ALLMULTI) {
3891                         rctl |= E1000_RCTL_MPE;
3892                         vmolr |= E1000_VMOLR_MPME;
3893                 } else {
3894                         /*
3895                          * Write addresses to the MTA, if the attempt fails
3896                          * then we should just turn on promiscuous mode so
3897                          * that we can at least receive multicast traffic
3898                          */
3899                         count = igb_write_mc_addr_list(netdev);
3900                         if (count < 0) {
3901                                 rctl |= E1000_RCTL_MPE;
3902                                 vmolr |= E1000_VMOLR_MPME;
3903                         } else if (count) {
3904                                 vmolr |= E1000_VMOLR_ROMPE;
3905                         }
3906                 }
3907 #ifdef HAVE_SET_RX_MODE
3908                 /*
3909                  * Write addresses to available RAR registers, if there is not
3910                  * sufficient space to store all the addresses then enable
3911                  * unicast promiscuous mode
3912                  */
3913                 count = igb_write_uc_addr_list(netdev);
3914                 if (count < 0) {
3915                         rctl |= E1000_RCTL_UPE;
3916                         vmolr |= E1000_VMOLR_ROPE;
3917                 }
3918 #endif /* HAVE_SET_RX_MODE */
3919                 rctl |= E1000_RCTL_VFE;
3920         }
3921         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
3922
3923         /*
3924          * In order to support SR-IOV and eventually VMDq it is necessary to set
3925          * the VMOLR to enable the appropriate modes.  Without this workaround
3926          * we will have issues with VLAN tag stripping not being done for frames
3927          * that are only arriving because we are the default pool
3928          */
3929         if (hw->mac.type < e1000_82576)
3930                 return;
3931
3932         vmolr |= E1000_READ_REG(hw, E1000_VMOLR(vfn)) &
3933                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3934         E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
3935         igb_restore_vf_multicasts(adapter);
3936 }
3937
3938 static void igb_check_wvbr(struct igb_adapter *adapter)
3939 {
3940         struct e1000_hw *hw = &adapter->hw;
3941         u32 wvbr = 0;
3942
3943         switch (hw->mac.type) {
3944         case e1000_82576:
3945         case e1000_i350:
3946                 if (!(wvbr = E1000_READ_REG(hw, E1000_WVBR)))
3947                         return;
3948                 break;
3949         default:
3950                 break;
3951         }
3952
3953         adapter->wvbr |= wvbr;
3954 }
3955
3956 #define IGB_STAGGERED_QUEUE_OFFSET 8
3957
3958 static void igb_spoof_check(struct igb_adapter *adapter)
3959 {
3960         int j;
3961
3962         if (!adapter->wvbr)
3963                 return;
3964
3965         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3966                 if (adapter->wvbr & (1 << j) ||
3967                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3968                         DPRINTK(DRV, WARNING,
3969                                 "Spoof event(s) detected on VF %d\n", j);
3970                         adapter->wvbr &=
3971                                 ~((1 << j) |
3972                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3973                 }
3974         }
3975 }
3976
3977 /* Need to wait a few seconds after link up to get diagnostic information from
3978  * the phy */
3979 static void igb_update_phy_info(unsigned long data)
3980 {
3981         struct igb_adapter *adapter = (struct igb_adapter *) data;
3982         e1000_get_phy_info(&adapter->hw);
3983 }
3984
3985 /**
3986  * igb_has_link - check shared code for link and determine up/down
3987  * @adapter: pointer to driver private info
3988  **/
3989 bool igb_has_link(struct igb_adapter *adapter)
3990 {
3991         struct e1000_hw *hw = &adapter->hw;
3992         bool link_active = FALSE;
3993
3994         /* get_link_status is set on LSC (link status) interrupt or
3995          * rx sequence error interrupt.  get_link_status will stay
3996          * false until the e1000_check_for_link establishes link
3997          * for copper adapters ONLY
3998          */
3999         switch (hw->phy.media_type) {
4000         case e1000_media_type_copper:
4001                 if (!hw->mac.get_link_status)
4002                         return true;
4003         case e1000_media_type_internal_serdes:
4004                 e1000_check_for_link(hw);
4005                 link_active = !hw->mac.get_link_status;
4006                 break;
4007         case e1000_media_type_unknown:
4008         default:
4009                 break;
4010         }
4011
4012         return link_active;
4013 }
4014
4015 /**
4016  * igb_watchdog - Timer Call-back
4017  * @data: pointer to adapter cast into an unsigned long
4018  **/
4019 static void igb_watchdog(unsigned long data)
4020 {
4021         struct igb_adapter *adapter = (struct igb_adapter *)data;
4022         /* Do the rest outside of interrupt context */
4023         schedule_work(&adapter->watchdog_task);
4024 }
4025
4026 static void igb_watchdog_task(struct work_struct *work)
4027 {
4028         struct igb_adapter *adapter = container_of(work,
4029                                                    struct igb_adapter,
4030                                                    watchdog_task);
4031         struct e1000_hw *hw = &adapter->hw;
4032         struct net_device *netdev = adapter->netdev;
4033         u32 link;
4034         int i;
4035         u32 thstat, ctrl_ext;
4036
4037
4038         link = igb_has_link(adapter);
4039         if (link) {
4040                 /* Cancel scheduled suspend requests. */
4041                 pm_runtime_resume(netdev->dev.parent);
4042
4043                 if (!netif_carrier_ok(netdev)) {
4044                         u32 ctrl;
4045                         e1000_get_speed_and_duplex(hw,
4046                                                    &adapter->link_speed,
4047                                                    &adapter->link_duplex);
4048
4049                         ctrl = E1000_READ_REG(hw, E1000_CTRL);
4050                         /* Links status message must follow this format */
4051                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
4052                                  "Flow Control: %s\n",
4053                                netdev->name,
4054                                adapter->link_speed,
4055                                adapter->link_duplex == FULL_DUPLEX ?
4056                                  "Full Duplex" : "Half Duplex",
4057                                ((ctrl & E1000_CTRL_TFCE) &&
4058                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX":
4059                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
4060                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
4061                         /* adjust timeout factor according to speed/duplex */
4062                         adapter->tx_timeout_factor = 1;
4063                         switch (adapter->link_speed) {
4064                         case SPEED_10:
4065                                 adapter->tx_timeout_factor = 14;
4066                                 break;
4067                         case SPEED_100:
4068                                 /* maybe add some timeout factor ? */
4069                                 break;
4070                         }
4071
4072                         netif_carrier_on(netdev);
4073                         netif_tx_wake_all_queues(netdev);
4074
4075                         igb_ping_all_vfs(adapter);
4076 #ifdef IFLA_VF_MAX
4077                         igb_check_vf_rate_limit(adapter);
4078 #endif /* IFLA_VF_MAX */
4079
4080                         /* link state has changed, schedule phy info update */
4081                         if (!test_bit(__IGB_DOWN, &adapter->state))
4082                                 mod_timer(&adapter->phy_info_timer,
4083                                           round_jiffies(jiffies + 2 * HZ));
4084                 }
4085         } else {
4086                 if (netif_carrier_ok(netdev)) {
4087                         adapter->link_speed = 0;
4088                         adapter->link_duplex = 0;
4089                         /* check for thermal sensor event on i350 */
4090                         if (hw->mac.type == e1000_i350) {
4091                                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
4092                                 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
4093                                 if ((hw->phy.media_type ==
4094                                         e1000_media_type_copper) &&
4095                                         !(ctrl_ext &
4096                                         E1000_CTRL_EXT_LINK_MODE_SGMII)) {
4097                                         if (thstat & E1000_THSTAT_PWR_DOWN) {
4098                                                 printk(KERN_ERR "igb: %s The "
4099                                                 "network adapter was stopped "
4100                                                 "because it overheated.\n",
4101                                                 netdev->name);
4102                                         }
4103                                         if (thstat & E1000_THSTAT_LINK_THROTTLE) {
4104                                                 printk(KERN_INFO 
4105                                                         "igb: %s The network "
4106                                                         "adapter supported "
4107                                                         "link speed "
4108                                                         "was downshifted "
4109                                                         "because it "
4110                                                         "overheated.\n",
4111                                                         netdev->name);
4112                                         }
4113                                 }
4114                         }
4115
4116                         /* Links status message must follow this format */
4117                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
4118                                netdev->name);
4119                         netif_carrier_off(netdev);
4120                         netif_tx_stop_all_queues(netdev);
4121
4122                         igb_ping_all_vfs(adapter);
4123
4124                         /* link state has changed, schedule phy info update */
4125                         if (!test_bit(__IGB_DOWN, &adapter->state))
4126                                 mod_timer(&adapter->phy_info_timer,
4127                                           round_jiffies(jiffies + 2 * HZ));
4128
4129                         pm_schedule_suspend(netdev->dev.parent,
4130                                             MSEC_PER_SEC * 5);
4131                 }
4132         }
4133
4134         igb_update_stats(adapter);
4135
4136         for (i = 0; i < adapter->num_tx_queues; i++) {
4137                 struct igb_ring *tx_ring = adapter->tx_ring[i];
4138                 if (!netif_carrier_ok(netdev)) {
4139                         /* We've lost link, so the controller stops DMA,
4140                          * but we've got queued Tx work that's never going
4141                          * to get done, so reset controller to flush Tx.
4142                          * (Do the reset outside of interrupt context). */
4143                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
4144                                 adapter->tx_timeout_count++;
4145                                 schedule_work(&adapter->reset_task);
4146                                 /* return immediately since reset is imminent */
4147                                 return;
4148                         }
4149                 }
4150
4151                 /* Force detection of hung controller every watchdog period */
4152                 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
4153         }
4154
4155         /* Cause software interrupt to ensure rx ring is cleaned */
4156         if (adapter->msix_entries) {
4157                 u32 eics = 0;
4158                 for (i = 0; i < adapter->num_q_vectors; i++)
4159                         eics |= adapter->q_vector[i]->eims_value;
4160                 E1000_WRITE_REG(hw, E1000_EICS, eics);
4161         } else {
4162                 E1000_WRITE_REG(hw, E1000_ICS, E1000_ICS_RXDMT0);
4163         }
4164
4165         igb_spoof_check(adapter);
4166
4167         /* Reset the timer */
4168         if (!test_bit(__IGB_DOWN, &adapter->state))
4169                 mod_timer(&adapter->watchdog_timer,
4170                           round_jiffies(jiffies + 2 * HZ));
4171 }
4172
4173 static void igb_dma_err_task(struct work_struct *work)
4174 {
4175         struct igb_adapter *adapter = container_of(work,
4176                                                    struct igb_adapter,
4177                                                    dma_err_task);
4178         int vf;
4179         struct e1000_hw *hw = &adapter->hw;
4180         struct net_device *netdev = adapter->netdev;
4181         u32 hgptc;
4182         u32 ciaa, ciad;
4183
4184         hgptc = E1000_READ_REG(hw, E1000_HGPTC);
4185         if (hgptc) /* If incrementing then no need for the check below */
4186                 goto dma_timer_reset;
4187         /*
4188          * Check to see if a bad DMA write target from an errant or
4189          * malicious VF has caused a PCIe error.  If so then we can
4190          * issue a VFLR to the offending VF(s) and then resume without
4191          * requesting a full slot reset.
4192          */
4193
4194         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4195                 ciaa = (vf << 16) | 0x80000000;
4196                 /* 32 bit read so align, we really want status at offset 6 */
4197                 ciaa |= PCI_COMMAND;
4198                 E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4199                 ciad = E1000_READ_REG(hw, E1000_CIAD);
4200                 ciaa &= 0x7FFFFFFF;
4201                 /* disable debug mode asap after reading data */
4202                 E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4203                 /* Get the upper 16 bits which will be the PCI status reg */
4204                 ciad >>= 16;
4205                 if (ciad & (PCI_STATUS_REC_MASTER_ABORT |
4206                             PCI_STATUS_REC_TARGET_ABORT |
4207                             PCI_STATUS_SIG_SYSTEM_ERROR)) {
4208                         netdev_err(netdev, "VF %d suffered error\n", vf);
4209                         /* Issue VFLR */
4210                         ciaa = (vf << 16) | 0x80000000;
4211                         ciaa |= 0xA8;
4212                         E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4213                         ciad = 0x00008000;  /* VFLR */
4214                         E1000_WRITE_REG(hw, E1000_CIAD, ciad);
4215                         ciaa &= 0x7FFFFFFF;
4216                         E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4217                 }
4218         }
4219 dma_timer_reset:
4220         /* Reset the timer */
4221         if (!test_bit(__IGB_DOWN, &adapter->state))
4222                 mod_timer(&adapter->dma_err_timer,
4223                           round_jiffies(jiffies + HZ / 10));
4224 }
4225
4226 /**
4227  * igb_dma_err_timer - Timer Call-back
4228  * @data: pointer to adapter cast into an unsigned long
4229  **/
4230 static void igb_dma_err_timer(unsigned long data)
4231 {
4232         struct igb_adapter *adapter = (struct igb_adapter *)data;
4233         /* Do the rest outside of interrupt context */
4234         schedule_work(&adapter->dma_err_task);
4235 }
4236
4237 enum latency_range {
4238         lowest_latency = 0,
4239         low_latency = 1,
4240         bulk_latency = 2,
4241         latency_invalid = 255
4242 };
4243
4244 /**
4245  * igb_update_ring_itr - update the dynamic ITR value based on packet size
4246  *
4247  *      Stores a new ITR value based on strictly on packet size.  This
4248  *      algorithm is less sophisticated than that used in igb_update_itr,
4249  *      due to the difficulty of synchronizing statistics across multiple
4250  *      receive rings.  The divisors and thresholds used by this function
4251  *      were determined based on theoretical maximum wire speed and testing
4252  *      data, in order to minimize response time while increasing bulk
4253  *      throughput.
4254  *      This functionality is controlled by the InterruptThrottleRate module
4255  *      parameter (see igb_param.c)
4256  *      NOTE:  This function is called only when operating in a multiqueue
4257  *             receive environment.
4258  * @q_vector: pointer to q_vector
4259  **/
4260 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
4261 {
4262         int new_val = q_vector->itr_val;
4263         int avg_wire_size = 0;
4264         struct igb_adapter *adapter = q_vector->adapter;
4265         unsigned int packets;
4266
4267         /* For non-gigabit speeds, just fix the interrupt rate at 4000
4268          * ints/sec - ITR timer value of 120 ticks.
4269          */
4270         if (adapter->link_speed != SPEED_1000) {
4271                 new_val = IGB_4K_ITR;
4272                 goto set_itr_val;
4273         }
4274
4275         packets = q_vector->rx.total_packets;
4276         if (packets)
4277                 avg_wire_size = q_vector->rx.total_bytes / packets;
4278
4279         packets = q_vector->tx.total_packets;
4280         if (packets)
4281                 avg_wire_size = max_t(u32, avg_wire_size,
4282                                       q_vector->tx.total_bytes / packets);
4283
4284         /* if avg_wire_size isn't set no work was done */
4285         if (!avg_wire_size)
4286                 goto clear_counts;
4287
4288         /* Add 24 bytes to size to account for CRC, preamble, and gap */
4289         avg_wire_size += 24;
4290
4291         /* Don't starve jumbo frames */
4292         avg_wire_size = min(avg_wire_size, 3000);
4293
4294         /* Give a little boost to mid-size frames */
4295         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
4296                 new_val = avg_wire_size / 3;
4297         else
4298                 new_val = avg_wire_size / 2;
4299
4300         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4301         if (new_val < IGB_20K_ITR &&
4302             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4303              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4304                 new_val = IGB_20K_ITR;
4305
4306 set_itr_val:
4307         if (new_val != q_vector->itr_val) {
4308                 q_vector->itr_val = new_val;
4309                 q_vector->set_itr = 1;
4310         }
4311 clear_counts:
4312         q_vector->rx.total_bytes = 0;
4313         q_vector->rx.total_packets = 0;
4314         q_vector->tx.total_bytes = 0;
4315         q_vector->tx.total_packets = 0;
4316 }
4317
4318 /**
4319  * igb_update_itr - update the dynamic ITR value based on statistics
4320  *      Stores a new ITR value based on packets and byte
4321  *      counts during the last interrupt.  The advantage of per interrupt
4322  *      computation is faster updates and more accurate ITR for the current
4323  *      traffic pattern.  Constants in this function were computed
4324  *      based on theoretical maximum wire speed and thresholds were set based
4325  *      on testing data as well as attempting to minimize response time
4326  *      while increasing bulk throughput.
4327  *      this functionality is controlled by the InterruptThrottleRate module
4328  *      parameter (see igb_param.c)
4329  *      NOTE:  These calculations are only valid when operating in a single-
4330  *             queue environment.
4331  * @q_vector: pointer to q_vector
4332  * @ring_container: ring info to update the itr for
4333  **/
4334 static void igb_update_itr(struct igb_q_vector *q_vector,
4335                            struct igb_ring_container *ring_container)
4336 {
4337         unsigned int packets = ring_container->total_packets;
4338         unsigned int bytes = ring_container->total_bytes;
4339         u8 itrval = ring_container->itr;
4340
4341         /* no packets, exit with status unchanged */
4342         if (packets == 0)
4343                 return;
4344
4345         switch (itrval) {
4346         case lowest_latency:
4347                 /* handle TSO and jumbo frames */
4348                 if (bytes/packets > 8000)
4349                         itrval = bulk_latency;
4350                 else if ((packets < 5) && (bytes > 512))
4351                         itrval = low_latency;
4352                 break;
4353         case low_latency:  /* 50 usec aka 20000 ints/s */
4354                 if (bytes > 10000) {
4355                         /* this if handles the TSO accounting */
4356                         if (bytes/packets > 8000) {
4357                                 itrval = bulk_latency;
4358                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
4359                                 itrval = bulk_latency;
4360                         } else if ((packets > 35)) {
4361                                 itrval = lowest_latency;
4362                         }
4363                 } else if (bytes/packets > 2000) {
4364                         itrval = bulk_latency;
4365                 } else if (packets <= 2 && bytes < 512) {
4366                         itrval = lowest_latency;
4367                 }
4368                 break;
4369         case bulk_latency: /* 250 usec aka 4000 ints/s */
4370                 if (bytes > 25000) {
4371                         if (packets > 35)
4372                                 itrval = low_latency;
4373                 } else if (bytes < 1500) {
4374                         itrval = low_latency;
4375                 }
4376                 break;
4377         }
4378
4379         /* clear work counters since we have the values we need */
4380         ring_container->total_bytes = 0;
4381         ring_container->total_packets = 0;
4382
4383         /* write updated itr to ring container */
4384         ring_container->itr = itrval;
4385 }
4386
4387 static void igb_set_itr(struct igb_q_vector *q_vector)
4388 {
4389         struct igb_adapter *adapter = q_vector->adapter;
4390         u32 new_itr = q_vector->itr_val;
4391         u8 current_itr = 0;
4392
4393         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4394         if (adapter->link_speed != SPEED_1000) {
4395                 current_itr = 0;
4396                 new_itr = IGB_4K_ITR;
4397                 goto set_itr_now;
4398         }
4399
4400         igb_update_itr(q_vector, &q_vector->tx);
4401         igb_update_itr(q_vector, &q_vector->rx);
4402
4403         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4404
4405         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4406         if (current_itr == lowest_latency &&
4407             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4408              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4409                 current_itr = low_latency;
4410
4411         switch (current_itr) {
4412         /* counts and packets in update_itr are dependent on these numbers */
4413         case lowest_latency:
4414                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4415                 break;
4416         case low_latency:
4417                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4418                 break;
4419         case bulk_latency:
4420                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4421                 break;
4422         default:
4423                 break;
4424         }
4425
4426 set_itr_now:
4427         if (new_itr != q_vector->itr_val) {
4428                 /* this attempts to bias the interrupt rate towards Bulk
4429                  * by adding intermediate steps when interrupt rate is
4430                  * increasing */
4431                 new_itr = new_itr > q_vector->itr_val ?
4432                              max((new_itr * q_vector->itr_val) /
4433                                  (new_itr + (q_vector->itr_val >> 2)),
4434                                  new_itr) :
4435                              new_itr;
4436                 /* Don't write the value here; it resets the adapter's
4437                  * internal timer, and causes us to delay far longer than
4438                  * we should between interrupts.  Instead, we write the ITR
4439                  * value at the beginning of the next interrupt so the timing
4440                  * ends up being correct.
4441                  */
4442                 q_vector->itr_val = new_itr;
4443                 q_vector->set_itr = 1;
4444         }
4445 }
4446
4447 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4448                      u32 type_tucmd, u32 mss_l4len_idx)
4449 {
4450         struct e1000_adv_tx_context_desc *context_desc;
4451         u16 i = tx_ring->next_to_use;
4452
4453         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4454
4455         i++;
4456         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4457
4458         /* set bits to identify this as an advanced context descriptor */
4459         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4460
4461         /* For 82575, context index must be unique per ring. */
4462         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4463                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4464
4465         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
4466         context_desc->seqnum_seed       = 0;
4467         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
4468         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
4469 }
4470
4471 static int igb_tso(struct igb_ring *tx_ring,
4472                    struct igb_tx_buffer *first,
4473                    u8 *hdr_len)
4474 {
4475 #ifdef NETIF_F_TSO
4476         struct sk_buff *skb = first->skb;
4477         u32 vlan_macip_lens, type_tucmd;
4478         u32 mss_l4len_idx, l4len;
4479
4480         if (!skb_is_gso(skb))
4481 #endif /* NETIF_F_TSO */
4482                 return 0;
4483 #ifdef NETIF_F_TSO
4484
4485         if (skb_header_cloned(skb)) {
4486                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4487                 if (err)
4488                         return err;
4489         }
4490
4491         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4492         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4493
4494         if (first->protocol == __constant_htons(ETH_P_IP)) {
4495                 struct iphdr *iph = ip_hdr(skb);
4496                 iph->tot_len = 0;
4497                 iph->check = 0;
4498                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4499                                                          iph->daddr, 0,
4500                                                          IPPROTO_TCP,
4501                                                          0);
4502                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4503                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4504                                    IGB_TX_FLAGS_CSUM |
4505                                    IGB_TX_FLAGS_IPV4;
4506 #ifdef NETIF_F_TSO6
4507         } else if (skb_is_gso_v6(skb)) {
4508                 ipv6_hdr(skb)->payload_len = 0;
4509                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4510                                                        &ipv6_hdr(skb)->daddr,
4511                                                        0, IPPROTO_TCP, 0);
4512                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4513                                    IGB_TX_FLAGS_CSUM;
4514 #endif
4515         }
4516
4517         /* compute header lengths */
4518         l4len = tcp_hdrlen(skb);
4519         *hdr_len = skb_transport_offset(skb) + l4len;
4520
4521         /* update gso size and bytecount with header size */
4522         first->gso_segs = skb_shinfo(skb)->gso_segs;
4523         first->bytecount += (first->gso_segs - 1) * *hdr_len;
4524
4525         /* MSS L4LEN IDX */
4526         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4527         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4528
4529         /* VLAN MACLEN IPLEN */
4530         vlan_macip_lens = skb_network_header_len(skb);
4531         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4532         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4533
4534         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4535
4536         return 1;
4537 #endif  /* NETIF_F_TSO */
4538 }
4539
4540 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4541 {
4542         struct sk_buff *skb = first->skb;
4543         u32 vlan_macip_lens = 0;
4544         u32 mss_l4len_idx = 0;
4545         u32 type_tucmd = 0;
4546
4547         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4548                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4549                         return;
4550         } else {
4551                 u8 l4_hdr = 0;
4552                 switch (first->protocol) {
4553                 case __constant_htons(ETH_P_IP):
4554                         vlan_macip_lens |= skb_network_header_len(skb);
4555                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4556                         l4_hdr = ip_hdr(skb)->protocol;
4557                         break;
4558 #ifdef NETIF_F_IPV6_CSUM
4559                 case __constant_htons(ETH_P_IPV6):
4560                         vlan_macip_lens |= skb_network_header_len(skb);
4561                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4562                         break;
4563 #endif
4564                 default:
4565                         if (unlikely(net_ratelimit())) {
4566                                 dev_warn(tx_ring->dev,
4567                                  "partial checksum but proto=%x!\n",
4568                                  first->protocol);
4569                         }
4570                         break;
4571                 }
4572
4573                 switch (l4_hdr) {
4574                 case IPPROTO_TCP:
4575                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4576                         mss_l4len_idx = tcp_hdrlen(skb) <<
4577                                         E1000_ADVTXD_L4LEN_SHIFT;
4578                         break;
4579 #ifdef HAVE_SCTP
4580                 case IPPROTO_SCTP:
4581                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4582                         mss_l4len_idx = sizeof(struct sctphdr) <<
4583                                         E1000_ADVTXD_L4LEN_SHIFT;
4584                         break;
4585 #endif
4586                 case IPPROTO_UDP:
4587                         mss_l4len_idx = sizeof(struct udphdr) <<
4588                                         E1000_ADVTXD_L4LEN_SHIFT;
4589                         break;
4590                 default:
4591                         if (unlikely(net_ratelimit())) {
4592                                 dev_warn(tx_ring->dev,
4593                                  "partial checksum but l4 proto=%x!\n",
4594                                  l4_hdr);
4595                         }
4596                         break;
4597                 }
4598
4599                 /* update TX checksum flag */
4600                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4601         }
4602
4603         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4604         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4605
4606         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4607 }
4608
4609 static __le32 igb_tx_cmd_type(u32 tx_flags)
4610 {
4611         /* set type for advanced descriptor with frame checksum insertion */
4612         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4613                                       E1000_ADVTXD_DCMD_IFCS |
4614                                       E1000_ADVTXD_DCMD_DEXT);
4615
4616         /* set HW vlan bit if vlan is present */
4617         if (tx_flags & IGB_TX_FLAGS_VLAN)
4618                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4619
4620         /* set timestamp bit if present */
4621         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4622                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4623
4624         /* set segmentation bits for TSO */
4625         if (tx_flags & IGB_TX_FLAGS_TSO)
4626                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4627
4628         return cmd_type;
4629 }
4630
4631 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4632                                  union e1000_adv_tx_desc *tx_desc,
4633                                  u32 tx_flags, unsigned int paylen)
4634 {
4635         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4636
4637         /* 82575 requires a unique index per ring if any offload is enabled */
4638         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4639             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4640                 olinfo_status |= tx_ring->reg_idx << 4;
4641
4642         /* insert L4 checksum */
4643         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4644                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4645
4646                 /* insert IPv4 checksum */
4647                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4648                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4649         }
4650
4651         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4652 }
4653
4654 /*
4655  * The largest size we can write to the descriptor is 65535.  In order to
4656  * maintain a power of two alignment we have to limit ourselves to 32K.
4657  */
4658 #define IGB_MAX_TXD_PWR 15
4659 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4660
4661 static void igb_tx_map(struct igb_ring *tx_ring,
4662                        struct igb_tx_buffer *first,
4663                        const u8 hdr_len)
4664 {
4665         struct sk_buff *skb = first->skb;
4666         struct igb_tx_buffer *tx_buffer;
4667         union e1000_adv_tx_desc *tx_desc;
4668         dma_addr_t dma;
4669 #ifdef MAX_SKB_FRAGS
4670         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4671         unsigned int data_len = skb->data_len;
4672 #endif
4673         unsigned int size = skb_headlen(skb);
4674         unsigned int paylen = skb->len - hdr_len;
4675         __le32 cmd_type;
4676         u32 tx_flags = first->tx_flags;
4677         u16 i = tx_ring->next_to_use;
4678
4679         tx_desc = IGB_TX_DESC(tx_ring, i);
4680
4681         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4682         cmd_type = igb_tx_cmd_type(tx_flags);
4683
4684         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4685         if (dma_mapping_error(tx_ring->dev, dma))
4686                 goto dma_error;
4687
4688         /* record length, and DMA address */
4689         dma_unmap_len_set(first, len, size);
4690         dma_unmap_addr_set(first, dma, dma);
4691         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4692
4693 #ifdef MAX_SKB_FRAGS
4694         for (;;) {
4695 #endif
4696                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4697                         tx_desc->read.cmd_type_len =
4698                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4699
4700                         i++;
4701                         tx_desc++;
4702                         if (i == tx_ring->count) {
4703                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4704                                 i = 0;
4705                         }
4706
4707                         dma += IGB_MAX_DATA_PER_TXD;
4708                         size -= IGB_MAX_DATA_PER_TXD;
4709
4710                         tx_desc->read.olinfo_status = 0;
4711                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4712                 }
4713
4714 #ifdef MAX_SKB_FRAGS
4715                 if (likely(!data_len))
4716                         break;
4717
4718                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4719
4720                 i++;
4721                 tx_desc++;
4722                 if (i == tx_ring->count) {
4723                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4724                         i = 0;
4725                 }
4726
4727                 size = skb_frag_size(frag);
4728                 data_len -= size;
4729
4730                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
4731                                 DMA_TO_DEVICE);
4732                 if (dma_mapping_error(tx_ring->dev, dma))
4733                         goto dma_error;
4734
4735                 tx_buffer = &tx_ring->tx_buffer_info[i];
4736                 dma_unmap_len_set(tx_buffer, len, size);
4737                 dma_unmap_addr_set(tx_buffer, dma, dma);
4738
4739                 tx_desc->read.olinfo_status = 0;
4740                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4741
4742                 frag++;
4743         }
4744
4745 #endif /* MAX_SKB_FRAGS */
4746 #ifdef CONFIG_BQL
4747         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4748 #endif /* CONFIG_BQL */
4749
4750         /* write last descriptor with RS and EOP bits */
4751         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4752         tx_desc->read.cmd_type_len = cmd_type;
4753
4754         /* set the timestamp */
4755         first->time_stamp = jiffies;
4756
4757         /*
4758          * Force memory writes to complete before letting h/w know there
4759          * are new descriptors to fetch.  (Only applicable for weak-ordered
4760          * memory model archs, such as IA-64).
4761          *
4762          * We also need this memory barrier to make certain all of the
4763          * status bits have been updated before next_to_watch is written.
4764          */
4765         wmb();
4766
4767         /* set next_to_watch value indicating a packet is present */
4768         first->next_to_watch = tx_desc;
4769
4770         i++;
4771         if (i == tx_ring->count)
4772                 i = 0;
4773
4774         tx_ring->next_to_use = i;
4775
4776         writel(i, tx_ring->tail);
4777
4778         /* we need this if more than one processor can write to our tail
4779          * at a time, it syncronizes IO on IA64/Altix systems */
4780         mmiowb();
4781
4782         return;
4783
4784 dma_error:
4785         dev_err(tx_ring->dev, "TX DMA map failed\n");
4786
4787         /* clear dma mappings for failed tx_buffer_info map */
4788         for (;;) {
4789                 tx_buffer= &tx_ring->tx_buffer_info[i];
4790                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
4791                 if (tx_buffer == first)
4792                         break;
4793                 if (i == 0)
4794                         i = tx_ring->count;
4795                 i--;
4796         }
4797
4798         tx_ring->next_to_use = i;
4799 }
4800
4801 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4802 {
4803         struct net_device *netdev = netdev_ring(tx_ring);
4804
4805         if (netif_is_multiqueue(netdev))
4806                 netif_stop_subqueue(netdev, ring_queue_index(tx_ring));
4807         else
4808                 netif_stop_queue(netdev);
4809
4810         /* Herbert's original patch had:
4811          *  smp_mb__after_netif_stop_queue();
4812          * but since that doesn't exist yet, just open code it. */
4813         smp_mb();
4814
4815         /* We need to check again in a case another CPU has just
4816          * made room available. */
4817         if (igb_desc_unused(tx_ring) < size)
4818                 return -EBUSY;
4819
4820         /* A reprieve! */
4821         if (netif_is_multiqueue(netdev))
4822                 netif_wake_subqueue(netdev, ring_queue_index(tx_ring));
4823         else
4824                 netif_wake_queue(netdev);
4825
4826         tx_ring->tx_stats.restart_queue++;
4827
4828         return 0;
4829 }
4830
4831 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4832 {
4833         if (igb_desc_unused(tx_ring) >= size)
4834                 return 0;
4835         return __igb_maybe_stop_tx(tx_ring, size);
4836 }
4837
4838 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4839                                 struct igb_ring *tx_ring)
4840 {
4841         struct igb_tx_buffer *first;
4842         int tso;
4843         u32 tx_flags = 0;
4844         __be16 protocol = vlan_get_protocol(skb);
4845         u8 hdr_len = 0;
4846
4847         /* need: 1 descriptor per page,
4848          *       + 2 desc gap to keep tail from touching head,
4849          *       + 1 desc for skb->data,
4850          *       + 1 desc for context descriptor,
4851          * otherwise try next time */
4852         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4853                 /* this is a hard error */
4854                 return NETDEV_TX_BUSY;
4855         }
4856
4857         /* record the location of the first descriptor for this packet */
4858         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4859         first->skb = skb;
4860         first->bytecount = skb->len;
4861         first->gso_segs = 1;
4862
4863 #ifdef HAVE_HW_TIME_STAMP
4864 #ifdef SKB_SHARED_TX_IS_UNION
4865         if (unlikely(skb_shinfo(skb)->tx_flags.flags & SKBTX_HW_TSTAMP)) {
4866                 skb_shinfo(skb)->tx_flags.flags |= SKBTX_IN_PROGRESS;
4867                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4868         }
4869 #else
4870         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4871                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4872                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4873         }
4874 #endif
4875
4876 #endif
4877         if (vlan_tx_tag_present(skb)) {
4878                 tx_flags |= IGB_TX_FLAGS_VLAN;
4879                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4880         }
4881
4882         /* record initial flags and protocol */
4883         first->tx_flags = tx_flags;
4884         first->protocol = protocol;
4885
4886         tso = igb_tso(tx_ring, first, &hdr_len);
4887         if (tso < 0)
4888                 goto out_drop;
4889         else if (!tso)
4890                 igb_tx_csum(tx_ring, first);
4891
4892         igb_tx_map(tx_ring, first, hdr_len);
4893
4894 #ifndef HAVE_TRANS_START_IN_QUEUE
4895         netdev_ring(tx_ring)->trans_start = jiffies;
4896
4897 #endif
4898         /* Make sure there is space in the ring for the next send. */
4899         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4900
4901         return NETDEV_TX_OK;
4902
4903 out_drop:
4904         igb_unmap_and_free_tx_resource(tx_ring, first);
4905
4906         return NETDEV_TX_OK;
4907 }
4908
4909 #ifdef HAVE_TX_MQ
4910 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4911                                                     struct sk_buff *skb)
4912 {
4913         unsigned int r_idx = skb->queue_mapping;
4914
4915         if (r_idx >= adapter->num_tx_queues)
4916                 r_idx = r_idx % adapter->num_tx_queues;
4917
4918         return adapter->tx_ring[r_idx];
4919 }
4920 #else
4921 #define igb_tx_queue_mapping(_adapter, _skb) (_adapter)->tx_ring[0]
4922 #endif
4923
4924 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4925                                   struct net_device *netdev)
4926 {
4927         struct igb_adapter *adapter = netdev_priv(netdev);
4928
4929         if (test_bit(__IGB_DOWN, &adapter->state)) {
4930                 dev_kfree_skb_any(skb);
4931                 return NETDEV_TX_OK;
4932         }
4933
4934         if (skb->len <= 0) {
4935                 dev_kfree_skb_any(skb);
4936                 return NETDEV_TX_OK;
4937         }
4938
4939         /*
4940          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4941          * in order to meet this minimum size requirement.
4942          */
4943         if (skb->len < 17) {
4944                 if (skb_padto(skb, 17))
4945                         return NETDEV_TX_OK;
4946                 skb->len = 17;
4947         }
4948
4949         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4950 }
4951
4952 /**
4953  * igb_tx_timeout - Respond to a Tx Hang
4954  * @netdev: network interface device structure
4955  **/
4956 static void igb_tx_timeout(struct net_device *netdev)
4957 {
4958         struct igb_adapter *adapter = netdev_priv(netdev);
4959         struct e1000_hw *hw = &adapter->hw;
4960
4961         /* Do the reset outside of interrupt context */
4962         adapter->tx_timeout_count++;
4963
4964         if (hw->mac.type >= e1000_82580)
4965                 hw->dev_spec._82575.global_device_reset = true;
4966
4967         schedule_work(&adapter->reset_task);
4968         E1000_WRITE_REG(hw, E1000_EICS,
4969                         (adapter->eims_enable_mask & ~adapter->eims_other));
4970 }
4971
4972 static void igb_reset_task(struct work_struct *work)
4973 {
4974         struct igb_adapter *adapter;
4975         adapter = container_of(work, struct igb_adapter, reset_task);
4976
4977         igb_reinit_locked(adapter);
4978 }
4979
4980 /**
4981  * igb_get_stats - Get System Network Statistics
4982  * @netdev: network interface device structure
4983  *
4984  * Returns the address of the device statistics structure.
4985  * The statistics are updated here and also from the timer callback.
4986  **/
4987 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
4988 {
4989         struct igb_adapter *adapter = netdev_priv(netdev);
4990
4991         if (!test_bit(__IGB_RESETTING, &adapter->state))
4992                 igb_update_stats(adapter);
4993
4994 #ifdef HAVE_NETDEV_STATS_IN_NETDEV
4995         /* only return the current stats */
4996         return &netdev->stats;
4997 #else
4998         /* only return the current stats */
4999         return &adapter->net_stats;
5000 #endif /* HAVE_NETDEV_STATS_IN_NETDEV */
5001 }
5002
5003 /**
5004  * igb_change_mtu - Change the Maximum Transfer Unit
5005  * @netdev: network interface device structure
5006  * @new_mtu: new value for maximum frame size
5007  *
5008  * Returns 0 on success, negative on failure
5009  **/
5010 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
5011 {
5012         struct igb_adapter *adapter = netdev_priv(netdev);
5013         struct pci_dev *pdev = adapter->pdev;
5014         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
5015 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
5016         u32 rx_buffer_len, i;
5017 #endif
5018
5019         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
5020                 dev_err(pci_dev_to_dev(pdev), "Invalid MTU setting\n");
5021                 return -EINVAL;
5022         }
5023
5024 #define MAX_STD_JUMBO_FRAME_SIZE 9238
5025         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
5026                 dev_err(pci_dev_to_dev(pdev), "MTU > 9216 not supported.\n");
5027                 return -EINVAL;
5028         }
5029
5030         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
5031                 usleep_range(1000, 2000);
5032
5033         /* igb_down has a dependency on max_frame_size */
5034         adapter->max_frame_size = max_frame;
5035
5036 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
5037 #ifdef IGB_PER_PKT_TIMESTAMP
5038         if (adapter->hw.mac.type >= e1000_82580)
5039                 max_frame += IGB_TS_HDR_LEN;
5040
5041 #endif
5042         /*
5043          * RLPML prevents us from receiving a frame larger than max_frame so
5044          * it is safe to just set the rx_buffer_len to max_frame without the
5045          * risk of an skb over panic.
5046          */
5047         if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
5048                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
5049         else
5050                 rx_buffer_len = max_frame;
5051
5052 #endif
5053         if (netif_running(netdev))
5054                 igb_down(adapter);
5055
5056         dev_info(pci_dev_to_dev(pdev), "changing MTU from %d to %d\n",
5057                 netdev->mtu, new_mtu);
5058         netdev->mtu = new_mtu;
5059
5060 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
5061         for (i = 0; i < adapter->num_rx_queues; i++)
5062                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
5063
5064 #endif
5065         if (netif_running(netdev))
5066                 igb_up(adapter);
5067         else
5068                 igb_reset(adapter);
5069
5070         clear_bit(__IGB_RESETTING, &adapter->state);
5071
5072         return 0;
5073 }
5074
5075 /**
5076  * igb_update_stats - Update the board statistics counters
5077  * @adapter: board private structure
5078  **/
5079
5080 void igb_update_stats(struct igb_adapter *adapter)
5081 {
5082 #ifdef HAVE_NETDEV_STATS_IN_NETDEV
5083         struct net_device_stats *net_stats = &adapter->netdev->stats;
5084 #else
5085         struct net_device_stats *net_stats = &adapter->net_stats;
5086 #endif /* HAVE_NETDEV_STATS_IN_NETDEV */
5087         struct e1000_hw *hw = &adapter->hw;
5088 #ifdef HAVE_PCI_ERS
5089         struct pci_dev *pdev = adapter->pdev;
5090 #endif
5091         u32 reg, mpc;
5092         u16 phy_tmp;
5093         int i;
5094         u64 bytes, packets;
5095 #ifndef IGB_NO_LRO
5096         u32 flushed = 0, coal = 0, recycled = 0;
5097         struct igb_q_vector *q_vector;
5098 #endif
5099
5100 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
5101
5102         /*
5103          * Prevent stats update while adapter is being reset, or if the pci
5104          * connection is down.
5105          */
5106         if (adapter->link_speed == 0)
5107                 return;
5108 #ifdef HAVE_PCI_ERS
5109         if (pci_channel_offline(pdev))
5110                 return;
5111
5112 #endif
5113 #ifndef IGB_NO_LRO
5114         for (i = 0; i < adapter->num_q_vectors; i++) {
5115                 q_vector = adapter->q_vector[i];
5116                 if (!q_vector || !q_vector->lrolist)
5117                         continue;
5118                 flushed += q_vector->lrolist->stats.flushed;
5119                 coal += q_vector->lrolist->stats.coal;
5120                 recycled += q_vector->lrolist->stats.recycled;
5121         }
5122         adapter->lro_stats.flushed = flushed;
5123         adapter->lro_stats.coal = coal;
5124         adapter->lro_stats.recycled = recycled;
5125
5126 #endif
5127         bytes = 0;
5128         packets = 0;
5129         for (i = 0; i < adapter->num_rx_queues; i++) {
5130                 u32 rqdpc_tmp = E1000_READ_REG(hw, E1000_RQDPC(i)) & 0x0FFF;
5131                 struct igb_ring *ring = adapter->rx_ring[i];
5132                 ring->rx_stats.drops += rqdpc_tmp;
5133                 net_stats->rx_fifo_errors += rqdpc_tmp;
5134 #ifdef CONFIG_IGB_VMDQ_NETDEV
5135                 if (!ring->vmdq_netdev) {
5136                         bytes += ring->rx_stats.bytes;
5137                         packets += ring->rx_stats.packets;
5138                 }
5139 #else
5140                 bytes += ring->rx_stats.bytes;
5141                 packets += ring->rx_stats.packets;
5142 #endif
5143         }
5144
5145         net_stats->rx_bytes = bytes;
5146         net_stats->rx_packets = packets;
5147
5148         bytes = 0;
5149         packets = 0;
5150         for (i = 0; i < adapter->num_tx_queues; i++) {
5151                 struct igb_ring *ring = adapter->tx_ring[i];
5152 #ifdef CONFIG_IGB_VMDQ_NETDEV
5153                 if (!ring->vmdq_netdev) {
5154                         bytes += ring->tx_stats.bytes;
5155                         packets += ring->tx_stats.packets;
5156                 }
5157 #else
5158                 bytes += ring->tx_stats.bytes;
5159                 packets += ring->tx_stats.packets;
5160 #endif
5161         }
5162         net_stats->tx_bytes = bytes;
5163         net_stats->tx_packets = packets;
5164
5165         /* read stats registers */
5166         adapter->stats.crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5167         adapter->stats.gprc += E1000_READ_REG(hw, E1000_GPRC);
5168         adapter->stats.gorc += E1000_READ_REG(hw, E1000_GORCL);
5169         E1000_READ_REG(hw, E1000_GORCH); /* clear GORCL */
5170         adapter->stats.bprc += E1000_READ_REG(hw, E1000_BPRC);
5171         adapter->stats.mprc += E1000_READ_REG(hw, E1000_MPRC);
5172         adapter->stats.roc += E1000_READ_REG(hw, E1000_ROC);
5173
5174         adapter->stats.prc64 += E1000_READ_REG(hw, E1000_PRC64);
5175         adapter->stats.prc127 += E1000_READ_REG(hw, E1000_PRC127);
5176         adapter->stats.prc255 += E1000_READ_REG(hw, E1000_PRC255);
5177         adapter->stats.prc511 += E1000_READ_REG(hw, E1000_PRC511);
5178         adapter->stats.prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5179         adapter->stats.prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5180         adapter->stats.symerrs += E1000_READ_REG(hw, E1000_SYMERRS);
5181         adapter->stats.sec += E1000_READ_REG(hw, E1000_SEC);
5182
5183         mpc = E1000_READ_REG(hw, E1000_MPC);
5184         adapter->stats.mpc += mpc;
5185         net_stats->rx_fifo_errors += mpc;
5186         adapter->stats.scc += E1000_READ_REG(hw, E1000_SCC);
5187         adapter->stats.ecol += E1000_READ_REG(hw, E1000_ECOL);
5188         adapter->stats.mcc += E1000_READ_REG(hw, E1000_MCC);
5189         adapter->stats.latecol += E1000_READ_REG(hw, E1000_LATECOL);
5190         adapter->stats.dc += E1000_READ_REG(hw, E1000_DC);
5191         adapter->stats.rlec += E1000_READ_REG(hw, E1000_RLEC);
5192         adapter->stats.xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5193         adapter->stats.xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5194         adapter->stats.xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC);
5195         adapter->stats.xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5196         adapter->stats.fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5197         adapter->stats.gptc += E1000_READ_REG(hw, E1000_GPTC);
5198         adapter->stats.gotc += E1000_READ_REG(hw, E1000_GOTCL);
5199         E1000_READ_REG(hw, E1000_GOTCH); /* clear GOTCL */
5200         adapter->stats.rnbc += E1000_READ_REG(hw, E1000_RNBC);
5201         adapter->stats.ruc += E1000_READ_REG(hw, E1000_RUC);
5202         adapter->stats.rfc += E1000_READ_REG(hw, E1000_RFC);
5203         adapter->stats.rjc += E1000_READ_REG(hw, E1000_RJC);
5204         adapter->stats.tor += E1000_READ_REG(hw, E1000_TORH);
5205         adapter->stats.tot += E1000_READ_REG(hw, E1000_TOTH);
5206         adapter->stats.tpr += E1000_READ_REG(hw, E1000_TPR);
5207
5208         adapter->stats.ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5209         adapter->stats.ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5210         adapter->stats.ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5211         adapter->stats.ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5212         adapter->stats.ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5213         adapter->stats.ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5214
5215         adapter->stats.mptc += E1000_READ_REG(hw, E1000_MPTC);
5216         adapter->stats.bptc += E1000_READ_REG(hw, E1000_BPTC);
5217
5218         adapter->stats.tpt += E1000_READ_REG(hw, E1000_TPT);
5219         adapter->stats.colc += E1000_READ_REG(hw, E1000_COLC);
5220
5221         adapter->stats.algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5222         /* read internal phy sepecific stats */
5223         reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
5224         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
5225                 adapter->stats.rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5226                 adapter->stats.tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5227         }
5228
5229         adapter->stats.tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5230         adapter->stats.tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5231
5232         adapter->stats.iac += E1000_READ_REG(hw, E1000_IAC);
5233         adapter->stats.icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5234         adapter->stats.icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5235         adapter->stats.icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5236         adapter->stats.ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5237         adapter->stats.ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5238         adapter->stats.ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5239         adapter->stats.ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5240         adapter->stats.icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5241
5242         /* Fill out the OS statistics structure */
5243         net_stats->multicast = adapter->stats.mprc;
5244         net_stats->collisions = adapter->stats.colc;
5245
5246         /* Rx Errors */
5247
5248         /* RLEC on some newer hardware can be incorrect so build
5249          * our own version based on RUC and ROC */
5250         net_stats->rx_errors = adapter->stats.rxerrc +
5251                 adapter->stats.crcerrs + adapter->stats.algnerrc +
5252                 adapter->stats.ruc + adapter->stats.roc +
5253                 adapter->stats.cexterr;
5254         net_stats->rx_length_errors = adapter->stats.ruc +
5255                                       adapter->stats.roc;
5256         net_stats->rx_crc_errors = adapter->stats.crcerrs;
5257         net_stats->rx_frame_errors = adapter->stats.algnerrc;
5258         net_stats->rx_missed_errors = adapter->stats.mpc;
5259
5260         /* Tx Errors */
5261         net_stats->tx_errors = adapter->stats.ecol +
5262                                adapter->stats.latecol;
5263         net_stats->tx_aborted_errors = adapter->stats.ecol;
5264         net_stats->tx_window_errors = adapter->stats.latecol;
5265         net_stats->tx_carrier_errors = adapter->stats.tncrs;
5266
5267         /* Tx Dropped needs to be maintained elsewhere */
5268
5269         /* Phy Stats */
5270         if (hw->phy.media_type == e1000_media_type_copper) {
5271                 if ((adapter->link_speed == SPEED_1000) &&
5272                    (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
5273                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
5274                         adapter->phy_stats.idle_errors += phy_tmp;
5275                 }
5276         }
5277
5278         /* Management Stats */
5279         adapter->stats.mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5280         adapter->stats.mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5281         if (hw->mac.type > e1000_82580) {
5282                 adapter->stats.o2bgptc += E1000_READ_REG(hw, E1000_O2BGPTC);
5283                 adapter->stats.o2bspc += E1000_READ_REG(hw, E1000_O2BSPC);
5284                 adapter->stats.b2ospc += E1000_READ_REG(hw, E1000_B2OSPC);
5285                 adapter->stats.b2ogprc += E1000_READ_REG(hw, E1000_B2OGPRC);
5286         }
5287 }
5288
5289 static irqreturn_t igb_msix_other(int irq, void *data)
5290 {
5291         struct igb_adapter *adapter = data;
5292         struct e1000_hw *hw = &adapter->hw;
5293         u32 icr = E1000_READ_REG(hw, E1000_ICR);
5294         /* reading ICR causes bit 31 of EICR to be cleared */
5295
5296         if (icr & E1000_ICR_DRSTA)
5297                 schedule_work(&adapter->reset_task);
5298
5299         if (icr & E1000_ICR_DOUTSYNC) {
5300                 /* HW is reporting DMA is out of sync */
5301                 adapter->stats.doosync++;
5302                 /* The DMA Out of Sync is also indication of a spoof event
5303                  * in IOV mode. Check the Wrong VM Behavior register to
5304                  * see if it is really a spoof event. */
5305                 igb_check_wvbr(adapter);
5306         }
5307
5308         /* Check for a mailbox event */
5309         if (icr & E1000_ICR_VMMB)
5310                 igb_msg_task(adapter);
5311
5312         if (icr & E1000_ICR_LSC) {
5313                 hw->mac.get_link_status = 1;
5314                 /* guard against interrupt when we're going down */
5315                 if (!test_bit(__IGB_DOWN, &adapter->state))
5316                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5317         }
5318
5319         /* Check for MDD event */
5320         if (icr & E1000_ICR_MDDET)
5321                 igb_process_mdd_event(adapter);
5322
5323         E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_other);
5324
5325         return IRQ_HANDLED;
5326 }
5327
5328 static void igb_write_itr(struct igb_q_vector *q_vector)
5329 {
5330         struct igb_adapter *adapter = q_vector->adapter;
5331         u32 itr_val = q_vector->itr_val & 0x7FFC;
5332
5333         if (!q_vector->set_itr)
5334                 return;
5335
5336         if (!itr_val)
5337                 itr_val = 0x4;
5338
5339         if (adapter->hw.mac.type == e1000_82575)
5340                 itr_val |= itr_val << 16;
5341         else
5342                 itr_val |= E1000_EITR_CNT_IGNR;
5343
5344         writel(itr_val, q_vector->itr_register);
5345         q_vector->set_itr = 0;
5346 }
5347
5348 static irqreturn_t igb_msix_ring(int irq, void *data)
5349 {
5350         struct igb_q_vector *q_vector = data;
5351
5352         /* Write the ITR value calculated from the previous interrupt. */
5353         igb_write_itr(q_vector);
5354
5355         napi_schedule(&q_vector->napi);
5356
5357         return IRQ_HANDLED;
5358 }
5359
5360 #ifdef IGB_DCA
5361 static void igb_update_dca(struct igb_q_vector *q_vector)
5362 {
5363         struct igb_adapter *adapter = q_vector->adapter;
5364         struct e1000_hw *hw = &adapter->hw;
5365         int cpu = get_cpu();
5366
5367         if (q_vector->cpu == cpu)
5368                 goto out_no_update;
5369
5370         if (q_vector->tx.ring) {
5371                 int q = q_vector->tx.ring->reg_idx;
5372                 u32 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(q));
5373                 if (hw->mac.type == e1000_82575) {
5374                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
5375                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
5376                 } else {
5377                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
5378                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
5379                                       E1000_DCA_TXCTRL_CPUID_SHIFT_82576;
5380                 }
5381                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
5382                 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(q), dca_txctrl);
5383         }
5384         if (q_vector->rx.ring) {
5385                 int q = q_vector->rx.ring->reg_idx;
5386                 u32 dca_rxctrl = E1000_READ_REG(hw, E1000_DCA_RXCTRL(q));
5387                 if (hw->mac.type == e1000_82575) {
5388                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
5389                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
5390                 } else {
5391                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
5392                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
5393                                       E1000_DCA_RXCTRL_CPUID_SHIFT_82576;
5394                 }
5395                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
5396                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
5397                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
5398                 E1000_WRITE_REG(hw, E1000_DCA_RXCTRL(q), dca_rxctrl);
5399         }
5400         q_vector->cpu = cpu;
5401 out_no_update:
5402         put_cpu();
5403 }
5404
5405 static void igb_setup_dca(struct igb_adapter *adapter)
5406 {
5407         struct e1000_hw *hw = &adapter->hw;
5408         int i;
5409
5410         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
5411                 return;
5412
5413         /* Always use CB2 mode, difference is masked in the CB driver. */
5414         E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
5415
5416         for (i = 0; i < adapter->num_q_vectors; i++) {
5417                 adapter->q_vector[i]->cpu = -1;
5418                 igb_update_dca(adapter->q_vector[i]);
5419         }
5420 }
5421
5422 static int __igb_notify_dca(struct device *dev, void *data)
5423 {
5424         struct net_device *netdev = dev_get_drvdata(dev);
5425         struct igb_adapter *adapter = netdev_priv(netdev);
5426         struct pci_dev *pdev = adapter->pdev;
5427         struct e1000_hw *hw = &adapter->hw;
5428         unsigned long event = *(unsigned long *)data;
5429
5430         switch (event) {
5431         case DCA_PROVIDER_ADD:
5432                 /* if already enabled, don't do it again */
5433                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
5434                         break;
5435                 if (dca_add_requester(dev) == E1000_SUCCESS) {
5436                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
5437                         dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
5438                         igb_setup_dca(adapter);
5439                         break;
5440                 }
5441                 /* Fall Through since DCA is disabled. */
5442         case DCA_PROVIDER_REMOVE:
5443                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
5444                         /* without this a class_device is left
5445                          * hanging around in the sysfs model */
5446                         dca_remove_requester(dev);
5447                         dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
5448                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
5449                         E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
5450                 }
5451                 break;
5452         }
5453
5454         return E1000_SUCCESS;
5455 }
5456
5457 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
5458                           void *p)
5459 {
5460         int ret_val;
5461
5462         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
5463                                          __igb_notify_dca);
5464
5465         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
5466 }
5467 #endif /* IGB_DCA */
5468
5469 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5470 {
5471         unsigned char mac_addr[ETH_ALEN];
5472 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5473         struct pci_dev *pdev = adapter->pdev;
5474         struct e1000_hw *hw = &adapter->hw;
5475         struct pci_dev *pvfdev;
5476         unsigned int device_id;
5477         u16 thisvf_devfn;
5478 #endif
5479
5480         random_ether_addr(mac_addr);
5481         igb_set_vf_mac(adapter, vf, mac_addr);
5482
5483 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5484         switch (adapter->hw.mac.type) {
5485         case e1000_82576:
5486                 device_id = IGB_82576_VF_DEV_ID;
5487                 /* VF Stride for 82576 is 2 */
5488                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
5489                         (pdev->devfn & 1);
5490                 break;
5491         case e1000_i350:
5492                 device_id = IGB_I350_VF_DEV_ID;
5493                 /* VF Stride for I350 is 4 */
5494                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5495                                 (pdev->devfn & 3);
5496                 break;
5497         default:
5498                 device_id = 0;
5499                 thisvf_devfn = 0;
5500                 break;
5501         }
5502
5503         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5504         while (pvfdev) {
5505                 if (pvfdev->devfn == thisvf_devfn)
5506                         break;
5507                 pvfdev = pci_get_device(hw->vendor_id,
5508                                         device_id, pvfdev);
5509         }
5510
5511         if (pvfdev)
5512                 adapter->vf_data[vf].vfdev = pvfdev;
5513         else
5514                 dev_err(&pdev->dev,
5515                         "Couldn't find pci dev ptr for VF %4.4x\n",
5516                         thisvf_devfn);
5517         return pvfdev != NULL;
5518 #else
5519         return true;
5520 #endif
5521 }
5522
5523 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5524 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5525 {
5526         struct e1000_hw *hw = &adapter->hw;
5527         struct pci_dev *pdev = adapter->pdev;
5528         struct pci_dev *pvfdev;
5529         u16 vf_devfn = 0;
5530         u16 vf_stride;
5531         unsigned int device_id;
5532         int vfs_found = 0;
5533
5534         switch (adapter->hw.mac.type) {
5535         case e1000_82576:
5536                 device_id = IGB_82576_VF_DEV_ID;
5537                 /* VF Stride for 82576 is 2 */
5538                 vf_stride = 2;
5539                 break;
5540         case e1000_i350:
5541                 device_id = IGB_I350_VF_DEV_ID;
5542                 /* VF Stride for I350 is 4 */
5543                 vf_stride = 4;
5544                 break;
5545         default:
5546                 device_id = 0;
5547                 vf_stride = 0;
5548                 break;
5549         }
5550
5551         vf_devfn = pdev->devfn + 0x80;
5552         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5553         while (pvfdev) {
5554                 if (pvfdev->devfn == vf_devfn)
5555                         vfs_found++;
5556                 vf_devfn += vf_stride;
5557                 pvfdev = pci_get_device(hw->vendor_id,
5558                                         device_id, pvfdev);
5559         }
5560
5561         return vfs_found;
5562 }
5563 #endif
5564
5565 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5566 {
5567 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5568         int i;
5569         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5570                 if (adapter->vf_data[i].vfdev) {
5571                         if (adapter->vf_data[i].vfdev->dev_flags &
5572                             PCI_DEV_FLAGS_ASSIGNED)
5573                                 return true;
5574                 }
5575         }
5576 #endif
5577         return false;
5578 }
5579
5580 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5581 {
5582         struct e1000_hw *hw = &adapter->hw;
5583         u32 ping;
5584         int i;
5585
5586         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5587                 ping = E1000_PF_CONTROL_MSG;
5588                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5589                         ping |= E1000_VT_MSGTYPE_CTS;
5590                 e1000_write_mbx(hw, &ping, 1, i);
5591         }
5592 }
5593
5594 /**
5595  *  igb_mta_set_ - Set multicast filter table address
5596  *  @adapter: pointer to the adapter structure
5597  *  @hash_value: determines the MTA register and bit to set
5598  *
5599  *  The multicast table address is a register array of 32-bit registers.
5600  *  The hash_value is used to determine what register the bit is in, the
5601  *  current value is read, the new bit is OR'd in and the new value is
5602  *  written back into the register.
5603  **/
5604 void igb_mta_set(struct igb_adapter *adapter, u32 hash_value)
5605 {
5606         struct e1000_hw *hw = &adapter->hw;
5607         u32 hash_bit, hash_reg, mta;
5608
5609         /*
5610          * The MTA is a register array of 32-bit registers. It is
5611          * treated like an array of (32*mta_reg_count) bits.  We want to
5612          * set bit BitArray[hash_value]. So we figure out what register
5613          * the bit is in, read it, OR in the new bit, then write
5614          * back the new value.  The (hw->mac.mta_reg_count - 1) serves as a
5615          * mask to bits 31:5 of the hash value which gives us the
5616          * register we're modifying.  The hash bit within that register
5617          * is determined by the lower 5 bits of the hash value.
5618          */
5619         hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
5620         hash_bit = hash_value & 0x1F;
5621
5622         mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg);
5623
5624         mta |= (1 << hash_bit);
5625
5626         E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta);
5627         E1000_WRITE_FLUSH(hw);
5628 }
5629
5630 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5631 {
5632
5633         struct e1000_hw *hw = &adapter->hw;
5634         u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vf));
5635         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5636
5637         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5638                             IGB_VF_FLAG_MULTI_PROMISC);
5639         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5640
5641 #ifdef IGB_ENABLE_VF_PROMISC
5642         if (*msgbuf & E1000_VF_SET_PROMISC_UNICAST) {
5643                 vmolr |= E1000_VMOLR_ROPE;
5644                 vf_data->flags |= IGB_VF_FLAG_UNI_PROMISC;
5645                 *msgbuf &= ~E1000_VF_SET_PROMISC_UNICAST;
5646         }
5647 #endif
5648         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5649                 vmolr |= E1000_VMOLR_MPME;
5650                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5651                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5652         } else {
5653                 /*
5654                  * if we have hashes and we are clearing a multicast promisc
5655                  * flag we need to write the hashes to the MTA as this step
5656                  * was previously skipped
5657                  */
5658                 if (vf_data->num_vf_mc_hashes > 30) {
5659                         vmolr |= E1000_VMOLR_MPME;
5660                 } else if (vf_data->num_vf_mc_hashes) {
5661                         int j;
5662                         vmolr |= E1000_VMOLR_ROMPE;
5663                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5664                                 igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
5665                 }
5666         }
5667
5668         E1000_WRITE_REG(hw, E1000_VMOLR(vf), vmolr);
5669
5670         /* there are flags left unprocessed, likely not supported */
5671         if (*msgbuf & E1000_VT_MSGINFO_MASK)
5672                 return -EINVAL;
5673
5674         return 0;
5675
5676 }
5677
5678 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5679                                   u32 *msgbuf, u32 vf)
5680 {
5681         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5682         u16 *hash_list = (u16 *)&msgbuf[1];
5683         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5684         int i;
5685
5686         /* salt away the number of multicast addresses assigned
5687          * to this VF for later use to restore when the PF multi cast
5688          * list changes
5689          */
5690         vf_data->num_vf_mc_hashes = n;
5691
5692         /* only up to 30 hash values supported */
5693         if (n > 30)
5694                 n = 30;
5695
5696         /* store the hashes for later use */
5697         for (i = 0; i < n; i++)
5698                 vf_data->vf_mc_hashes[i] = hash_list[i];
5699
5700         /* Flush and reset the mta with the new values */
5701         igb_set_rx_mode(adapter->netdev);
5702
5703         return 0;
5704 }
5705
5706 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5707 {
5708         struct e1000_hw *hw = &adapter->hw;
5709         struct vf_data_storage *vf_data;
5710         int i, j;
5711
5712         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5713                 u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
5714                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5715
5716                 vf_data = &adapter->vf_data[i];
5717
5718                 if ((vf_data->num_vf_mc_hashes > 30) ||
5719                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5720                         vmolr |= E1000_VMOLR_MPME;
5721                 } else if (vf_data->num_vf_mc_hashes) {
5722                         vmolr |= E1000_VMOLR_ROMPE;
5723                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5724                                 igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
5725                 }
5726                 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
5727         }
5728 }
5729
5730 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5731 {
5732         struct e1000_hw *hw = &adapter->hw;
5733         u32 pool_mask, reg, vid;
5734         u16 vlan_default;
5735         int i;
5736
5737         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5738
5739         /* Find the vlan filter for this id */
5740         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5741                 reg = E1000_READ_REG(hw, E1000_VLVF(i));
5742
5743                 /* remove the vf from the pool */
5744                 reg &= ~pool_mask;
5745
5746                 /* if pool is empty then remove entry from vfta */
5747                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5748                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5749                         reg = 0;
5750                         vid = reg & E1000_VLVF_VLANID_MASK;
5751                         igb_vfta_set(adapter, vid, FALSE);
5752                 }
5753
5754                 E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
5755         }
5756
5757         adapter->vf_data[vf].vlans_enabled = 0;
5758
5759         vlan_default = adapter->vf_data[vf].default_vf_vlan_id;
5760         if (vlan_default)
5761                 igb_vlvf_set(adapter, vlan_default, true, vf);
5762 }
5763
5764 s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5765 {
5766         struct e1000_hw *hw = &adapter->hw;
5767         u32 reg, i;
5768
5769         /* The vlvf table only exists on 82576 hardware and newer */
5770         if (hw->mac.type < e1000_82576)
5771                 return -1;
5772
5773         /* we only need to do this if VMDq is enabled */
5774         if (!adapter->vmdq_pools)
5775                 return -1;
5776
5777         /* Find the vlan filter for this id */
5778         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5779                 reg = E1000_READ_REG(hw, E1000_VLVF(i));
5780                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5781                     vid == (reg & E1000_VLVF_VLANID_MASK))
5782                         break;
5783         }
5784
5785         if (add) {
5786                 if (i == E1000_VLVF_ARRAY_SIZE) {
5787                         /* Did not find a matching VLAN ID entry that was
5788                          * enabled.  Search for a free filter entry, i.e.
5789                          * one without the enable bit set
5790                          */
5791                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5792                                 reg = E1000_READ_REG(hw, E1000_VLVF(i));
5793                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5794                                         break;
5795                         }
5796                 }
5797                 if (i < E1000_VLVF_ARRAY_SIZE) {
5798                         /* Found an enabled/available entry */
5799                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5800
5801                         /* if !enabled we need to set this up in vfta */
5802                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5803                                 /* add VID to filter table */
5804                                 igb_vfta_set(adapter, vid, TRUE);
5805                                 reg |= E1000_VLVF_VLANID_ENABLE;
5806                         }
5807                         reg &= ~E1000_VLVF_VLANID_MASK;
5808                         reg |= vid;
5809                         E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
5810
5811                         /* do not modify RLPML for PF devices */
5812                         if (vf >= adapter->vfs_allocated_count)
5813                                 return E1000_SUCCESS;
5814
5815                         if (!adapter->vf_data[vf].vlans_enabled) {
5816                                 u32 size;
5817                                 reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
5818                                 size = reg & E1000_VMOLR_RLPML_MASK;
5819                                 size += 4;
5820                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5821                                 reg |= size;
5822                                 E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
5823                         }
5824
5825                         adapter->vf_data[vf].vlans_enabled++;
5826                 }
5827         } else {
5828                 if (i < E1000_VLVF_ARRAY_SIZE) {
5829                         /* remove vf from the pool */
5830                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5831                         /* if pool is empty then remove entry from vfta */
5832                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5833                                 reg = 0;
5834                                 igb_vfta_set(adapter, vid, FALSE);
5835                         }
5836                         E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
5837
5838                         /* do not modify RLPML for PF devices */
5839                         if (vf >= adapter->vfs_allocated_count)
5840                                 return E1000_SUCCESS;
5841
5842                         adapter->vf_data[vf].vlans_enabled--;
5843                         if (!adapter->vf_data[vf].vlans_enabled) {
5844                                 u32 size;
5845                                 reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
5846                                 size = reg & E1000_VMOLR_RLPML_MASK;
5847                                 size -= 4;
5848                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5849                                 reg |= size;
5850                                 E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
5851                         }
5852                 }
5853         }
5854         return E1000_SUCCESS;
5855 }
5856
5857 #ifdef IFLA_VF_MAX
5858 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5859 {
5860         struct e1000_hw *hw = &adapter->hw;
5861
5862         if (vid)
5863                 E1000_WRITE_REG(hw, E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5864         else
5865                 E1000_WRITE_REG(hw, E1000_VMVIR(vf), 0);
5866 }
5867
5868 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5869                                int vf, u16 vlan, u8 qos)
5870 {
5871         int err = 0;
5872         struct igb_adapter *adapter = netdev_priv(netdev);
5873
5874         /* VLAN IDs accepted range 0-4094 */
5875         if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7))
5876                 return -EINVAL;
5877         if (vlan || qos) {
5878                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5879                 if (err)
5880                         goto out;
5881                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5882                 igb_set_vmolr(adapter, vf, !vlan);
5883                 adapter->vf_data[vf].pf_vlan = vlan;
5884                 adapter->vf_data[vf].pf_qos = qos;
5885                 igb_set_vf_vlan_strip(adapter, vf, true); 
5886                 dev_info(&adapter->pdev->dev,
5887                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5888                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5889                         dev_warn(&adapter->pdev->dev,
5890                                  "The VF VLAN has been set,"
5891                                  " but the PF device is not up.\n");
5892                         dev_warn(&adapter->pdev->dev,
5893                                  "Bring the PF device up before"
5894                                  " attempting to use the VF device.\n");
5895                 }
5896         } else {
5897                 if (adapter->vf_data[vf].pf_vlan)
5898                         dev_info(&adapter->pdev->dev,
5899                                  "Clearing VLAN on VF %d\n", vf);
5900                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5901                                    false, vf);
5902                 igb_set_vmvir(adapter, vlan, vf);
5903                 igb_set_vmolr(adapter, vf, true);
5904                 igb_set_vf_vlan_strip(adapter, vf, false); 
5905                 adapter->vf_data[vf].pf_vlan = 0;
5906                 adapter->vf_data[vf].pf_qos = 0;
5907        }
5908 out:
5909        return err;
5910 }
5911 #endif
5912
5913 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5914 {
5915         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5916         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5917
5918         if (vid)
5919                 igb_set_vf_vlan_strip(adapter, vf, true);
5920         else
5921                 igb_set_vf_vlan_strip(adapter, vf, false);
5922
5923         return igb_vlvf_set(adapter, vid, add, vf);
5924 }
5925
5926 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5927 {
5928         struct e1000_hw *hw = &adapter->hw;
5929
5930         /* clear flags except flag that the PF has set the MAC */
5931         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5932         adapter->vf_data[vf].last_nack = jiffies;
5933
5934         /* reset offloads to defaults */
5935         igb_set_vmolr(adapter, vf, true);
5936
5937         /* reset vlans for device */
5938         igb_clear_vf_vfta(adapter, vf);
5939 #ifdef IFLA_VF_MAX
5940         if (adapter->vf_data[vf].pf_vlan)
5941                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5942                                     adapter->vf_data[vf].pf_vlan,
5943                                     adapter->vf_data[vf].pf_qos);
5944         else
5945                 igb_clear_vf_vfta(adapter, vf);
5946 #endif
5947
5948         /* reset multicast table array for vf */
5949         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5950
5951         /* Flush and reset the mta with the new values */
5952         igb_set_rx_mode(adapter->netdev);
5953
5954         /* 
5955          * Reset the VFs TDWBAL and TDWBAH registers which are not
5956          * cleared by a VFLR
5957          */
5958         E1000_WRITE_REG(hw, E1000_TDWBAH(vf), 0);
5959         E1000_WRITE_REG(hw, E1000_TDWBAL(vf), 0);
5960         if (hw->mac.type == e1000_82576) {
5961                 E1000_WRITE_REG(hw, E1000_TDWBAH(IGB_MAX_VF_FUNCTIONS + vf), 0);
5962                 E1000_WRITE_REG(hw, E1000_TDWBAL(IGB_MAX_VF_FUNCTIONS + vf), 0);
5963         }
5964 }
5965
5966 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5967 {
5968         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5969
5970         /* generate a new mac address as we were hotplug removed/added */
5971         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5972                 random_ether_addr(vf_mac);
5973
5974         /* process remaining reset events */
5975         igb_vf_reset(adapter, vf);
5976 }
5977
5978 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5979 {
5980         struct e1000_hw *hw = &adapter->hw;
5981         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5982         u32 reg, msgbuf[3];
5983         u8 *addr = (u8 *)(&msgbuf[1]);
5984
5985         /* process all the same items cleared in a function level reset */
5986         igb_vf_reset(adapter, vf);
5987
5988         /* set vf mac address */
5989         igb_del_mac_filter(adapter, vf_mac, vf);
5990         igb_add_mac_filter(adapter, vf_mac, vf);
5991
5992         /* enable transmit and receive for vf */
5993         reg = E1000_READ_REG(hw, E1000_VFTE);
5994         E1000_WRITE_REG(hw, E1000_VFTE, reg | (1 << vf));
5995         reg = E1000_READ_REG(hw, E1000_VFRE);
5996         E1000_WRITE_REG(hw, E1000_VFRE, reg | (1 << vf));
5997
5998         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5999
6000         /* reply to reset with ack and vf mac address */
6001         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
6002         memcpy(addr, vf_mac, 6);
6003         e1000_write_mbx(hw, msgbuf, 3, vf);
6004 }
6005
6006 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
6007 {
6008         /*
6009          * The VF MAC Address is stored in a packed array of bytes
6010          * starting at the second 32 bit word of the msg array
6011          */
6012         unsigned char *addr = (unsigned char *)&msg[1];
6013         int err = -1;
6014
6015         if (is_valid_ether_addr(addr))
6016                 err = igb_set_vf_mac(adapter, vf, addr);
6017
6018         return err;
6019 }
6020
6021 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
6022 {
6023         struct e1000_hw *hw = &adapter->hw;
6024         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6025         u32 msg = E1000_VT_MSGTYPE_NACK;
6026
6027         /* if device isn't clear to send it shouldn't be reading either */
6028         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
6029             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
6030                 e1000_write_mbx(hw, &msg, 1, vf);
6031                 vf_data->last_nack = jiffies;
6032         }
6033 }
6034
6035 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
6036 {
6037         struct pci_dev *pdev = adapter->pdev;
6038         u32 msgbuf[E1000_VFMAILBOX_SIZE];
6039         struct e1000_hw *hw = &adapter->hw;
6040         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6041         s32 retval;
6042
6043         retval = e1000_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
6044
6045         if (retval) {
6046                 dev_err(pci_dev_to_dev(pdev), "Error receiving message from VF\n");
6047                 return;
6048         }
6049
6050         /* this is a message we already processed, do nothing */
6051         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
6052                 return;
6053
6054         /*
6055          * until the vf completes a reset it should not be
6056          * allowed to start any configuration.
6057          */
6058
6059         if (msgbuf[0] == E1000_VF_RESET) {
6060                 igb_vf_reset_msg(adapter, vf);
6061                 return;
6062         }
6063
6064         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
6065                 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
6066                 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
6067                         e1000_write_mbx(hw, msgbuf, 1, vf);
6068                         vf_data->last_nack = jiffies;
6069                 }
6070                 return;
6071         }
6072
6073         switch ((msgbuf[0] & 0xFFFF)) {
6074         case E1000_VF_SET_MAC_ADDR:
6075                 retval = -EINVAL;
6076 #ifndef IGB_DISABLE_VF_MAC_SET
6077                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
6078                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
6079                 else
6080                         DPRINTK(DRV, INFO,
6081                                 "VF %d attempted to override administratively "
6082                                 "set MAC address\nReload the VF driver to "
6083                                 "resume operations\n", vf);
6084 #endif
6085                 break;
6086         case E1000_VF_SET_PROMISC:
6087                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
6088                 break;
6089         case E1000_VF_SET_MULTICAST:
6090                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
6091                 break;
6092         case E1000_VF_SET_LPE:
6093                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
6094                 break;
6095         case E1000_VF_SET_VLAN:
6096                 retval = -1;
6097 #ifdef IFLA_VF_MAX
6098                 if (vf_data->pf_vlan)
6099                         DPRINTK(DRV, INFO,
6100                                 "VF %d attempted to override administratively "
6101                                 "set VLAN tag\nReload the VF driver to "
6102                                 "resume operations\n", vf);
6103                 else
6104 #endif
6105                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
6106                 break;
6107         default:
6108                 dev_err(pci_dev_to_dev(pdev), "Unhandled Msg %08x\n", msgbuf[0]);
6109                 retval = -E1000_ERR_MBX;
6110                 break;
6111         }
6112
6113         /* notify the VF of the results of what it sent us */
6114         if (retval)
6115                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
6116         else
6117                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
6118
6119         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
6120
6121         e1000_write_mbx(hw, msgbuf, 1, vf);
6122 }
6123
6124 static void igb_msg_task(struct igb_adapter *adapter)
6125 {
6126         struct e1000_hw *hw = &adapter->hw;
6127         u32 vf;
6128
6129         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
6130                 /* process any reset requests */
6131                 if (!e1000_check_for_rst(hw, vf))
6132                         igb_vf_reset_event(adapter, vf);
6133
6134                 /* process any messages pending */
6135                 if (!e1000_check_for_msg(hw, vf))
6136                         igb_rcv_msg_from_vf(adapter, vf);
6137
6138                 /* process any acks */
6139                 if (!e1000_check_for_ack(hw, vf))
6140                         igb_rcv_ack_from_vf(adapter, vf);
6141         }
6142 }
6143
6144 /**
6145  *  igb_set_uta - Set unicast filter table address
6146  *  @adapter: board private structure
6147  *
6148  *  The unicast table address is a register array of 32-bit registers.
6149  *  The table is meant to be used in a way similar to how the MTA is used
6150  *  however due to certain limitations in the hardware it is necessary to
6151  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
6152  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
6153  **/
6154 static void igb_set_uta(struct igb_adapter *adapter)
6155 {
6156         struct e1000_hw *hw = &adapter->hw;
6157         int i;
6158
6159         /* The UTA table only exists on 82576 hardware and newer */
6160         if (hw->mac.type < e1000_82576)
6161                 return;
6162
6163         /* we only need to do this if VMDq is enabled */
6164         if (!adapter->vmdq_pools)
6165                 return;
6166
6167         for (i = 0; i < hw->mac.uta_reg_count; i++)
6168                 E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, ~0);
6169 }
6170
6171 /**
6172  * igb_intr_msi - Interrupt Handler
6173  * @irq: interrupt number
6174  * @data: pointer to a network interface device structure
6175  **/
6176 static irqreturn_t igb_intr_msi(int irq, void *data)
6177 {
6178         struct igb_adapter *adapter = data;
6179         struct igb_q_vector *q_vector = adapter->q_vector[0];
6180         struct e1000_hw *hw = &adapter->hw;
6181         /* read ICR disables interrupts using IAM */
6182         u32 icr = E1000_READ_REG(hw, E1000_ICR);
6183
6184         igb_write_itr(q_vector);
6185
6186         if (icr & E1000_ICR_DRSTA)
6187                 schedule_work(&adapter->reset_task);
6188
6189         if (icr & E1000_ICR_DOUTSYNC) {
6190                 /* HW is reporting DMA is out of sync */
6191                 adapter->stats.doosync++;
6192         }
6193
6194         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6195                 hw->mac.get_link_status = 1;
6196                 if (!test_bit(__IGB_DOWN, &adapter->state))
6197                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
6198         }
6199
6200         napi_schedule(&q_vector->napi);
6201
6202         return IRQ_HANDLED;
6203 }
6204
6205 /**
6206  * igb_intr - Legacy Interrupt Handler
6207  * @irq: interrupt number
6208  * @data: pointer to a network interface device structure
6209  **/
6210 static irqreturn_t igb_intr(int irq, void *data)
6211 {
6212         struct igb_adapter *adapter = data;
6213         struct igb_q_vector *q_vector = adapter->q_vector[0];
6214         struct e1000_hw *hw = &adapter->hw;
6215         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
6216          * need for the IMC write */
6217         u32 icr = E1000_READ_REG(hw, E1000_ICR);
6218
6219         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
6220          * not set, then the adapter didn't send an interrupt */
6221         if (!(icr & E1000_ICR_INT_ASSERTED))
6222                 return IRQ_NONE;
6223
6224         igb_write_itr(q_vector);
6225
6226         if (icr & E1000_ICR_DRSTA)
6227                 schedule_work(&adapter->reset_task);
6228
6229         if (icr & E1000_ICR_DOUTSYNC) {
6230                 /* HW is reporting DMA is out of sync */
6231                 adapter->stats.doosync++;
6232         }
6233
6234         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6235                 hw->mac.get_link_status = 1;
6236                 /* guard against interrupt when we're going down */
6237                 if (!test_bit(__IGB_DOWN, &adapter->state))
6238                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
6239         }
6240
6241         napi_schedule(&q_vector->napi);
6242
6243         return IRQ_HANDLED;
6244 }
6245
6246 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
6247 {
6248         struct igb_adapter *adapter = q_vector->adapter;
6249         struct e1000_hw *hw = &adapter->hw;
6250
6251         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
6252             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
6253                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
6254                         igb_set_itr(q_vector);
6255                 else
6256                         igb_update_ring_itr(q_vector);
6257         }
6258
6259         if (!test_bit(__IGB_DOWN, &adapter->state)) {
6260                 if (adapter->msix_entries)
6261                         E1000_WRITE_REG(hw, E1000_EIMS, q_vector->eims_value);
6262                 else
6263                         igb_irq_enable(adapter);
6264         }
6265 }
6266
6267 /**
6268  * igb_poll - NAPI Rx polling callback
6269  * @napi: napi polling structure
6270  * @budget: count of how many packets we should handle
6271  **/
6272 static int igb_poll(struct napi_struct *napi, int budget)
6273 {
6274         struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi);
6275         bool clean_complete = true;
6276
6277 #ifdef IGB_DCA
6278         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
6279                 igb_update_dca(q_vector);
6280 #endif
6281         if (q_vector->tx.ring)
6282                 clean_complete = igb_clean_tx_irq(q_vector);
6283
6284         if (q_vector->rx.ring)
6285                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
6286
6287 #ifndef HAVE_NETDEV_NAPI_LIST
6288         /* if netdev is disabled we need to stop polling */
6289         if (!netif_running(q_vector->adapter->netdev))
6290                 clean_complete = true;
6291
6292 #endif
6293         /* If all work not completed, return budget and keep polling */
6294         if (!clean_complete)
6295                 return budget;
6296
6297         /* If not enough Rx work done, exit the polling mode */
6298         napi_complete(napi);
6299         igb_ring_irq_enable(q_vector);
6300
6301         return 0;
6302 }
6303
6304 #ifdef HAVE_HW_TIME_STAMP
6305 /**
6306  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
6307  * @adapter: board private structure
6308  * @shhwtstamps: timestamp structure to update
6309  * @regval: unsigned 64bit system time value.
6310  *
6311  * We need to convert the system time value stored in the RX/TXSTMP registers
6312  * into a hwtstamp which can be used by the upper level timestamping functions
6313  */
6314 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
6315                                    struct skb_shared_hwtstamps *shhwtstamps,
6316                                    u64 regval)
6317 {
6318         u64 ns;
6319
6320         /*
6321          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
6322          * 24 to match clock shift we setup earlier.
6323          */
6324         if (adapter->hw.mac.type >= e1000_82580)
6325                 regval <<= IGB_82580_TSYNC_SHIFT;
6326
6327         ns = timecounter_cyc2time(&adapter->clock, regval);
6328
6329         /*
6330          * force a timecompare_update here (even if less than a second
6331          * has passed) in order to prevent the case when ptpd or other
6332          * software jumps the clock offset. othwerise there is a small
6333          * window when the timestamp would be based on previous skew
6334          * and invalid results would be pushed to the network stack.
6335          */
6336         timecompare_update(&adapter->compare, 0);
6337         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
6338         shhwtstamps->hwtstamp = ns_to_ktime(ns);
6339         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
6340 }
6341
6342 /**
6343  * igb_tx_hwtstamp - utility function which checks for TX time stamp
6344  * @q_vector: pointer to q_vector containing needed info
6345  * @buffer: pointer to igb_tx_buffer structure
6346  *
6347  * If we were asked to do hardware stamping and such a time stamp is
6348  * available, then it must have been for this skb here because we only
6349  * allow only one such packet into the queue.
6350  */
6351 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
6352                             struct igb_tx_buffer *buffer_info)
6353 {
6354         struct igb_adapter *adapter = q_vector->adapter;
6355         struct e1000_hw *hw = &adapter->hw;
6356         struct skb_shared_hwtstamps shhwtstamps;
6357         u64 regval;
6358
6359         /* if skb does not support hw timestamp or TX stamp not valid exit */
6360         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
6361             !(E1000_READ_REG(hw, E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
6362                 return;
6363
6364         regval = E1000_READ_REG(hw, E1000_TXSTMPL);
6365         regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32;
6366
6367         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
6368         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
6369 }
6370
6371 #endif
6372 /**
6373  * igb_clean_tx_irq - Reclaim resources after transmit completes
6374  * @q_vector: pointer to q_vector containing needed info
6375  * returns TRUE if ring is completely cleaned
6376  **/
6377 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
6378 {
6379         struct igb_adapter *adapter = q_vector->adapter;
6380         struct igb_ring *tx_ring = q_vector->tx.ring;
6381         struct igb_tx_buffer *tx_buffer;
6382         union e1000_adv_tx_desc *tx_desc, *eop_desc;
6383         unsigned int total_bytes = 0, total_packets = 0;
6384         unsigned int budget = q_vector->tx.work_limit;
6385         unsigned int i = tx_ring->next_to_clean;
6386
6387         if (test_bit(__IGB_DOWN, &adapter->state))
6388                 return true;
6389
6390         tx_buffer = &tx_ring->tx_buffer_info[i];
6391         tx_desc = IGB_TX_DESC(tx_ring, i);
6392         i -= tx_ring->count;
6393
6394         for (; budget; budget--) {
6395                 eop_desc = tx_buffer->next_to_watch;
6396
6397                 /* prevent any other reads prior to eop_desc */
6398                 rmb();
6399
6400                 /* if next_to_watch is not set then there is no work pending */
6401                 if (!eop_desc)
6402                         break;
6403
6404                 /* if DD is not set pending work has not been completed */
6405                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
6406                         break;
6407
6408                 /* clear next_to_watch to prevent false hangs */
6409                 tx_buffer->next_to_watch = NULL;
6410
6411                 /* update the statistics for this packet */
6412                 total_bytes += tx_buffer->bytecount;
6413                 total_packets += tx_buffer->gso_segs;
6414
6415 #ifdef HAVE_HW_TIME_STAMP
6416                 /* retrieve hardware timestamp */
6417                 igb_tx_hwtstamp(q_vector, tx_buffer);
6418
6419 #endif
6420                 /* free the skb */
6421                 dev_kfree_skb_any(tx_buffer->skb);
6422
6423                 /* unmap skb header data */
6424                 dma_unmap_single(tx_ring->dev,
6425                                  dma_unmap_addr(tx_buffer, dma),
6426                                  dma_unmap_len(tx_buffer, len),
6427                                  DMA_TO_DEVICE);
6428
6429                 /* clear tx_buffer data */
6430                 tx_buffer->skb = NULL;
6431                 dma_unmap_len_set(tx_buffer, len, 0);
6432
6433                 /* clear last DMA location and unmap remaining buffers */
6434                 while (tx_desc != eop_desc) {
6435                         tx_buffer++;
6436                         tx_desc++;
6437                         i++;
6438                         if (unlikely(!i)) {
6439                                 i -= tx_ring->count;
6440                                 tx_buffer = tx_ring->tx_buffer_info;
6441                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
6442                         }
6443
6444                         /* unmap any remaining paged data */
6445                         if (dma_unmap_len(tx_buffer, len)) {
6446                                 dma_unmap_page(tx_ring->dev,
6447                                                dma_unmap_addr(tx_buffer, dma),
6448                                                dma_unmap_len(tx_buffer, len),
6449                                                DMA_TO_DEVICE);
6450                                 dma_unmap_len_set(tx_buffer, len, 0);
6451                         }
6452                 }
6453
6454                 /* move us one more past the eop_desc for start of next pkt */
6455                 tx_buffer++;
6456                 tx_desc++;
6457                 i++;
6458                 if (unlikely(!i)) {
6459                         i -= tx_ring->count;
6460                         tx_buffer = tx_ring->tx_buffer_info;
6461                         tx_desc = IGB_TX_DESC(tx_ring, 0);
6462                 }
6463         }
6464
6465 #ifdef CONFIG_BQL
6466         netdev_tx_completed_queue(txring_txq(tx_ring),
6467                                   total_packets, total_bytes);
6468 #endif /* CONFIG_BQL */
6469
6470         i += tx_ring->count;
6471         tx_ring->next_to_clean = i;
6472         tx_ring->tx_stats.bytes += total_bytes;
6473         tx_ring->tx_stats.packets += total_packets;
6474         q_vector->tx.total_bytes += total_bytes;
6475         q_vector->tx.total_packets += total_packets;
6476
6477         if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
6478                 struct e1000_hw *hw = &adapter->hw;
6479
6480                 eop_desc = tx_buffer->next_to_watch;
6481
6482                 /* Detect a transmit hang in hardware, this serializes the
6483                  * check with the clearing of time_stamp and movement of i */
6484                 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
6485                 if (eop_desc &&
6486                     time_after(jiffies, tx_buffer->time_stamp +
6487                                (adapter->tx_timeout_factor * HZ))
6488                     && !(E1000_READ_REG(hw, E1000_STATUS) &
6489                          E1000_STATUS_TXOFF)) {
6490
6491                         /* detected Tx unit hang */
6492                         dev_err(tx_ring->dev,
6493                                 "Detected Tx Unit Hang\n"
6494                                 "  Tx Queue             <%d>\n"
6495                                 "  TDH                  <%x>\n"
6496                                 "  TDT                  <%x>\n"
6497                                 "  next_to_use          <%x>\n"
6498                                 "  next_to_clean        <%x>\n"
6499                                 "buffer_info[next_to_clean]\n"
6500                                 "  time_stamp           <%lx>\n"
6501                                 "  next_to_watch        <%p>\n"
6502                                 "  jiffies              <%lx>\n"
6503                                 "  desc.status          <%x>\n",
6504                                 tx_ring->queue_index,
6505                                 E1000_READ_REG(hw, E1000_TDH(tx_ring->reg_idx)),
6506                                 readl(tx_ring->tail),
6507                                 tx_ring->next_to_use,
6508                                 tx_ring->next_to_clean,
6509                                 tx_buffer->time_stamp,
6510                                 eop_desc,
6511                                 jiffies,
6512                                 eop_desc->wb.status);
6513                         if (netif_is_multiqueue(netdev_ring(tx_ring)))
6514                                 netif_stop_subqueue(netdev_ring(tx_ring),
6515                                                     ring_queue_index(tx_ring));
6516                         else
6517                                 netif_stop_queue(netdev_ring(tx_ring));
6518
6519                         /* we are about to reset, no point in enabling stuff */
6520                         return true;
6521                 }
6522         }
6523
6524         if (unlikely(total_packets &&
6525                      netif_carrier_ok(netdev_ring(tx_ring)) &&
6526                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
6527                 /* Make sure that anybody stopping the queue after this
6528                  * sees the new next_to_clean.
6529                  */
6530                 smp_mb();
6531                 if (netif_is_multiqueue(netdev_ring(tx_ring))) {
6532                         if (__netif_subqueue_stopped(netdev_ring(tx_ring),
6533                                                      ring_queue_index(tx_ring)) &&
6534                             !(test_bit(__IGB_DOWN, &adapter->state))) {
6535                                 netif_wake_subqueue(netdev_ring(tx_ring),
6536                                                     ring_queue_index(tx_ring));
6537                                 tx_ring->tx_stats.restart_queue++;
6538                         }
6539                 } else {
6540                         if (netif_queue_stopped(netdev_ring(tx_ring)) &&
6541                             !(test_bit(__IGB_DOWN, &adapter->state))) {
6542                                 netif_wake_queue(netdev_ring(tx_ring));
6543                                 tx_ring->tx_stats.restart_queue++;
6544                         }
6545                 }
6546         }
6547
6548         return !!budget;
6549 }
6550
6551 #ifdef HAVE_VLAN_RX_REGISTER
6552 /**
6553  * igb_receive_skb - helper function to handle rx indications
6554  * @q_vector: structure containing interrupt and ring information
6555  * @skb: packet to send up
6556  **/
6557 static void igb_receive_skb(struct igb_q_vector *q_vector,
6558                             struct sk_buff *skb)
6559 {
6560         struct vlan_group **vlgrp = netdev_priv(skb->dev);
6561
6562         if (IGB_CB(skb)->vid) {
6563                 if (*vlgrp) {
6564                         vlan_gro_receive(&q_vector->napi, *vlgrp,
6565                                          IGB_CB(skb)->vid, skb);
6566                 } else {
6567                         dev_kfree_skb_any(skb);
6568                 }
6569         } else {
6570                 napi_gro_receive(&q_vector->napi, skb);
6571         }
6572 }
6573
6574 #endif /* HAVE_VLAN_RX_REGISTER */
6575 static inline void igb_rx_checksum(struct igb_ring *ring,
6576                                    union e1000_adv_rx_desc *rx_desc,
6577                                    struct sk_buff *skb)
6578 {
6579         skb_checksum_none_assert(skb);
6580
6581         /* Ignore Checksum bit is set */
6582         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
6583                 return;
6584
6585         /* Rx checksum disabled via ethtool */
6586 #ifdef HAVE_NDO_SET_FEATURES
6587         if (!(netdev_ring(ring)->features & NETIF_F_RXCSUM))
6588 #else
6589         if (!test_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags))
6590 #endif
6591                 return;
6592
6593         /* TCP/UDP checksum error bit is set */
6594         if (igb_test_staterr(rx_desc,
6595                              E1000_RXDEXT_STATERR_TCPE |
6596                              E1000_RXDEXT_STATERR_IPE)) {
6597                 /*
6598                  * work around errata with sctp packets where the TCPE aka
6599                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
6600                  * packets, (aka let the stack check the crc32c)
6601                  */
6602                 if (!((skb->len == 60) &&
6603                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags)))
6604                         ring->rx_stats.csum_err++;
6605
6606                 /* let the stack verify checksum errors */
6607                 return;
6608         }
6609         /* It must be a TCP or UDP packet with a valid checksum */
6610         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
6611                                       E1000_RXD_STAT_UDPCS))
6612                 skb->ip_summed = CHECKSUM_UNNECESSARY;
6613 }
6614
6615 #ifdef NETIF_F_RXHASH
6616 static inline void igb_rx_hash(struct igb_ring *ring,
6617                                union e1000_adv_rx_desc *rx_desc,
6618                                struct sk_buff *skb)
6619 {
6620         if (netdev_ring(ring)->features & NETIF_F_RXHASH)
6621                 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
6622 }
6623
6624 #endif
6625 #ifdef HAVE_HW_TIME_STAMP
6626 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
6627                             union e1000_adv_rx_desc *rx_desc,
6628                             struct sk_buff *skb)
6629 {
6630         struct igb_adapter *adapter = q_vector->adapter;
6631         struct e1000_hw *hw = &adapter->hw;
6632         u64 regval;
6633
6634         if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6635                                        E1000_RXDADV_STAT_TS))
6636                 return;
6637
6638         /*
6639          * If this bit is set, then the RX registers contain the time stamp. No
6640          * other packet will be time stamped until we read these registers, so
6641          * read the registers to make them available again. Because only one
6642          * packet can be time stamped at a time, we know that the register
6643          * values must belong to this one here and therefore we don't need to
6644          * compare any of the additional attributes stored for it.
6645          *
6646          * If nothing went wrong, then it should have a skb_shared_tx that we
6647          * can turn into a skb_shared_hwtstamps.
6648          */
6649         if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6650                 u32 *stamp = (u32 *)skb->data;
6651                 regval = le32_to_cpu(*(stamp + 2));
6652                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6653                 skb_pull(skb, IGB_TS_HDR_LEN);
6654         } else {
6655                 if(!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6656                         return;
6657
6658                 regval = E1000_READ_REG(hw, E1000_RXSTMPL);
6659                 regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32;
6660         }
6661
6662         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6663 }
6664 #endif
6665 static void igb_rx_vlan(struct igb_ring *ring,
6666                         union e1000_adv_rx_desc *rx_desc,
6667                         struct sk_buff *skb)
6668 {
6669         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6670                 u16 vid = 0;
6671                 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6672                     test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6673                         vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6674                 else
6675                         vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6676 #ifdef HAVE_VLAN_RX_REGISTER
6677                 IGB_CB(skb)->vid = vid;
6678         } else {
6679                 IGB_CB(skb)->vid = 0;
6680 #else
6681                 __vlan_hwaccel_put_tag(skb, vid);
6682 #endif
6683         }
6684 }
6685
6686 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
6687 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6688 {
6689         /* HW will not DMA in data larger than the given buffer, even if it
6690          * parses the (NFS, of course) header to be larger.  In that case, it
6691          * fills the header buffer and spills the rest into the page.
6692          */
6693         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info) &
6694                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6695         if (hlen > IGB_RX_HDR_LEN)
6696                 hlen = IGB_RX_HDR_LEN;
6697         return hlen;
6698 }
6699
6700 #endif
6701 #ifndef IGB_NO_LRO
6702 /**
6703  * igb_merge_active_tail - merge active tail into lro skb
6704  * @tail: pointer to active tail in frag_list
6705  *
6706  * This function merges the length and data of an active tail into the
6707  * skb containing the frag_list.  It resets the tail's pointer to the head,
6708  * but it leaves the heads pointer to tail intact.
6709  **/
6710 static inline struct sk_buff *igb_merge_active_tail(struct sk_buff *tail)
6711 {
6712         struct sk_buff *head = IGB_CB(tail)->head;
6713
6714         if (!head)
6715                 return tail;
6716
6717         head->len += tail->len;
6718         head->data_len += tail->len;
6719         head->truesize += tail->len;
6720
6721         IGB_CB(tail)->head = NULL;
6722
6723         return head;
6724 }
6725
6726 /**
6727  * igb_add_active_tail - adds an active tail into the skb frag_list
6728  * @head: pointer to the start of the skb
6729  * @tail: pointer to active tail to add to frag_list
6730  *
6731  * This function adds an active tail to the end of the frag list.  This tail
6732  * will still be receiving data so we cannot yet ad it's stats to the main
6733  * skb.  That is done via igb_merge_active_tail.
6734  **/
6735 static inline void igb_add_active_tail(struct sk_buff *head, struct sk_buff *tail)
6736 {
6737         struct sk_buff *old_tail = IGB_CB(head)->tail;
6738
6739         if (old_tail) {
6740                 igb_merge_active_tail(old_tail);
6741                 old_tail->next = tail;
6742         } else {
6743                 skb_shinfo(head)->frag_list = tail;
6744         }
6745
6746         IGB_CB(tail)->head = head;
6747         IGB_CB(head)->tail = tail;
6748
6749         IGB_CB(head)->append_cnt++;
6750 }
6751
6752 /**
6753  * igb_close_active_frag_list - cleanup pointers on a frag_list skb
6754  * @head: pointer to head of an active frag list
6755  *
6756  * This function will clear the frag_tail_tracker pointer on an active
6757  * frag_list and returns true if the pointer was actually set
6758  **/
6759 static inline bool igb_close_active_frag_list(struct sk_buff *head)
6760 {
6761         struct sk_buff *tail = IGB_CB(head)->tail;
6762
6763         if (!tail)
6764                 return false;
6765
6766         igb_merge_active_tail(tail);
6767
6768         IGB_CB(head)->tail = NULL;
6769
6770         return true;
6771 }
6772
6773 /**
6774  * igb_can_lro - returns true if packet is TCP/IPV4 and LRO is enabled
6775  * @adapter: board private structure
6776  * @rx_desc: pointer to the rx descriptor
6777  * @skb: pointer to the skb to be merged
6778  *
6779  **/
6780 static inline bool igb_can_lro(struct igb_ring *rx_ring,
6781                                union e1000_adv_rx_desc *rx_desc,
6782                                struct sk_buff *skb)
6783 {
6784         struct iphdr *iph = (struct iphdr *)skb->data;
6785         __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
6786
6787         /* verify LRO is enabled */
6788         if (!(netdev_ring(rx_ring)->features & NETIF_F_LRO))
6789                 return false;
6790
6791         /* verify hardware indicates this is IPv4/TCP */
6792         if((!(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) ||
6793             !(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4))))
6794                 return false;
6795
6796         /* verify the header is large enough for us to read IP/TCP fields */
6797         if (!pskb_may_pull(skb, sizeof(struct igb_lrohdr)))
6798                 return false;
6799
6800         /* verify there are no VLANs on packet */
6801         if (skb->protocol != __constant_htons(ETH_P_IP))
6802                 return false;
6803
6804         /* ensure we are version 4 with no options */
6805         if (*(u8 *)iph != 0x45)
6806                 return false;
6807
6808         /* .. and the packet is not fragmented */
6809         if (iph->frag_off & htons(IP_MF | IP_OFFSET))
6810                 return false;
6811
6812         /* .. and that next header is TCP */
6813         if (iph->protocol != IPPROTO_TCP)
6814                 return false;
6815
6816         return true;
6817 }
6818
6819 static inline struct igb_lrohdr *igb_lro_hdr(struct sk_buff *skb)
6820 {
6821         return (struct igb_lrohdr *)skb->data;
6822 }
6823
6824 /**
6825  * igb_lro_flush - Indicate packets to upper layer.
6826  *
6827  * Update IP and TCP header part of head skb if more than one
6828  * skb's chained and indicate packets to upper layer.
6829  **/
6830 static void igb_lro_flush(struct igb_q_vector *q_vector,
6831                           struct sk_buff *skb)
6832 {
6833         struct igb_lro_list *lrolist = q_vector->lrolist;
6834
6835         __skb_unlink(skb, &lrolist->active);
6836
6837         if (IGB_CB(skb)->append_cnt) {
6838                 struct igb_lrohdr *lroh = igb_lro_hdr(skb);
6839
6840                 /* close any active lro contexts */
6841                 igb_close_active_frag_list(skb);
6842
6843                 /* incorporate ip header and re-calculate checksum */
6844                 lroh->iph.tot_len = ntohs(skb->len);
6845                 lroh->iph.check = 0;
6846
6847                 /* header length is 5 since we know no options exist */
6848                 lroh->iph.check = ip_fast_csum((u8 *)lroh, 5);
6849
6850                 /* clear TCP checksum to indicate we are an LRO frame */
6851                 lroh->th.check = 0;
6852
6853                 /* incorporate latest timestamp into the tcp header */
6854                 if (IGB_CB(skb)->tsecr) {
6855                         lroh->ts[2] = IGB_CB(skb)->tsecr;
6856                         lroh->ts[1] = htonl(IGB_CB(skb)->tsval);
6857                 }
6858 #ifdef NETIF_F_TSO
6859
6860                 skb_shinfo(skb)->gso_size = IGB_CB(skb)->mss;
6861 #endif
6862         }
6863
6864 #ifdef HAVE_VLAN_RX_REGISTER
6865         igb_receive_skb(q_vector, skb);
6866 #else
6867         napi_gro_receive(&q_vector->napi, skb);
6868 #endif
6869         lrolist->stats.flushed++;
6870 }
6871
6872 static void igb_lro_flush_all(struct igb_q_vector *q_vector)
6873 {
6874         struct igb_lro_list *lrolist = q_vector->lrolist;
6875         struct sk_buff *skb, *tmp;
6876
6877         skb_queue_reverse_walk_safe(&lrolist->active, skb, tmp)
6878                 igb_lro_flush(q_vector, skb);
6879 }
6880
6881 /*
6882  * igb_lro_header_ok - Main LRO function.
6883  **/
6884 static void igb_lro_header_ok(struct sk_buff *skb)
6885 {
6886         struct igb_lrohdr *lroh = igb_lro_hdr(skb);
6887         u16 opt_bytes, data_len;
6888
6889         IGB_CB(skb)->tail = NULL;
6890         IGB_CB(skb)->tsecr = 0;
6891         IGB_CB(skb)->append_cnt = 0;
6892         IGB_CB(skb)->mss = 0;
6893
6894         /* ensure that the checksum is valid */
6895         if (skb->ip_summed != CHECKSUM_UNNECESSARY)
6896                 return;
6897
6898         /* If we see CE codepoint in IP header, packet is not mergeable */
6899         if (INET_ECN_is_ce(ipv4_get_dsfield(&lroh->iph)))
6900                 return;
6901
6902         /* ensure no bits set besides ack or psh */
6903         if (lroh->th.fin || lroh->th.syn || lroh->th.rst ||
6904             lroh->th.urg || lroh->th.ece || lroh->th.cwr ||
6905             !lroh->th.ack)
6906                 return;
6907
6908         /* store the total packet length */
6909         data_len = ntohs(lroh->iph.tot_len);
6910
6911         /* remove any padding from the end of the skb */
6912         __pskb_trim(skb, data_len);
6913
6914         /* remove header length from data length */
6915         data_len -= sizeof(struct igb_lrohdr);
6916
6917         /*
6918          * check for timestamps. Since the only option we handle are timestamps,
6919          * we only have to handle the simple case of aligned timestamps
6920          */
6921         opt_bytes = (lroh->th.doff << 2) - sizeof(struct tcphdr);
6922         if (opt_bytes != 0) {
6923                 if ((opt_bytes != TCPOLEN_TSTAMP_ALIGNED) ||
6924                     !pskb_may_pull(skb, sizeof(struct igb_lrohdr) +
6925                                         TCPOLEN_TSTAMP_ALIGNED) ||
6926                     (lroh->ts[0] != htonl((TCPOPT_NOP << 24) |
6927                                              (TCPOPT_NOP << 16) |
6928                                              (TCPOPT_TIMESTAMP << 8) |
6929                                               TCPOLEN_TIMESTAMP)) ||
6930                     (lroh->ts[2] == 0)) {
6931                         return;
6932                 }
6933                 
6934                 IGB_CB(skb)->tsval = ntohl(lroh->ts[1]);
6935                 IGB_CB(skb)->tsecr = lroh->ts[2];
6936
6937                 data_len -= TCPOLEN_TSTAMP_ALIGNED;
6938         }
6939
6940         /* record data_len as mss for the packet */
6941         IGB_CB(skb)->mss = data_len;
6942         IGB_CB(skb)->next_seq = ntohl(lroh->th.seq);
6943 }
6944
6945 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
6946 static bool igb_merge_frags(struct sk_buff *lro_skb, struct sk_buff *new_skb)
6947 {
6948         struct sk_buff *tail;
6949         struct skb_shared_info *tail_info;
6950         struct skb_shared_info *new_skb_info;
6951         u16 data_len;
6952
6953         /* header must be empty to pull frags into current skb */
6954         if (skb_headlen(new_skb))
6955                 return false;
6956
6957         if (IGB_CB(lro_skb)->tail)
6958                 tail = IGB_CB(lro_skb)->tail;
6959         else
6960                 tail = lro_skb;
6961
6962         tail_info = skb_shinfo(tail);
6963         new_skb_info = skb_shinfo(new_skb);
6964
6965         /* make sure we have room in frags list */
6966         if (new_skb_info->nr_frags >= (MAX_SKB_FRAGS - tail_info->nr_frags))
6967                 return false;
6968
6969         /* bump append count */
6970         IGB_CB(lro_skb)->append_cnt++;
6971
6972         /* copy frags into the last skb */
6973         memcpy(tail_info->frags + tail_info->nr_frags,
6974                new_skb_info->frags,
6975                new_skb_info->nr_frags * sizeof(skb_frag_t));
6976
6977         /* copy size data over */
6978         tail_info->nr_frags += new_skb_info->nr_frags;
6979         data_len = IGB_CB(new_skb)->mss;
6980         tail->len += data_len;
6981         tail->data_len += data_len;
6982         tail->truesize += data_len;
6983
6984         /* wipe record of data from new_skb */
6985         new_skb_info->nr_frags = 0;
6986         new_skb->len = new_skb->data_len = 0;
6987         new_skb->truesize -= data_len;
6988         new_skb->data = new_skb->head + NET_SKB_PAD + NET_IP_ALIGN;
6989         skb_reset_tail_pointer(new_skb);
6990         new_skb->protocol = 0;
6991         new_skb->ip_summed = CHECKSUM_NONE;
6992 #ifdef HAVE_VLAN_RX_REGISTER
6993         IGB_CB(new_skb)->vid = 0;
6994 #else
6995         new_skb->vlan_tci = 0;
6996 #endif
6997
6998         return true;
6999 }
7000
7001 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7002 /**
7003  * igb_lro_queue - if able, queue skb into lro chain
7004  * @q_vector: structure containing interrupt and ring information
7005  * @new_skb: pointer to current skb being checked
7006  *
7007  * Checks whether the skb given is eligible for LRO and if that's
7008  * fine chains it to the existing lro_skb based on flowid. If an LRO for
7009  * the flow doesn't exist create one.
7010  **/
7011 static struct sk_buff *igb_lro_queue(struct igb_q_vector *q_vector,
7012                                        struct sk_buff *new_skb)
7013 {
7014         struct sk_buff *lro_skb;
7015         struct igb_lro_list *lrolist = q_vector->lrolist;
7016         struct igb_lrohdr *lroh = igb_lro_hdr(new_skb);
7017         __be32 saddr = lroh->iph.saddr;
7018         __be32 daddr = lroh->iph.daddr;
7019         __be32 tcp_ports = *(__be32 *)&lroh->th;
7020         u16 data_len;
7021 #ifdef HAVE_VLAN_RX_REGISTER
7022         u16 vid = IGB_CB(new_skb)->vid;
7023 #else
7024         u16 vid = new_skb->vlan_tci;
7025 #endif
7026
7027         igb_lro_header_ok(new_skb);
7028
7029         /*
7030          * we have a packet that might be eligible for LRO,
7031          * so see if it matches anything we might expect
7032          */
7033         skb_queue_walk(&lrolist->active, lro_skb) {
7034                 if (*(__be32 *)&igb_lro_hdr(lro_skb)->th != tcp_ports ||
7035                     igb_lro_hdr(lro_skb)->iph.saddr != saddr ||
7036                     igb_lro_hdr(lro_skb)->iph.daddr != daddr)
7037                         continue;
7038
7039 #ifdef HAVE_VLAN_RX_REGISTER
7040                 if (IGB_CB(lro_skb)->vid != vid)
7041 #else
7042                 if (lro_skb->vlan_tci != vid)
7043 #endif
7044                         continue;
7045
7046                 /* out of order packet */
7047                 if (IGB_CB(lro_skb)->next_seq != IGB_CB(new_skb)->next_seq) {
7048                         igb_lro_flush(q_vector, lro_skb);
7049                         IGB_CB(new_skb)->mss = 0;
7050                         break;
7051                 }
7052
7053                 /* TCP timestamp options have changed */
7054                 if (!IGB_CB(lro_skb)->tsecr != !IGB_CB(new_skb)->tsecr) {
7055                         igb_lro_flush(q_vector, lro_skb);
7056                         break;
7057                 }
7058
7059                 /* make sure timestamp values are increasing */
7060                 if (IGB_CB(lro_skb)->tsecr &&
7061                     IGB_CB(lro_skb)->tsval > IGB_CB(new_skb)->tsval) {
7062                         igb_lro_flush(q_vector, lro_skb);
7063                         IGB_CB(new_skb)->mss = 0;
7064                         break;
7065                 }
7066
7067                 data_len = IGB_CB(new_skb)->mss;
7068
7069                 /*
7070                  * malformed header, no tcp data, resultant packet would
7071                  * be too large, or new skb is larger than our current mss.
7072                  */
7073                 if (data_len == 0 ||
7074                     data_len > IGB_CB(lro_skb)->mss ||
7075                     data_len > IGB_CB(lro_skb)->free) {
7076                         igb_lro_flush(q_vector, lro_skb);
7077                         break;
7078                 }
7079
7080                 /* ack sequence numbers or window size has changed */
7081                 if (igb_lro_hdr(lro_skb)->th.ack_seq != lroh->th.ack_seq ||
7082                     igb_lro_hdr(lro_skb)->th.window != lroh->th.window) {
7083                         igb_lro_flush(q_vector, lro_skb);
7084                         break;
7085                 }
7086
7087                 /* Remove IP and TCP header*/
7088                 skb_pull(new_skb, new_skb->len - data_len);
7089
7090                 /* update timestamp and timestamp echo response */
7091                 IGB_CB(lro_skb)->tsval = IGB_CB(new_skb)->tsval;
7092                 IGB_CB(lro_skb)->tsecr = IGB_CB(new_skb)->tsecr;
7093
7094                 /* update sequence and free space */
7095                 IGB_CB(lro_skb)->next_seq += data_len;
7096                 IGB_CB(lro_skb)->free -= data_len;
7097
7098 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7099                 /* if header is empty pull pages into current skb */
7100                 if (igb_merge_frags(lro_skb, new_skb)) {
7101                         lrolist->stats.recycled++;
7102                 } else {
7103 #endif
7104                         /* chain this new skb in frag_list */
7105                         igb_add_active_tail(lro_skb, new_skb);
7106                         new_skb = NULL;
7107 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7108                 }
7109 #endif
7110
7111                 if ((data_len < IGB_CB(lro_skb)->mss) || lroh->th.psh) {
7112                         igb_lro_hdr(lro_skb)->th.psh |= lroh->th.psh;
7113                         igb_lro_flush(q_vector, lro_skb);
7114                 }
7115
7116                 lrolist->stats.coal++;
7117                 return new_skb;
7118         }
7119
7120         if (IGB_CB(new_skb)->mss && !lroh->th.psh) {
7121                 /* if we are at capacity flush the tail */
7122                 if (skb_queue_len(&lrolist->active) >= IGB_LRO_MAX) {
7123                         lro_skb = skb_peek_tail(&lrolist->active);
7124                         if (lro_skb)
7125                                 igb_lro_flush(q_vector, lro_skb);
7126                 }
7127
7128                 /* update sequence and free space */
7129                 IGB_CB(new_skb)->next_seq += IGB_CB(new_skb)->mss;
7130                 IGB_CB(new_skb)->free = 65521 - new_skb->len;
7131
7132                 /* .. and insert at the front of the active list */
7133                 __skb_queue_head(&lrolist->active, new_skb);
7134
7135                 lrolist->stats.coal++;
7136                 return NULL;
7137         }
7138
7139         /* packet not handled by any of the above, pass it to the stack */
7140 #ifdef HAVE_VLAN_RX_REGISTER
7141         igb_receive_skb(q_vector, new_skb);
7142 #else
7143         napi_gro_receive(&q_vector->napi, new_skb);
7144 #endif
7145         return NULL;
7146 }
7147
7148 #endif /* IGB_NO_LRO */
7149 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
7150 {
7151         struct igb_ring *rx_ring = q_vector->rx.ring;
7152         union e1000_adv_rx_desc *rx_desc;
7153 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7154         const int current_node = numa_node_id();
7155 #endif
7156         unsigned int total_bytes = 0, total_packets = 0;
7157         u16 cleaned_count = igb_desc_unused(rx_ring);
7158         u16 i = rx_ring->next_to_clean;
7159
7160         rx_desc = IGB_RX_DESC(rx_ring, i);
7161
7162         while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
7163                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
7164                 struct sk_buff *skb = buffer_info->skb;
7165                 union e1000_adv_rx_desc *next_rxd;
7166
7167                 buffer_info->skb = NULL;
7168                 prefetch(skb->data);
7169
7170                 i++;
7171                 if (i == rx_ring->count)
7172                         i = 0;
7173
7174                 next_rxd = IGB_RX_DESC(rx_ring, i);
7175                 prefetch(next_rxd);
7176
7177                 /*
7178                  * This memory barrier is needed to keep us from reading
7179                  * any other fields out of the rx_desc until we know the
7180                  * RXD_STAT_DD bit is set
7181                  */
7182                 rmb();
7183
7184 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7185                 __skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length));
7186                 dma_unmap_single(rx_ring->dev, buffer_info->dma,
7187                                  rx_ring->rx_buffer_len,
7188                                  DMA_FROM_DEVICE);
7189                 buffer_info->dma = 0;
7190
7191 #else
7192                 if (!skb_is_nonlinear(skb)) {
7193                         __skb_put(skb, igb_get_hlen(rx_desc));
7194                         dma_unmap_single(rx_ring->dev, buffer_info->dma,
7195                                          IGB_RX_HDR_LEN,
7196                                          DMA_FROM_DEVICE);
7197                         buffer_info->dma = 0;
7198                 }
7199
7200                 if (rx_desc->wb.upper.length) {
7201                         u16 length = le16_to_cpu(rx_desc->wb.upper.length);
7202
7203                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
7204                                            buffer_info->page,
7205                                            buffer_info->page_offset,
7206                                            length);
7207
7208                         skb->len += length;
7209                         skb->data_len += length;
7210                         skb->truesize += length;
7211
7212                         if ((page_count(buffer_info->page) != 1) ||
7213                             (page_to_nid(buffer_info->page) != current_node))
7214                                 buffer_info->page = NULL;
7215                         else
7216                                 get_page(buffer_info->page);
7217
7218                         dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
7219                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
7220                         buffer_info->page_dma = 0;
7221                 }
7222
7223                 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
7224                         struct igb_rx_buffer *next_buffer;
7225                         next_buffer = &rx_ring->rx_buffer_info[i];
7226                         buffer_info->skb = next_buffer->skb;
7227                         buffer_info->dma = next_buffer->dma;
7228                         next_buffer->skb = skb;
7229                         next_buffer->dma = 0;
7230                         goto next_desc;
7231                 }
7232
7233 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7234                 if (igb_test_staterr(rx_desc,
7235                                      E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
7236                         dev_kfree_skb_any(skb);
7237                         goto next_desc;
7238                 }
7239
7240 #ifdef HAVE_HW_TIME_STAMP
7241                 igb_rx_hwtstamp(q_vector, rx_desc, skb);
7242 #endif
7243 #ifdef NETIF_F_RXHASH
7244                 igb_rx_hash(rx_ring, rx_desc, skb);
7245 #endif
7246                 igb_rx_checksum(rx_ring, rx_desc, skb);
7247                 igb_rx_vlan(rx_ring, rx_desc, skb);
7248
7249                 total_bytes += skb->len;
7250                 total_packets++;
7251
7252                 skb->protocol = eth_type_trans(skb, netdev_ring(rx_ring));
7253
7254 #ifndef IGB_NO_LRO
7255                 if (igb_can_lro(rx_ring, rx_desc, skb))
7256                         buffer_info->skb = igb_lro_queue(q_vector, skb);
7257                 else
7258 #endif
7259 #ifdef HAVE_VLAN_RX_REGISTER
7260                         igb_receive_skb(q_vector, skb);
7261 #else
7262                         napi_gro_receive(&q_vector->napi, skb);
7263 #endif
7264
7265 #ifndef NETIF_F_GRO
7266                 netdev_ring(rx_ring)->last_rx = jiffies;
7267
7268 #endif
7269                 budget--;
7270 next_desc:
7271                 cleaned_count++;
7272
7273                 if (!budget)
7274                         break;
7275
7276                 /* return some buffers to hardware, one at a time is too slow */
7277                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
7278                         igb_alloc_rx_buffers(rx_ring, cleaned_count);
7279                         cleaned_count = 0;
7280                 }
7281
7282                 /* use prefetched values */
7283                 rx_desc = next_rxd;
7284         }
7285
7286         rx_ring->next_to_clean = i;
7287         rx_ring->rx_stats.packets += total_packets;
7288         rx_ring->rx_stats.bytes += total_bytes;
7289         q_vector->rx.total_packets += total_packets;
7290         q_vector->rx.total_bytes += total_bytes;
7291
7292         if (cleaned_count)
7293                 igb_alloc_rx_buffers(rx_ring, cleaned_count);
7294
7295 #ifndef IGB_NO_LRO
7296         if (netdev_ring(rx_ring)->features & NETIF_F_LRO)
7297                 igb_lro_flush_all(q_vector);
7298
7299 #endif /* IGB_NO_LRO */
7300         return !!budget;
7301 }
7302
7303 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
7304                                  struct igb_rx_buffer *bi)
7305 {
7306         struct sk_buff *skb = bi->skb;
7307         dma_addr_t dma = bi->dma;
7308
7309         if (dma)
7310                 return true;
7311
7312         if (likely(!skb)) {
7313 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7314                 skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring),
7315                                                 rx_ring->rx_buffer_len);
7316 #else
7317                 skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring),
7318                                                 IGB_RX_HDR_LEN);
7319 #endif
7320                 bi->skb = skb;
7321                 if (!skb) {
7322                         rx_ring->rx_stats.alloc_failed++;
7323                         return false;
7324                 }
7325
7326                 /* initialize skb for ring */
7327                 skb_record_rx_queue(skb, ring_queue_index(rx_ring));
7328         }
7329
7330 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7331         dma = dma_map_single(rx_ring->dev, skb->data,
7332                              rx_ring->rx_buffer_len, DMA_FROM_DEVICE);
7333 #else
7334         dma = dma_map_single(rx_ring->dev, skb->data,
7335                              IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
7336 #endif
7337
7338         if (dma_mapping_error(rx_ring->dev, dma)) {
7339                 rx_ring->rx_stats.alloc_failed++;
7340                 return false;
7341         }
7342
7343         bi->dma = dma;
7344         return true;
7345 }
7346
7347 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7348 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
7349                                   struct igb_rx_buffer *bi)
7350 {
7351         struct page *page = bi->page;
7352         dma_addr_t page_dma = bi->page_dma;
7353         unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
7354
7355         if (page_dma)
7356                 return true;
7357
7358         if (!page) {
7359                 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
7360                 bi->page = page;
7361                 if (unlikely(!page)) {
7362                         rx_ring->rx_stats.alloc_failed++;
7363                         return false;
7364                 }
7365         }
7366
7367         page_dma = dma_map_page(rx_ring->dev, page,
7368                                 page_offset, PAGE_SIZE / 2,
7369                                 DMA_FROM_DEVICE);
7370
7371         if (dma_mapping_error(rx_ring->dev, page_dma)) {
7372                 rx_ring->rx_stats.alloc_failed++;
7373                 return false;
7374         }
7375
7376         bi->page_dma = page_dma;
7377         bi->page_offset = page_offset;
7378         return true;
7379 }
7380
7381 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7382 /**
7383  * igb_alloc_rx_buffers - Replace used receive buffers; packet split
7384  * @adapter: address of board private structure
7385  **/
7386 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
7387 {
7388         union e1000_adv_rx_desc *rx_desc;
7389         struct igb_rx_buffer *bi;
7390         u16 i = rx_ring->next_to_use;
7391
7392         rx_desc = IGB_RX_DESC(rx_ring, i);
7393         bi = &rx_ring->rx_buffer_info[i];
7394         i -= rx_ring->count;
7395
7396         while (cleaned_count--) {
7397                 if (!igb_alloc_mapped_skb(rx_ring, bi))
7398                         break;
7399
7400                 /* Refresh the desc even if buffer_addrs didn't change
7401                  * because each write-back erases this info. */
7402 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7403                 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
7404 #else
7405                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
7406
7407                 if (!igb_alloc_mapped_page(rx_ring, bi))
7408                         break;
7409
7410                 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
7411
7412 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7413                 rx_desc++;
7414                 bi++;
7415                 i++;
7416                 if (unlikely(!i)) {
7417                         rx_desc = IGB_RX_DESC(rx_ring, 0);
7418                         bi = rx_ring->rx_buffer_info;
7419                         i -= rx_ring->count;
7420                 }
7421
7422                 /* clear the hdr_addr for the next_to_use descriptor */
7423                 rx_desc->read.hdr_addr = 0;
7424         }
7425
7426         i += rx_ring->count;
7427
7428         if (rx_ring->next_to_use != i) {
7429                 rx_ring->next_to_use = i;
7430
7431                 /* Force memory writes to complete before letting h/w
7432                  * know there are new descriptors to fetch.  (Only
7433                  * applicable for weak-ordered memory model archs,
7434                  * such as IA-64). */
7435                 wmb();
7436                 writel(i, rx_ring->tail);
7437         }
7438 }
7439
7440 #ifdef SIOCGMIIPHY
7441 /**
7442  * igb_mii_ioctl -
7443  * @netdev:
7444  * @ifreq:
7445  * @cmd:
7446  **/
7447 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7448 {
7449         struct igb_adapter *adapter = netdev_priv(netdev);
7450         struct mii_ioctl_data *data = if_mii(ifr);
7451
7452         if (adapter->hw.phy.media_type != e1000_media_type_copper)
7453                 return -EOPNOTSUPP;
7454
7455         switch (cmd) {
7456         case SIOCGMIIPHY:
7457                 data->phy_id = adapter->hw.phy.addr;
7458                 break;
7459         case SIOCGMIIREG:
7460                 if (!capable(CAP_NET_ADMIN))
7461                         return -EPERM;
7462                 if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
7463                                    &data->val_out))
7464                         return -EIO;
7465                 break;
7466         case SIOCSMIIREG:
7467         default:
7468                 return -EOPNOTSUPP;
7469         }
7470         return E1000_SUCCESS;
7471 }
7472
7473 #endif
7474 #ifdef HAVE_HW_TIME_STAMP
7475 /**
7476  * igb_hwtstamp_ioctl - control hardware time stamping
7477  * @netdev:
7478  * @ifreq:
7479  * @cmd:
7480  *
7481  * Outgoing time stamping can be enabled and disabled. Play nice and
7482  * disable it when requested, although it shouldn't case any overhead
7483  * when no packet needs it. At most one packet in the queue may be
7484  * marked for time stamping, otherwise it would be impossible to tell
7485  * for sure to which packet the hardware time stamp belongs.
7486  *
7487  * Incoming time stamping has to be configured via the hardware
7488  * filters. Not all combinations are supported, in particular event
7489  * type has to be specified. Matching the kind of event packet is
7490  * not supported, with the exception of "all V2 events regardless of
7491  * level 2 or 4".
7492  *
7493  **/
7494 static int igb_hwtstamp_ioctl(struct net_device *netdev,
7495                               struct ifreq *ifr, int cmd)
7496 {
7497         struct igb_adapter *adapter = netdev_priv(netdev);
7498         struct e1000_hw *hw = &adapter->hw;
7499         struct hwtstamp_config config;
7500         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
7501         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
7502         u32 tsync_rx_cfg = 0;
7503         bool is_l4 = false;
7504         bool is_l2 = false;
7505         u32 regval;
7506
7507         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
7508                 return -EFAULT;
7509
7510         /* reserved for future extensions */
7511         if (config.flags)
7512                 return -EINVAL;
7513
7514         switch (config.tx_type) {
7515         case HWTSTAMP_TX_OFF:
7516                 tsync_tx_ctl = 0;
7517         case HWTSTAMP_TX_ON:
7518                 break;
7519         default:
7520                 return -ERANGE;
7521         }
7522
7523         switch (config.rx_filter) {
7524         case HWTSTAMP_FILTER_NONE:
7525                 tsync_rx_ctl = 0;
7526                 break;
7527         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
7528         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
7529         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
7530         case HWTSTAMP_FILTER_ALL:
7531                 /*
7532                  * register TSYNCRXCFG must be set, therefore it is not
7533                  * possible to time stamp both Sync and Delay_Req messages
7534                  * => fall back to time stamping all packets
7535                  */
7536                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
7537                 config.rx_filter = HWTSTAMP_FILTER_ALL;
7538                 break;
7539         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
7540                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
7541                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
7542                 is_l4 = true;
7543                 break;
7544         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
7545                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
7546                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
7547                 is_l4 = true;
7548                 break;
7549         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
7550         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
7551                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
7552                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
7553                 is_l2 = true;
7554                 is_l4 = true;
7555                 config.rx_filter = HWTSTAMP_FILTER_SOME;
7556                 break;
7557         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
7558         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
7559                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
7560                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
7561                 is_l2 = true;
7562                 is_l4 = true;
7563                 config.rx_filter = HWTSTAMP_FILTER_SOME;
7564                 break;
7565         case HWTSTAMP_FILTER_PTP_V2_EVENT:
7566         case HWTSTAMP_FILTER_PTP_V2_SYNC:
7567         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
7568                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
7569                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
7570                 is_l2 = true;
7571                 is_l4 = true;
7572                 break;
7573         default:
7574                 return -ERANGE;
7575         }
7576
7577         if (hw->mac.type == e1000_82575) {
7578                 if (tsync_rx_ctl | tsync_tx_ctl)
7579                         return -EINVAL;
7580                 return 0;
7581         }
7582
7583 #ifdef IGB_PER_PKT_TIMESTAMP
7584         /*
7585          * Per-packet timestamping only works if all packets are
7586          * timestamped, so enable timestamping in all packets as
7587          * long as one rx filter was configured.
7588          */
7589         if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
7590                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
7591                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
7592         }
7593 #endif
7594
7595         /* enable/disable TX */
7596         regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
7597         regval &= ~E1000_TSYNCTXCTL_ENABLED;
7598         regval |= tsync_tx_ctl;
7599         E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
7600
7601         /* enable/disable RX */
7602         regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
7603         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
7604         regval |= tsync_rx_ctl;
7605         E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
7606
7607         /* define which PTP packets are time stamped */
7608         E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
7609
7610         /* define ethertype filter for timestamped packets */
7611         if (is_l2)
7612                 E1000_WRITE_REG(hw, E1000_ETQF(3),
7613                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
7614                                  E1000_ETQF_1588 | /* enable timestamping */
7615                                  ETH_P_1588));     /* 1588 eth protocol type */
7616         else
7617                 E1000_WRITE_REG(hw, E1000_ETQF(3), 0);
7618
7619 #define PTP_PORT 319
7620         /* L4 Queue Filter[3]: filter by destination port and protocol */
7621         if (is_l4) {
7622                 u32 ftqf = (IPPROTO_UDP /* UDP */
7623                         | E1000_FTQF_VF_BP /* VF not compared */
7624                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
7625                         | E1000_FTQF_MASK); /* mask all inputs */
7626                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
7627
7628                 E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_PORT));
7629                 E1000_WRITE_REG(hw, E1000_IMIREXT(3),
7630                                 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
7631                 if (hw->mac.type == e1000_82576) {
7632                         /* enable source port check */
7633                         E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_PORT));
7634                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
7635                 }
7636                 E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf);
7637         } else {
7638                 E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK);
7639         }
7640         E1000_WRITE_FLUSH(hw);
7641
7642         adapter->hwtstamp_config = config;
7643
7644         /* clear TX/RX time stamp registers, just to be sure */
7645         regval = E1000_READ_REG(hw, E1000_TXSTMPH);
7646         regval = E1000_READ_REG(hw, E1000_RXSTMPH);
7647
7648         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
7649                 -EFAULT : 0;
7650 }
7651
7652 #endif
7653 /**
7654  * igb_ioctl -
7655  * @netdev:
7656  * @ifreq:
7657  * @cmd:
7658  **/
7659 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7660 {
7661         switch (cmd) {
7662 #ifdef SIOCGMIIPHY
7663         case SIOCGMIIPHY:
7664         case SIOCGMIIREG:
7665         case SIOCSMIIREG:
7666                 return igb_mii_ioctl(netdev, ifr, cmd);
7667 #endif
7668 #ifdef HAVE_HW_TIME_STAMP
7669         case SIOCSHWTSTAMP:
7670                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
7671 #endif
7672 #ifdef ETHTOOL_OPS_COMPAT
7673         case SIOCETHTOOL:
7674                 return ethtool_ioctl(ifr);
7675 #endif
7676         default:
7677                 return -EOPNOTSUPP;
7678         }
7679 }
7680
7681 s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7682 {
7683         struct igb_adapter *adapter = hw->back;
7684         u16 cap_offset;
7685
7686         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
7687         if (!cap_offset)
7688                 return -E1000_ERR_CONFIG;
7689
7690         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
7691
7692         return E1000_SUCCESS;
7693 }
7694
7695 s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7696 {
7697         struct igb_adapter *adapter = hw->back;
7698         u16 cap_offset;
7699
7700         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
7701         if (!cap_offset)
7702                 return -E1000_ERR_CONFIG;
7703
7704         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
7705
7706         return E1000_SUCCESS;
7707 }
7708
7709 #ifdef HAVE_VLAN_RX_REGISTER
7710 static void igb_vlan_mode(struct net_device *netdev, struct vlan_group *vlgrp)
7711 #else
7712 void igb_vlan_mode(struct net_device *netdev, u32 features)
7713 #endif
7714 {
7715         struct igb_adapter *adapter = netdev_priv(netdev);
7716         struct e1000_hw *hw = &adapter->hw;
7717         u32 ctrl, rctl;
7718         int i;
7719 #ifdef HAVE_VLAN_RX_REGISTER
7720         bool enable = !!vlgrp;
7721
7722         igb_irq_disable(adapter);
7723
7724         adapter->vlgrp = vlgrp;
7725
7726         if (!test_bit(__IGB_DOWN, &adapter->state))
7727                 igb_irq_enable(adapter);
7728 #else
7729         bool enable = !!(features & NETIF_F_HW_VLAN_RX);
7730 #endif
7731
7732         if (enable) {
7733                 /* enable VLAN tag insert/strip */
7734                 ctrl = E1000_READ_REG(hw, E1000_CTRL);
7735                 ctrl |= E1000_CTRL_VME;
7736                 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
7737
7738                 /* Disable CFI check */
7739                 rctl = E1000_READ_REG(hw, E1000_RCTL);
7740                 rctl &= ~E1000_RCTL_CFIEN;
7741                 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
7742         } else {
7743                 /* disable VLAN tag insert/strip */
7744                 ctrl = E1000_READ_REG(hw, E1000_CTRL);
7745                 ctrl &= ~E1000_CTRL_VME;
7746                 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
7747         }
7748
7749 #ifndef CONFIG_IGB_VMDQ_NETDEV
7750         for (i = 0; i < adapter->vmdq_pools; i++) {
7751                 igb_set_vf_vlan_strip(adapter,
7752                                       adapter->vfs_allocated_count + i,
7753                                       enable);
7754         }
7755
7756 #else
7757         igb_set_vf_vlan_strip(adapter,
7758                               adapter->vfs_allocated_count,
7759                               enable);
7760
7761         for (i = 1; i < adapter->vmdq_pools; i++) {
7762 #ifdef HAVE_VLAN_RX_REGISTER
7763                 struct igb_vmdq_adapter *vadapter;
7764                 vadapter = netdev_priv(adapter->vmdq_netdev[i-1]);
7765                 enable = !!vadapter->vlgrp;
7766 #else
7767                 struct net_device *vnetdev;
7768                 vnetdev = adapter->vmdq_netdev[i-1];
7769                 enable = !!(vnetdev->features & NETIF_F_HW_VLAN_RX);
7770 #endif
7771                 igb_set_vf_vlan_strip(adapter, 
7772                                       adapter->vfs_allocated_count + i,
7773                                       enable);
7774         }
7775
7776 #endif
7777         igb_rlpml_set(adapter);
7778 }
7779
7780 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7781 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
7782 #else
7783 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
7784 #endif
7785 {
7786         struct igb_adapter *adapter = netdev_priv(netdev);
7787         int pf_id = adapter->vfs_allocated_count;
7788
7789         /* attempt to add filter to vlvf array */
7790         igb_vlvf_set(adapter, vid, TRUE, pf_id);
7791
7792         /* add the filter since PF can receive vlans w/o entry in vlvf */
7793         igb_vfta_set(adapter, vid, TRUE);
7794 #ifndef HAVE_NETDEV_VLAN_FEATURES
7795
7796         /* Copy feature flags from netdev to the vlan netdev for this vid.
7797          * This allows things like TSO to bubble down to our vlan device.
7798          * There is no need to update netdev for vlan 0 (DCB), since it
7799          * wouldn't has v_netdev.
7800          */
7801         if (adapter->vlgrp) {
7802                 struct vlan_group *vlgrp = adapter->vlgrp;
7803                 struct net_device *v_netdev = vlan_group_get_device(vlgrp, vid);
7804                 if (v_netdev) {
7805                         v_netdev->features |= netdev->features;
7806                         vlan_group_set_device(vlgrp, vid, v_netdev);
7807                 }
7808         }
7809 #endif
7810 #ifndef HAVE_VLAN_RX_REGISTER
7811
7812         set_bit(vid, adapter->active_vlans);
7813 #endif
7814 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7815         return 0;
7816 #endif
7817 }
7818
7819 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7820 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
7821 #else
7822 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
7823 #endif
7824 {
7825         struct igb_adapter *adapter = netdev_priv(netdev);
7826         int pf_id = adapter->vfs_allocated_count;
7827         s32 err;
7828
7829 #ifdef HAVE_VLAN_RX_REGISTER
7830         igb_irq_disable(adapter);
7831
7832         vlan_group_set_device(adapter->vlgrp, vid, NULL);
7833
7834         if (!test_bit(__IGB_DOWN, &adapter->state))
7835                 igb_irq_enable(adapter);
7836
7837 #endif /* HAVE_VLAN_RX_REGISTER */
7838         /* remove vlan from VLVF table array */
7839         err = igb_vlvf_set(adapter, vid, FALSE, pf_id);
7840
7841         /* if vid was not present in VLVF just remove it from table */
7842         if (err)
7843                 igb_vfta_set(adapter, vid, FALSE);
7844 #ifndef HAVE_VLAN_RX_REGISTER
7845
7846         clear_bit(vid, adapter->active_vlans);
7847 #endif
7848 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7849         return 0;
7850 #endif
7851 }
7852
7853 static void igb_restore_vlan(struct igb_adapter *adapter)
7854 {
7855 #ifdef HAVE_VLAN_RX_REGISTER
7856         igb_vlan_mode(adapter->netdev, adapter->vlgrp);
7857
7858         if (adapter->vlgrp) {
7859                 u16 vid;
7860                 for (vid = 0; vid < VLAN_N_VID; vid++) {
7861                         if (!vlan_group_get_device(adapter->vlgrp, vid))
7862                                 continue;
7863                         igb_vlan_rx_add_vid(adapter->netdev, vid);
7864                 }
7865         }
7866 #else
7867         u16 vid;
7868
7869         igb_vlan_mode(adapter->netdev, adapter->netdev->features);
7870
7871         for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
7872                 igb_vlan_rx_add_vid(adapter->netdev, vid);
7873 #endif
7874 }
7875
7876 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
7877 {
7878         struct pci_dev *pdev = adapter->pdev;
7879         struct e1000_mac_info *mac = &adapter->hw.mac;
7880
7881         mac->autoneg = 0;
7882
7883         /* Fiber NIC's only allow 1000 gbps Full duplex */
7884         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes ) &&
7885                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
7886                 dev_err(pci_dev_to_dev(pdev),
7887                         "Unsupported Speed/Duplex configuration\n");
7888                 return -EINVAL;
7889         }
7890
7891         switch (spddplx) {
7892         case SPEED_10 + DUPLEX_HALF:
7893                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
7894                 break;
7895         case SPEED_10 + DUPLEX_FULL:
7896                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
7897                 break;
7898         case SPEED_100 + DUPLEX_HALF:
7899                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
7900                 break;
7901         case SPEED_100 + DUPLEX_FULL:
7902                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
7903                 break;
7904         case SPEED_1000 + DUPLEX_FULL:
7905                 mac->autoneg = 1;
7906                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
7907                 break;
7908         case SPEED_1000 + DUPLEX_HALF: /* not supported */
7909         default:
7910                 dev_err(pci_dev_to_dev(pdev), "Unsupported Speed/Duplex configuration\n");
7911                 return -EINVAL;
7912         }
7913         return 0;
7914 }
7915
7916 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
7917                           bool runtime)
7918 {
7919         struct net_device *netdev = pci_get_drvdata(pdev);
7920         struct igb_adapter *adapter = netdev_priv(netdev);
7921         struct e1000_hw *hw = &adapter->hw;
7922         u32 ctrl, rctl, status;
7923         u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
7924 #ifdef CONFIG_PM
7925         int retval = 0;
7926 #endif
7927
7928         netif_device_detach(netdev);
7929
7930         if (netif_running(netdev))
7931                 __igb_close(netdev, true);
7932
7933         igb_clear_interrupt_scheme(adapter);
7934
7935 #ifdef CONFIG_PM
7936         retval = pci_save_state(pdev);
7937         if (retval)
7938                 return retval;
7939 #endif
7940
7941         status = E1000_READ_REG(hw, E1000_STATUS);
7942         if (status & E1000_STATUS_LU)
7943                 wufc &= ~E1000_WUFC_LNKC;
7944
7945         if (wufc) {
7946                 igb_setup_rctl(adapter);
7947                 igb_set_rx_mode(netdev);
7948
7949                 /* turn on all-multi mode if wake on multicast is enabled */
7950                 if (wufc & E1000_WUFC_MC) {
7951                         rctl = E1000_READ_REG(hw, E1000_RCTL);
7952                         rctl |= E1000_RCTL_MPE;
7953                         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
7954                 }
7955
7956                 ctrl = E1000_READ_REG(hw, E1000_CTRL);
7957                 /* phy power management enable */
7958                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
7959                 ctrl |= E1000_CTRL_ADVD3WUC;
7960                 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
7961
7962                 /* Allow time for pending master requests to run */
7963                 e1000_disable_pcie_master(hw);
7964
7965                 E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PME_EN);
7966                 E1000_WRITE_REG(hw, E1000_WUFC, wufc);
7967         } else {
7968                 E1000_WRITE_REG(hw, E1000_WUC, 0);
7969                 E1000_WRITE_REG(hw, E1000_WUFC, 0);
7970         }
7971
7972         *enable_wake = wufc || adapter->en_mng_pt;
7973         if (!*enable_wake)
7974                 igb_power_down_link(adapter);
7975         else
7976                 igb_power_up_link(adapter);
7977
7978         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
7979          * would have already happened in close and is redundant. */
7980         igb_release_hw_control(adapter);
7981
7982         pci_disable_device(pdev);
7983
7984         return 0;
7985 }
7986
7987 #ifdef CONFIG_PM
7988 #ifdef HAVE_SYSTEM_SLEEP_PM_OPS
7989 static int igb_suspend(struct device *dev)
7990 {
7991         int retval;
7992         bool wake;
7993         struct pci_dev *pdev = to_pci_dev(dev);
7994
7995         retval = __igb_shutdown(pdev, &wake, 0);
7996         if (retval)
7997                 return retval;
7998
7999         if (wake) {
8000                 pci_prepare_to_sleep(pdev);
8001         } else {
8002                 pci_wake_from_d3(pdev, false);
8003                 pci_set_power_state(pdev, PCI_D3hot);
8004         }
8005
8006         return 0;
8007 }
8008
8009 static int igb_resume(struct device *dev)
8010 {
8011         struct pci_dev *pdev = to_pci_dev(dev);
8012         struct net_device *netdev = pci_get_drvdata(pdev);
8013         struct igb_adapter *adapter = netdev_priv(netdev);
8014         struct e1000_hw *hw = &adapter->hw;
8015         u32 err;
8016
8017         pci_set_power_state(pdev, PCI_D0);
8018         pci_restore_state(pdev);
8019         pci_save_state(pdev);
8020
8021         err = pci_enable_device_mem(pdev);
8022         if (err) {
8023                 dev_err(pci_dev_to_dev(pdev),
8024                         "igb: Cannot enable PCI device from suspend\n");
8025                 return err;
8026         }
8027         pci_set_master(pdev);
8028
8029         pci_enable_wake(pdev, PCI_D3hot, 0);
8030         pci_enable_wake(pdev, PCI_D3cold, 0);
8031
8032 #ifdef CONFIG_PM_RUNTIME
8033         if (!rtnl_is_locked()) {
8034                 /*
8035                  * shut up ASSERT_RTNL() warning in
8036                  * netif_set_real_num_tx/rx_queues.
8037                  */
8038                 rtnl_lock();
8039                 err = igb_init_interrupt_scheme(adapter);
8040                 rtnl_unlock();
8041         } else {
8042                 err = igb_init_interrupt_scheme(adapter);
8043         }
8044         if (err) {
8045 #else
8046         if (igb_init_interrupt_scheme(adapter)) {
8047 #endif /* CONFIG_PM_RUNTIME */
8048                 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
8049                 return -ENOMEM;
8050         }
8051
8052         igb_reset(adapter);
8053
8054         /* let the f/w know that the h/w is now under the control of the
8055          * driver. */
8056         igb_get_hw_control(adapter);
8057
8058         E1000_WRITE_REG(hw, E1000_WUS, ~0);
8059
8060         if (netdev->flags & IFF_UP) {
8061                 err = __igb_open(netdev, true);
8062                 if (err)
8063                         return err;
8064         }
8065
8066         netif_device_attach(netdev);
8067
8068         return 0;
8069 }
8070
8071 #ifdef CONFIG_PM_RUNTIME
8072 static int igb_runtime_idle(struct device *dev)
8073 {
8074         struct pci_dev *pdev = to_pci_dev(dev);
8075         struct net_device *netdev = pci_get_drvdata(pdev);
8076         struct igb_adapter *adapter = netdev_priv(netdev);
8077
8078         if (!igb_has_link(adapter))
8079                 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
8080
8081         return -EBUSY;
8082 }
8083
8084 static int igb_runtime_suspend(struct device *dev)
8085 {
8086         struct pci_dev *pdev = to_pci_dev(dev);
8087         int retval;
8088         bool wake;
8089
8090         retval = __igb_shutdown(pdev, &wake, 1);
8091         if (retval)
8092                 return retval;
8093
8094         if (wake) {
8095                 pci_prepare_to_sleep(pdev);
8096         } else {
8097                 pci_wake_from_d3(pdev, false);
8098                 pci_set_power_state(pdev, PCI_D3hot);
8099         }
8100
8101         return 0;
8102 }
8103
8104 static int igb_runtime_resume(struct device *dev)
8105 {
8106         return igb_resume(dev);
8107 }
8108 #endif /* CONFIG_PM_RUNTIME */
8109 #endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
8110 #endif /* CONFIG_PM */
8111
8112 #ifdef USE_REBOOT_NOTIFIER
8113 /* only want to do this for 2.4 kernels? */
8114 static int igb_notify_reboot(struct notifier_block *nb, unsigned long event,
8115                              void *p)
8116 {
8117         struct pci_dev *pdev = NULL;
8118         bool wake;
8119
8120         switch (event) {
8121         case SYS_DOWN:
8122         case SYS_HALT:
8123         case SYS_POWER_OFF:
8124                 while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
8125                         if (pci_dev_driver(pdev) == &igb_driver) {
8126                                 __igb_shutdown(pdev, &wake, 0);
8127                                 if (event == SYS_POWER_OFF) {
8128                                         pci_wake_from_d3(pdev, wake);
8129                                         pci_set_power_state(pdev, PCI_D3hot);
8130                                 }
8131                         }
8132                 }
8133         }
8134         return NOTIFY_DONE;
8135 }
8136 #else
8137 static void igb_shutdown(struct pci_dev *pdev)
8138 {
8139         bool wake = false;
8140
8141         __igb_shutdown(pdev, &wake, 0);
8142
8143         if (system_state == SYSTEM_POWER_OFF) {
8144                 pci_wake_from_d3(pdev, wake);
8145                 pci_set_power_state(pdev, PCI_D3hot);
8146         }
8147 }
8148 #endif /* USE_REBOOT_NOTIFIER */
8149
8150 #ifdef CONFIG_NET_POLL_CONTROLLER
8151 /*
8152  * Polling 'interrupt' - used by things like netconsole to send skbs
8153  * without having to re-enable interrupts. It's not called while
8154  * the interrupt routine is executing.
8155  */
8156 static void igb_netpoll(struct net_device *netdev)
8157 {
8158         struct igb_adapter *adapter = netdev_priv(netdev);
8159         struct e1000_hw *hw = &adapter->hw;
8160         struct igb_q_vector *q_vector;
8161         int i;
8162
8163         for (i = 0; i < adapter->num_q_vectors; i++) {
8164                 q_vector = adapter->q_vector[i];
8165                 if (adapter->msix_entries)
8166                         E1000_WRITE_REG(hw, E1000_EIMC, q_vector->eims_value);
8167                 else
8168                         igb_irq_disable(adapter);
8169                 napi_schedule(&q_vector->napi);
8170         }
8171 }
8172 #endif /* CONFIG_NET_POLL_CONTROLLER */
8173
8174 #ifdef HAVE_PCI_ERS
8175 #define E1000_DEV_ID_82576_VF 0x10CA
8176 /**
8177  * igb_io_error_detected - called when PCI error is detected
8178  * @pdev: Pointer to PCI device
8179  * @state: The current pci connection state
8180  *
8181  * This function is called after a PCI bus error affecting
8182  * this device has been detected.
8183  */
8184 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
8185                                               pci_channel_state_t state)
8186 {
8187         struct net_device *netdev = pci_get_drvdata(pdev);
8188         struct igb_adapter *adapter = netdev_priv(netdev);
8189
8190 #ifdef CONFIG_PCI_IOV__UNUSED
8191         struct pci_dev *bdev, *vfdev;
8192         u32 dw0, dw1, dw2, dw3;
8193         int vf, pos;
8194         u16 req_id, pf_func;
8195
8196         if (!(adapter->flags & IGB_FLAG_DETECT_BAD_DMA))
8197                 goto skip_bad_vf_detection;
8198
8199         bdev = pdev->bus->self;
8200         while (bdev && (bdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT))
8201                 bdev = bdev->bus->self;
8202
8203         if (!bdev)
8204                 goto skip_bad_vf_detection;
8205
8206         pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
8207         if (!pos)
8208                 goto skip_bad_vf_detection;
8209
8210         pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0);
8211         pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1);
8212         pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2);
8213         pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3);
8214
8215         req_id = dw1 >> 16;
8216         /* On the 82576 if bit 7 of the requestor ID is set then it's a VF */
8217         if (!(req_id & 0x0080))
8218                 goto skip_bad_vf_detection;
8219
8220         pf_func = req_id & 0x01;
8221         if ((pf_func & 1) == (pdev->devfn & 1)) {
8222
8223                 vf = (req_id & 0x7F) >> 1;
8224                 dev_err(pci_dev_to_dev(pdev),
8225                         "VF %d has caused a PCIe error\n", vf);
8226                 dev_err(pci_dev_to_dev(pdev),
8227                         "TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
8228                         "%8.8x\tdw3: %8.8x\n",
8229                         dw0, dw1, dw2, dw3);
8230
8231                 /* Find the pci device of the offending VF */
8232                 vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
8233                                        E1000_DEV_ID_82576_VF, NULL);
8234                 while (vfdev) {
8235                         if (vfdev->devfn == (req_id & 0xFF))
8236                                 break;
8237                         vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
8238                                                E1000_DEV_ID_82576_VF, vfdev);
8239                 }
8240                 /*
8241                  * There's a slim chance the VF could have been hot plugged,
8242                  * so if it is no longer present we don't need to issue the
8243                  * VFLR.  Just clean up the AER in that case.
8244                  */
8245                 if (vfdev) {
8246                         dev_err(pci_dev_to_dev(pdev),
8247                                 "Issuing VFLR to VF %d\n", vf);
8248                         pci_write_config_dword(vfdev, 0xA8, 0x00008000);
8249                 }
8250
8251                 pci_cleanup_aer_uncorrect_error_status(pdev);
8252         }
8253
8254         /*
8255          * Even though the error may have occurred on the other port
8256          * we still need to increment the vf error reference count for
8257          * both ports because the I/O resume function will be called
8258          * for both of them.
8259          */
8260         adapter->vferr_refcount++;
8261
8262         return PCI_ERS_RESULT_RECOVERED;
8263
8264 skip_bad_vf_detection:
8265 #endif /* CONFIG_PCI_IOV */
8266
8267         netif_device_detach(netdev);
8268
8269         if (state == pci_channel_io_perm_failure)
8270                 return PCI_ERS_RESULT_DISCONNECT;
8271
8272         if (netif_running(netdev))
8273                 igb_down(adapter);
8274         pci_disable_device(pdev);
8275
8276         /* Request a slot slot reset. */
8277         return PCI_ERS_RESULT_NEED_RESET;
8278 }
8279
8280 /**
8281  * igb_io_slot_reset - called after the pci bus has been reset.
8282  * @pdev: Pointer to PCI device
8283  *
8284  * Restart the card from scratch, as if from a cold-boot. Implementation
8285  * resembles the first-half of the igb_resume routine.
8286  */
8287 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
8288 {
8289         struct net_device *netdev = pci_get_drvdata(pdev);
8290         struct igb_adapter *adapter = netdev_priv(netdev);
8291         struct e1000_hw *hw = &adapter->hw;
8292         pci_ers_result_t result;
8293
8294         if (pci_enable_device_mem(pdev)) {
8295                 dev_err(pci_dev_to_dev(pdev),
8296                         "Cannot re-enable PCI device after reset.\n");
8297                 result = PCI_ERS_RESULT_DISCONNECT;
8298         } else {
8299                 pci_set_master(pdev);
8300                 pci_restore_state(pdev);
8301                 pci_save_state(pdev);
8302
8303                 pci_enable_wake(pdev, PCI_D3hot, 0);
8304                 pci_enable_wake(pdev, PCI_D3cold, 0);
8305
8306                 schedule_work(&adapter->reset_task);
8307                 E1000_WRITE_REG(hw, E1000_WUS, ~0);
8308                 result = PCI_ERS_RESULT_RECOVERED;
8309         }
8310
8311         pci_cleanup_aer_uncorrect_error_status(pdev);
8312
8313         return result;
8314 }
8315
8316 /**
8317  * igb_io_resume - called when traffic can start flowing again.
8318  * @pdev: Pointer to PCI device
8319  *
8320  * This callback is called when the error recovery driver tells us that
8321  * its OK to resume normal operation. Implementation resembles the
8322  * second-half of the igb_resume routine.
8323  */
8324 static void igb_io_resume(struct pci_dev *pdev)
8325 {
8326         struct net_device *netdev = pci_get_drvdata(pdev);
8327         struct igb_adapter *adapter = netdev_priv(netdev);
8328
8329         if (adapter->vferr_refcount) {
8330                 dev_info(pci_dev_to_dev(pdev), "Resuming after VF err\n");
8331                 adapter->vferr_refcount--;
8332                 return;
8333         }
8334
8335         if (netif_running(netdev)) {
8336                 if (igb_up(adapter)) {
8337                         dev_err(pci_dev_to_dev(pdev), "igb_up failed after reset\n");
8338                         return;
8339                 }
8340         }
8341
8342         netif_device_attach(netdev);
8343
8344         /* let the f/w know that the h/w is now under the control of the
8345          * driver. */
8346         igb_get_hw_control(adapter);
8347 }
8348
8349 #endif /* HAVE_PCI_ERS */
8350
8351 int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue)
8352 {
8353         struct e1000_hw *hw = &adapter->hw;
8354         int i;
8355
8356         if (is_zero_ether_addr(addr))
8357                 return 0;
8358
8359         for (i = 0; i < hw->mac.rar_entry_count; i++) {
8360                 if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)
8361                         continue;
8362                 adapter->mac_table[i].state = (IGB_MAC_STATE_MODIFIED |
8363                                                    IGB_MAC_STATE_IN_USE);
8364                 memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
8365                 adapter->mac_table[i].queue = queue;
8366                 igb_sync_mac_table(adapter);
8367                 return 0;
8368         }
8369         return -ENOMEM;
8370 }
8371 int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue)
8372 {
8373         /* search table for addr, if found, set to 0 and sync */
8374         int i;
8375         struct e1000_hw *hw = &adapter->hw;
8376
8377         if (is_zero_ether_addr(addr))
8378                 return 0;
8379         for (i = 0; i < hw->mac.rar_entry_count; i++) {
8380                 if (!compare_ether_addr(addr, adapter->mac_table[i].addr) &&
8381                     adapter->mac_table[i].queue == queue) {
8382                         adapter->mac_table[i].state = IGB_MAC_STATE_MODIFIED;
8383                         memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
8384                         adapter->mac_table[i].queue = 0;
8385                         igb_sync_mac_table(adapter);
8386                         return 0;
8387                 }
8388         }
8389         return -ENOMEM;
8390 }
8391 static int igb_set_vf_mac(struct igb_adapter *adapter,
8392                           int vf, unsigned char *mac_addr)
8393 {
8394         igb_del_mac_filter(adapter, adapter->vf_data[vf].vf_mac_addresses, vf);
8395         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
8396
8397         igb_add_mac_filter(adapter, mac_addr, vf);
8398
8399         return 0;
8400 }
8401
8402 #ifdef IFLA_VF_MAX
8403 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
8404 {
8405         struct igb_adapter *adapter = netdev_priv(netdev);
8406         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
8407                 return -EINVAL;
8408         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
8409         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
8410         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
8411                                       " change effective.\n");
8412         if (test_bit(__IGB_DOWN, &adapter->state)) {
8413                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
8414                          " but the PF device is not up.\n");
8415                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
8416                          " attempting to use the VF device.\n");
8417         }
8418         return igb_set_vf_mac(adapter, vf, mac);
8419 }
8420
8421 static int igb_link_mbps(int internal_link_speed)
8422 {
8423         switch (internal_link_speed) {
8424         case SPEED_100:
8425                 return 100;
8426         case SPEED_1000:
8427                 return 1000;
8428         default:
8429                 return 0;
8430         }
8431 }
8432
8433 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
8434                         int link_speed)
8435 {
8436         int rf_dec, rf_int;
8437         u32 bcnrc_val;
8438
8439         if (tx_rate != 0) {
8440                 /* Calculate the rate factor values to set */
8441                 rf_int = link_speed / tx_rate;
8442                 rf_dec = (link_speed - (rf_int * tx_rate));
8443                 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
8444
8445                 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
8446                 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
8447                                 E1000_RTTBCNRC_RF_INT_MASK);
8448                 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
8449         } else {
8450                 bcnrc_val = 0;
8451         }
8452
8453         E1000_WRITE_REG(hw, E1000_RTTDQSEL, vf); /* vf X uses queue X */
8454         /*
8455          * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
8456          * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
8457          */
8458         E1000_WRITE_REG(hw, E1000_RTTBCNRM(0), 0x14);
8459         E1000_WRITE_REG(hw, E1000_RTTBCNRC, bcnrc_val);
8460 }
8461
8462 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
8463 {
8464         int actual_link_speed, i;
8465         bool reset_rate = false;
8466
8467         /* VF TX rate limit was not set */
8468         if ((adapter->vf_rate_link_speed == 0) || 
8469                 (adapter->hw.mac.type != e1000_82576))
8470                 return;
8471
8472         actual_link_speed = igb_link_mbps(adapter->link_speed);
8473         if (actual_link_speed != adapter->vf_rate_link_speed) {
8474                 reset_rate = true;
8475                 adapter->vf_rate_link_speed = 0;
8476                 dev_info(&adapter->pdev->dev,
8477                 "Link speed has been changed. VF Transmit rate is disabled\n");
8478         }
8479
8480         for (i = 0; i < adapter->vfs_allocated_count; i++) {
8481                 if (reset_rate)
8482                         adapter->vf_data[i].tx_rate = 0;
8483
8484                 igb_set_vf_rate_limit(&adapter->hw, i,
8485                         adapter->vf_data[i].tx_rate, actual_link_speed);
8486         }
8487 }
8488
8489 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
8490 {
8491         struct igb_adapter *adapter = netdev_priv(netdev);
8492         struct e1000_hw *hw = &adapter->hw;
8493         int actual_link_speed;
8494         
8495         if (hw->mac.type != e1000_82576)
8496                 return -EOPNOTSUPP;
8497
8498         actual_link_speed = igb_link_mbps(adapter->link_speed);
8499         if ((vf >= adapter->vfs_allocated_count) ||
8500                 (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) ||
8501                 (tx_rate < 0) || (tx_rate > actual_link_speed))
8502                 return -EINVAL;
8503
8504         adapter->vf_rate_link_speed = actual_link_speed;
8505         adapter->vf_data[vf].tx_rate = (u16)tx_rate;
8506         igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
8507
8508         return 0;
8509 }
8510
8511 static int igb_ndo_get_vf_config(struct net_device *netdev,
8512                                  int vf, struct ifla_vf_info *ivi)
8513 {
8514         struct igb_adapter *adapter = netdev_priv(netdev);
8515         if (vf >= adapter->vfs_allocated_count)
8516                 return -EINVAL;
8517         ivi->vf = vf;
8518         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
8519         ivi->tx_rate = adapter->vf_data[vf].tx_rate;
8520         ivi->vlan = adapter->vf_data[vf].pf_vlan;
8521         ivi->qos = adapter->vf_data[vf].pf_qos;
8522         return 0;
8523 }
8524 #endif
8525 static void igb_vmm_control(struct igb_adapter *adapter)
8526 {
8527         struct e1000_hw *hw = &adapter->hw;
8528         u32 reg;
8529
8530         switch (hw->mac.type) {
8531         case e1000_82575:
8532         default:
8533                 /* replication is not supported for 82575 */
8534                 return;
8535         case e1000_82576:
8536                 /* notify HW that the MAC is adding vlan tags */
8537                 reg = E1000_READ_REG(hw, E1000_DTXCTL);
8538                 reg |= (E1000_DTXCTL_VLAN_ADDED |
8539                         E1000_DTXCTL_SPOOF_INT);
8540                 E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
8541         case e1000_82580:
8542                 /* enable replication vlan tag stripping */
8543                 reg = E1000_READ_REG(hw, E1000_RPLOLR);
8544                 reg |= E1000_RPLOLR_STRVLAN;
8545                 E1000_WRITE_REG(hw, E1000_RPLOLR, reg);
8546         case e1000_i350:
8547                 /* none of the above registers are supported by i350 */
8548                 break;
8549         }
8550
8551         /* Enable Malicious Driver Detection */
8552         if ((hw->mac.type == e1000_i350) && (adapter->vfs_allocated_count) &&
8553             (adapter->mdd))
8554                 igb_enable_mdd(adapter);
8555
8556         /* enable replication and loopback support */
8557         e1000_vmdq_set_loopback_pf(hw, adapter->vfs_allocated_count ||
8558                                    adapter->vmdq_pools);
8559
8560         e1000_vmdq_set_anti_spoofing_pf(hw, adapter->vfs_allocated_count ||
8561                                         adapter->vmdq_pools,
8562                                         adapter->vfs_allocated_count);
8563         e1000_vmdq_set_replication_pf(hw, adapter->vfs_allocated_count ||
8564                                       adapter->vmdq_pools);
8565 }
8566
8567 static void igb_init_fw(struct igb_adapter *adapter) 
8568 {
8569         struct e1000_fw_drv_info fw_cmd;
8570         struct e1000_hw *hw = &adapter->hw;
8571         int i;
8572         u16 mask;
8573
8574         mask = E1000_SWFW_PHY0_SM;
8575
8576         if (!hw->mac.ops.acquire_swfw_sync(hw, mask)) {
8577                 for (i = 0; i <= FW_MAX_RETRIES; i++) {
8578                         E1000_WRITE_REG(hw, E1000_FWSTS, E1000_FWSTS_FWRI);
8579                         fw_cmd.hdr.cmd = FW_CMD_DRV_INFO;
8580                         fw_cmd.hdr.buf_len = FW_CMD_DRV_INFO_LEN;
8581                         fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CMD_RESERVED;
8582                         fw_cmd.port_num = hw->bus.func;
8583                         fw_cmd.drv_version = FW_FAMILY_DRV_VER;
8584                         fw_cmd.hdr.checksum = 0;
8585                         fw_cmd.hdr.checksum = e1000_calculate_checksum((u8 *)&fw_cmd,
8586                                                                    (FW_HDR_LEN +
8587                                                                     fw_cmd.hdr.buf_len));
8588                          e1000_host_interface_command(hw, (u8*)&fw_cmd,
8589                                                      sizeof(fw_cmd));
8590                         if (fw_cmd.hdr.cmd_or_resp.ret_status == FW_STATUS_SUCCESS)
8591                                 break;
8592                 }
8593         } else
8594                 dev_warn(pci_dev_to_dev(adapter->pdev),
8595                          "Unable to get semaphore, firmware init failed.\n");
8596         hw->mac.ops.release_swfw_sync(hw, mask);
8597 }
8598
8599 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
8600 {
8601         struct e1000_hw *hw = &adapter->hw;
8602         u32 dmac_thr;
8603         u16 hwm;
8604
8605         if (hw->mac.type > e1000_82580) {
8606                 if (adapter->dmac != IGB_DMAC_DISABLE) {
8607                         u32 reg;
8608
8609                         /* force threshold to 0.  */
8610                         E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
8611
8612                         /*
8613                          * DMA Coalescing high water mark needs to be greater
8614                          * than the Rx threshold. Set hwm to PBA - max frame
8615                          * size in 16B units, capping it at PBA - 6KB.
8616                          */
8617                         hwm = 64 * pba - adapter->max_frame_size / 16;
8618                         if (hwm < 64 * (pba - 6))
8619                                 hwm = 64 * (pba - 6);
8620                         reg = E1000_READ_REG(hw, E1000_FCRTC);
8621                         reg &= ~E1000_FCRTC_RTH_COAL_MASK;
8622                         reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
8623                                 & E1000_FCRTC_RTH_COAL_MASK);
8624                         E1000_WRITE_REG(hw, E1000_FCRTC, reg);
8625
8626                         /* 
8627                          * Set the DMA Coalescing Rx threshold to PBA - 2 * max
8628                          * frame size, capping it at PBA - 10KB.
8629                          */
8630                         dmac_thr = pba - adapter->max_frame_size / 512;
8631                         if (dmac_thr < pba - 10)
8632                                 dmac_thr = pba - 10;
8633                         reg = E1000_READ_REG(hw, E1000_DMACR);
8634                         reg &= ~E1000_DMACR_DMACTHR_MASK;
8635                         reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
8636                                 & E1000_DMACR_DMACTHR_MASK);
8637
8638                         /* transition to L0x or L1 if available..*/
8639                         reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
8640
8641                         /* watchdog timer= msec values in 32usec intervals */
8642                         reg |= ((adapter->dmac) >> 5);
8643                         E1000_WRITE_REG(hw, E1000_DMACR, reg);
8644
8645                         /* no lower threshold to disable coalescing(smart fifb)-UTRESH=0*/
8646                         E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
8647
8648                         /*
8649                          * This sets the time to wait before requesting transition to
8650                          * low power state to number of usecs needed to receive 1 512
8651                          * byte frame at gigabit line rate
8652                          */
8653                         reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
8654
8655                         E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
8656
8657                         /* free space in tx packet buffer to wake from DMA coal */
8658                         E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
8659                                 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
8660
8661                         /* make low power state decision controlled by DMA coal */
8662                         reg = E1000_READ_REG(hw, E1000_PCIEMISC);
8663                         reg &= ~E1000_PCIEMISC_LX_DECISION;
8664                         E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
8665                 } /* endif adapter->dmac is not disabled */
8666         } else if (hw->mac.type == e1000_82580) {
8667                 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
8668                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
8669                                 reg & ~E1000_PCIEMISC_LX_DECISION);
8670                 E1000_WRITE_REG(hw, E1000_DMACR, 0);
8671         }
8672 }
8673
8674 /* igb_main.c */
8675
8676
8677 /**
8678  * igb_probe - Device Initialization Routine
8679  * @pdev: PCI device information struct
8680  * @ent: entry in igb_pci_tbl
8681  *
8682  * Returns 0 on success, negative on failure
8683  *
8684  * igb_probe initializes an adapter identified by a pci_dev structure.
8685  * The OS initialization, configuring of the adapter private structure,
8686  * and a hardware reset occur.
8687  **/
8688 int igb_kni_probe(struct pci_dev *pdev,
8689                                struct net_device **lad_dev)
8690 {
8691         struct net_device *netdev;
8692         struct igb_adapter *adapter;
8693         struct e1000_hw *hw;
8694         u16 eeprom_data = 0;
8695         u8 pba_str[E1000_PBANUM_LENGTH];
8696         s32 ret_val;
8697         static int global_quad_port_a; /* global quad port a indication */
8698         int i, err, pci_using_dac = 0;
8699         static int cards_found;
8700
8701         err = pci_enable_device_mem(pdev);
8702         if (err)
8703                 return err;
8704
8705 #ifdef NO_KNI
8706         pci_using_dac = 0;
8707         err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
8708         if (!err) {
8709                 err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
8710                 if (!err)
8711                         pci_using_dac = 1;
8712         } else {
8713                 err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
8714                 if (err) {
8715                         err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
8716                         if (err) {
8717                                 IGB_ERR("No usable DMA configuration, "
8718                                         "aborting\n");
8719                                 goto err_dma;
8720                         }
8721                 }
8722         }
8723
8724 #ifndef HAVE_ASPM_QUIRKS
8725         /* 82575 requires that the pci-e link partner disable the L0s state */
8726         switch (pdev->device) {
8727         case E1000_DEV_ID_82575EB_COPPER:
8728         case E1000_DEV_ID_82575EB_FIBER_SERDES:
8729         case E1000_DEV_ID_82575GB_QUAD_COPPER:
8730                 pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
8731         default:
8732                 break;
8733         }
8734
8735 #endif /* HAVE_ASPM_QUIRKS */
8736         err = pci_request_selected_regions(pdev,
8737                                            pci_select_bars(pdev,
8738                                                            IORESOURCE_MEM),
8739                                            igb_driver_name);
8740         if (err)
8741                 goto err_pci_reg;
8742
8743         pci_enable_pcie_error_reporting(pdev);
8744
8745         pci_set_master(pdev);
8746 #endif /* NO_KNI */
8747         err = -ENOMEM;
8748 #ifdef HAVE_TX_MQ
8749         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
8750                                    IGB_MAX_TX_QUEUES);
8751 #else
8752         netdev = alloc_etherdev(sizeof(struct igb_adapter));
8753 #endif /* HAVE_TX_MQ */
8754         if (!netdev)
8755                 goto err_alloc_etherdev;
8756
8757
8758         SET_MODULE_OWNER(netdev);
8759         SET_NETDEV_DEV(netdev, &pdev->dev);
8760
8761         //pci_set_drvdata(pdev, netdev);
8762
8763         adapter = netdev_priv(netdev);
8764         adapter->netdev = netdev;
8765         adapter->pdev = pdev;
8766         hw = &adapter->hw;
8767         hw->back = adapter;
8768         adapter->port_num = hw->bus.func;
8769         adapter->msg_enable = (1 << debug) - 1;
8770
8771 #ifdef HAVE_PCI_ERS
8772         err = pci_save_state(pdev);
8773         if (err)
8774                 goto err_ioremap;
8775 #endif
8776         err = -EIO;
8777         hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
8778                               pci_resource_len(pdev, 0));
8779         if (!hw->hw_addr)
8780                 goto err_ioremap;
8781
8782 #ifdef HAVE_NET_DEVICE_OPS
8783         netdev->netdev_ops = &igb_netdev_ops;
8784 #else /* HAVE_NET_DEVICE_OPS */
8785         netdev->open = &igb_open;
8786         netdev->stop = &igb_close;
8787         netdev->get_stats = &igb_get_stats;
8788 #ifdef HAVE_SET_RX_MODE
8789         netdev->set_rx_mode = &igb_set_rx_mode;
8790 #endif
8791         netdev->set_multicast_list = &igb_set_rx_mode;
8792         netdev->set_mac_address = &igb_set_mac;
8793         netdev->change_mtu = &igb_change_mtu;
8794         netdev->do_ioctl = &igb_ioctl;
8795 #ifdef HAVE_TX_TIMEOUT
8796         netdev->tx_timeout = &igb_tx_timeout;
8797 #endif
8798         netdev->vlan_rx_register = igb_vlan_mode;
8799         netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
8800         netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
8801 #ifdef CONFIG_NET_POLL_CONTROLLER
8802         netdev->poll_controller = igb_netpoll;
8803 #endif
8804         netdev->hard_start_xmit = &igb_xmit_frame;
8805 #endif /* HAVE_NET_DEVICE_OPS */
8806         igb_set_ethtool_ops(netdev);
8807 #ifdef HAVE_TX_TIMEOUT
8808         netdev->watchdog_timeo = 5 * HZ;
8809 #endif
8810
8811         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
8812
8813         adapter->bd_number = cards_found;
8814
8815         /* setup the private structure */
8816         err = igb_sw_init(adapter);
8817         if (err)
8818                 goto err_sw_init;
8819
8820         e1000_get_bus_info(hw);
8821
8822         hw->phy.autoneg_wait_to_complete = FALSE;
8823         hw->mac.adaptive_ifs = FALSE;
8824
8825         /* Copper options */
8826         if (hw->phy.media_type == e1000_media_type_copper) {
8827 #ifdef ETH_TP_MDI_X
8828                 hw->phy.mdix = ETH_TP_MDI_INVALID;
8829 #else
8830                 hw->phy.mdix = AUTO_ALL_MODES;
8831 #endif /* ETH_TP_MDI_X */
8832                 hw->phy.disable_polarity_correction = FALSE;
8833                 hw->phy.ms_type = e1000_ms_hw_default;
8834         }
8835
8836         if (e1000_check_reset_block(hw))
8837                 dev_info(pci_dev_to_dev(pdev),
8838                         "PHY reset is blocked due to SOL/IDER session.\n");
8839
8840         /*
8841          * features is initialized to 0 in allocation, it might have bits
8842          * set by igb_sw_init so we should use an or instead of an
8843          * assignment.
8844          */
8845         netdev->features |= NETIF_F_SG |
8846                             NETIF_F_IP_CSUM |
8847 #ifdef NETIF_F_IPV6_CSUM
8848                             NETIF_F_IPV6_CSUM |
8849 #endif
8850 #ifdef NETIF_F_TSO
8851                             NETIF_F_TSO |
8852 #ifdef NETIF_F_TSO6
8853                             NETIF_F_TSO6 |
8854 #endif
8855 #endif /* NETIF_F_TSO */
8856 #ifdef NETIF_F_RXHASH
8857                             NETIF_F_RXHASH |
8858 #endif
8859 #ifdef HAVE_NDO_SET_FEATURES
8860                             NETIF_F_RXCSUM |
8861 #endif
8862                             NETIF_F_HW_VLAN_RX |
8863                             NETIF_F_HW_VLAN_TX;
8864
8865 #ifdef HAVE_NDO_SET_FEATURES
8866         /* copy netdev features into list of user selectable features */
8867         netdev->hw_features |= netdev->features;
8868 #ifndef IGB_NO_LRO
8869
8870         /* give us the option of enabling LRO later */
8871         netdev->hw_features |= NETIF_F_LRO;
8872 #endif
8873 #else
8874 #ifdef NETIF_F_GRO
8875
8876         /* this is only needed on kernels prior to 2.6.39 */
8877         netdev->features |= NETIF_F_GRO;
8878 #endif
8879 #endif
8880
8881         /* set this bit last since it cannot be part of hw_features */
8882         netdev->features |= NETIF_F_HW_VLAN_FILTER;
8883
8884 #ifdef HAVE_NETDEV_VLAN_FEATURES
8885         netdev->vlan_features |= NETIF_F_TSO |
8886                                  NETIF_F_TSO6 |
8887                                  NETIF_F_IP_CSUM |
8888                                  NETIF_F_IPV6_CSUM |
8889                                  NETIF_F_SG;
8890
8891 #endif
8892         if (pci_using_dac)
8893                 netdev->features |= NETIF_F_HIGHDMA;
8894
8895         if (hw->mac.type >= e1000_82576)
8896                 netdev->features |= NETIF_F_SCTP_CSUM;
8897
8898 #ifdef NO_KNI
8899         adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
8900
8901         /* before reading the NVM, reset the controller to put the device in a
8902          * known good starting state */
8903         e1000_reset_hw(hw);
8904 #endif
8905
8906         /* make sure the NVM is good */
8907         if (e1000_validate_nvm_checksum(hw) < 0) {
8908                 dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
8909                         " Valid\n");
8910                 err = -EIO;
8911                 goto err_eeprom;
8912         }
8913
8914         /* copy the MAC address out of the NVM */
8915         if (e1000_read_mac_addr(hw))
8916                 dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
8917         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
8918 #ifdef ETHTOOL_GPERMADDR
8919         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
8920
8921         if (!is_valid_ether_addr(netdev->perm_addr)) {
8922 #else
8923         if (!is_valid_ether_addr(netdev->dev_addr)) {
8924 #endif
8925                 dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
8926                 err = -EIO;
8927                 goto err_eeprom;
8928         }
8929
8930         memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
8931         adapter->mac_table[0].queue = adapter->vfs_allocated_count;
8932         adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
8933         igb_rar_set(adapter, 0);
8934
8935         /* get firmware version for ethtool -i */
8936         e1000_read_nvm(&adapter->hw, 5, 1, &adapter->fw_version);
8937 #ifdef NO_KNI
8938         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
8939                     (unsigned long) adapter);
8940         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
8941                 setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
8942                             (unsigned long) adapter);
8943         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
8944                     (unsigned long) adapter);
8945
8946         INIT_WORK(&adapter->reset_task, igb_reset_task);
8947         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
8948         if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
8949                 INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
8950 #endif
8951
8952         /* Initialize link properties that are user-changeable */
8953         adapter->fc_autoneg = true;
8954         hw->mac.autoneg = true;
8955         hw->phy.autoneg_advertised = 0x2f;
8956
8957         hw->fc.requested_mode = e1000_fc_default;
8958         hw->fc.current_mode = e1000_fc_default;
8959
8960         e1000_validate_mdi_setting(hw);
8961
8962         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
8963          * enable the ACPI Magic Packet filter
8964          */
8965
8966         if (hw->bus.func == 0)
8967                 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
8968         else if (hw->mac.type >= e1000_82580)
8969                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
8970                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
8971                                  &eeprom_data);
8972         else if (hw->bus.func == 1)
8973                 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
8974
8975         if (eeprom_data & IGB_EEPROM_APME)
8976                 adapter->eeprom_wol |= E1000_WUFC_MAG;
8977
8978         /* now that we have the eeprom settings, apply the special cases where
8979          * the eeprom may be wrong or the board simply won't support wake on
8980          * lan on a particular port */
8981         switch (pdev->device) {
8982         case E1000_DEV_ID_82575GB_QUAD_COPPER:
8983                 adapter->eeprom_wol = 0;
8984                 break;
8985         case E1000_DEV_ID_82575EB_FIBER_SERDES:
8986         case E1000_DEV_ID_82576_FIBER:
8987         case E1000_DEV_ID_82576_SERDES:
8988                 /* Wake events only supported on port A for dual fiber
8989                  * regardless of eeprom setting */
8990                 if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
8991                         adapter->eeprom_wol = 0;
8992                 break;
8993         case E1000_DEV_ID_82576_QUAD_COPPER:
8994         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
8995                 /* if quad port adapter, disable WoL on all but port A */
8996                 if (global_quad_port_a != 0)
8997                         adapter->eeprom_wol = 0;
8998                 else
8999                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
9000                 /* Reset for multiple quad port adapters */
9001                 if (++global_quad_port_a == 4)
9002                         global_quad_port_a = 0;
9003                 break;
9004         }
9005
9006         /* initialize the wol settings based on the eeprom settings */
9007         adapter->wol = adapter->eeprom_wol;
9008 #ifdef NO_KNI
9009         device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), adapter->wol);
9010
9011         /* reset the hardware with the new settings */
9012         igb_reset(adapter);
9013
9014         /* let the f/w know that the h/w is now under the control of the
9015          * driver. */
9016         igb_get_hw_control(adapter);
9017
9018         strncpy(netdev->name, "eth%d", IFNAMSIZ);
9019         err = register_netdev(netdev);
9020         if (err)
9021                 goto err_register;
9022
9023 #ifdef CONFIG_IGB_VMDQ_NETDEV
9024         err = igb_init_vmdq_netdevs(adapter);
9025         if (err)
9026                 goto err_register;
9027 #endif
9028         /* carrier off reporting is important to ethtool even BEFORE open */
9029         netif_carrier_off(netdev);
9030
9031 #ifdef IGB_DCA
9032         if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
9033                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
9034                 dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
9035                 igb_setup_dca(adapter);
9036         }
9037
9038 #endif
9039 #ifdef HAVE_HW_TIME_STAMP
9040         /* do hw tstamp init after resetting */
9041         igb_init_hw_timer(adapter);
9042
9043 #endif
9044
9045 #endif /* NO_KNI */
9046         dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
9047         /* print bus type/speed/width info */
9048         dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
9049                  netdev->name,
9050                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
9051                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
9052                                                             "unknown"),
9053                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4\n" :
9054                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2\n" :
9055                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1\n" :
9056                    "unknown"));
9057         dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
9058         for (i = 0; i < 6; i++)
9059                 printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
9060
9061         ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
9062         if (ret_val)
9063                 strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
9064         dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
9065                  pba_str);
9066
9067         /* Initialize the thermal sensor on i350 devices. */
9068         if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
9069                 u16 ets_word;
9070
9071                 /*
9072                  * Read the NVM to determine if this i350 device supports an
9073                  * external thermal sensor.
9074                  */
9075                 e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
9076                 if (ets_word != 0x0000 && ets_word != 0xFFFF)
9077                         adapter->ets = true;
9078                 else
9079                         adapter->ets = false;
9080 #ifdef NO_KNI
9081 #ifdef IGB_SYSFS
9082                 igb_sysfs_init(adapter);
9083 #else
9084 #ifdef IGB_PROCFS
9085                 igb_procfs_init(adapter);
9086 #endif /* IGB_PROCFS */
9087 #endif /* IGB_SYSFS */
9088 #endif /* NO_KNI */
9089         } else {
9090                 adapter->ets = false;
9091         }
9092
9093         switch (hw->mac.type) {
9094         case e1000_i350:
9095                 /* Enable EEE for internal copper PHY devices */
9096                 if (hw->phy.media_type == e1000_media_type_copper)
9097                         e1000_set_eee_i350(hw);
9098
9099                 /* send driver version info to firmware */
9100                 igb_init_fw(adapter);
9101                 break;
9102         default:
9103                 break;
9104         }
9105 #ifndef IGB_NO_LRO
9106         if (netdev->features & NETIF_F_LRO)
9107                 dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
9108         else
9109                 dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
9110 #endif
9111         dev_info(pci_dev_to_dev(pdev),
9112                  "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
9113                  adapter->msix_entries ? "MSI-X" :
9114                  (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
9115                  adapter->num_rx_queues, adapter->num_tx_queues);
9116
9117         cards_found++;
9118         *lad_dev = netdev;
9119
9120         pm_runtime_put_noidle(&pdev->dev);
9121         return 0;
9122
9123 //err_register:
9124         //igb_release_hw_control(adapter);
9125 err_eeprom:
9126         //if (!e1000_check_reset_block(hw))
9127         //      e1000_phy_hw_reset(hw);
9128
9129         if (hw->flash_address)
9130                 iounmap(hw->flash_address);
9131 err_sw_init:
9132         //igb_clear_interrupt_scheme(adapter);
9133         //igb_reset_sriov_capability(adapter);
9134         iounmap(hw->hw_addr);
9135 err_ioremap:
9136         free_netdev(netdev);
9137 err_alloc_etherdev:
9138         //pci_release_selected_regions(pdev,
9139         //                             pci_select_bars(pdev, IORESOURCE_MEM));
9140 //err_pci_reg:
9141 //err_dma:
9142         pci_disable_device(pdev);
9143         return err;
9144 }
9145
9146
9147 void igb_kni_remove(struct pci_dev *pdev)
9148 {
9149         struct net_device *netdev = pci_get_drvdata(pdev);
9150         struct igb_adapter *adapter = netdev_priv(netdev);
9151         struct e1000_hw *hw = &adapter->hw;
9152
9153         iounmap(hw->hw_addr);
9154
9155         if (hw->flash_address)
9156                 iounmap(hw->flash_address);
9157
9158         pci_disable_device(pdev);
9159 }
9160