1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2012 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/tcp.h>
36 #include <net/checksum.h>
38 #include <linux/ipv6.h>
39 #include <net/ip6_checksum.h>
43 #include <linux/mii.h>
46 #include <linux/ethtool.h>
48 #include <linux/if_vlan.h>
49 #ifdef CONFIG_PM_RUNTIME
50 #include <linux/pm_runtime.h>
51 #endif /* CONFIG_PM_RUNTIME */
56 #include <linux/uio_driver.h>
60 #define VERSION_SUFFIX
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." __stringify(BUILD) VERSION_SUFFIX DRV_DEBUG DRV_HW_PERF
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
73 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER) },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER) },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES) },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII) },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER) },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER) },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER) },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES) },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII) },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL) },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII) },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES) },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP) },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576) },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS) },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES) },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER) },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES) },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD) },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER) },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER) },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) },
99 /* required last entry */
103 //MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
104 static void igb_set_sriov_capability(struct igb_adapter *adapter) __attribute__((__unused__));
105 void igb_reset(struct igb_adapter *);
106 static int igb_setup_all_tx_resources(struct igb_adapter *);
107 static int igb_setup_all_rx_resources(struct igb_adapter *);
108 static void igb_free_all_tx_resources(struct igb_adapter *);
109 static void igb_free_all_rx_resources(struct igb_adapter *);
110 static void igb_setup_mrqc(struct igb_adapter *);
111 void igb_update_stats(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 #ifdef HAVE_HW_TIME_STAMP
115 static void igb_init_hw_timer(struct igb_adapter *adapter);
117 static int igb_sw_init(struct igb_adapter *);
118 static int igb_open(struct net_device *);
119 static int igb_close(struct net_device *);
120 static void igb_configure_tx(struct igb_adapter *);
121 static void igb_configure_rx(struct igb_adapter *);
122 static void igb_clean_all_tx_rings(struct igb_adapter *);
123 static void igb_clean_all_rx_rings(struct igb_adapter *);
124 static void igb_clean_tx_ring(struct igb_ring *);
125 static void igb_set_rx_mode(struct net_device *);
126 static void igb_update_phy_info(unsigned long);
127 static void igb_watchdog(unsigned long);
128 static void igb_watchdog_task(struct work_struct *);
129 static void igb_dma_err_task(struct work_struct *);
130 static void igb_dma_err_timer(unsigned long data);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct net_device_stats *igb_get_stats(struct net_device *);
133 static int igb_change_mtu(struct net_device *, int);
134 void igb_full_sync_mac_table(struct igb_adapter *adapter);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 #ifdef HAVE_VLAN_RX_REGISTER
152 static void igb_vlan_mode(struct net_device *, struct vlan_group *);
154 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
155 static int igb_vlan_rx_add_vid(struct net_device *, u16);
156 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158 static void igb_vlan_rx_add_vid(struct net_device *, u16);
159 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
161 static void igb_restore_vlan(struct igb_adapter *);
162 void igb_rar_set(struct igb_adapter *adapter, u32 index);
163 static void igb_ping_all_vfs(struct igb_adapter *);
164 static void igb_msg_task(struct igb_adapter *);
165 static void igb_vmm_control(struct igb_adapter *);
166 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
167 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
168 static void igb_process_mdd_event(struct igb_adapter *);
170 static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac);
171 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
172 int vf, u16 vlan, u8 qos);
173 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
174 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
175 struct ifla_vf_info *ivi);
176 static void igb_check_vf_rate_limit(struct igb_adapter *);
178 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
179 static int igb_check_vf_assignment(struct igb_adapter *adapter);
180 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
181 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
184 #ifdef HAVE_SYSTEM_SLEEP_PM_OPS
185 static int igb_suspend(struct device *dev);
186 static int igb_resume(struct device *dev);
187 #ifdef CONFIG_PM_RUNTIME
188 static int igb_runtime_suspend(struct device *dev);
189 static int igb_runtime_resume(struct device *dev);
190 static int igb_runtime_idle(struct device *dev);
191 #endif /* CONFIG_PM_RUNTIME */
192 static const struct dev_pm_ops igb_pm_ops = {
193 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34)
194 .suspend = igb_suspend,
195 .resume = igb_resume,
196 .freeze = igb_suspend,
198 .poweroff = igb_suspend,
199 .restore = igb_resume,
200 #ifdef CONFIG_PM_RUNTIME
201 .runtime_suspend = igb_runtime_suspend,
202 .runtime_resume = igb_runtime_resume,
203 .runtime_idle = igb_runtime_idle,
205 #else /* Linux >= 2.6.34 */
206 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
207 #ifdef CONFIG_PM_RUNTIME
208 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
210 #endif /* CONFIG_PM_RUNTIME */
211 #endif /* Linux version */
213 #endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
214 #endif /* CONFIG_PM */
215 #ifndef USE_REBOOT_NOTIFIER
216 static void igb_shutdown(struct pci_dev *);
218 static int igb_notify_reboot(struct notifier_block *, unsigned long, void *);
219 static struct notifier_block igb_notifier_reboot = {
220 .notifier_call = igb_notify_reboot,
226 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
227 static struct notifier_block dca_notifier = {
228 .notifier_call = igb_notify_dca,
233 #ifdef CONFIG_NET_POLL_CONTROLLER
234 /* for netdump / net console */
235 static void igb_netpoll(struct net_device *);
239 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
240 pci_channel_state_t);
241 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
242 static void igb_io_resume(struct pci_dev *);
244 static struct pci_error_handlers igb_err_handler = {
245 .error_detected = igb_io_error_detected,
246 .slot_reset = igb_io_slot_reset,
247 .resume = igb_io_resume,
251 static void igb_init_fw(struct igb_adapter *adapter);
252 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
254 static struct pci_driver igb_driver = {
255 .name = igb_driver_name,
256 .id_table = igb_pci_tbl,
258 .remove = __devexit_p(igb_remove),
260 #ifdef HAVE_SYSTEM_SLEEP_PM_OPS
261 .driver.pm = &igb_pm_ops,
262 #endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
263 #endif /* CONFIG_PM */
264 #ifndef USE_REBOOT_NOTIFIER
265 .shutdown = igb_shutdown,
268 .err_handler = &igb_err_handler
272 //MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
273 //MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
274 //MODULE_LICENSE("GPL");
275 //MODULE_VERSION(DRV_VERSION);
277 static void igb_vfta_set(struct igb_adapter *adapter, u32 vid, bool add)
279 struct e1000_hw *hw = &adapter->hw;
280 struct e1000_host_mng_dhcp_cookie *mng_cookie = &hw->mng_cookie;
281 u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK;
282 u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
286 * if this is the management vlan the only option is to add it in so
287 * that the management pass through will continue to work
289 if ((mng_cookie->status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
290 (vid == mng_cookie->vlan_id))
293 vfta = adapter->shadow_vfta[index];
300 e1000_write_vfta(hw, index, vfta);
301 adapter->shadow_vfta[index] = vfta;
304 #ifdef HAVE_HW_TIME_STAMP
306 * igb_read_clock - read raw cycle counter (to be used by time counter)
308 static cycle_t igb_read_clock(const struct cyclecounter *tc)
310 struct igb_adapter *adapter =
311 container_of(tc, struct igb_adapter, cycles);
312 struct e1000_hw *hw = &adapter->hw;
317 * The timestamp latches on lowest register read. For the 82580
318 * the lowest register is SYSTIMR instead of SYSTIML. However we never
319 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
321 if (hw->mac.type >= e1000_82580) {
322 stamp = E1000_READ_REG(hw, E1000_SYSTIMR) >> 8;
323 shift = IGB_82580_TSYNC_SHIFT;
326 stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIML) << shift;
327 stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << (shift + 32);
331 #endif /* SIOCSHWTSTAMP */
332 static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE;
333 //module_param(debug, int, 0);
334 //MODULE_PARM_DESC(debug, "Debug level (0=none, ..., 16=all)");
337 * igb_init_module - Driver Registration Routine
339 * igb_init_module is the first routine called when the driver is
340 * loaded. All it does is register with the PCI subsystem.
342 static int __init igb_init_module(void)
346 printk(KERN_INFO "%s - version %s\n",
347 igb_driver_string, igb_driver_version);
349 printk(KERN_INFO "%s\n", igb_copyright);
351 /* only use IGB_PROCFS if IGB_SYSFS is not defined */
354 if (igb_procfs_topdir_init())
355 printk(KERN_INFO "Procfs failed to initialize topdir\n");
356 #endif /* IGB_PROCFS */
357 #endif /* IGB_SYSFS */
360 dca_register_notify(&dca_notifier);
362 ret = pci_register_driver(&igb_driver);
363 #ifdef USE_REBOOT_NOTIFIER
365 register_reboot_notifier(&igb_notifier_reboot);
372 #define module_init(x) static int x(void) __attribute__((__unused__));
373 module_init(igb_init_module);
376 * igb_exit_module - Driver Exit Cleanup Routine
378 * igb_exit_module is called just before the driver is removed
381 static void __exit igb_exit_module(void)
384 dca_unregister_notify(&dca_notifier);
386 #ifdef USE_REBOOT_NOTIFIER
387 unregister_reboot_notifier(&igb_notifier_reboot);
389 pci_unregister_driver(&igb_driver);
392 /* only compile IGB_PROCFS if IGB_SYSFS is not defined */
395 igb_procfs_topdir_exit();
396 #endif /* IGB_PROCFS */
397 #endif /* IGB_SYSFS */
401 #define module_exit(x) static void x(void) __attribute__((__unused__));
402 module_exit(igb_exit_module);
404 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
406 * igb_cache_ring_register - Descriptor ring to register mapping
407 * @adapter: board private structure to initialize
409 * Once we know the feature-set enabled for the device, we'll cache
410 * the register offset the descriptor ring is assigned to.
412 static void igb_cache_ring_register(struct igb_adapter *adapter)
415 u32 rbase_offset = adapter->vfs_allocated_count;
417 switch (adapter->hw.mac.type) {
419 /* The queues are allocated for virtualization such that VF 0
420 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
421 * In order to avoid collision we start at the first free queue
422 * and continue consuming queues in the same sequence
424 if ((adapter->rss_queues > 1) && adapter->vmdq_pools) {
425 for (; i < adapter->rss_queues; i++)
426 adapter->rx_ring[i]->reg_idx = rbase_offset +
433 for (; i < adapter->num_rx_queues; i++)
434 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
435 for (; j < adapter->num_tx_queues; j++)
436 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
441 static void igb_free_queues(struct igb_adapter *adapter)
445 for (i = 0; i < adapter->num_tx_queues; i++) {
446 kfree(adapter->tx_ring[i]);
447 adapter->tx_ring[i] = NULL;
449 for (i = 0; i < adapter->num_rx_queues; i++) {
450 kfree(adapter->rx_ring[i]);
451 adapter->rx_ring[i] = NULL;
453 adapter->num_rx_queues = 0;
454 adapter->num_tx_queues = 0;
458 * igb_alloc_queues - Allocate memory for all rings
459 * @adapter: board private structure to initialize
461 * We allocate one ring per queue at run-time since we don't know the
462 * number of queues at compile-time.
464 static int igb_alloc_queues(struct igb_adapter *adapter)
466 struct igb_ring *ring;
468 #ifdef HAVE_DEVICE_NUMA_NODE
469 int orig_node = adapter->node;
470 #endif /* HAVE_DEVICE_NUMA_NODE */
472 for (i = 0; i < adapter->num_tx_queues; i++) {
473 #ifdef HAVE_DEVICE_NUMA_NODE
474 if (orig_node == -1) {
475 int cur_node = next_online_node(adapter->node);
476 if (cur_node == MAX_NUMNODES)
477 cur_node = first_online_node;
478 adapter->node = cur_node;
480 #endif /* HAVE_DEVICE_NUMA_NODE */
481 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
484 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
487 ring->count = adapter->tx_ring_count;
488 ring->queue_index = i;
489 ring->dev = pci_dev_to_dev(adapter->pdev);
490 ring->netdev = adapter->netdev;
491 ring->numa_node = adapter->node;
492 /* For 82575, context index must be unique per ring. */
493 if (adapter->hw.mac.type == e1000_82575)
494 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
495 adapter->tx_ring[i] = ring;
497 #ifdef HAVE_DEVICE_NUMA_NODE
498 /* Restore the adapter's original node */
499 adapter->node = orig_node;
500 #endif /* HAVE_DEVICE_NUMA_NODE */
502 for (i = 0; i < adapter->num_rx_queues; i++) {
503 #ifdef HAVE_DEVICE_NUMA_NODE
504 if (orig_node == -1) {
505 int cur_node = next_online_node(adapter->node);
506 if (cur_node == MAX_NUMNODES)
507 cur_node = first_online_node;
508 adapter->node = cur_node;
510 #endif /* HAVE_DEVICE_NUMA_NODE */
511 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
514 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
517 ring->count = adapter->rx_ring_count;
518 ring->queue_index = i;
519 ring->dev = pci_dev_to_dev(adapter->pdev);
520 ring->netdev = adapter->netdev;
521 ring->numa_node = adapter->node;
522 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
523 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
525 #ifndef HAVE_NDO_SET_FEATURES
526 /* enable rx checksum */
527 set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags);
530 /* set flag indicating ring supports SCTP checksum offload */
531 if (adapter->hw.mac.type >= e1000_82576)
532 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
534 /* On i350, loopback VLAN packets have the tag byte-swapped. */
535 if (adapter->hw.mac.type == e1000_i350)
536 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
538 adapter->rx_ring[i] = ring;
540 #ifdef HAVE_DEVICE_NUMA_NODE
541 /* Restore the adapter's original node */
542 adapter->node = orig_node;
543 #endif /* HAVE_DEVICE_NUMA_NODE */
545 igb_cache_ring_register(adapter);
547 return E1000_SUCCESS;
550 #ifdef HAVE_DEVICE_NUMA_NODE
551 /* Restore the adapter's original node */
552 adapter->node = orig_node;
553 #endif /* HAVE_DEVICE_NUMA_NODE */
554 igb_free_queues(adapter);
559 static void igb_configure_lli(struct igb_adapter *adapter)
561 struct e1000_hw *hw = &adapter->hw;
564 /* LLI should only be enabled for MSI-X or MSI interrupts */
565 if (!adapter->msix_entries && !(adapter->flags & IGB_FLAG_HAS_MSI))
568 if (adapter->lli_port) {
569 /* use filter 0 for port */
570 port = htons((u16)adapter->lli_port);
571 E1000_WRITE_REG(hw, E1000_IMIR(0),
572 (port | E1000_IMIR_PORT_IM_EN));
573 E1000_WRITE_REG(hw, E1000_IMIREXT(0),
574 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
577 if (adapter->flags & IGB_FLAG_LLI_PUSH) {
578 /* use filter 1 for push flag */
579 E1000_WRITE_REG(hw, E1000_IMIR(1),
580 (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
581 E1000_WRITE_REG(hw, E1000_IMIREXT(1),
582 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_PSH));
585 if (adapter->lli_size) {
586 /* use filter 2 for size */
587 E1000_WRITE_REG(hw, E1000_IMIR(2),
588 (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN));
589 E1000_WRITE_REG(hw, E1000_IMIREXT(2),
590 (adapter->lli_size | E1000_IMIREXT_CTRL_BP));
596 * igb_write_ivar - configure ivar for given MSI-X vector
597 * @hw: pointer to the HW structure
598 * @msix_vector: vector number we are allocating to a given ring
599 * @index: row index of IVAR register to write within IVAR table
600 * @offset: column offset of in IVAR, should be multiple of 8
602 * This function is intended to handle the writing of the IVAR register
603 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
604 * each containing an cause allocation for an Rx and Tx ring, and a
605 * variable number of rows depending on the number of queues supported.
607 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
608 int index, int offset)
610 u32 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
612 /* clear any bits that are currently set */
613 ivar &= ~((u32)0xFF << offset);
615 /* write vector and valid bit */
616 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
618 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
621 #define IGB_N0_QUEUE -1
622 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
624 struct igb_adapter *adapter = q_vector->adapter;
625 struct e1000_hw *hw = &adapter->hw;
626 int rx_queue = IGB_N0_QUEUE;
627 int tx_queue = IGB_N0_QUEUE;
630 if (q_vector->rx.ring)
631 rx_queue = q_vector->rx.ring->reg_idx;
632 if (q_vector->tx.ring)
633 tx_queue = q_vector->tx.ring->reg_idx;
635 switch (hw->mac.type) {
637 /* The 82575 assigns vectors using a bitmask, which matches the
638 bitmask for the EICR/EIMS/EIMC registers. To assign one
639 or more queues to a vector, we write the appropriate bits
640 into the MSIXBM register for that vector. */
641 if (rx_queue > IGB_N0_QUEUE)
642 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
643 if (tx_queue > IGB_N0_QUEUE)
644 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
645 if (!adapter->msix_entries && msix_vector == 0)
646 msixbm |= E1000_EIMS_OTHER;
647 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), msix_vector, msixbm);
648 q_vector->eims_value = msixbm;
652 * 82576 uses a table that essentially consists of 2 columns
653 * with 8 rows. The ordering is column-major so we use the
654 * lower 3 bits as the row index, and the 4th bit as the
657 if (rx_queue > IGB_N0_QUEUE)
658 igb_write_ivar(hw, msix_vector,
660 (rx_queue & 0x8) << 1);
661 if (tx_queue > IGB_N0_QUEUE)
662 igb_write_ivar(hw, msix_vector,
664 ((tx_queue & 0x8) << 1) + 8);
665 q_vector->eims_value = 1 << msix_vector;
670 * On 82580 and newer adapters the scheme is similar to 82576
671 * however instead of ordering column-major we have things
672 * ordered row-major. So we traverse the table by using
673 * bit 0 as the column offset, and the remaining bits as the
676 if (rx_queue > IGB_N0_QUEUE)
677 igb_write_ivar(hw, msix_vector,
679 (rx_queue & 0x1) << 4);
680 if (tx_queue > IGB_N0_QUEUE)
681 igb_write_ivar(hw, msix_vector,
683 ((tx_queue & 0x1) << 4) + 8);
684 q_vector->eims_value = 1 << msix_vector;
691 /* add q_vector eims value to global eims_enable_mask */
692 adapter->eims_enable_mask |= q_vector->eims_value;
694 /* configure q_vector to set itr on first interrupt */
695 q_vector->set_itr = 1;
699 * igb_configure_msix - Configure MSI-X hardware
701 * igb_configure_msix sets up the hardware to properly
702 * generate MSI-X interrupts.
704 static void igb_configure_msix(struct igb_adapter *adapter)
708 struct e1000_hw *hw = &adapter->hw;
710 adapter->eims_enable_mask = 0;
712 /* set vector for other causes, i.e. link changes */
713 switch (hw->mac.type) {
715 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
716 /* enable MSI-X PBA support*/
717 tmp |= E1000_CTRL_EXT_PBA_CLR;
719 /* Auto-Mask interrupts upon ICR read. */
720 tmp |= E1000_CTRL_EXT_EIAME;
721 tmp |= E1000_CTRL_EXT_IRCA;
723 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
725 /* enable msix_other interrupt */
726 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), vector++,
728 adapter->eims_other = E1000_EIMS_OTHER;
735 /* Turn on MSI-X capability first, or our settings
736 * won't stick. And it will take days to debug. */
737 E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
738 E1000_GPIE_PBA | E1000_GPIE_EIAME |
741 /* enable msix_other interrupt */
742 adapter->eims_other = 1 << vector;
743 tmp = (vector++ | E1000_IVAR_VALID) << 8;
745 E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp);
748 /* do nothing, since nothing else supports MSI-X */
750 } /* switch (hw->mac.type) */
752 adapter->eims_enable_mask |= adapter->eims_other;
754 for (i = 0; i < adapter->num_q_vectors; i++)
755 igb_assign_vector(adapter->q_vector[i], vector++);
757 E1000_WRITE_FLUSH(hw);
761 * igb_request_msix - Initialize MSI-X interrupts
763 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
766 static int igb_request_msix(struct igb_adapter *adapter)
768 struct net_device *netdev = adapter->netdev;
769 struct e1000_hw *hw = &adapter->hw;
770 int i, err = 0, vector = 0;
772 err = request_irq(adapter->msix_entries[vector].vector,
773 &igb_msix_other, 0, netdev->name, adapter);
778 for (i = 0; i < adapter->num_q_vectors; i++) {
779 struct igb_q_vector *q_vector = adapter->q_vector[i];
781 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
783 if (q_vector->rx.ring && q_vector->tx.ring)
784 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
785 q_vector->rx.ring->queue_index);
786 else if (q_vector->tx.ring)
787 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
788 q_vector->tx.ring->queue_index);
789 else if (q_vector->rx.ring)
790 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
791 q_vector->rx.ring->queue_index);
793 sprintf(q_vector->name, "%s-unused", netdev->name);
795 err = request_irq(adapter->msix_entries[vector].vector,
796 igb_msix_ring, 0, q_vector->name,
803 igb_configure_msix(adapter);
809 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
811 if (adapter->msix_entries) {
812 pci_disable_msix(adapter->pdev);
813 kfree(adapter->msix_entries);
814 adapter->msix_entries = NULL;
815 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
816 pci_disable_msi(adapter->pdev);
821 * igb_free_q_vectors - Free memory allocated for interrupt vectors
822 * @adapter: board private structure to initialize
824 * This function frees the memory allocated to the q_vectors. In addition if
825 * NAPI is enabled it will delete any references to the NAPI struct prior
826 * to freeing the q_vector.
828 static void igb_free_q_vectors(struct igb_adapter *adapter)
832 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
833 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
834 adapter->q_vector[v_idx] = NULL;
837 netif_napi_del(&q_vector->napi);
839 if (q_vector->lrolist) {
840 __skb_queue_purge(&q_vector->lrolist->active);
841 vfree(q_vector->lrolist);
842 q_vector->lrolist = NULL;
847 adapter->num_q_vectors = 0;
851 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
853 * This function resets the device so that it has 0 rx queues, tx queues, and
854 * MSI-X interrupts allocated.
856 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
858 igb_free_queues(adapter);
859 igb_free_q_vectors(adapter);
860 igb_reset_interrupt_capability(adapter);
864 * igb_process_mdd_event
865 * @adapter - board private structure
867 * Identify a malicious VF, disable the VF TX/RX queues and log a message.
869 static void igb_process_mdd_event(struct igb_adapter *adapter)
871 struct e1000_hw *hw = &adapter->hw;
872 u32 lvmmc, vfte, vfre, mdfb;
875 lvmmc = E1000_READ_REG(hw, E1000_LVMMC);
876 vf_queue = lvmmc >> 29;
878 /* VF index cannot be bigger or equal to VFs allocated */
879 if (vf_queue >= adapter->vfs_allocated_count)
882 netdev_info(adapter->netdev,
883 "VF %d misbehaved. VF queues are disabled. "
884 "VM misbehavior code is 0x%x\n", vf_queue, lvmmc);
886 /* Disable VFTE and VFRE related bits */
887 vfte = E1000_READ_REG(hw, E1000_VFTE);
888 vfte &= ~(1 << vf_queue);
889 E1000_WRITE_REG(hw, E1000_VFTE, vfte);
891 vfre = E1000_READ_REG(hw, E1000_VFRE);
892 vfre &= ~(1 << vf_queue);
893 E1000_WRITE_REG(hw, E1000_VFRE, vfre);
895 /* Disable MDFB related bit */
896 mdfb = E1000_READ_REG(hw, E1000_MDFB);
897 mdfb &= ~(1 << vf_queue);
898 E1000_WRITE_REG(hw, E1000_MDFB, mdfb);
900 /* Reset the specific VF */
901 E1000_WRITE_REG(hw, E1000_VTCTRL(vf_queue), E1000_VTCTRL_RST);
906 * @adapter - board private structure
908 * Disable MDD behavior in the HW
910 static void igb_disable_mdd(struct igb_adapter *adapter)
912 struct e1000_hw *hw = &adapter->hw;
915 if (hw->mac.type != e1000_i350)
918 reg = E1000_READ_REG(hw, E1000_DTXCTL);
919 reg &= (~E1000_DTXCTL_MDP_EN);
920 E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
925 * @adapter - board private structure
927 * Enable the HW to detect malicious driver and sends an interrupt to
930 * Only available on i350 device
932 static void igb_enable_mdd(struct igb_adapter *adapter)
934 struct e1000_hw *hw = &adapter->hw;
937 if (hw->mac.type != e1000_i350)
940 reg = E1000_READ_REG(hw, E1000_DTXCTL);
941 reg |= E1000_DTXCTL_MDP_EN;
942 E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
946 * igb_reset_sriov_capability - disable SR-IOV if enabled
948 * Attempt to disable single root IO virtualization capabilites present in the
951 static void igb_reset_sriov_capability(struct igb_adapter *adapter)
953 struct pci_dev *pdev = adapter->pdev;
954 struct e1000_hw *hw = &adapter->hw;
956 /* reclaim resources allocated to VFs */
957 if (adapter->vf_data) {
958 if (!igb_check_vf_assignment(adapter)) {
960 * disable iov and allow time for transactions to
963 pci_disable_sriov(pdev);
966 dev_info(pci_dev_to_dev(pdev), "IOV Disabled\n");
968 dev_info(pci_dev_to_dev(pdev), "IOV Not Disabled\n "
969 "VF(s) are assigned to guests!\n");
971 /* Disable Malicious Driver Detection */
972 igb_disable_mdd(adapter);
974 /* free vf data storage */
975 kfree(adapter->vf_data);
976 adapter->vf_data = NULL;
978 /* switch rings back to PF ownership */
979 E1000_WRITE_REG(hw, E1000_IOVCTL,
980 E1000_IOVCTL_REUSE_VFQ);
981 E1000_WRITE_FLUSH(hw);
985 adapter->vfs_allocated_count = 0;
989 * igb_set_sriov_capability - setup SR-IOV if supported
991 * Attempt to enable single root IO virtualization capabilites present in the
994 static void igb_set_sriov_capability(struct igb_adapter *adapter)
996 struct pci_dev *pdev = adapter->pdev;
1000 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
1001 old_vfs = igb_find_enabled_vfs(adapter);
1004 dev_info(pci_dev_to_dev(pdev),
1005 "%d pre-allocated VFs found - override "
1006 "max_vfs setting of %d\n", old_vfs,
1007 adapter->vfs_allocated_count);
1008 adapter->vfs_allocated_count = old_vfs;
1010 /* no VFs requested, do nothing */
1011 if (!adapter->vfs_allocated_count)
1014 /* allocate vf data storage */
1015 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1016 sizeof(struct vf_data_storage),
1019 if (adapter->vf_data) {
1021 if (pci_enable_sriov(pdev,
1022 adapter->vfs_allocated_count))
1025 for (i = 0; i < adapter->vfs_allocated_count; i++)
1026 igb_vf_configure(adapter, i);
1028 /* DMA Coalescing is not supported in IOV mode. */
1029 if (adapter->hw.mac.type >= e1000_i350)
1030 adapter->dmac = IGB_DMAC_DISABLE;
1031 if (adapter->hw.mac.type < e1000_i350)
1032 adapter->flags |= IGB_FLAG_DETECT_BAD_DMA;
1038 kfree(adapter->vf_data);
1039 adapter->vf_data = NULL;
1040 adapter->vfs_allocated_count = 0;
1041 dev_warn(pci_dev_to_dev(pdev),
1042 "Failed to initialize SR-IOV virtualization\n");
1046 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1048 * Attempt to configure interrupts using the best available
1049 * capabilities of the hardware and kernel.
1051 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
1053 struct pci_dev *pdev = adapter->pdev;
1057 /* Number of supported queues. */
1058 adapter->num_rx_queues = adapter->rss_queues;
1060 if (adapter->vmdq_pools > 1)
1061 adapter->num_rx_queues += adapter->vmdq_pools - 1;
1064 if (adapter->vmdq_pools)
1065 adapter->num_tx_queues = adapter->vmdq_pools;
1067 adapter->num_tx_queues = adapter->num_rx_queues;
1069 adapter->num_tx_queues = max_t(u32, 1, adapter->vmdq_pools);
1072 switch (adapter->int_mode) {
1073 case IGB_INT_MODE_MSIX:
1074 /* start with one vector for every rx queue */
1075 numvecs = adapter->num_rx_queues;
1077 /* if tx handler is seperate add 1 for every tx queue */
1078 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1079 numvecs += adapter->num_tx_queues;
1081 /* store the number of vectors reserved for queues */
1082 adapter->num_q_vectors = numvecs;
1084 /* add 1 vector for link status interrupts */
1086 adapter->msix_entries = kcalloc(numvecs,
1087 sizeof(struct msix_entry),
1089 if (adapter->msix_entries) {
1090 for (i = 0; i < numvecs; i++)
1091 adapter->msix_entries[i].entry = i;
1093 err = pci_enable_msix(pdev,
1094 adapter->msix_entries, numvecs);
1098 /* MSI-X failed, so fall through and try MSI */
1099 dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI-X interrupts. "
1100 "Falling back to MSI interrupts.\n");
1101 igb_reset_interrupt_capability(adapter);
1102 case IGB_INT_MODE_MSI:
1103 if (!pci_enable_msi(pdev))
1104 adapter->flags |= IGB_FLAG_HAS_MSI;
1106 dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI "
1107 "interrupts. Falling back to legacy "
1110 case IGB_INT_MODE_LEGACY:
1111 /* disable advanced features and set number of queues to 1 */
1112 igb_reset_sriov_capability(adapter);
1113 adapter->vmdq_pools = 0;
1114 adapter->rss_queues = 1;
1115 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1116 adapter->num_rx_queues = 1;
1117 adapter->num_tx_queues = 1;
1118 adapter->num_q_vectors = 1;
1119 /* Don't do anything; this is system default */
1124 /* Notify the stack of the (possibly) reduced Tx Queue count. */
1125 #ifdef CONFIG_NETDEVICES_MULTIQUEUE
1126 adapter->netdev->egress_subqueue_count = adapter->num_tx_queues;
1128 adapter->netdev->real_num_tx_queues =
1129 (adapter->vmdq_pools ? 1 : adapter->num_tx_queues);
1130 #endif /* CONFIG_NETDEVICES_MULTIQUEUE */
1131 #endif /* HAVE_TX_MQ */
1135 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1136 * @adapter: board private structure to initialize
1138 * We allocate one q_vector per queue interrupt. If allocation fails we
1141 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1143 struct igb_q_vector *q_vector;
1144 struct e1000_hw *hw = &adapter->hw;
1146 #ifdef HAVE_DEVICE_NUMA_NODE
1147 int orig_node = adapter->node;
1148 #endif /* HAVE_DEVICE_NUMA_NODE */
1150 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1151 #ifdef HAVE_DEVICE_NUMA_NODE
1152 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1153 adapter->num_tx_queues)) &&
1154 (adapter->num_rx_queues == v_idx))
1155 adapter->node = orig_node;
1156 if (orig_node == -1) {
1157 int cur_node = next_online_node(adapter->node);
1158 if (cur_node == MAX_NUMNODES)
1159 cur_node = first_online_node;
1160 adapter->node = cur_node;
1162 #endif /* HAVE_DEVICE_NUMA_NODE */
1163 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1166 q_vector = kzalloc(sizeof(struct igb_q_vector),
1170 q_vector->adapter = adapter;
1171 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1172 q_vector->itr_val = IGB_START_ITR;
1173 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1174 adapter->q_vector[v_idx] = q_vector;
1176 if (v_idx < adapter->num_rx_queues) {
1177 int size = sizeof(struct igb_lro_list);
1178 q_vector->lrolist = vzalloc_node(size, q_vector->numa_node);
1179 if (!q_vector->lrolist)
1180 q_vector->lrolist = vzalloc(size);
1181 if (!q_vector->lrolist)
1183 __skb_queue_head_init(&q_vector->lrolist->active);
1185 #endif /* IGB_NO_LRO */
1187 #ifdef HAVE_DEVICE_NUMA_NODE
1188 /* Restore the adapter's original node */
1189 adapter->node = orig_node;
1190 #endif /* HAVE_DEVICE_NUMA_NODE */
1195 #ifdef HAVE_DEVICE_NUMA_NODE
1196 /* Restore the adapter's original node */
1197 adapter->node = orig_node;
1198 #endif /* HAVE_DEVICE_NUMA_NODE */
1199 igb_free_q_vectors(adapter);
1203 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1204 int ring_idx, int v_idx)
1206 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1208 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1209 q_vector->rx.ring->q_vector = q_vector;
1210 q_vector->rx.count++;
1211 q_vector->itr_val = adapter->rx_itr_setting;
1212 if (q_vector->itr_val && q_vector->itr_val <= 3)
1213 q_vector->itr_val = IGB_START_ITR;
1216 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1217 int ring_idx, int v_idx)
1219 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1221 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1222 q_vector->tx.ring->q_vector = q_vector;
1223 q_vector->tx.count++;
1224 q_vector->itr_val = adapter->tx_itr_setting;
1225 q_vector->tx.work_limit = adapter->tx_work_limit;
1226 if (q_vector->itr_val && q_vector->itr_val <= 3)
1227 q_vector->itr_val = IGB_START_ITR;
1231 * igb_map_ring_to_vector - maps allocated queues to vectors
1233 * This function maps the recently allocated queues to vectors.
1235 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1240 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1241 (adapter->num_q_vectors < adapter->num_tx_queues))
1244 if (adapter->num_q_vectors >=
1245 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1246 for (i = 0; i < adapter->num_rx_queues; i++)
1247 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1248 for (i = 0; i < adapter->num_tx_queues; i++)
1249 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1251 for (i = 0; i < adapter->num_rx_queues; i++) {
1252 if (i < adapter->num_tx_queues)
1253 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1254 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1256 for (; i < adapter->num_tx_queues; i++)
1257 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1263 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1265 * This function initializes the interrupts and allocates all of the queues.
1267 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1269 struct pci_dev *pdev = adapter->pdev;
1272 igb_set_interrupt_capability(adapter);
1274 err = igb_alloc_q_vectors(adapter);
1276 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for vectors\n");
1277 goto err_alloc_q_vectors;
1280 err = igb_alloc_queues(adapter);
1282 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
1283 goto err_alloc_queues;
1286 err = igb_map_ring_to_vector(adapter);
1288 dev_err(pci_dev_to_dev(pdev), "Invalid q_vector to ring mapping\n");
1289 goto err_map_queues;
1295 igb_free_queues(adapter);
1297 igb_free_q_vectors(adapter);
1298 err_alloc_q_vectors:
1299 igb_reset_interrupt_capability(adapter);
1304 * igb_request_irq - initialize interrupts
1306 * Attempts to configure interrupts using the best available
1307 * capabilities of the hardware and kernel.
1309 static int igb_request_irq(struct igb_adapter *adapter)
1311 struct net_device *netdev = adapter->netdev;
1312 struct pci_dev *pdev = adapter->pdev;
1315 if (adapter->msix_entries) {
1316 err = igb_request_msix(adapter);
1319 /* fall back to MSI */
1320 igb_clear_interrupt_scheme(adapter);
1321 igb_reset_sriov_capability(adapter);
1322 if (!pci_enable_msi(pdev))
1323 adapter->flags |= IGB_FLAG_HAS_MSI;
1324 igb_free_all_tx_resources(adapter);
1325 igb_free_all_rx_resources(adapter);
1326 adapter->num_tx_queues = 1;
1327 adapter->num_rx_queues = 1;
1328 adapter->num_q_vectors = 1;
1329 err = igb_alloc_q_vectors(adapter);
1331 dev_err(pci_dev_to_dev(pdev),
1332 "Unable to allocate memory for vectors\n");
1335 err = igb_alloc_queues(adapter);
1337 dev_err(pci_dev_to_dev(pdev),
1338 "Unable to allocate memory for queues\n");
1339 igb_free_q_vectors(adapter);
1342 igb_setup_all_tx_resources(adapter);
1343 igb_setup_all_rx_resources(adapter);
1346 igb_assign_vector(adapter->q_vector[0], 0);
1348 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1349 err = request_irq(pdev->irq, &igb_intr_msi, 0,
1350 netdev->name, adapter);
1354 /* fall back to legacy interrupts */
1355 igb_reset_interrupt_capability(adapter);
1356 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1359 err = request_irq(pdev->irq, &igb_intr, IRQF_SHARED,
1360 netdev->name, adapter);
1363 dev_err(pci_dev_to_dev(pdev), "Error %d getting interrupt\n",
1370 static void igb_free_irq(struct igb_adapter *adapter)
1372 if (adapter->msix_entries) {
1375 free_irq(adapter->msix_entries[vector++].vector, adapter);
1377 for (i = 0; i < adapter->num_q_vectors; i++)
1378 free_irq(adapter->msix_entries[vector++].vector,
1379 adapter->q_vector[i]);
1381 free_irq(adapter->pdev->irq, adapter);
1386 * igb_irq_disable - Mask off interrupt generation on the NIC
1387 * @adapter: board private structure
1389 static void igb_irq_disable(struct igb_adapter *adapter)
1391 struct e1000_hw *hw = &adapter->hw;
1394 * we need to be careful when disabling interrupts. The VFs are also
1395 * mapped into these registers and so clearing the bits can cause
1396 * issues on the VF drivers so we only need to clear what we set
1398 if (adapter->msix_entries) {
1399 u32 regval = E1000_READ_REG(hw, E1000_EIAM);
1400 E1000_WRITE_REG(hw, E1000_EIAM, regval & ~adapter->eims_enable_mask);
1401 E1000_WRITE_REG(hw, E1000_EIMC, adapter->eims_enable_mask);
1402 regval = E1000_READ_REG(hw, E1000_EIAC);
1403 E1000_WRITE_REG(hw, E1000_EIAC, regval & ~adapter->eims_enable_mask);
1406 E1000_WRITE_REG(hw, E1000_IAM, 0);
1407 E1000_WRITE_REG(hw, E1000_IMC, ~0);
1408 E1000_WRITE_FLUSH(hw);
1410 if (adapter->msix_entries) {
1413 synchronize_irq(adapter->msix_entries[vector++].vector);
1415 for (i = 0; i < adapter->num_q_vectors; i++)
1416 synchronize_irq(adapter->msix_entries[vector++].vector);
1418 synchronize_irq(adapter->pdev->irq);
1423 * igb_irq_enable - Enable default interrupt generation settings
1424 * @adapter: board private structure
1426 static void igb_irq_enable(struct igb_adapter *adapter)
1428 struct e1000_hw *hw = &adapter->hw;
1430 if (adapter->msix_entries) {
1431 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1432 u32 regval = E1000_READ_REG(hw, E1000_EIAC);
1433 E1000_WRITE_REG(hw, E1000_EIAC, regval | adapter->eims_enable_mask);
1434 regval = E1000_READ_REG(hw, E1000_EIAM);
1435 E1000_WRITE_REG(hw, E1000_EIAM, regval | adapter->eims_enable_mask);
1436 E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_enable_mask);
1437 if (adapter->vfs_allocated_count) {
1438 E1000_WRITE_REG(hw, E1000_MBVFIMR, 0xFF);
1439 ims |= E1000_IMS_VMMB;
1440 /* For I350 device only enable MDD interrupts*/
1441 if ((adapter->mdd) &&
1442 (adapter->hw.mac.type == e1000_i350))
1443 ims |= E1000_IMS_MDDET;
1445 E1000_WRITE_REG(hw, E1000_IMS, ims);
1447 E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK |
1449 E1000_WRITE_REG(hw, E1000_IAM, IMS_ENABLE_MASK |
1454 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1456 struct e1000_hw *hw = &adapter->hw;
1457 u16 vid = adapter->hw.mng_cookie.vlan_id;
1458 u16 old_vid = adapter->mng_vlan_id;
1460 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1461 /* add VID to filter table */
1462 igb_vfta_set(adapter, vid, TRUE);
1463 adapter->mng_vlan_id = vid;
1465 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1468 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1470 #ifdef HAVE_VLAN_RX_REGISTER
1471 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1473 !test_bit(old_vid, adapter->active_vlans)) {
1475 /* remove VID from filter table */
1476 igb_vfta_set(adapter, old_vid, FALSE);
1481 * igb_release_hw_control - release control of the h/w to f/w
1482 * @adapter: address of board private structure
1484 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1485 * For ASF and Pass Through versions of f/w this means that the
1486 * driver is no longer loaded.
1489 static void igb_release_hw_control(struct igb_adapter *adapter)
1491 struct e1000_hw *hw = &adapter->hw;
1494 /* Let firmware take over control of h/w */
1495 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
1496 E1000_WRITE_REG(hw, E1000_CTRL_EXT,
1497 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1501 * igb_get_hw_control - get control of the h/w from f/w
1502 * @adapter: address of board private structure
1504 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1505 * For ASF and Pass Through versions of f/w this means that
1506 * the driver is loaded.
1509 static void igb_get_hw_control(struct igb_adapter *adapter)
1511 struct e1000_hw *hw = &adapter->hw;
1514 /* Let firmware know the driver has taken over */
1515 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
1516 E1000_WRITE_REG(hw, E1000_CTRL_EXT,
1517 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1521 * igb_configure - configure the hardware for RX and TX
1522 * @adapter: private board structure
1524 static void igb_configure(struct igb_adapter *adapter)
1526 struct net_device *netdev = adapter->netdev;
1529 igb_get_hw_control(adapter);
1530 igb_set_rx_mode(netdev);
1532 igb_restore_vlan(adapter);
1534 igb_setup_tctl(adapter);
1535 igb_setup_mrqc(adapter);
1536 igb_setup_rctl(adapter);
1538 igb_configure_tx(adapter);
1539 igb_configure_rx(adapter);
1541 e1000_rx_fifo_flush_82575(&adapter->hw);
1542 #ifdef CONFIG_NETDEVICES_MULTIQUEUE
1544 if (adapter->num_tx_queues > 1)
1545 netdev->features |= NETIF_F_MULTI_QUEUE;
1547 netdev->features &= ~NETIF_F_MULTI_QUEUE;
1550 /* call igb_desc_unused which always leaves
1551 * at least 1 descriptor unused to make sure
1552 * next_to_use != next_to_clean */
1553 for (i = 0; i < adapter->num_rx_queues; i++) {
1554 struct igb_ring *ring = adapter->rx_ring[i];
1555 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1560 * igb_power_up_link - Power up the phy/serdes link
1561 * @adapter: address of board private structure
1563 void igb_power_up_link(struct igb_adapter *adapter)
1565 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1566 e1000_power_up_phy(&adapter->hw);
1568 e1000_power_up_fiber_serdes_link(&adapter->hw);
1570 e1000_phy_hw_reset(&adapter->hw);
1574 * igb_power_down_link - Power down the phy/serdes link
1575 * @adapter: address of board private structure
1577 static void igb_power_down_link(struct igb_adapter *adapter)
1579 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1580 e1000_power_down_phy(&adapter->hw);
1582 e1000_shutdown_fiber_serdes_link(&adapter->hw);
1586 * igb_up - Open the interface and prepare it to handle traffic
1587 * @adapter: board private structure
1589 int igb_up(struct igb_adapter *adapter)
1591 struct e1000_hw *hw = &adapter->hw;
1594 /* hardware has been reset, we need to reload some things */
1595 igb_configure(adapter);
1597 clear_bit(__IGB_DOWN, &adapter->state);
1599 for (i = 0; i < adapter->num_q_vectors; i++)
1600 napi_enable(&(adapter->q_vector[i]->napi));
1602 if (adapter->msix_entries)
1603 igb_configure_msix(adapter);
1605 igb_assign_vector(adapter->q_vector[0], 0);
1607 igb_configure_lli(adapter);
1609 /* Clear any pending interrupts. */
1610 E1000_READ_REG(hw, E1000_ICR);
1611 igb_irq_enable(adapter);
1613 /* notify VFs that reset has been completed */
1614 if (adapter->vfs_allocated_count) {
1615 u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
1616 reg_data |= E1000_CTRL_EXT_PFRSTD;
1617 E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
1620 netif_tx_start_all_queues(adapter->netdev);
1622 if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
1623 schedule_work(&adapter->dma_err_task);
1624 /* start the watchdog. */
1625 hw->mac.get_link_status = 1;
1626 schedule_work(&adapter->watchdog_task);
1631 void igb_down(struct igb_adapter *adapter)
1633 struct net_device *netdev = adapter->netdev;
1634 struct e1000_hw *hw = &adapter->hw;
1638 /* signal that we're down so the interrupt handler does not
1639 * reschedule our watchdog timer */
1640 set_bit(__IGB_DOWN, &adapter->state);
1642 /* disable receives in the hardware */
1643 rctl = E1000_READ_REG(hw, E1000_RCTL);
1644 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
1645 /* flush and sleep below */
1647 netif_tx_stop_all_queues(netdev);
1649 /* disable transmits in the hardware */
1650 tctl = E1000_READ_REG(hw, E1000_TCTL);
1651 tctl &= ~E1000_TCTL_EN;
1652 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
1653 /* flush both disables and wait for them to finish */
1654 E1000_WRITE_FLUSH(hw);
1655 usleep_range(10000, 20000);
1657 for (i = 0; i < adapter->num_q_vectors; i++)
1658 napi_disable(&(adapter->q_vector[i]->napi));
1660 igb_irq_disable(adapter);
1662 del_timer_sync(&adapter->watchdog_timer);
1663 if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
1664 del_timer_sync(&adapter->dma_err_timer);
1665 del_timer_sync(&adapter->phy_info_timer);
1667 netif_carrier_off(netdev);
1669 /* record the stats before reset*/
1670 igb_update_stats(adapter);
1672 adapter->link_speed = 0;
1673 adapter->link_duplex = 0;
1676 if (!pci_channel_offline(adapter->pdev))
1681 igb_clean_all_tx_rings(adapter);
1682 igb_clean_all_rx_rings(adapter);
1685 /* since we reset the hardware DCA settings were cleared */
1686 igb_setup_dca(adapter);
1690 void igb_reinit_locked(struct igb_adapter *adapter)
1692 WARN_ON(in_interrupt());
1693 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1694 usleep_range(1000, 2000);
1697 clear_bit(__IGB_RESETTING, &adapter->state);
1700 void igb_reset(struct igb_adapter *adapter)
1702 struct pci_dev *pdev = adapter->pdev;
1703 struct e1000_hw *hw = &adapter->hw;
1704 struct e1000_mac_info *mac = &hw->mac;
1705 struct e1000_fc_info *fc = &hw->fc;
1706 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1709 /* Repartition Pba for greater than 9k mtu
1710 * To take effect CTRL.RST is required.
1712 switch (mac->type) {
1715 pba = E1000_READ_REG(hw, E1000_RXPBS);
1716 pba = e1000_rxpbs_adjust_82580(pba);
1719 pba = E1000_READ_REG(hw, E1000_RXPBS);
1720 pba &= E1000_RXPBS_SIZE_MASK_82576;
1724 pba = E1000_PBA_34K;
1728 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1729 (mac->type < e1000_82576)) {
1730 /* adjust PBA for jumbo frames */
1731 E1000_WRITE_REG(hw, E1000_PBA, pba);
1733 /* To maintain wire speed transmits, the Tx FIFO should be
1734 * large enough to accommodate two full transmit packets,
1735 * rounded up to the next 1KB and expressed in KB. Likewise,
1736 * the Rx FIFO should be large enough to accommodate at least
1737 * one full receive packet and is similarly rounded up and
1738 * expressed in KB. */
1739 pba = E1000_READ_REG(hw, E1000_PBA);
1740 /* upper 16 bits has Tx packet buffer allocation size in KB */
1741 tx_space = pba >> 16;
1742 /* lower 16 bits has Rx packet buffer allocation size in KB */
1744 /* the tx fifo also stores 16 bytes of information about the tx
1745 * but don't include ethernet FCS because hardware appends it */
1746 min_tx_space = (adapter->max_frame_size +
1747 sizeof(union e1000_adv_tx_desc) -
1749 min_tx_space = ALIGN(min_tx_space, 1024);
1750 min_tx_space >>= 10;
1751 /* software strips receive CRC, so leave room for it */
1752 min_rx_space = adapter->max_frame_size;
1753 min_rx_space = ALIGN(min_rx_space, 1024);
1754 min_rx_space >>= 10;
1756 /* If current Tx allocation is less than the min Tx FIFO size,
1757 * and the min Tx FIFO size is less than the current Rx FIFO
1758 * allocation, take space away from current Rx allocation */
1759 if (tx_space < min_tx_space &&
1760 ((min_tx_space - tx_space) < pba)) {
1761 pba = pba - (min_tx_space - tx_space);
1763 /* if short on rx space, rx wins and must trump tx
1765 if (pba < min_rx_space)
1768 E1000_WRITE_REG(hw, E1000_PBA, pba);
1771 /* flow control settings */
1772 /* The high water mark must be low enough to fit one full frame
1773 * (or the size used for early receive) above it in the Rx FIFO.
1774 * Set it to the lower of:
1775 * - 90% of the Rx FIFO size, or
1776 * - the full Rx FIFO size minus one full frame */
1777 hwm = min(((pba << 10) * 9 / 10),
1778 ((pba << 10) - 2 * adapter->max_frame_size));
1780 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1781 fc->low_water = fc->high_water - 16;
1782 fc->pause_time = 0xFFFF;
1784 fc->current_mode = fc->requested_mode;
1786 /* disable receive for all VFs and wait one second */
1787 if (adapter->vfs_allocated_count) {
1790 * Clear all flags except indication that the PF has set
1791 * the VF MAC addresses administratively
1793 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1794 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1796 /* ping all the active vfs to let them know we are going down */
1797 igb_ping_all_vfs(adapter);
1799 /* disable transmits and receives */
1800 E1000_WRITE_REG(hw, E1000_VFRE, 0);
1801 E1000_WRITE_REG(hw, E1000_VFTE, 0);
1804 /* Allow time for pending master requests to run */
1806 E1000_WRITE_REG(hw, E1000_WUC, 0);
1808 if (e1000_init_hw(hw))
1809 dev_err(pci_dev_to_dev(pdev), "Hardware Error\n");
1811 igb_init_dmac(adapter, pba);
1812 /* Re-initialize the thermal sensor on i350 devices. */
1813 if (mac->type == e1000_i350 && hw->bus.func == 0) {
1815 * If present, re-initialize the external thermal sensor
1819 e1000_set_i2c_bb(hw);
1820 e1000_init_thermal_sensor_thresh(hw);
1822 if (!netif_running(adapter->netdev))
1823 igb_power_down_link(adapter);
1825 igb_update_mng_vlan(adapter);
1827 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1828 E1000_WRITE_REG(hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1830 e1000_get_phy_info(hw);
1833 #ifdef HAVE_NDO_SET_FEATURES
1834 static netdev_features_t igb_fix_features(struct net_device *netdev,
1835 netdev_features_t features)
1838 * Since there is no support for separate tx vlan accel
1839 * enabled make sure tx flag is cleared if rx is.
1841 if (!(features & NETIF_F_HW_VLAN_RX))
1842 features &= ~NETIF_F_HW_VLAN_TX;
1844 /* If Rx checksum is disabled, then LRO should also be disabled */
1845 if (!(features & NETIF_F_RXCSUM))
1846 features &= ~NETIF_F_LRO;
1851 static int igb_set_features(struct net_device *netdev,
1852 netdev_features_t features)
1854 u32 changed = netdev->features ^ features;
1856 if (changed & NETIF_F_HW_VLAN_RX)
1857 igb_vlan_mode(netdev, features);
1862 #endif /* HAVE_NDO_SET_FEATURES */
1863 #ifdef HAVE_NET_DEVICE_OPS
1864 static const struct net_device_ops igb_netdev_ops = {
1865 .ndo_open = igb_open,
1866 .ndo_stop = igb_close,
1867 .ndo_start_xmit = igb_xmit_frame,
1868 .ndo_get_stats = igb_get_stats,
1869 .ndo_set_rx_mode = igb_set_rx_mode,
1870 .ndo_set_mac_address = igb_set_mac,
1871 .ndo_change_mtu = igb_change_mtu,
1872 .ndo_do_ioctl = igb_ioctl,
1873 .ndo_tx_timeout = igb_tx_timeout,
1874 .ndo_validate_addr = eth_validate_addr,
1875 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1876 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1878 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1879 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1880 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1881 .ndo_get_vf_config = igb_ndo_get_vf_config,
1883 #ifdef CONFIG_NET_POLL_CONTROLLER
1884 .ndo_poll_controller = igb_netpoll,
1886 #ifdef HAVE_NDO_SET_FEATURES
1887 .ndo_fix_features = igb_fix_features,
1888 .ndo_set_features = igb_set_features,
1890 #ifdef HAVE_VLAN_RX_REGISTER
1891 .ndo_vlan_rx_register = igb_vlan_mode,
1895 #ifdef CONFIG_IGB_VMDQ_NETDEV
1896 static const struct net_device_ops igb_vmdq_ops = {
1897 .ndo_open = &igb_vmdq_open,
1898 .ndo_stop = &igb_vmdq_close,
1899 .ndo_start_xmit = &igb_vmdq_xmit_frame,
1900 .ndo_get_stats = &igb_vmdq_get_stats,
1901 .ndo_set_rx_mode = &igb_vmdq_set_rx_mode,
1902 .ndo_validate_addr = eth_validate_addr,
1903 .ndo_set_mac_address = &igb_vmdq_set_mac,
1904 .ndo_change_mtu = &igb_vmdq_change_mtu,
1905 .ndo_tx_timeout = &igb_vmdq_tx_timeout,
1906 .ndo_vlan_rx_register = &igb_vmdq_vlan_rx_register,
1907 .ndo_vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid,
1908 .ndo_vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid,
1911 #endif /* CONFIG_IGB_VMDQ_NETDEV */
1912 #endif /* HAVE_NET_DEVICE_OPS */
1913 #ifdef CONFIG_IGB_VMDQ_NETDEV
1914 void igb_assign_vmdq_netdev_ops(struct net_device *vnetdev)
1916 #ifdef HAVE_NET_DEVICE_OPS
1917 vnetdev->netdev_ops = &igb_vmdq_ops;
1919 dev->open = &igb_vmdq_open;
1920 dev->stop = &igb_vmdq_close;
1921 dev->hard_start_xmit = &igb_vmdq_xmit_frame;
1922 dev->get_stats = &igb_vmdq_get_stats;
1923 #ifdef HAVE_SET_RX_MODE
1924 dev->set_rx_mode = &igb_vmdq_set_rx_mode;
1926 dev->set_multicast_list = &igb_vmdq_set_rx_mode;
1927 dev->set_mac_address = &igb_vmdq_set_mac;
1928 dev->change_mtu = &igb_vmdq_change_mtu;
1929 #ifdef HAVE_TX_TIMEOUT
1930 dev->tx_timeout = &igb_vmdq_tx_timeout;
1932 #ifdef NETIF_F_HW_VLAN_TX
1933 dev->vlan_rx_register = &igb_vmdq_vlan_rx_register;
1934 dev->vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid;
1935 dev->vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid;
1938 igb_vmdq_set_ethtool_ops(vnetdev);
1939 vnetdev->watchdog_timeo = 5 * HZ;
1943 int igb_init_vmdq_netdevs(struct igb_adapter *adapter)
1945 int pool, err = 0, base_queue;
1946 struct net_device *vnetdev;
1947 struct igb_vmdq_adapter *vmdq_adapter;
1949 for (pool = 1; pool < adapter->vmdq_pools; pool++) {
1950 int qpp = (!adapter->rss_queues ? 1 : adapter->rss_queues);
1951 base_queue = pool * qpp;
1952 vnetdev = alloc_etherdev(sizeof(struct igb_vmdq_adapter));
1957 vmdq_adapter = netdev_priv(vnetdev);
1958 vmdq_adapter->vnetdev = vnetdev;
1959 vmdq_adapter->real_adapter = adapter;
1960 vmdq_adapter->rx_ring = adapter->rx_ring[base_queue];
1961 vmdq_adapter->tx_ring = adapter->tx_ring[base_queue];
1962 igb_assign_vmdq_netdev_ops(vnetdev);
1963 snprintf(vnetdev->name, IFNAMSIZ, "%sv%d",
1964 adapter->netdev->name, pool);
1965 vnetdev->features = adapter->netdev->features;
1966 #ifdef HAVE_NETDEV_VLAN_FEATURES
1967 vnetdev->vlan_features = adapter->netdev->vlan_features;
1969 adapter->vmdq_netdev[pool-1] = vnetdev;
1970 err = register_netdev(vnetdev);
1977 int igb_remove_vmdq_netdevs(struct igb_adapter *adapter)
1981 for (pool = 1; pool < adapter->vmdq_pools; pool++) {
1982 unregister_netdev(adapter->vmdq_netdev[pool-1]);
1983 free_netdev(adapter->vmdq_netdev[pool-1]);
1984 adapter->vmdq_netdev[pool-1] = NULL;
1988 #endif /* CONFIG_IGB_VMDQ_NETDEV */
1991 * igb_probe - Device Initialization Routine
1992 * @pdev: PCI device information struct
1993 * @ent: entry in igb_pci_tbl
1995 * Returns 0 on success, negative on failure
1997 * igb_probe initializes an adapter identified by a pci_dev structure.
1998 * The OS initialization, configuring of the adapter private structure,
1999 * and a hardware reset occur.
2001 static int __devinit igb_probe(struct pci_dev *pdev,
2002 const struct pci_device_id *ent)
2004 struct net_device *netdev;
2005 struct igb_adapter *adapter;
2006 struct e1000_hw *hw;
2007 u16 eeprom_data = 0;
2008 u8 pba_str[E1000_PBANUM_LENGTH];
2010 static int global_quad_port_a; /* global quad port a indication */
2011 int i, err, pci_using_dac;
2012 static int cards_found;
2014 err = pci_enable_device_mem(pdev);
2019 err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
2021 err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
2025 err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
2027 err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
2029 IGB_ERR("No usable DMA configuration, "
2036 #ifndef HAVE_ASPM_QUIRKS
2037 /* 82575 requires that the pci-e link partner disable the L0s state */
2038 switch (pdev->device) {
2039 case E1000_DEV_ID_82575EB_COPPER:
2040 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2041 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2042 pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
2047 #endif /* HAVE_ASPM_QUIRKS */
2048 err = pci_request_selected_regions(pdev,
2049 pci_select_bars(pdev,
2055 pci_enable_pcie_error_reporting(pdev);
2057 pci_set_master(pdev);
2061 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
2064 netdev = alloc_etherdev(sizeof(struct igb_adapter));
2065 #endif /* HAVE_TX_MQ */
2067 goto err_alloc_etherdev;
2069 SET_MODULE_OWNER(netdev);
2070 SET_NETDEV_DEV(netdev, &pdev->dev);
2072 pci_set_drvdata(pdev, netdev);
2073 adapter = netdev_priv(netdev);
2074 adapter->netdev = netdev;
2075 adapter->pdev = pdev;
2078 adapter->port_num = hw->bus.func;
2079 adapter->msg_enable = (1 << debug) - 1;
2082 err = pci_save_state(pdev);
2087 hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
2088 pci_resource_len(pdev, 0));
2092 #ifdef HAVE_NET_DEVICE_OPS
2093 netdev->netdev_ops = &igb_netdev_ops;
2094 #else /* HAVE_NET_DEVICE_OPS */
2095 netdev->open = &igb_open;
2096 netdev->stop = &igb_close;
2097 netdev->get_stats = &igb_get_stats;
2098 #ifdef HAVE_SET_RX_MODE
2099 netdev->set_rx_mode = &igb_set_rx_mode;
2101 netdev->set_multicast_list = &igb_set_rx_mode;
2102 netdev->set_mac_address = &igb_set_mac;
2103 netdev->change_mtu = &igb_change_mtu;
2104 netdev->do_ioctl = &igb_ioctl;
2105 #ifdef HAVE_TX_TIMEOUT
2106 netdev->tx_timeout = &igb_tx_timeout;
2108 netdev->vlan_rx_register = igb_vlan_mode;
2109 netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
2110 netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
2111 #ifdef CONFIG_NET_POLL_CONTROLLER
2112 netdev->poll_controller = igb_netpoll;
2114 netdev->hard_start_xmit = &igb_xmit_frame;
2115 #endif /* HAVE_NET_DEVICE_OPS */
2116 igb_set_ethtool_ops(netdev);
2117 #ifdef HAVE_TX_TIMEOUT
2118 netdev->watchdog_timeo = 5 * HZ;
2121 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
2123 adapter->bd_number = cards_found;
2125 /* setup the private structure */
2126 err = igb_sw_init(adapter);
2130 e1000_get_bus_info(hw);
2132 hw->phy.autoneg_wait_to_complete = FALSE;
2133 hw->mac.adaptive_ifs = FALSE;
2135 /* Copper options */
2136 if (hw->phy.media_type == e1000_media_type_copper) {
2138 hw->phy.mdix = ETH_TP_MDI_INVALID;
2140 hw->phy.mdix = AUTO_ALL_MODES;
2141 #endif /* ETH_TP_MDI_X */
2142 hw->phy.disable_polarity_correction = FALSE;
2143 hw->phy.ms_type = e1000_ms_hw_default;
2146 if (e1000_check_reset_block(hw))
2147 dev_info(pci_dev_to_dev(pdev),
2148 "PHY reset is blocked due to SOL/IDER session.\n");
2151 * features is initialized to 0 in allocation, it might have bits
2152 * set by igb_sw_init so we should use an or instead of an
2155 netdev->features |= NETIF_F_SG |
2157 #ifdef NETIF_F_IPV6_CSUM
2165 #endif /* NETIF_F_TSO */
2166 #ifdef NETIF_F_RXHASH
2169 #ifdef HAVE_NDO_SET_FEATURES
2172 NETIF_F_HW_VLAN_RX |
2175 #ifdef HAVE_NDO_SET_FEATURES
2176 /* copy netdev features into list of user selectable features */
2177 netdev->hw_features |= netdev->features;
2180 /* give us the option of enabling LRO later */
2181 netdev->hw_features |= NETIF_F_LRO;
2186 /* this is only needed on kernels prior to 2.6.39 */
2187 netdev->features |= NETIF_F_GRO;
2191 /* set this bit last since it cannot be part of hw_features */
2192 netdev->features |= NETIF_F_HW_VLAN_FILTER;
2194 #ifdef HAVE_NETDEV_VLAN_FEATURES
2195 netdev->vlan_features |= NETIF_F_TSO |
2203 netdev->features |= NETIF_F_HIGHDMA;
2205 if (hw->mac.type >= e1000_82576)
2206 netdev->features |= NETIF_F_SCTP_CSUM;
2208 adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
2210 /* before reading the NVM, reset the controller to put the device in a
2211 * known good starting state */
2214 /* make sure the NVM is good */
2215 if (e1000_validate_nvm_checksum(hw) < 0) {
2216 dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
2222 /* copy the MAC address out of the NVM */
2223 if (e1000_read_mac_addr(hw))
2224 dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
2225 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2226 #ifdef ETHTOOL_GPERMADDR
2227 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2229 if (!is_valid_ether_addr(netdev->perm_addr)) {
2231 if (!is_valid_ether_addr(netdev->dev_addr)) {
2233 dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
2238 memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
2239 adapter->mac_table[0].queue = adapter->vfs_allocated_count;
2240 adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
2241 igb_rar_set(adapter, 0);
2243 /* get firmware version for ethtool -i */
2244 e1000_read_nvm(&adapter->hw, 5, 1, &adapter->fw_version);
2245 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
2246 (unsigned long) adapter);
2247 if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2248 setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
2249 (unsigned long) adapter);
2250 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
2251 (unsigned long) adapter);
2253 INIT_WORK(&adapter->reset_task, igb_reset_task);
2254 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2255 if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2256 INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
2258 /* Initialize link properties that are user-changeable */
2259 adapter->fc_autoneg = true;
2260 hw->mac.autoneg = true;
2261 hw->phy.autoneg_advertised = 0x2f;
2263 hw->fc.requested_mode = e1000_fc_default;
2264 hw->fc.current_mode = e1000_fc_default;
2266 e1000_validate_mdi_setting(hw);
2268 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2269 * enable the ACPI Magic Packet filter
2272 if (hw->bus.func == 0)
2273 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2274 else if (hw->mac.type >= e1000_82580)
2275 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2276 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2278 else if (hw->bus.func == 1)
2279 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2281 if (eeprom_data & IGB_EEPROM_APME)
2282 adapter->eeprom_wol |= E1000_WUFC_MAG;
2284 /* now that we have the eeprom settings, apply the special cases where
2285 * the eeprom may be wrong or the board simply won't support wake on
2286 * lan on a particular port */
2287 switch (pdev->device) {
2288 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2289 adapter->eeprom_wol = 0;
2291 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2292 case E1000_DEV_ID_82576_FIBER:
2293 case E1000_DEV_ID_82576_SERDES:
2294 /* Wake events only supported on port A for dual fiber
2295 * regardless of eeprom setting */
2296 if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
2297 adapter->eeprom_wol = 0;
2299 case E1000_DEV_ID_82576_QUAD_COPPER:
2300 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2301 /* if quad port adapter, disable WoL on all but port A */
2302 if (global_quad_port_a != 0)
2303 adapter->eeprom_wol = 0;
2305 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2306 /* Reset for multiple quad port adapters */
2307 if (++global_quad_port_a == 4)
2308 global_quad_port_a = 0;
2312 /* initialize the wol settings based on the eeprom settings */
2313 adapter->wol = adapter->eeprom_wol;
2314 device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), adapter->wol);
2316 /* reset the hardware with the new settings */
2319 /* let the f/w know that the h/w is now under the control of the
2321 igb_get_hw_control(adapter);
2323 strncpy(netdev->name, "eth%d", IFNAMSIZ);
2324 err = register_netdev(netdev);
2328 #ifdef CONFIG_IGB_VMDQ_NETDEV
2329 err = igb_init_vmdq_netdevs(adapter);
2333 /* carrier off reporting is important to ethtool even BEFORE open */
2334 netif_carrier_off(netdev);
2337 if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
2338 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2339 dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
2340 igb_setup_dca(adapter);
2344 #ifdef HAVE_HW_TIME_STAMP
2345 /* do hw tstamp init after resetting */
2346 igb_init_hw_timer(adapter);
2349 dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
2350 /* print bus type/speed/width info */
2351 dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
2353 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
2354 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
2356 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4\n" :
2357 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2\n" :
2358 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1\n" :
2360 dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
2361 for (i = 0; i < 6; i++)
2362 printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
2364 ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
2366 strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
2367 dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
2371 /* Initialize the thermal sensor on i350 devices. */
2372 if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
2376 * Read the NVM to determine if this i350 device supports an
2377 * external thermal sensor.
2379 e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
2380 if (ets_word != 0x0000 && ets_word != 0xFFFF)
2381 adapter->ets = true;
2383 adapter->ets = false;
2385 igb_sysfs_init(adapter);
2388 igb_procfs_init(adapter);
2389 #endif /* IGB_PROCFS */
2390 #endif /* IGB_SYSFS */
2392 adapter->ets = false;
2395 switch (hw->mac.type) {
2397 /* Enable EEE for internal copper PHY devices */
2398 if (hw->phy.media_type == e1000_media_type_copper)
2399 e1000_set_eee_i350(hw);
2401 /* send driver version info to firmware */
2402 igb_init_fw(adapter);
2408 if (netdev->features & NETIF_F_LRO)
2409 dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
2411 dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
2413 dev_info(pci_dev_to_dev(pdev),
2414 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2415 adapter->msix_entries ? "MSI-X" :
2416 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2417 adapter->num_rx_queues, adapter->num_tx_queues);
2421 pm_runtime_put_noidle(&pdev->dev);
2425 igb_release_hw_control(adapter);
2427 if (!e1000_check_reset_block(hw))
2428 e1000_phy_hw_reset(hw);
2430 if (hw->flash_address)
2431 iounmap(hw->flash_address);
2433 igb_clear_interrupt_scheme(adapter);
2434 igb_reset_sriov_capability(adapter);
2435 iounmap(hw->hw_addr);
2437 free_netdev(netdev);
2439 pci_release_selected_regions(pdev,
2440 pci_select_bars(pdev, IORESOURCE_MEM));
2443 pci_disable_device(pdev);
2448 * igb_remove - Device Removal Routine
2449 * @pdev: PCI device information struct
2451 * igb_remove is called by the PCI subsystem to alert the driver
2452 * that it should release a PCI device. The could be caused by a
2453 * Hot-Plug event, or because the driver is going to be removed from
2456 static void __devexit igb_remove(struct pci_dev *pdev)
2458 struct net_device *netdev = pci_get_drvdata(pdev);
2459 struct igb_adapter *adapter = netdev_priv(netdev);
2460 struct e1000_hw *hw = &adapter->hw;
2462 pm_runtime_get_noresume(&pdev->dev);
2464 /* flush_scheduled work may reschedule our watchdog task, so
2465 * explicitly disable watchdog tasks from being rescheduled */
2466 set_bit(__IGB_DOWN, &adapter->state);
2467 del_timer_sync(&adapter->watchdog_timer);
2468 if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2469 del_timer_sync(&adapter->dma_err_timer);
2470 del_timer_sync(&adapter->phy_info_timer);
2472 flush_scheduled_work();
2475 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2476 dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
2477 dca_remove_requester(&pdev->dev);
2478 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2479 E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
2483 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2484 * would have already happened in close and is redundant. */
2485 igb_release_hw_control(adapter);
2487 unregister_netdev(netdev);
2488 #ifdef CONFIG_IGB_VMDQ_NETDEV
2489 igb_remove_vmdq_netdevs(adapter);
2492 igb_clear_interrupt_scheme(adapter);
2493 igb_reset_sriov_capability(adapter);
2495 iounmap(hw->hw_addr);
2496 if (hw->flash_address)
2497 iounmap(hw->flash_address);
2498 pci_release_selected_regions(pdev,
2499 pci_select_bars(pdev, IORESOURCE_MEM));
2501 kfree(adapter->mac_table);
2502 kfree(adapter->shadow_vfta);
2503 free_netdev(netdev);
2505 pci_disable_pcie_error_reporting(pdev);
2507 pci_disable_device(pdev);
2510 igb_sysfs_exit(adapter);
2513 igb_procfs_exit(adapter);
2514 #endif /* IGB_PROCFS */
2515 #endif /* IGB_SYSFS */
2518 #ifdef HAVE_HW_TIME_STAMP
2520 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2521 * @adapter: board private structure to initialize
2523 * igb_init_hw_timer initializes the function pointer and values for the hw
2524 * timer found in hardware.
2526 static void igb_init_hw_timer(struct igb_adapter *adapter)
2528 struct e1000_hw *hw = &adapter->hw;
2530 switch (hw->mac.type) {
2533 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2534 adapter->cycles.read = igb_read_clock;
2535 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2536 adapter->cycles.mult = 1;
2538 * The 82580 timesync updates the system timer every 8ns by 8ns
2539 * and the value cannot be shifted. Instead we need to shift
2540 * the registers to generate a 64bit timer value. As a result
2541 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2542 * 24 in order to generate a larger value for synchronization.
2544 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2545 /* disable system timer temporarily by setting bit 31 */
2546 E1000_WRITE_REG(hw, E1000_TSAUXC, 0x80000000);
2547 E1000_WRITE_FLUSH(hw);
2549 /* Set registers so that rollover occurs soon to test this. */
2550 E1000_WRITE_REG(hw, E1000_SYSTIMR, 0x00000000);
2551 E1000_WRITE_REG(hw, E1000_SYSTIML, 0x80000000);
2552 E1000_WRITE_REG(hw, E1000_SYSTIMH, 0x000000FF);
2553 E1000_WRITE_FLUSH(hw);
2555 /* enable system timer by clearing bit 31 */
2556 E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0);
2557 E1000_WRITE_FLUSH(hw);
2559 timecounter_init(&adapter->clock,
2561 ktime_to_ns(ktime_get_real()));
2563 * Synchronize our NIC clock against system wall clock. NIC
2564 * time stamp reading requires ~3us per sample, each sample
2565 * was pretty stable even under load => only require 10
2566 * samples for each offset comparison.
2568 memset(&adapter->compare, 0, sizeof(adapter->compare));
2569 adapter->compare.source = &adapter->clock;
2570 adapter->compare.target = ktime_get_real;
2571 adapter->compare.num_samples = 10;
2572 timecompare_update(&adapter->compare, 0);
2576 * Initialize hardware timer: we keep it running just in case
2577 * that some program needs it later on.
2579 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2580 adapter->cycles.read = igb_read_clock;
2581 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2582 adapter->cycles.mult = 1;
2584 * Scale the NIC clock cycle by a large factor so that
2585 * relatively small clock corrections can be added or
2586 * subtracted at each clock tick. The drawbacks of a large
2587 * factor are a) that the clock register overflows more quickly
2588 * (not such a big deal) and b) that the increment per tick has
2589 * to fit into 24 bits. As a result we need to use a shift of
2590 * 19 so we can fit a value of 16 into the TIMINCA register.
2592 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2593 E1000_WRITE_REG(hw, E1000_TIMINCA,
2594 (1 << E1000_TIMINCA_16NS_SHIFT) |
2595 (16 << IGB_82576_TSYNC_SHIFT));
2597 /* Set registers so that rollover occurs soon to test this. */
2598 E1000_WRITE_REG(hw, E1000_SYSTIML, 0x00000000);
2599 E1000_WRITE_REG(hw, E1000_SYSTIMH, 0xFF800000);
2600 E1000_WRITE_FLUSH(hw);
2602 timecounter_init(&adapter->clock,
2604 ktime_to_ns(ktime_get_real()));
2606 * Synchronize our NIC clock against system wall clock. NIC
2607 * time stamp reading requires ~3us per sample, each sample
2608 * was pretty stable even under load => only require 10
2609 * samples for each offset comparison.
2611 memset(&adapter->compare, 0, sizeof(adapter->compare));
2612 adapter->compare.source = &adapter->clock;
2613 adapter->compare.target = ktime_get_real;
2614 adapter->compare.num_samples = 10;
2615 timecompare_update(&adapter->compare, 0);
2618 /* 82575 does not support timesync */
2624 #endif /* HAVE_HW_TIME_STAMP */
2626 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2627 * @adapter: board private structure to initialize
2629 * igb_sw_init initializes the Adapter private data structure.
2630 * Fields are initialized based on PCI device information and
2631 * OS network device settings (MTU size).
2633 static int igb_sw_init(struct igb_adapter *adapter)
2635 struct e1000_hw *hw = &adapter->hw;
2636 struct net_device *netdev = adapter->netdev;
2637 struct pci_dev *pdev = adapter->pdev;
2639 /* PCI config space info */
2641 hw->vendor_id = pdev->vendor;
2642 hw->device_id = pdev->device;
2643 hw->subsystem_vendor_id = pdev->subsystem_vendor;
2644 hw->subsystem_device_id = pdev->subsystem_device;
2646 pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
2648 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2650 /* set default ring sizes */
2651 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2652 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2654 /* set default work limits */
2655 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2657 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2660 /* Initialize the hardware-specific values */
2661 if (e1000_setup_init_funcs(hw, TRUE)) {
2662 dev_err(pci_dev_to_dev(pdev), "Hardware Initialization Failure\n");
2666 adapter->mac_table = kzalloc(sizeof(struct igb_mac_addr) *
2667 hw->mac.rar_entry_count,
2670 /* Setup and initialize a copy of the hw vlan table array */
2671 adapter->shadow_vfta = (u32 *)kzalloc(sizeof(u32) * E1000_VFTA_ENTRIES,
2674 /* These calls may decrease the number of queues */
2675 igb_set_sriov_capability(adapter);
2677 if (igb_init_interrupt_scheme(adapter)) {
2678 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
2682 /* Explicitly disable IRQ since the NIC can be in any state. */
2683 igb_irq_disable(adapter);
2685 set_bit(__IGB_DOWN, &adapter->state);
2691 * igb_open - Called when a network interface is made active
2692 * @netdev: network interface device structure
2694 * Returns 0 on success, negative value on failure
2696 * The open entry point is called when a network interface is made
2697 * active by the system (IFF_UP). At this point all resources needed
2698 * for transmit and receive operations are allocated, the interrupt
2699 * handler is registered with the OS, the watchdog timer is started,
2700 * and the stack is notified that the interface is ready.
2702 static int __igb_open(struct net_device *netdev, bool resuming)
2704 struct igb_adapter *adapter = netdev_priv(netdev);
2705 struct e1000_hw *hw = &adapter->hw;
2706 #ifdef CONFIG_PM_RUNTIME
2707 struct pci_dev *pdev = adapter->pdev;
2708 #endif /* CONFIG_PM_RUNTIME */
2712 /* disallow open during test */
2713 if (test_bit(__IGB_TESTING, &adapter->state)) {
2718 #ifdef CONFIG_PM_RUNTIME
2720 pm_runtime_get_sync(&pdev->dev);
2721 #endif /* CONFIG_PM_RUNTIME */
2723 netif_carrier_off(netdev);
2725 /* allocate transmit descriptors */
2726 err = igb_setup_all_tx_resources(adapter);
2730 /* allocate receive descriptors */
2731 err = igb_setup_all_rx_resources(adapter);
2735 igb_power_up_link(adapter);
2737 /* before we allocate an interrupt, we must be ready to handle it.
2738 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2739 * as soon as we call pci_request_irq, so we have to setup our
2740 * clean_rx handler before we do so. */
2741 igb_configure(adapter);
2743 err = igb_request_irq(adapter);
2747 /* From here on the code is the same as igb_up() */
2748 clear_bit(__IGB_DOWN, &adapter->state);
2750 for (i = 0; i < adapter->num_q_vectors; i++)
2751 napi_enable(&(adapter->q_vector[i]->napi));
2752 igb_configure_lli(adapter);
2754 /* Clear any pending interrupts. */
2755 E1000_READ_REG(hw, E1000_ICR);
2757 igb_irq_enable(adapter);
2759 /* notify VFs that reset has been completed */
2760 if (adapter->vfs_allocated_count) {
2761 u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT);
2762 reg_data |= E1000_CTRL_EXT_PFRSTD;
2763 E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data);
2766 netif_tx_start_all_queues(netdev);
2768 if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
2769 schedule_work(&adapter->dma_err_task);
2771 /* start the watchdog. */
2772 hw->mac.get_link_status = 1;
2773 schedule_work(&adapter->watchdog_task);
2775 return E1000_SUCCESS;
2778 igb_release_hw_control(adapter);
2779 igb_power_down_link(adapter);
2780 igb_free_all_rx_resources(adapter);
2782 igb_free_all_tx_resources(adapter);
2786 #ifdef CONFIG_PM_RUNTIME
2788 pm_runtime_put(&pdev->dev);
2789 #endif /* CONFIG_PM_RUNTIME */
2794 static int igb_open(struct net_device *netdev)
2796 return __igb_open(netdev, false);
2800 * igb_close - Disables a network interface
2801 * @netdev: network interface device structure
2803 * Returns 0, this is not allowed to fail
2805 * The close entry point is called when an interface is de-activated
2806 * by the OS. The hardware is still under the driver's control, but
2807 * needs to be disabled. A global MAC reset is issued to stop the
2808 * hardware, and all transmit and receive resources are freed.
2810 static int __igb_close(struct net_device *netdev, bool suspending)
2812 struct igb_adapter *adapter = netdev_priv(netdev);
2813 #ifdef CONFIG_PM_RUNTIME
2814 struct pci_dev *pdev = adapter->pdev;
2815 #endif /* CONFIG_PM_RUNTIME */
2817 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2819 #ifdef CONFIG_PM_RUNTIME
2821 pm_runtime_get_sync(&pdev->dev);
2822 #endif /* CONFIG_PM_RUNTIME */
2826 igb_release_hw_control(adapter);
2828 igb_free_irq(adapter);
2830 igb_free_all_tx_resources(adapter);
2831 igb_free_all_rx_resources(adapter);
2833 #ifdef CONFIG_PM_RUNTIME
2835 pm_runtime_put_sync(&pdev->dev);
2836 #endif /* CONFIG_PM_RUNTIME */
2841 static int igb_close(struct net_device *netdev)
2843 return __igb_close(netdev, false);
2847 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2848 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2850 * Return 0 on success, negative on failure
2852 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2854 struct device *dev = tx_ring->dev;
2855 int orig_node = dev_to_node(dev);
2858 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2859 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2860 if (!tx_ring->tx_buffer_info)
2861 tx_ring->tx_buffer_info = vzalloc(size);
2862 if (!tx_ring->tx_buffer_info)
2865 /* round up to nearest 4K */
2866 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2867 tx_ring->size = ALIGN(tx_ring->size, 4096);
2869 set_dev_node(dev, tx_ring->numa_node);
2870 tx_ring->desc = dma_alloc_coherent(dev,
2874 set_dev_node(dev, orig_node);
2876 tx_ring->desc = dma_alloc_coherent(dev,
2884 tx_ring->next_to_use = 0;
2885 tx_ring->next_to_clean = 0;
2890 vfree(tx_ring->tx_buffer_info);
2892 "Unable to allocate memory for the transmit descriptor ring\n");
2897 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2898 * (Descriptors) for all queues
2899 * @adapter: board private structure
2901 * Return 0 on success, negative on failure
2903 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2905 struct pci_dev *pdev = adapter->pdev;
2908 for (i = 0; i < adapter->num_tx_queues; i++) {
2909 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2911 dev_err(pci_dev_to_dev(pdev),
2912 "Allocation for Tx Queue %u failed\n", i);
2913 for (i--; i >= 0; i--)
2914 igb_free_tx_resources(adapter->tx_ring[i]);
2923 * igb_setup_tctl - configure the transmit control registers
2924 * @adapter: Board private structure
2926 void igb_setup_tctl(struct igb_adapter *adapter)
2928 struct e1000_hw *hw = &adapter->hw;
2931 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2932 E1000_WRITE_REG(hw, E1000_TXDCTL(0), 0);
2934 /* Program the Transmit Control Register */
2935 tctl = E1000_READ_REG(hw, E1000_TCTL);
2936 tctl &= ~E1000_TCTL_CT;
2937 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2938 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2940 e1000_config_collision_dist(hw);
2942 /* Enable transmits */
2943 tctl |= E1000_TCTL_EN;
2945 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2949 * igb_configure_tx_ring - Configure transmit ring after Reset
2950 * @adapter: board private structure
2951 * @ring: tx ring to configure
2953 * Configure a transmit ring after a reset.
2955 void igb_configure_tx_ring(struct igb_adapter *adapter,
2956 struct igb_ring *ring)
2958 struct e1000_hw *hw = &adapter->hw;
2960 u64 tdba = ring->dma;
2961 int reg_idx = ring->reg_idx;
2963 /* disable the queue */
2964 E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), 0);
2965 E1000_WRITE_FLUSH(hw);
2968 E1000_WRITE_REG(hw, E1000_TDLEN(reg_idx),
2969 ring->count * sizeof(union e1000_adv_tx_desc));
2970 E1000_WRITE_REG(hw, E1000_TDBAL(reg_idx),
2971 tdba & 0x00000000ffffffffULL);
2972 E1000_WRITE_REG(hw, E1000_TDBAH(reg_idx), tdba >> 32);
2974 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2975 E1000_WRITE_REG(hw, E1000_TDH(reg_idx), 0);
2976 writel(0, ring->tail);
2978 txdctl |= IGB_TX_PTHRESH;
2979 txdctl |= IGB_TX_HTHRESH << 8;
2980 txdctl |= IGB_TX_WTHRESH << 16;
2982 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2983 E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), txdctl);
2987 * igb_configure_tx - Configure transmit Unit after Reset
2988 * @adapter: board private structure
2990 * Configure the Tx unit of the MAC after a reset.
2992 static void igb_configure_tx(struct igb_adapter *adapter)
2996 for (i = 0; i < adapter->num_tx_queues; i++)
2997 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
3001 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
3002 * @rx_ring: rx descriptor ring (for a specific queue) to setup
3004 * Returns 0 on success, negative on failure
3006 int igb_setup_rx_resources(struct igb_ring *rx_ring)
3008 struct device *dev = rx_ring->dev;
3009 int orig_node = dev_to_node(dev);
3012 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3013 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
3014 if (!rx_ring->rx_buffer_info)
3015 rx_ring->rx_buffer_info = vzalloc(size);
3016 if (!rx_ring->rx_buffer_info)
3019 desc_len = sizeof(union e1000_adv_rx_desc);
3021 /* Round up to nearest 4K */
3022 rx_ring->size = rx_ring->count * desc_len;
3023 rx_ring->size = ALIGN(rx_ring->size, 4096);
3025 set_dev_node(dev, rx_ring->numa_node);
3026 rx_ring->desc = dma_alloc_coherent(dev,
3030 set_dev_node(dev, orig_node);
3032 rx_ring->desc = dma_alloc_coherent(dev,
3040 rx_ring->next_to_clean = 0;
3041 rx_ring->next_to_use = 0;
3046 vfree(rx_ring->rx_buffer_info);
3047 rx_ring->rx_buffer_info = NULL;
3048 dev_err(dev, "Unable to allocate memory for the receive descriptor"
3054 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
3055 * (Descriptors) for all queues
3056 * @adapter: board private structure
3058 * Return 0 on success, negative on failure
3060 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
3062 struct pci_dev *pdev = adapter->pdev;
3065 for (i = 0; i < adapter->num_rx_queues; i++) {
3066 err = igb_setup_rx_resources(adapter->rx_ring[i]);
3068 dev_err(pci_dev_to_dev(pdev),
3069 "Allocation for Rx Queue %u failed\n", i);
3070 for (i--; i >= 0; i--)
3071 igb_free_rx_resources(adapter->rx_ring[i]);
3080 * igb_setup_mrqc - configure the multiple receive queue control registers
3081 * @adapter: Board private structure
3083 static void igb_setup_mrqc(struct igb_adapter *adapter)
3085 struct e1000_hw *hw = &adapter->hw;
3087 u32 j, num_rx_queues, shift = 0, shift2 = 0;
3092 static const u8 rsshash[40] = {
3093 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
3094 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
3095 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
3096 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
3098 /* Fill out hash function seeds */
3099 for (j = 0; j < 10; j++) {
3100 u32 rsskey = rsshash[(j * 4)];
3101 rsskey |= rsshash[(j * 4) + 1] << 8;
3102 rsskey |= rsshash[(j * 4) + 2] << 16;
3103 rsskey |= rsshash[(j * 4) + 3] << 24;
3104 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), j, rsskey);
3107 num_rx_queues = adapter->rss_queues;
3109 if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
3110 /* 82575 and 82576 supports 2 RSS queues for VMDq */
3111 switch (hw->mac.type) {
3128 if (hw->mac.type == e1000_82575)
3132 for (j = 0; j < (32 * 4); j++) {
3133 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
3135 reta.bytes[j & 3] |= num_rx_queues << shift2;
3137 E1000_WRITE_REG(hw, E1000_RETA(j >> 2), reta.dword);
3141 * Disable raw packet checksumming so that RSS hash is placed in
3142 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
3143 * offloads as they are enabled by default
3145 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3146 rxcsum |= E1000_RXCSUM_PCSD;
3148 if (adapter->hw.mac.type >= e1000_82576)
3149 /* Enable Receive Checksum Offload for SCTP */
3150 rxcsum |= E1000_RXCSUM_CRCOFL;
3152 /* Don't need to set TUOFL or IPOFL, they default to 1 */
3153 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
3155 /* If VMDq is enabled then we set the appropriate mode for that, else
3156 * we default to RSS so that an RSS hash is calculated per packet even
3157 * if we are only using one queue */
3158 if (adapter->vfs_allocated_count || adapter->vmdq_pools) {
3159 if (hw->mac.type > e1000_82575) {
3160 /* Set the default pool for the PF's first queue */
3161 u32 vtctl = E1000_READ_REG(hw, E1000_VT_CTL);
3162 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
3163 E1000_VT_CTL_DISABLE_DEF_POOL);
3164 vtctl |= adapter->vfs_allocated_count <<
3165 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
3166 E1000_WRITE_REG(hw, E1000_VT_CTL, vtctl);
3167 } else if (adapter->rss_queues > 1) {
3168 /* set default queue for pool 1 to queue 2 */
3169 E1000_WRITE_REG(hw, E1000_VT_CTL,
3170 adapter->rss_queues << 7);
3172 if (adapter->rss_queues > 1)
3173 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
3175 mrqc = E1000_MRQC_ENABLE_VMDQ;
3177 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3180 igb_vmm_control(adapter);
3183 * Generate RSS hash based on TCP port numbers and/or
3184 * IPv4/v6 src and dst addresses since UDP cannot be
3185 * hashed reliably due to IP fragmentation
3187 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
3188 E1000_MRQC_RSS_FIELD_IPV4_TCP |
3189 E1000_MRQC_RSS_FIELD_IPV6 |
3190 E1000_MRQC_RSS_FIELD_IPV6_TCP |
3191 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
3193 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
3197 * igb_setup_rctl - configure the receive control registers
3198 * @adapter: Board private structure
3200 void igb_setup_rctl(struct igb_adapter *adapter)
3202 struct e1000_hw *hw = &adapter->hw;
3205 rctl = E1000_READ_REG(hw, E1000_RCTL);
3207 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3208 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3210 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3211 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3214 * enable stripping of CRC. It's unlikely this will break BMC
3215 * redirection as it did with e1000. Newer features require
3216 * that the HW strips the CRC.
3218 rctl |= E1000_RCTL_SECRC;
3220 /* disable store bad packets and clear size bits. */
3221 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3223 /* enable LPE to prevent packets larger than max_frame_size */
3224 rctl |= E1000_RCTL_LPE;
3226 /* disable queue 0 to prevent tail write w/o re-config */
3227 E1000_WRITE_REG(hw, E1000_RXDCTL(0), 0);
3229 /* Attention!!! For SR-IOV PF driver operations you must enable
3230 * queue drop for all VF and PF queues to prevent head of line blocking
3231 * if an un-trusted VF does not provide descriptors to hardware.
3233 if (adapter->vfs_allocated_count) {
3234 /* set all queue drop enable bits */
3235 E1000_WRITE_REG(hw, E1000_QDE, ALL_QUEUES);
3238 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
3241 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3244 struct e1000_hw *hw = &adapter->hw;
3247 /* if it isn't the PF check to see if VFs are enabled and
3248 * increase the size to support vlan tags */
3249 if (vfn < adapter->vfs_allocated_count &&
3250 adapter->vf_data[vfn].vlans_enabled)
3253 #ifdef CONFIG_IGB_VMDQ_NETDEV
3254 if (vfn >= adapter->vfs_allocated_count) {
3255 int queue = vfn - adapter->vfs_allocated_count;
3256 struct igb_vmdq_adapter *vadapter;
3258 vadapter = netdev_priv(adapter->vmdq_netdev[queue-1]);
3259 if (vadapter->vlgrp)
3263 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
3264 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3265 vmolr |= size | E1000_VMOLR_LPE;
3266 E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
3272 * igb_rlpml_set - set maximum receive packet size
3273 * @adapter: board private structure
3275 * Configure maximum receivable packet size.
3277 static void igb_rlpml_set(struct igb_adapter *adapter)
3279 u32 max_frame_size = adapter->max_frame_size;
3280 struct e1000_hw *hw = &adapter->hw;
3281 u16 pf_id = adapter->vfs_allocated_count;
3283 if (adapter->vmdq_pools && hw->mac.type != e1000_82575) {
3285 for (i = 0; i < adapter->vmdq_pools; i++)
3286 igb_set_vf_rlpml(adapter, max_frame_size, pf_id + i);
3288 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3289 * to our max jumbo frame size, in case we need to enable
3290 * jumbo frames on one of the rings later.
3291 * This will not pass over-length frames into the default
3292 * queue because it's gated by the VMOLR.RLPML.
3294 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3296 /* Set VF RLPML for the PF device. */
3297 if (adapter->vfs_allocated_count)
3298 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3300 E1000_WRITE_REG(hw, E1000_RLPML, max_frame_size);
3303 static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter,
3304 int vfn, bool enable)
3306 struct e1000_hw *hw = &adapter->hw;
3310 if (hw->mac.type < e1000_82576)
3313 if (hw->mac.type == e1000_i350)
3314 reg = hw->hw_addr + E1000_DVMOLR(vfn);
3316 reg = hw->hw_addr + E1000_VMOLR(vfn);
3320 val |= E1000_VMOLR_STRVLAN;
3322 val &= ~(E1000_VMOLR_STRVLAN);
3325 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3328 struct e1000_hw *hw = &adapter->hw;
3332 * This register exists only on 82576 and newer so if we are older then
3333 * we should exit and do nothing
3335 if (hw->mac.type < e1000_82576)
3338 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn));
3341 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3343 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3345 /* clear all bits that might not be set */
3346 vmolr &= ~E1000_VMOLR_RSSE;
3348 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3349 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3351 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3352 vmolr |= E1000_VMOLR_LPE; /* Accept long packets */
3354 E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
3358 * igb_configure_rx_ring - Configure a receive ring after Reset
3359 * @adapter: board private structure
3360 * @ring: receive ring to be configured
3362 * Configure the Rx unit of the MAC after a reset.
3364 void igb_configure_rx_ring(struct igb_adapter *adapter,
3365 struct igb_ring *ring)
3367 struct e1000_hw *hw = &adapter->hw;
3368 u64 rdba = ring->dma;
3369 int reg_idx = ring->reg_idx;
3370 u32 srrctl = 0, rxdctl = 0;
3372 /* disable the queue */
3373 E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), 0);
3375 /* Set DMA base address registers */
3376 E1000_WRITE_REG(hw, E1000_RDBAL(reg_idx),
3377 rdba & 0x00000000ffffffffULL);
3378 E1000_WRITE_REG(hw, E1000_RDBAH(reg_idx), rdba >> 32);
3379 E1000_WRITE_REG(hw, E1000_RDLEN(reg_idx),
3380 ring->count * sizeof(union e1000_adv_rx_desc));
3382 /* initialize head and tail */
3383 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3384 E1000_WRITE_REG(hw, E1000_RDH(reg_idx), 0);
3385 writel(0, ring->tail);
3387 /* set descriptor configuration */
3388 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
3389 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3390 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3391 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3393 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3395 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3396 #else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
3397 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3398 E1000_SRRCTL_BSIZEPKT_SHIFT;
3399 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3400 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
3401 #ifdef IGB_PER_PKT_TIMESTAMP
3402 if (hw->mac.type >= e1000_82580)
3403 srrctl |= E1000_SRRCTL_TIMESTAMP;
3406 * We should set the drop enable bit if:
3409 * Flow Control is disabled and number of RX queues > 1
3411 * This allows us to avoid head of line blocking for security
3412 * and performance reasons.
3414 if (adapter->vfs_allocated_count ||
3415 (adapter->num_rx_queues > 1 &&
3416 (hw->fc.requested_mode == e1000_fc_none ||
3417 hw->fc.requested_mode == e1000_fc_rx_pause)))
3418 srrctl |= E1000_SRRCTL_DROP_EN;
3420 E1000_WRITE_REG(hw, E1000_SRRCTL(reg_idx), srrctl);
3422 /* set filtering for VMDQ pools */
3423 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3425 rxdctl |= IGB_RX_PTHRESH;
3426 rxdctl |= IGB_RX_HTHRESH << 8;
3427 rxdctl |= IGB_RX_WTHRESH << 16;
3429 /* enable receive descriptor fetching */
3430 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3431 E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), rxdctl);
3435 * igb_configure_rx - Configure receive Unit after Reset
3436 * @adapter: board private structure
3438 * Configure the Rx unit of the MAC after a reset.
3440 static void igb_configure_rx(struct igb_adapter *adapter)
3444 /* set UTA to appropriate mode */
3445 igb_set_uta(adapter);
3447 igb_full_sync_mac_table(adapter);
3448 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3449 * the Base and Length of the Rx Descriptor Ring */
3450 for (i = 0; i < adapter->num_rx_queues; i++)
3451 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3455 * igb_free_tx_resources - Free Tx Resources per Queue
3456 * @tx_ring: Tx descriptor ring for a specific queue
3458 * Free all transmit software resources
3460 void igb_free_tx_resources(struct igb_ring *tx_ring)
3462 igb_clean_tx_ring(tx_ring);
3464 vfree(tx_ring->tx_buffer_info);
3465 tx_ring->tx_buffer_info = NULL;
3467 /* if not set, then don't free */
3471 dma_free_coherent(tx_ring->dev, tx_ring->size,
3472 tx_ring->desc, tx_ring->dma);
3474 tx_ring->desc = NULL;
3478 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3479 * @adapter: board private structure
3481 * Free all transmit software resources
3483 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3487 for (i = 0; i < adapter->num_tx_queues; i++)
3488 igb_free_tx_resources(adapter->tx_ring[i]);
3491 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3492 struct igb_tx_buffer *tx_buffer)
3494 if (tx_buffer->skb) {
3495 dev_kfree_skb_any(tx_buffer->skb);
3496 if (dma_unmap_len(tx_buffer, len))
3497 dma_unmap_single(ring->dev,
3498 dma_unmap_addr(tx_buffer, dma),
3499 dma_unmap_len(tx_buffer, len),
3501 } else if (dma_unmap_len(tx_buffer, len)) {
3502 dma_unmap_page(ring->dev,
3503 dma_unmap_addr(tx_buffer, dma),
3504 dma_unmap_len(tx_buffer, len),
3507 tx_buffer->next_to_watch = NULL;
3508 tx_buffer->skb = NULL;
3509 dma_unmap_len_set(tx_buffer, len, 0);
3510 /* buffer_info must be completely set up in the transmit path */
3514 * igb_clean_tx_ring - Free Tx Buffers
3515 * @tx_ring: ring to be cleaned
3517 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3519 struct igb_tx_buffer *buffer_info;
3523 if (!tx_ring->tx_buffer_info)
3525 /* Free all the Tx ring sk_buffs */
3527 for (i = 0; i < tx_ring->count; i++) {
3528 buffer_info = &tx_ring->tx_buffer_info[i];
3529 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3533 netdev_tx_reset_queue(txring_txq(tx_ring));
3534 #endif /* CONFIG_BQL */
3536 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3537 memset(tx_ring->tx_buffer_info, 0, size);
3539 /* Zero out the descriptor ring */
3540 memset(tx_ring->desc, 0, tx_ring->size);
3542 tx_ring->next_to_use = 0;
3543 tx_ring->next_to_clean = 0;
3547 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3548 * @adapter: board private structure
3550 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3554 for (i = 0; i < adapter->num_tx_queues; i++)
3555 igb_clean_tx_ring(adapter->tx_ring[i]);
3559 * igb_free_rx_resources - Free Rx Resources
3560 * @rx_ring: ring to clean the resources from
3562 * Free all receive software resources
3564 void igb_free_rx_resources(struct igb_ring *rx_ring)
3566 igb_clean_rx_ring(rx_ring);
3568 vfree(rx_ring->rx_buffer_info);
3569 rx_ring->rx_buffer_info = NULL;
3571 /* if not set, then don't free */
3575 dma_free_coherent(rx_ring->dev, rx_ring->size,
3576 rx_ring->desc, rx_ring->dma);
3578 rx_ring->desc = NULL;
3582 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3583 * @adapter: board private structure
3585 * Free all receive software resources
3587 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3591 for (i = 0; i < adapter->num_rx_queues; i++)
3592 igb_free_rx_resources(adapter->rx_ring[i]);
3596 * igb_clean_rx_ring - Free Rx Buffers per Queue
3597 * @rx_ring: ring to free buffers from
3599 void igb_clean_rx_ring(struct igb_ring *rx_ring)
3602 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
3603 const int bufsz = rx_ring->rx_buffer_len;
3605 const int bufsz = IGB_RX_HDR_LEN;
3609 if (!rx_ring->rx_buffer_info)
3612 /* Free all the Rx ring sk_buffs */
3613 for (i = 0; i < rx_ring->count; i++) {
3614 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3615 if (buffer_info->dma) {
3616 dma_unmap_single(rx_ring->dev,
3620 buffer_info->dma = 0;
3623 if (buffer_info->skb) {
3624 dev_kfree_skb(buffer_info->skb);
3625 buffer_info->skb = NULL;
3627 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
3628 if (buffer_info->page_dma) {
3629 dma_unmap_page(rx_ring->dev,
3630 buffer_info->page_dma,
3633 buffer_info->page_dma = 0;
3635 if (buffer_info->page) {
3636 put_page(buffer_info->page);
3637 buffer_info->page = NULL;
3638 buffer_info->page_offset = 0;
3643 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3644 memset(rx_ring->rx_buffer_info, 0, size);
3646 /* Zero out the descriptor ring */
3647 memset(rx_ring->desc, 0, rx_ring->size);
3649 rx_ring->next_to_clean = 0;
3650 rx_ring->next_to_use = 0;
3654 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3655 * @adapter: board private structure
3657 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3661 for (i = 0; i < adapter->num_rx_queues; i++)
3662 igb_clean_rx_ring(adapter->rx_ring[i]);
3666 * igb_set_mac - Change the Ethernet Address of the NIC
3667 * @netdev: network interface device structure
3668 * @p: pointer to an address structure
3670 * Returns 0 on success, negative on failure
3672 static int igb_set_mac(struct net_device *netdev, void *p)
3674 struct igb_adapter *adapter = netdev_priv(netdev);
3675 struct e1000_hw *hw = &adapter->hw;
3676 struct sockaddr *addr = p;
3678 if (!is_valid_ether_addr(addr->sa_data))
3679 return -EADDRNOTAVAIL;
3681 igb_del_mac_filter(adapter, hw->mac.addr,
3682 adapter->vfs_allocated_count);
3683 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3684 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3686 /* set the correct pool for the new PF MAC address in entry 0 */
3687 return igb_add_mac_filter(adapter, hw->mac.addr,
3688 adapter->vfs_allocated_count);
3692 * igb_write_mc_addr_list - write multicast addresses to MTA
3693 * @netdev: network interface device structure
3695 * Writes multicast address list to the MTA hash table.
3696 * Returns: -ENOMEM on failure
3697 * 0 on no addresses written
3698 * X on writing X addresses to MTA
3700 int igb_write_mc_addr_list(struct net_device *netdev)
3702 struct igb_adapter *adapter = netdev_priv(netdev);
3703 struct e1000_hw *hw = &adapter->hw;
3704 #ifdef NETDEV_HW_ADDR_T_MULTICAST
3705 struct netdev_hw_addr *ha;
3707 struct dev_mc_list *ha;
3711 #ifdef CONFIG_IGB_VMDQ_NETDEV
3714 count = netdev_mc_count(netdev);
3715 #ifdef CONFIG_IGB_VMDQ_NETDEV
3716 for (vm = 1; vm < adapter->vmdq_pools; vm++) {
3717 if (!adapter->vmdq_netdev[vm])
3719 if (!netif_running(adapter->vmdq_netdev[vm]))
3721 count += netdev_mc_count(adapter->vmdq_netdev[vm]);
3726 e1000_update_mc_addr_list(hw, NULL, 0);
3729 mta_list = kzalloc(count * 6, GFP_ATOMIC);
3733 /* The shared function expects a packed array of only addresses. */
3735 netdev_for_each_mc_addr(ha, netdev)
3736 #ifdef NETDEV_HW_ADDR_T_MULTICAST
3737 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3739 memcpy(mta_list + (i++ * ETH_ALEN), ha->dmi_addr, ETH_ALEN);
3741 #ifdef CONFIG_IGB_VMDQ_NETDEV
3742 for (vm = 1; vm < adapter->vmdq_pools; vm++) {
3743 if (!adapter->vmdq_netdev[vm])
3745 if (!netif_running(adapter->vmdq_netdev[vm]) ||
3746 !netdev_mc_count(adapter->vmdq_netdev[vm]))
3748 netdev_for_each_mc_addr(ha, adapter->vmdq_netdev[vm])
3749 #ifdef NETDEV_HW_ADDR_T_MULTICAST
3750 memcpy(mta_list + (i++ * ETH_ALEN),
3751 ha->addr, ETH_ALEN);
3753 memcpy(mta_list + (i++ * ETH_ALEN),
3754 ha->dmi_addr, ETH_ALEN);
3758 e1000_update_mc_addr_list(hw, mta_list, i);
3764 void igb_rar_set(struct igb_adapter *adapter, u32 index)
3766 u32 rar_low, rar_high;
3767 struct e1000_hw *hw = &adapter->hw;
3768 u8 *addr = adapter->mac_table[index].addr;
3769 /* HW expects these in little endian so we reverse the byte order
3770 * from network order (big endian) to little endian
3772 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
3773 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
3774 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
3776 /* Indicate to hardware the Address is Valid. */
3777 if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE)
3778 rar_high |= E1000_RAH_AV;
3780 if (hw->mac.type == e1000_82575)
3781 rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue;
3783 rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue;
3785 E1000_WRITE_REG(hw, E1000_RAL(index), rar_low);
3786 E1000_WRITE_FLUSH(hw);
3787 E1000_WRITE_REG(hw, E1000_RAH(index), rar_high);
3788 E1000_WRITE_FLUSH(hw);
3791 void igb_full_sync_mac_table(struct igb_adapter *adapter)
3793 struct e1000_hw *hw = &adapter->hw;
3795 for (i = 0; i < hw->mac.rar_entry_count; i++) {
3796 igb_rar_set(adapter, i);
3800 void igb_sync_mac_table(struct igb_adapter *adapter)
3802 struct e1000_hw *hw = &adapter->hw;
3804 for (i = 0; i < hw->mac.rar_entry_count; i++) {
3805 if (adapter->mac_table[i].state & IGB_MAC_STATE_MODIFIED)
3806 igb_rar_set(adapter, i);
3807 adapter->mac_table[i].state &= ~(IGB_MAC_STATE_MODIFIED);
3811 int igb_available_rars(struct igb_adapter *adapter)
3813 struct e1000_hw *hw = &adapter->hw;
3816 for (i = 0; i < hw->mac.rar_entry_count; i++) {
3817 if (adapter->mac_table[i].state == 0)
3823 #ifdef HAVE_SET_RX_MODE
3825 * igb_write_uc_addr_list - write unicast addresses to RAR table
3826 * @netdev: network interface device structure
3828 * Writes unicast address list to the RAR table.
3829 * Returns: -ENOMEM on failure/insufficient address space
3830 * 0 on no addresses written
3831 * X on writing X addresses to the RAR table
3833 static int igb_write_uc_addr_list(struct net_device *netdev)
3835 struct igb_adapter *adapter = netdev_priv(netdev);
3836 unsigned int vfn = adapter->vfs_allocated_count;
3839 /* return ENOMEM indicating insufficient memory for addresses */
3840 if (netdev_uc_count(netdev) > igb_available_rars(adapter))
3842 if (!netdev_uc_empty(netdev)) {
3843 #ifdef NETDEV_HW_ADDR_T_UNICAST
3844 struct netdev_hw_addr *ha;
3846 struct dev_mc_list *ha;
3848 netdev_for_each_uc_addr(ha, netdev) {
3849 #ifdef NETDEV_HW_ADDR_T_UNICAST
3850 igb_del_mac_filter(adapter, ha->addr, vfn);
3851 igb_add_mac_filter(adapter, ha->addr, vfn);
3853 igb_del_mac_filter(adapter, ha->da_addr, vfn);
3854 igb_add_mac_filter(adapter, ha->da_addr, vfn);
3862 #endif /* HAVE_SET_RX_MODE */
3864 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3865 * @netdev: network interface device structure
3867 * The set_rx_mode entry point is called whenever the unicast or multicast
3868 * address lists or the network interface flags are updated. This routine is
3869 * responsible for configuring the hardware for proper unicast, multicast,
3870 * promiscuous mode, and all-multi behavior.
3872 static void igb_set_rx_mode(struct net_device *netdev)
3874 struct igb_adapter *adapter = netdev_priv(netdev);
3875 struct e1000_hw *hw = &adapter->hw;
3876 unsigned int vfn = adapter->vfs_allocated_count;
3877 u32 rctl, vmolr = 0;
3880 /* Check for Promiscuous and All Multicast modes */
3881 rctl = E1000_READ_REG(hw, E1000_RCTL);
3883 /* clear the effected bits */
3884 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3886 if (netdev->flags & IFF_PROMISC) {
3887 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3888 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3890 if (netdev->flags & IFF_ALLMULTI) {
3891 rctl |= E1000_RCTL_MPE;
3892 vmolr |= E1000_VMOLR_MPME;
3895 * Write addresses to the MTA, if the attempt fails
3896 * then we should just turn on promiscuous mode so
3897 * that we can at least receive multicast traffic
3899 count = igb_write_mc_addr_list(netdev);
3901 rctl |= E1000_RCTL_MPE;
3902 vmolr |= E1000_VMOLR_MPME;
3904 vmolr |= E1000_VMOLR_ROMPE;
3907 #ifdef HAVE_SET_RX_MODE
3909 * Write addresses to available RAR registers, if there is not
3910 * sufficient space to store all the addresses then enable
3911 * unicast promiscuous mode
3913 count = igb_write_uc_addr_list(netdev);
3915 rctl |= E1000_RCTL_UPE;
3916 vmolr |= E1000_VMOLR_ROPE;
3918 #endif /* HAVE_SET_RX_MODE */
3919 rctl |= E1000_RCTL_VFE;
3921 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
3924 * In order to support SR-IOV and eventually VMDq it is necessary to set
3925 * the VMOLR to enable the appropriate modes. Without this workaround
3926 * we will have issues with VLAN tag stripping not being done for frames
3927 * that are only arriving because we are the default pool
3929 if (hw->mac.type < e1000_82576)
3932 vmolr |= E1000_READ_REG(hw, E1000_VMOLR(vfn)) &
3933 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3934 E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr);
3935 igb_restore_vf_multicasts(adapter);
3938 static void igb_check_wvbr(struct igb_adapter *adapter)
3940 struct e1000_hw *hw = &adapter->hw;
3943 switch (hw->mac.type) {
3946 if (!(wvbr = E1000_READ_REG(hw, E1000_WVBR)))
3953 adapter->wvbr |= wvbr;
3956 #define IGB_STAGGERED_QUEUE_OFFSET 8
3958 static void igb_spoof_check(struct igb_adapter *adapter)
3965 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3966 if (adapter->wvbr & (1 << j) ||
3967 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3968 DPRINTK(DRV, WARNING,
3969 "Spoof event(s) detected on VF %d\n", j);
3972 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3977 /* Need to wait a few seconds after link up to get diagnostic information from
3979 static void igb_update_phy_info(unsigned long data)
3981 struct igb_adapter *adapter = (struct igb_adapter *) data;
3982 e1000_get_phy_info(&adapter->hw);
3986 * igb_has_link - check shared code for link and determine up/down
3987 * @adapter: pointer to driver private info
3989 bool igb_has_link(struct igb_adapter *adapter)
3991 struct e1000_hw *hw = &adapter->hw;
3992 bool link_active = FALSE;
3994 /* get_link_status is set on LSC (link status) interrupt or
3995 * rx sequence error interrupt. get_link_status will stay
3996 * false until the e1000_check_for_link establishes link
3997 * for copper adapters ONLY
3999 switch (hw->phy.media_type) {
4000 case e1000_media_type_copper:
4001 if (!hw->mac.get_link_status)
4003 case e1000_media_type_internal_serdes:
4004 e1000_check_for_link(hw);
4005 link_active = !hw->mac.get_link_status;
4007 case e1000_media_type_unknown:
4016 * igb_watchdog - Timer Call-back
4017 * @data: pointer to adapter cast into an unsigned long
4019 static void igb_watchdog(unsigned long data)
4021 struct igb_adapter *adapter = (struct igb_adapter *)data;
4022 /* Do the rest outside of interrupt context */
4023 schedule_work(&adapter->watchdog_task);
4026 static void igb_watchdog_task(struct work_struct *work)
4028 struct igb_adapter *adapter = container_of(work,
4031 struct e1000_hw *hw = &adapter->hw;
4032 struct net_device *netdev = adapter->netdev;
4035 u32 thstat, ctrl_ext;
4038 link = igb_has_link(adapter);
4040 /* Cancel scheduled suspend requests. */
4041 pm_runtime_resume(netdev->dev.parent);
4043 if (!netif_carrier_ok(netdev)) {
4045 e1000_get_speed_and_duplex(hw,
4046 &adapter->link_speed,
4047 &adapter->link_duplex);
4049 ctrl = E1000_READ_REG(hw, E1000_CTRL);
4050 /* Links status message must follow this format */
4051 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
4052 "Flow Control: %s\n",
4054 adapter->link_speed,
4055 adapter->link_duplex == FULL_DUPLEX ?
4056 "Full Duplex" : "Half Duplex",
4057 ((ctrl & E1000_CTRL_TFCE) &&
4058 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX":
4059 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
4060 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
4061 /* adjust timeout factor according to speed/duplex */
4062 adapter->tx_timeout_factor = 1;
4063 switch (adapter->link_speed) {
4065 adapter->tx_timeout_factor = 14;
4068 /* maybe add some timeout factor ? */
4072 netif_carrier_on(netdev);
4073 netif_tx_wake_all_queues(netdev);
4075 igb_ping_all_vfs(adapter);
4077 igb_check_vf_rate_limit(adapter);
4078 #endif /* IFLA_VF_MAX */
4080 /* link state has changed, schedule phy info update */
4081 if (!test_bit(__IGB_DOWN, &adapter->state))
4082 mod_timer(&adapter->phy_info_timer,
4083 round_jiffies(jiffies + 2 * HZ));
4086 if (netif_carrier_ok(netdev)) {
4087 adapter->link_speed = 0;
4088 adapter->link_duplex = 0;
4089 /* check for thermal sensor event on i350 */
4090 if (hw->mac.type == e1000_i350) {
4091 thstat = E1000_READ_REG(hw, E1000_THSTAT);
4092 ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
4093 if ((hw->phy.media_type ==
4094 e1000_media_type_copper) &&
4096 E1000_CTRL_EXT_LINK_MODE_SGMII)) {
4097 if (thstat & E1000_THSTAT_PWR_DOWN) {
4098 printk(KERN_ERR "igb: %s The "
4099 "network adapter was stopped "
4100 "because it overheated.\n",
4103 if (thstat & E1000_THSTAT_LINK_THROTTLE) {
4105 "igb: %s The network "
4106 "adapter supported "
4116 /* Links status message must follow this format */
4117 printk(KERN_INFO "igb: %s NIC Link is Down\n",
4119 netif_carrier_off(netdev);
4120 netif_tx_stop_all_queues(netdev);
4122 igb_ping_all_vfs(adapter);
4124 /* link state has changed, schedule phy info update */
4125 if (!test_bit(__IGB_DOWN, &adapter->state))
4126 mod_timer(&adapter->phy_info_timer,
4127 round_jiffies(jiffies + 2 * HZ));
4129 pm_schedule_suspend(netdev->dev.parent,
4134 igb_update_stats(adapter);
4136 for (i = 0; i < adapter->num_tx_queues; i++) {
4137 struct igb_ring *tx_ring = adapter->tx_ring[i];
4138 if (!netif_carrier_ok(netdev)) {
4139 /* We've lost link, so the controller stops DMA,
4140 * but we've got queued Tx work that's never going
4141 * to get done, so reset controller to flush Tx.
4142 * (Do the reset outside of interrupt context). */
4143 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
4144 adapter->tx_timeout_count++;
4145 schedule_work(&adapter->reset_task);
4146 /* return immediately since reset is imminent */
4151 /* Force detection of hung controller every watchdog period */
4152 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
4155 /* Cause software interrupt to ensure rx ring is cleaned */
4156 if (adapter->msix_entries) {
4158 for (i = 0; i < adapter->num_q_vectors; i++)
4159 eics |= adapter->q_vector[i]->eims_value;
4160 E1000_WRITE_REG(hw, E1000_EICS, eics);
4162 E1000_WRITE_REG(hw, E1000_ICS, E1000_ICS_RXDMT0);
4165 igb_spoof_check(adapter);
4167 /* Reset the timer */
4168 if (!test_bit(__IGB_DOWN, &adapter->state))
4169 mod_timer(&adapter->watchdog_timer,
4170 round_jiffies(jiffies + 2 * HZ));
4173 static void igb_dma_err_task(struct work_struct *work)
4175 struct igb_adapter *adapter = container_of(work,
4179 struct e1000_hw *hw = &adapter->hw;
4180 struct net_device *netdev = adapter->netdev;
4184 hgptc = E1000_READ_REG(hw, E1000_HGPTC);
4185 if (hgptc) /* If incrementing then no need for the check below */
4186 goto dma_timer_reset;
4188 * Check to see if a bad DMA write target from an errant or
4189 * malicious VF has caused a PCIe error. If so then we can
4190 * issue a VFLR to the offending VF(s) and then resume without
4191 * requesting a full slot reset.
4194 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4195 ciaa = (vf << 16) | 0x80000000;
4196 /* 32 bit read so align, we really want status at offset 6 */
4197 ciaa |= PCI_COMMAND;
4198 E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4199 ciad = E1000_READ_REG(hw, E1000_CIAD);
4201 /* disable debug mode asap after reading data */
4202 E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4203 /* Get the upper 16 bits which will be the PCI status reg */
4205 if (ciad & (PCI_STATUS_REC_MASTER_ABORT |
4206 PCI_STATUS_REC_TARGET_ABORT |
4207 PCI_STATUS_SIG_SYSTEM_ERROR)) {
4208 netdev_err(netdev, "VF %d suffered error\n", vf);
4210 ciaa = (vf << 16) | 0x80000000;
4212 E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4213 ciad = 0x00008000; /* VFLR */
4214 E1000_WRITE_REG(hw, E1000_CIAD, ciad);
4216 E1000_WRITE_REG(hw, E1000_CIAA, ciaa);
4220 /* Reset the timer */
4221 if (!test_bit(__IGB_DOWN, &adapter->state))
4222 mod_timer(&adapter->dma_err_timer,
4223 round_jiffies(jiffies + HZ / 10));
4227 * igb_dma_err_timer - Timer Call-back
4228 * @data: pointer to adapter cast into an unsigned long
4230 static void igb_dma_err_timer(unsigned long data)
4232 struct igb_adapter *adapter = (struct igb_adapter *)data;
4233 /* Do the rest outside of interrupt context */
4234 schedule_work(&adapter->dma_err_task);
4237 enum latency_range {
4241 latency_invalid = 255
4245 * igb_update_ring_itr - update the dynamic ITR value based on packet size
4247 * Stores a new ITR value based on strictly on packet size. This
4248 * algorithm is less sophisticated than that used in igb_update_itr,
4249 * due to the difficulty of synchronizing statistics across multiple
4250 * receive rings. The divisors and thresholds used by this function
4251 * were determined based on theoretical maximum wire speed and testing
4252 * data, in order to minimize response time while increasing bulk
4254 * This functionality is controlled by the InterruptThrottleRate module
4255 * parameter (see igb_param.c)
4256 * NOTE: This function is called only when operating in a multiqueue
4257 * receive environment.
4258 * @q_vector: pointer to q_vector
4260 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
4262 int new_val = q_vector->itr_val;
4263 int avg_wire_size = 0;
4264 struct igb_adapter *adapter = q_vector->adapter;
4265 unsigned int packets;
4267 /* For non-gigabit speeds, just fix the interrupt rate at 4000
4268 * ints/sec - ITR timer value of 120 ticks.
4270 if (adapter->link_speed != SPEED_1000) {
4271 new_val = IGB_4K_ITR;
4275 packets = q_vector->rx.total_packets;
4277 avg_wire_size = q_vector->rx.total_bytes / packets;
4279 packets = q_vector->tx.total_packets;
4281 avg_wire_size = max_t(u32, avg_wire_size,
4282 q_vector->tx.total_bytes / packets);
4284 /* if avg_wire_size isn't set no work was done */
4288 /* Add 24 bytes to size to account for CRC, preamble, and gap */
4289 avg_wire_size += 24;
4291 /* Don't starve jumbo frames */
4292 avg_wire_size = min(avg_wire_size, 3000);
4294 /* Give a little boost to mid-size frames */
4295 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
4296 new_val = avg_wire_size / 3;
4298 new_val = avg_wire_size / 2;
4300 /* conservative mode (itr 3) eliminates the lowest_latency setting */
4301 if (new_val < IGB_20K_ITR &&
4302 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4303 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4304 new_val = IGB_20K_ITR;
4307 if (new_val != q_vector->itr_val) {
4308 q_vector->itr_val = new_val;
4309 q_vector->set_itr = 1;
4312 q_vector->rx.total_bytes = 0;
4313 q_vector->rx.total_packets = 0;
4314 q_vector->tx.total_bytes = 0;
4315 q_vector->tx.total_packets = 0;
4319 * igb_update_itr - update the dynamic ITR value based on statistics
4320 * Stores a new ITR value based on packets and byte
4321 * counts during the last interrupt. The advantage of per interrupt
4322 * computation is faster updates and more accurate ITR for the current
4323 * traffic pattern. Constants in this function were computed
4324 * based on theoretical maximum wire speed and thresholds were set based
4325 * on testing data as well as attempting to minimize response time
4326 * while increasing bulk throughput.
4327 * this functionality is controlled by the InterruptThrottleRate module
4328 * parameter (see igb_param.c)
4329 * NOTE: These calculations are only valid when operating in a single-
4330 * queue environment.
4331 * @q_vector: pointer to q_vector
4332 * @ring_container: ring info to update the itr for
4334 static void igb_update_itr(struct igb_q_vector *q_vector,
4335 struct igb_ring_container *ring_container)
4337 unsigned int packets = ring_container->total_packets;
4338 unsigned int bytes = ring_container->total_bytes;
4339 u8 itrval = ring_container->itr;
4341 /* no packets, exit with status unchanged */
4346 case lowest_latency:
4347 /* handle TSO and jumbo frames */
4348 if (bytes/packets > 8000)
4349 itrval = bulk_latency;
4350 else if ((packets < 5) && (bytes > 512))
4351 itrval = low_latency;
4353 case low_latency: /* 50 usec aka 20000 ints/s */
4354 if (bytes > 10000) {
4355 /* this if handles the TSO accounting */
4356 if (bytes/packets > 8000) {
4357 itrval = bulk_latency;
4358 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
4359 itrval = bulk_latency;
4360 } else if ((packets > 35)) {
4361 itrval = lowest_latency;
4363 } else if (bytes/packets > 2000) {
4364 itrval = bulk_latency;
4365 } else if (packets <= 2 && bytes < 512) {
4366 itrval = lowest_latency;
4369 case bulk_latency: /* 250 usec aka 4000 ints/s */
4370 if (bytes > 25000) {
4372 itrval = low_latency;
4373 } else if (bytes < 1500) {
4374 itrval = low_latency;
4379 /* clear work counters since we have the values we need */
4380 ring_container->total_bytes = 0;
4381 ring_container->total_packets = 0;
4383 /* write updated itr to ring container */
4384 ring_container->itr = itrval;
4387 static void igb_set_itr(struct igb_q_vector *q_vector)
4389 struct igb_adapter *adapter = q_vector->adapter;
4390 u32 new_itr = q_vector->itr_val;
4393 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4394 if (adapter->link_speed != SPEED_1000) {
4396 new_itr = IGB_4K_ITR;
4400 igb_update_itr(q_vector, &q_vector->tx);
4401 igb_update_itr(q_vector, &q_vector->rx);
4403 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4405 /* conservative mode (itr 3) eliminates the lowest_latency setting */
4406 if (current_itr == lowest_latency &&
4407 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4408 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4409 current_itr = low_latency;
4411 switch (current_itr) {
4412 /* counts and packets in update_itr are dependent on these numbers */
4413 case lowest_latency:
4414 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4417 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4420 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
4427 if (new_itr != q_vector->itr_val) {
4428 /* this attempts to bias the interrupt rate towards Bulk
4429 * by adding intermediate steps when interrupt rate is
4431 new_itr = new_itr > q_vector->itr_val ?
4432 max((new_itr * q_vector->itr_val) /
4433 (new_itr + (q_vector->itr_val >> 2)),
4436 /* Don't write the value here; it resets the adapter's
4437 * internal timer, and causes us to delay far longer than
4438 * we should between interrupts. Instead, we write the ITR
4439 * value at the beginning of the next interrupt so the timing
4440 * ends up being correct.
4442 q_vector->itr_val = new_itr;
4443 q_vector->set_itr = 1;
4447 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4448 u32 type_tucmd, u32 mss_l4len_idx)
4450 struct e1000_adv_tx_context_desc *context_desc;
4451 u16 i = tx_ring->next_to_use;
4453 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4456 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4458 /* set bits to identify this as an advanced context descriptor */
4459 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4461 /* For 82575, context index must be unique per ring. */
4462 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4463 mss_l4len_idx |= tx_ring->reg_idx << 4;
4465 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4466 context_desc->seqnum_seed = 0;
4467 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4468 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4471 static int igb_tso(struct igb_ring *tx_ring,
4472 struct igb_tx_buffer *first,
4476 struct sk_buff *skb = first->skb;
4477 u32 vlan_macip_lens, type_tucmd;
4478 u32 mss_l4len_idx, l4len;
4480 if (!skb_is_gso(skb))
4481 #endif /* NETIF_F_TSO */
4485 if (skb_header_cloned(skb)) {
4486 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4491 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4492 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4494 if (first->protocol == __constant_htons(ETH_P_IP)) {
4495 struct iphdr *iph = ip_hdr(skb);
4498 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4502 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4503 first->tx_flags |= IGB_TX_FLAGS_TSO |
4507 } else if (skb_is_gso_v6(skb)) {
4508 ipv6_hdr(skb)->payload_len = 0;
4509 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4510 &ipv6_hdr(skb)->daddr,
4512 first->tx_flags |= IGB_TX_FLAGS_TSO |
4517 /* compute header lengths */
4518 l4len = tcp_hdrlen(skb);
4519 *hdr_len = skb_transport_offset(skb) + l4len;
4521 /* update gso size and bytecount with header size */
4522 first->gso_segs = skb_shinfo(skb)->gso_segs;
4523 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4526 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4527 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4529 /* VLAN MACLEN IPLEN */
4530 vlan_macip_lens = skb_network_header_len(skb);
4531 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4532 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4534 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4537 #endif /* NETIF_F_TSO */
4540 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4542 struct sk_buff *skb = first->skb;
4543 u32 vlan_macip_lens = 0;
4544 u32 mss_l4len_idx = 0;
4547 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4548 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4552 switch (first->protocol) {
4553 case __constant_htons(ETH_P_IP):
4554 vlan_macip_lens |= skb_network_header_len(skb);
4555 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4556 l4_hdr = ip_hdr(skb)->protocol;
4558 #ifdef NETIF_F_IPV6_CSUM
4559 case __constant_htons(ETH_P_IPV6):
4560 vlan_macip_lens |= skb_network_header_len(skb);
4561 l4_hdr = ipv6_hdr(skb)->nexthdr;
4565 if (unlikely(net_ratelimit())) {
4566 dev_warn(tx_ring->dev,
4567 "partial checksum but proto=%x!\n",
4575 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4576 mss_l4len_idx = tcp_hdrlen(skb) <<
4577 E1000_ADVTXD_L4LEN_SHIFT;
4581 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4582 mss_l4len_idx = sizeof(struct sctphdr) <<
4583 E1000_ADVTXD_L4LEN_SHIFT;
4587 mss_l4len_idx = sizeof(struct udphdr) <<
4588 E1000_ADVTXD_L4LEN_SHIFT;
4591 if (unlikely(net_ratelimit())) {
4592 dev_warn(tx_ring->dev,
4593 "partial checksum but l4 proto=%x!\n",
4599 /* update TX checksum flag */
4600 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4603 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4604 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4606 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4609 static __le32 igb_tx_cmd_type(u32 tx_flags)
4611 /* set type for advanced descriptor with frame checksum insertion */
4612 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4613 E1000_ADVTXD_DCMD_IFCS |
4614 E1000_ADVTXD_DCMD_DEXT);
4616 /* set HW vlan bit if vlan is present */
4617 if (tx_flags & IGB_TX_FLAGS_VLAN)
4618 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4620 /* set timestamp bit if present */
4621 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4622 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4624 /* set segmentation bits for TSO */
4625 if (tx_flags & IGB_TX_FLAGS_TSO)
4626 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4631 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4632 union e1000_adv_tx_desc *tx_desc,
4633 u32 tx_flags, unsigned int paylen)
4635 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4637 /* 82575 requires a unique index per ring if any offload is enabled */
4638 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4639 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4640 olinfo_status |= tx_ring->reg_idx << 4;
4642 /* insert L4 checksum */
4643 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4644 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4646 /* insert IPv4 checksum */
4647 if (tx_flags & IGB_TX_FLAGS_IPV4)
4648 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4651 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4655 * The largest size we can write to the descriptor is 65535. In order to
4656 * maintain a power of two alignment we have to limit ourselves to 32K.
4658 #define IGB_MAX_TXD_PWR 15
4659 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4661 static void igb_tx_map(struct igb_ring *tx_ring,
4662 struct igb_tx_buffer *first,
4665 struct sk_buff *skb = first->skb;
4666 struct igb_tx_buffer *tx_buffer;
4667 union e1000_adv_tx_desc *tx_desc;
4669 #ifdef MAX_SKB_FRAGS
4670 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4671 unsigned int data_len = skb->data_len;
4673 unsigned int size = skb_headlen(skb);
4674 unsigned int paylen = skb->len - hdr_len;
4676 u32 tx_flags = first->tx_flags;
4677 u16 i = tx_ring->next_to_use;
4679 tx_desc = IGB_TX_DESC(tx_ring, i);
4681 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4682 cmd_type = igb_tx_cmd_type(tx_flags);
4684 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4685 if (dma_mapping_error(tx_ring->dev, dma))
4688 /* record length, and DMA address */
4689 dma_unmap_len_set(first, len, size);
4690 dma_unmap_addr_set(first, dma, dma);
4691 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4693 #ifdef MAX_SKB_FRAGS
4696 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4697 tx_desc->read.cmd_type_len =
4698 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4702 if (i == tx_ring->count) {
4703 tx_desc = IGB_TX_DESC(tx_ring, 0);
4707 dma += IGB_MAX_DATA_PER_TXD;
4708 size -= IGB_MAX_DATA_PER_TXD;
4710 tx_desc->read.olinfo_status = 0;
4711 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4714 #ifdef MAX_SKB_FRAGS
4715 if (likely(!data_len))
4718 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4722 if (i == tx_ring->count) {
4723 tx_desc = IGB_TX_DESC(tx_ring, 0);
4727 size = skb_frag_size(frag);
4730 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
4732 if (dma_mapping_error(tx_ring->dev, dma))
4735 tx_buffer = &tx_ring->tx_buffer_info[i];
4736 dma_unmap_len_set(tx_buffer, len, size);
4737 dma_unmap_addr_set(tx_buffer, dma, dma);
4739 tx_desc->read.olinfo_status = 0;
4740 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4745 #endif /* MAX_SKB_FRAGS */
4747 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4748 #endif /* CONFIG_BQL */
4750 /* write last descriptor with RS and EOP bits */
4751 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4752 tx_desc->read.cmd_type_len = cmd_type;
4754 /* set the timestamp */
4755 first->time_stamp = jiffies;
4758 * Force memory writes to complete before letting h/w know there
4759 * are new descriptors to fetch. (Only applicable for weak-ordered
4760 * memory model archs, such as IA-64).
4762 * We also need this memory barrier to make certain all of the
4763 * status bits have been updated before next_to_watch is written.
4767 /* set next_to_watch value indicating a packet is present */
4768 first->next_to_watch = tx_desc;
4771 if (i == tx_ring->count)
4774 tx_ring->next_to_use = i;
4776 writel(i, tx_ring->tail);
4778 /* we need this if more than one processor can write to our tail
4779 * at a time, it syncronizes IO on IA64/Altix systems */
4785 dev_err(tx_ring->dev, "TX DMA map failed\n");
4787 /* clear dma mappings for failed tx_buffer_info map */
4789 tx_buffer= &tx_ring->tx_buffer_info[i];
4790 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
4791 if (tx_buffer == first)
4798 tx_ring->next_to_use = i;
4801 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4803 struct net_device *netdev = netdev_ring(tx_ring);
4805 if (netif_is_multiqueue(netdev))
4806 netif_stop_subqueue(netdev, ring_queue_index(tx_ring));
4808 netif_stop_queue(netdev);
4810 /* Herbert's original patch had:
4811 * smp_mb__after_netif_stop_queue();
4812 * but since that doesn't exist yet, just open code it. */
4815 /* We need to check again in a case another CPU has just
4816 * made room available. */
4817 if (igb_desc_unused(tx_ring) < size)
4821 if (netif_is_multiqueue(netdev))
4822 netif_wake_subqueue(netdev, ring_queue_index(tx_ring));
4824 netif_wake_queue(netdev);
4826 tx_ring->tx_stats.restart_queue++;
4831 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4833 if (igb_desc_unused(tx_ring) >= size)
4835 return __igb_maybe_stop_tx(tx_ring, size);
4838 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4839 struct igb_ring *tx_ring)
4841 struct igb_tx_buffer *first;
4844 __be16 protocol = vlan_get_protocol(skb);
4847 /* need: 1 descriptor per page,
4848 * + 2 desc gap to keep tail from touching head,
4849 * + 1 desc for skb->data,
4850 * + 1 desc for context descriptor,
4851 * otherwise try next time */
4852 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4853 /* this is a hard error */
4854 return NETDEV_TX_BUSY;
4857 /* record the location of the first descriptor for this packet */
4858 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4860 first->bytecount = skb->len;
4861 first->gso_segs = 1;
4863 #ifdef HAVE_HW_TIME_STAMP
4864 #ifdef SKB_SHARED_TX_IS_UNION
4865 if (unlikely(skb_shinfo(skb)->tx_flags.flags & SKBTX_HW_TSTAMP)) {
4866 skb_shinfo(skb)->tx_flags.flags |= SKBTX_IN_PROGRESS;
4867 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4870 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4871 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4872 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4877 if (vlan_tx_tag_present(skb)) {
4878 tx_flags |= IGB_TX_FLAGS_VLAN;
4879 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4882 /* record initial flags and protocol */
4883 first->tx_flags = tx_flags;
4884 first->protocol = protocol;
4886 tso = igb_tso(tx_ring, first, &hdr_len);
4890 igb_tx_csum(tx_ring, first);
4892 igb_tx_map(tx_ring, first, hdr_len);
4894 #ifndef HAVE_TRANS_START_IN_QUEUE
4895 netdev_ring(tx_ring)->trans_start = jiffies;
4898 /* Make sure there is space in the ring for the next send. */
4899 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4901 return NETDEV_TX_OK;
4904 igb_unmap_and_free_tx_resource(tx_ring, first);
4906 return NETDEV_TX_OK;
4910 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4911 struct sk_buff *skb)
4913 unsigned int r_idx = skb->queue_mapping;
4915 if (r_idx >= adapter->num_tx_queues)
4916 r_idx = r_idx % adapter->num_tx_queues;
4918 return adapter->tx_ring[r_idx];
4921 #define igb_tx_queue_mapping(_adapter, _skb) (_adapter)->tx_ring[0]
4924 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4925 struct net_device *netdev)
4927 struct igb_adapter *adapter = netdev_priv(netdev);
4929 if (test_bit(__IGB_DOWN, &adapter->state)) {
4930 dev_kfree_skb_any(skb);
4931 return NETDEV_TX_OK;
4934 if (skb->len <= 0) {
4935 dev_kfree_skb_any(skb);
4936 return NETDEV_TX_OK;
4940 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4941 * in order to meet this minimum size requirement.
4943 if (skb->len < 17) {
4944 if (skb_padto(skb, 17))
4945 return NETDEV_TX_OK;
4949 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4953 * igb_tx_timeout - Respond to a Tx Hang
4954 * @netdev: network interface device structure
4956 static void igb_tx_timeout(struct net_device *netdev)
4958 struct igb_adapter *adapter = netdev_priv(netdev);
4959 struct e1000_hw *hw = &adapter->hw;
4961 /* Do the reset outside of interrupt context */
4962 adapter->tx_timeout_count++;
4964 if (hw->mac.type >= e1000_82580)
4965 hw->dev_spec._82575.global_device_reset = true;
4967 schedule_work(&adapter->reset_task);
4968 E1000_WRITE_REG(hw, E1000_EICS,
4969 (adapter->eims_enable_mask & ~adapter->eims_other));
4972 static void igb_reset_task(struct work_struct *work)
4974 struct igb_adapter *adapter;
4975 adapter = container_of(work, struct igb_adapter, reset_task);
4977 igb_reinit_locked(adapter);
4981 * igb_get_stats - Get System Network Statistics
4982 * @netdev: network interface device structure
4984 * Returns the address of the device statistics structure.
4985 * The statistics are updated here and also from the timer callback.
4987 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
4989 struct igb_adapter *adapter = netdev_priv(netdev);
4991 if (!test_bit(__IGB_RESETTING, &adapter->state))
4992 igb_update_stats(adapter);
4994 #ifdef HAVE_NETDEV_STATS_IN_NETDEV
4995 /* only return the current stats */
4996 return &netdev->stats;
4998 /* only return the current stats */
4999 return &adapter->net_stats;
5000 #endif /* HAVE_NETDEV_STATS_IN_NETDEV */
5004 * igb_change_mtu - Change the Maximum Transfer Unit
5005 * @netdev: network interface device structure
5006 * @new_mtu: new value for maximum frame size
5008 * Returns 0 on success, negative on failure
5010 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
5012 struct igb_adapter *adapter = netdev_priv(netdev);
5013 struct pci_dev *pdev = adapter->pdev;
5014 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
5015 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
5016 u32 rx_buffer_len, i;
5019 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
5020 dev_err(pci_dev_to_dev(pdev), "Invalid MTU setting\n");
5024 #define MAX_STD_JUMBO_FRAME_SIZE 9238
5025 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
5026 dev_err(pci_dev_to_dev(pdev), "MTU > 9216 not supported.\n");
5030 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
5031 usleep_range(1000, 2000);
5033 /* igb_down has a dependency on max_frame_size */
5034 adapter->max_frame_size = max_frame;
5036 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
5037 #ifdef IGB_PER_PKT_TIMESTAMP
5038 if (adapter->hw.mac.type >= e1000_82580)
5039 max_frame += IGB_TS_HDR_LEN;
5043 * RLPML prevents us from receiving a frame larger than max_frame so
5044 * it is safe to just set the rx_buffer_len to max_frame without the
5045 * risk of an skb over panic.
5047 if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
5048 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
5050 rx_buffer_len = max_frame;
5053 if (netif_running(netdev))
5056 dev_info(pci_dev_to_dev(pdev), "changing MTU from %d to %d\n",
5057 netdev->mtu, new_mtu);
5058 netdev->mtu = new_mtu;
5060 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
5061 for (i = 0; i < adapter->num_rx_queues; i++)
5062 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
5065 if (netif_running(netdev))
5070 clear_bit(__IGB_RESETTING, &adapter->state);
5076 * igb_update_stats - Update the board statistics counters
5077 * @adapter: board private structure
5080 void igb_update_stats(struct igb_adapter *adapter)
5082 #ifdef HAVE_NETDEV_STATS_IN_NETDEV
5083 struct net_device_stats *net_stats = &adapter->netdev->stats;
5085 struct net_device_stats *net_stats = &adapter->net_stats;
5086 #endif /* HAVE_NETDEV_STATS_IN_NETDEV */
5087 struct e1000_hw *hw = &adapter->hw;
5089 struct pci_dev *pdev = adapter->pdev;
5096 u32 flushed = 0, coal = 0, recycled = 0;
5097 struct igb_q_vector *q_vector;
5100 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
5103 * Prevent stats update while adapter is being reset, or if the pci
5104 * connection is down.
5106 if (adapter->link_speed == 0)
5109 if (pci_channel_offline(pdev))
5114 for (i = 0; i < adapter->num_q_vectors; i++) {
5115 q_vector = adapter->q_vector[i];
5116 if (!q_vector || !q_vector->lrolist)
5118 flushed += q_vector->lrolist->stats.flushed;
5119 coal += q_vector->lrolist->stats.coal;
5120 recycled += q_vector->lrolist->stats.recycled;
5122 adapter->lro_stats.flushed = flushed;
5123 adapter->lro_stats.coal = coal;
5124 adapter->lro_stats.recycled = recycled;
5129 for (i = 0; i < adapter->num_rx_queues; i++) {
5130 u32 rqdpc_tmp = E1000_READ_REG(hw, E1000_RQDPC(i)) & 0x0FFF;
5131 struct igb_ring *ring = adapter->rx_ring[i];
5132 ring->rx_stats.drops += rqdpc_tmp;
5133 net_stats->rx_fifo_errors += rqdpc_tmp;
5134 #ifdef CONFIG_IGB_VMDQ_NETDEV
5135 if (!ring->vmdq_netdev) {
5136 bytes += ring->rx_stats.bytes;
5137 packets += ring->rx_stats.packets;
5140 bytes += ring->rx_stats.bytes;
5141 packets += ring->rx_stats.packets;
5145 net_stats->rx_bytes = bytes;
5146 net_stats->rx_packets = packets;
5150 for (i = 0; i < adapter->num_tx_queues; i++) {
5151 struct igb_ring *ring = adapter->tx_ring[i];
5152 #ifdef CONFIG_IGB_VMDQ_NETDEV
5153 if (!ring->vmdq_netdev) {
5154 bytes += ring->tx_stats.bytes;
5155 packets += ring->tx_stats.packets;
5158 bytes += ring->tx_stats.bytes;
5159 packets += ring->tx_stats.packets;
5162 net_stats->tx_bytes = bytes;
5163 net_stats->tx_packets = packets;
5165 /* read stats registers */
5166 adapter->stats.crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5167 adapter->stats.gprc += E1000_READ_REG(hw, E1000_GPRC);
5168 adapter->stats.gorc += E1000_READ_REG(hw, E1000_GORCL);
5169 E1000_READ_REG(hw, E1000_GORCH); /* clear GORCL */
5170 adapter->stats.bprc += E1000_READ_REG(hw, E1000_BPRC);
5171 adapter->stats.mprc += E1000_READ_REG(hw, E1000_MPRC);
5172 adapter->stats.roc += E1000_READ_REG(hw, E1000_ROC);
5174 adapter->stats.prc64 += E1000_READ_REG(hw, E1000_PRC64);
5175 adapter->stats.prc127 += E1000_READ_REG(hw, E1000_PRC127);
5176 adapter->stats.prc255 += E1000_READ_REG(hw, E1000_PRC255);
5177 adapter->stats.prc511 += E1000_READ_REG(hw, E1000_PRC511);
5178 adapter->stats.prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5179 adapter->stats.prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5180 adapter->stats.symerrs += E1000_READ_REG(hw, E1000_SYMERRS);
5181 adapter->stats.sec += E1000_READ_REG(hw, E1000_SEC);
5183 mpc = E1000_READ_REG(hw, E1000_MPC);
5184 adapter->stats.mpc += mpc;
5185 net_stats->rx_fifo_errors += mpc;
5186 adapter->stats.scc += E1000_READ_REG(hw, E1000_SCC);
5187 adapter->stats.ecol += E1000_READ_REG(hw, E1000_ECOL);
5188 adapter->stats.mcc += E1000_READ_REG(hw, E1000_MCC);
5189 adapter->stats.latecol += E1000_READ_REG(hw, E1000_LATECOL);
5190 adapter->stats.dc += E1000_READ_REG(hw, E1000_DC);
5191 adapter->stats.rlec += E1000_READ_REG(hw, E1000_RLEC);
5192 adapter->stats.xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5193 adapter->stats.xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5194 adapter->stats.xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC);
5195 adapter->stats.xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5196 adapter->stats.fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5197 adapter->stats.gptc += E1000_READ_REG(hw, E1000_GPTC);
5198 adapter->stats.gotc += E1000_READ_REG(hw, E1000_GOTCL);
5199 E1000_READ_REG(hw, E1000_GOTCH); /* clear GOTCL */
5200 adapter->stats.rnbc += E1000_READ_REG(hw, E1000_RNBC);
5201 adapter->stats.ruc += E1000_READ_REG(hw, E1000_RUC);
5202 adapter->stats.rfc += E1000_READ_REG(hw, E1000_RFC);
5203 adapter->stats.rjc += E1000_READ_REG(hw, E1000_RJC);
5204 adapter->stats.tor += E1000_READ_REG(hw, E1000_TORH);
5205 adapter->stats.tot += E1000_READ_REG(hw, E1000_TOTH);
5206 adapter->stats.tpr += E1000_READ_REG(hw, E1000_TPR);
5208 adapter->stats.ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5209 adapter->stats.ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5210 adapter->stats.ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5211 adapter->stats.ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5212 adapter->stats.ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5213 adapter->stats.ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5215 adapter->stats.mptc += E1000_READ_REG(hw, E1000_MPTC);
5216 adapter->stats.bptc += E1000_READ_REG(hw, E1000_BPTC);
5218 adapter->stats.tpt += E1000_READ_REG(hw, E1000_TPT);
5219 adapter->stats.colc += E1000_READ_REG(hw, E1000_COLC);
5221 adapter->stats.algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5222 /* read internal phy sepecific stats */
5223 reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
5224 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
5225 adapter->stats.rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5226 adapter->stats.tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5229 adapter->stats.tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5230 adapter->stats.tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5232 adapter->stats.iac += E1000_READ_REG(hw, E1000_IAC);
5233 adapter->stats.icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5234 adapter->stats.icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5235 adapter->stats.icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5236 adapter->stats.ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5237 adapter->stats.ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5238 adapter->stats.ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5239 adapter->stats.ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5240 adapter->stats.icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5242 /* Fill out the OS statistics structure */
5243 net_stats->multicast = adapter->stats.mprc;
5244 net_stats->collisions = adapter->stats.colc;
5248 /* RLEC on some newer hardware can be incorrect so build
5249 * our own version based on RUC and ROC */
5250 net_stats->rx_errors = adapter->stats.rxerrc +
5251 adapter->stats.crcerrs + adapter->stats.algnerrc +
5252 adapter->stats.ruc + adapter->stats.roc +
5253 adapter->stats.cexterr;
5254 net_stats->rx_length_errors = adapter->stats.ruc +
5256 net_stats->rx_crc_errors = adapter->stats.crcerrs;
5257 net_stats->rx_frame_errors = adapter->stats.algnerrc;
5258 net_stats->rx_missed_errors = adapter->stats.mpc;
5261 net_stats->tx_errors = adapter->stats.ecol +
5262 adapter->stats.latecol;
5263 net_stats->tx_aborted_errors = adapter->stats.ecol;
5264 net_stats->tx_window_errors = adapter->stats.latecol;
5265 net_stats->tx_carrier_errors = adapter->stats.tncrs;
5267 /* Tx Dropped needs to be maintained elsewhere */
5270 if (hw->phy.media_type == e1000_media_type_copper) {
5271 if ((adapter->link_speed == SPEED_1000) &&
5272 (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
5273 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
5274 adapter->phy_stats.idle_errors += phy_tmp;
5278 /* Management Stats */
5279 adapter->stats.mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5280 adapter->stats.mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5281 if (hw->mac.type > e1000_82580) {
5282 adapter->stats.o2bgptc += E1000_READ_REG(hw, E1000_O2BGPTC);
5283 adapter->stats.o2bspc += E1000_READ_REG(hw, E1000_O2BSPC);
5284 adapter->stats.b2ospc += E1000_READ_REG(hw, E1000_B2OSPC);
5285 adapter->stats.b2ogprc += E1000_READ_REG(hw, E1000_B2OGPRC);
5289 static irqreturn_t igb_msix_other(int irq, void *data)
5291 struct igb_adapter *adapter = data;
5292 struct e1000_hw *hw = &adapter->hw;
5293 u32 icr = E1000_READ_REG(hw, E1000_ICR);
5294 /* reading ICR causes bit 31 of EICR to be cleared */
5296 if (icr & E1000_ICR_DRSTA)
5297 schedule_work(&adapter->reset_task);
5299 if (icr & E1000_ICR_DOUTSYNC) {
5300 /* HW is reporting DMA is out of sync */
5301 adapter->stats.doosync++;
5302 /* The DMA Out of Sync is also indication of a spoof event
5303 * in IOV mode. Check the Wrong VM Behavior register to
5304 * see if it is really a spoof event. */
5305 igb_check_wvbr(adapter);
5308 /* Check for a mailbox event */
5309 if (icr & E1000_ICR_VMMB)
5310 igb_msg_task(adapter);
5312 if (icr & E1000_ICR_LSC) {
5313 hw->mac.get_link_status = 1;
5314 /* guard against interrupt when we're going down */
5315 if (!test_bit(__IGB_DOWN, &adapter->state))
5316 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5319 /* Check for MDD event */
5320 if (icr & E1000_ICR_MDDET)
5321 igb_process_mdd_event(adapter);
5323 E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_other);
5328 static void igb_write_itr(struct igb_q_vector *q_vector)
5330 struct igb_adapter *adapter = q_vector->adapter;
5331 u32 itr_val = q_vector->itr_val & 0x7FFC;
5333 if (!q_vector->set_itr)
5339 if (adapter->hw.mac.type == e1000_82575)
5340 itr_val |= itr_val << 16;
5342 itr_val |= E1000_EITR_CNT_IGNR;
5344 writel(itr_val, q_vector->itr_register);
5345 q_vector->set_itr = 0;
5348 static irqreturn_t igb_msix_ring(int irq, void *data)
5350 struct igb_q_vector *q_vector = data;
5352 /* Write the ITR value calculated from the previous interrupt. */
5353 igb_write_itr(q_vector);
5355 napi_schedule(&q_vector->napi);
5361 static void igb_update_dca(struct igb_q_vector *q_vector)
5363 struct igb_adapter *adapter = q_vector->adapter;
5364 struct e1000_hw *hw = &adapter->hw;
5365 int cpu = get_cpu();
5367 if (q_vector->cpu == cpu)
5370 if (q_vector->tx.ring) {
5371 int q = q_vector->tx.ring->reg_idx;
5372 u32 dca_txctrl = E1000_READ_REG(hw, E1000_DCA_TXCTRL(q));
5373 if (hw->mac.type == e1000_82575) {
5374 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
5375 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
5377 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
5378 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
5379 E1000_DCA_TXCTRL_CPUID_SHIFT_82576;
5381 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
5382 E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(q), dca_txctrl);
5384 if (q_vector->rx.ring) {
5385 int q = q_vector->rx.ring->reg_idx;
5386 u32 dca_rxctrl = E1000_READ_REG(hw, E1000_DCA_RXCTRL(q));
5387 if (hw->mac.type == e1000_82575) {
5388 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
5389 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
5391 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
5392 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
5393 E1000_DCA_RXCTRL_CPUID_SHIFT_82576;
5395 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
5396 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
5397 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
5398 E1000_WRITE_REG(hw, E1000_DCA_RXCTRL(q), dca_rxctrl);
5400 q_vector->cpu = cpu;
5405 static void igb_setup_dca(struct igb_adapter *adapter)
5407 struct e1000_hw *hw = &adapter->hw;
5410 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
5413 /* Always use CB2 mode, difference is masked in the CB driver. */
5414 E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
5416 for (i = 0; i < adapter->num_q_vectors; i++) {
5417 adapter->q_vector[i]->cpu = -1;
5418 igb_update_dca(adapter->q_vector[i]);
5422 static int __igb_notify_dca(struct device *dev, void *data)
5424 struct net_device *netdev = dev_get_drvdata(dev);
5425 struct igb_adapter *adapter = netdev_priv(netdev);
5426 struct pci_dev *pdev = adapter->pdev;
5427 struct e1000_hw *hw = &adapter->hw;
5428 unsigned long event = *(unsigned long *)data;
5431 case DCA_PROVIDER_ADD:
5432 /* if already enabled, don't do it again */
5433 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
5435 if (dca_add_requester(dev) == E1000_SUCCESS) {
5436 adapter->flags |= IGB_FLAG_DCA_ENABLED;
5437 dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
5438 igb_setup_dca(adapter);
5441 /* Fall Through since DCA is disabled. */
5442 case DCA_PROVIDER_REMOVE:
5443 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
5444 /* without this a class_device is left
5445 * hanging around in the sysfs model */
5446 dca_remove_requester(dev);
5447 dev_info(pci_dev_to_dev(pdev), "DCA disabled\n");
5448 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
5449 E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE);
5454 return E1000_SUCCESS;
5457 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
5462 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
5465 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
5467 #endif /* IGB_DCA */
5469 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5471 unsigned char mac_addr[ETH_ALEN];
5472 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5473 struct pci_dev *pdev = adapter->pdev;
5474 struct e1000_hw *hw = &adapter->hw;
5475 struct pci_dev *pvfdev;
5476 unsigned int device_id;
5480 random_ether_addr(mac_addr);
5481 igb_set_vf_mac(adapter, vf, mac_addr);
5483 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5484 switch (adapter->hw.mac.type) {
5486 device_id = IGB_82576_VF_DEV_ID;
5487 /* VF Stride for 82576 is 2 */
5488 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
5492 device_id = IGB_I350_VF_DEV_ID;
5493 /* VF Stride for I350 is 4 */
5494 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5503 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5505 if (pvfdev->devfn == thisvf_devfn)
5507 pvfdev = pci_get_device(hw->vendor_id,
5512 adapter->vf_data[vf].vfdev = pvfdev;
5515 "Couldn't find pci dev ptr for VF %4.4x\n",
5517 return pvfdev != NULL;
5523 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5524 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5526 struct e1000_hw *hw = &adapter->hw;
5527 struct pci_dev *pdev = adapter->pdev;
5528 struct pci_dev *pvfdev;
5531 unsigned int device_id;
5534 switch (adapter->hw.mac.type) {
5536 device_id = IGB_82576_VF_DEV_ID;
5537 /* VF Stride for 82576 is 2 */
5541 device_id = IGB_I350_VF_DEV_ID;
5542 /* VF Stride for I350 is 4 */
5551 vf_devfn = pdev->devfn + 0x80;
5552 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5554 if (pvfdev->devfn == vf_devfn)
5556 vf_devfn += vf_stride;
5557 pvfdev = pci_get_device(hw->vendor_id,
5565 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5567 #ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED
5569 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5570 if (adapter->vf_data[i].vfdev) {
5571 if (adapter->vf_data[i].vfdev->dev_flags &
5572 PCI_DEV_FLAGS_ASSIGNED)
5580 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5582 struct e1000_hw *hw = &adapter->hw;
5586 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5587 ping = E1000_PF_CONTROL_MSG;
5588 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5589 ping |= E1000_VT_MSGTYPE_CTS;
5590 e1000_write_mbx(hw, &ping, 1, i);
5595 * igb_mta_set_ - Set multicast filter table address
5596 * @adapter: pointer to the adapter structure
5597 * @hash_value: determines the MTA register and bit to set
5599 * The multicast table address is a register array of 32-bit registers.
5600 * The hash_value is used to determine what register the bit is in, the
5601 * current value is read, the new bit is OR'd in and the new value is
5602 * written back into the register.
5604 void igb_mta_set(struct igb_adapter *adapter, u32 hash_value)
5606 struct e1000_hw *hw = &adapter->hw;
5607 u32 hash_bit, hash_reg, mta;
5610 * The MTA is a register array of 32-bit registers. It is
5611 * treated like an array of (32*mta_reg_count) bits. We want to
5612 * set bit BitArray[hash_value]. So we figure out what register
5613 * the bit is in, read it, OR in the new bit, then write
5614 * back the new value. The (hw->mac.mta_reg_count - 1) serves as a
5615 * mask to bits 31:5 of the hash value which gives us the
5616 * register we're modifying. The hash bit within that register
5617 * is determined by the lower 5 bits of the hash value.
5619 hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
5620 hash_bit = hash_value & 0x1F;
5622 mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg);
5624 mta |= (1 << hash_bit);
5626 E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta);
5627 E1000_WRITE_FLUSH(hw);
5630 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5633 struct e1000_hw *hw = &adapter->hw;
5634 u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vf));
5635 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5637 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5638 IGB_VF_FLAG_MULTI_PROMISC);
5639 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5641 #ifdef IGB_ENABLE_VF_PROMISC
5642 if (*msgbuf & E1000_VF_SET_PROMISC_UNICAST) {
5643 vmolr |= E1000_VMOLR_ROPE;
5644 vf_data->flags |= IGB_VF_FLAG_UNI_PROMISC;
5645 *msgbuf &= ~E1000_VF_SET_PROMISC_UNICAST;
5648 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5649 vmolr |= E1000_VMOLR_MPME;
5650 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5651 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5654 * if we have hashes and we are clearing a multicast promisc
5655 * flag we need to write the hashes to the MTA as this step
5656 * was previously skipped
5658 if (vf_data->num_vf_mc_hashes > 30) {
5659 vmolr |= E1000_VMOLR_MPME;
5660 } else if (vf_data->num_vf_mc_hashes) {
5662 vmolr |= E1000_VMOLR_ROMPE;
5663 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5664 igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
5668 E1000_WRITE_REG(hw, E1000_VMOLR(vf), vmolr);
5670 /* there are flags left unprocessed, likely not supported */
5671 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5678 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5679 u32 *msgbuf, u32 vf)
5681 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5682 u16 *hash_list = (u16 *)&msgbuf[1];
5683 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5686 /* salt away the number of multicast addresses assigned
5687 * to this VF for later use to restore when the PF multi cast
5690 vf_data->num_vf_mc_hashes = n;
5692 /* only up to 30 hash values supported */
5696 /* store the hashes for later use */
5697 for (i = 0; i < n; i++)
5698 vf_data->vf_mc_hashes[i] = hash_list[i];
5700 /* Flush and reset the mta with the new values */
5701 igb_set_rx_mode(adapter->netdev);
5706 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5708 struct e1000_hw *hw = &adapter->hw;
5709 struct vf_data_storage *vf_data;
5712 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5713 u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
5714 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5716 vf_data = &adapter->vf_data[i];
5718 if ((vf_data->num_vf_mc_hashes > 30) ||
5719 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5720 vmolr |= E1000_VMOLR_MPME;
5721 } else if (vf_data->num_vf_mc_hashes) {
5722 vmolr |= E1000_VMOLR_ROMPE;
5723 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5724 igb_mta_set(adapter, vf_data->vf_mc_hashes[j]);
5726 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
5730 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5732 struct e1000_hw *hw = &adapter->hw;
5733 u32 pool_mask, reg, vid;
5737 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5739 /* Find the vlan filter for this id */
5740 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5741 reg = E1000_READ_REG(hw, E1000_VLVF(i));
5743 /* remove the vf from the pool */
5746 /* if pool is empty then remove entry from vfta */
5747 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5748 (reg & E1000_VLVF_VLANID_ENABLE)) {
5750 vid = reg & E1000_VLVF_VLANID_MASK;
5751 igb_vfta_set(adapter, vid, FALSE);
5754 E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
5757 adapter->vf_data[vf].vlans_enabled = 0;
5759 vlan_default = adapter->vf_data[vf].default_vf_vlan_id;
5761 igb_vlvf_set(adapter, vlan_default, true, vf);
5764 s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5766 struct e1000_hw *hw = &adapter->hw;
5769 /* The vlvf table only exists on 82576 hardware and newer */
5770 if (hw->mac.type < e1000_82576)
5773 /* we only need to do this if VMDq is enabled */
5774 if (!adapter->vmdq_pools)
5777 /* Find the vlan filter for this id */
5778 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5779 reg = E1000_READ_REG(hw, E1000_VLVF(i));
5780 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5781 vid == (reg & E1000_VLVF_VLANID_MASK))
5786 if (i == E1000_VLVF_ARRAY_SIZE) {
5787 /* Did not find a matching VLAN ID entry that was
5788 * enabled. Search for a free filter entry, i.e.
5789 * one without the enable bit set
5791 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5792 reg = E1000_READ_REG(hw, E1000_VLVF(i));
5793 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5797 if (i < E1000_VLVF_ARRAY_SIZE) {
5798 /* Found an enabled/available entry */
5799 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5801 /* if !enabled we need to set this up in vfta */
5802 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5803 /* add VID to filter table */
5804 igb_vfta_set(adapter, vid, TRUE);
5805 reg |= E1000_VLVF_VLANID_ENABLE;
5807 reg &= ~E1000_VLVF_VLANID_MASK;
5809 E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
5811 /* do not modify RLPML for PF devices */
5812 if (vf >= adapter->vfs_allocated_count)
5813 return E1000_SUCCESS;
5815 if (!adapter->vf_data[vf].vlans_enabled) {
5817 reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
5818 size = reg & E1000_VMOLR_RLPML_MASK;
5820 reg &= ~E1000_VMOLR_RLPML_MASK;
5822 E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
5825 adapter->vf_data[vf].vlans_enabled++;
5828 if (i < E1000_VLVF_ARRAY_SIZE) {
5829 /* remove vf from the pool */
5830 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5831 /* if pool is empty then remove entry from vfta */
5832 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5834 igb_vfta_set(adapter, vid, FALSE);
5836 E1000_WRITE_REG(hw, E1000_VLVF(i), reg);
5838 /* do not modify RLPML for PF devices */
5839 if (vf >= adapter->vfs_allocated_count)
5840 return E1000_SUCCESS;
5842 adapter->vf_data[vf].vlans_enabled--;
5843 if (!adapter->vf_data[vf].vlans_enabled) {
5845 reg = E1000_READ_REG(hw, E1000_VMOLR(vf));
5846 size = reg & E1000_VMOLR_RLPML_MASK;
5848 reg &= ~E1000_VMOLR_RLPML_MASK;
5850 E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg);
5854 return E1000_SUCCESS;
5858 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5860 struct e1000_hw *hw = &adapter->hw;
5863 E1000_WRITE_REG(hw, E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5865 E1000_WRITE_REG(hw, E1000_VMVIR(vf), 0);
5868 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5869 int vf, u16 vlan, u8 qos)
5872 struct igb_adapter *adapter = netdev_priv(netdev);
5874 /* VLAN IDs accepted range 0-4094 */
5875 if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7))
5878 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5881 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5882 igb_set_vmolr(adapter, vf, !vlan);
5883 adapter->vf_data[vf].pf_vlan = vlan;
5884 adapter->vf_data[vf].pf_qos = qos;
5885 igb_set_vf_vlan_strip(adapter, vf, true);
5886 dev_info(&adapter->pdev->dev,
5887 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5888 if (test_bit(__IGB_DOWN, &adapter->state)) {
5889 dev_warn(&adapter->pdev->dev,
5890 "The VF VLAN has been set,"
5891 " but the PF device is not up.\n");
5892 dev_warn(&adapter->pdev->dev,
5893 "Bring the PF device up before"
5894 " attempting to use the VF device.\n");
5897 if (adapter->vf_data[vf].pf_vlan)
5898 dev_info(&adapter->pdev->dev,
5899 "Clearing VLAN on VF %d\n", vf);
5900 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5902 igb_set_vmvir(adapter, vlan, vf);
5903 igb_set_vmolr(adapter, vf, true);
5904 igb_set_vf_vlan_strip(adapter, vf, false);
5905 adapter->vf_data[vf].pf_vlan = 0;
5906 adapter->vf_data[vf].pf_qos = 0;
5913 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5915 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5916 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5919 igb_set_vf_vlan_strip(adapter, vf, true);
5921 igb_set_vf_vlan_strip(adapter, vf, false);
5923 return igb_vlvf_set(adapter, vid, add, vf);
5926 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5928 struct e1000_hw *hw = &adapter->hw;
5930 /* clear flags except flag that the PF has set the MAC */
5931 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5932 adapter->vf_data[vf].last_nack = jiffies;
5934 /* reset offloads to defaults */
5935 igb_set_vmolr(adapter, vf, true);
5937 /* reset vlans for device */
5938 igb_clear_vf_vfta(adapter, vf);
5940 if (adapter->vf_data[vf].pf_vlan)
5941 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5942 adapter->vf_data[vf].pf_vlan,
5943 adapter->vf_data[vf].pf_qos);
5945 igb_clear_vf_vfta(adapter, vf);
5948 /* reset multicast table array for vf */
5949 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5951 /* Flush and reset the mta with the new values */
5952 igb_set_rx_mode(adapter->netdev);
5955 * Reset the VFs TDWBAL and TDWBAH registers which are not
5958 E1000_WRITE_REG(hw, E1000_TDWBAH(vf), 0);
5959 E1000_WRITE_REG(hw, E1000_TDWBAL(vf), 0);
5960 if (hw->mac.type == e1000_82576) {
5961 E1000_WRITE_REG(hw, E1000_TDWBAH(IGB_MAX_VF_FUNCTIONS + vf), 0);
5962 E1000_WRITE_REG(hw, E1000_TDWBAL(IGB_MAX_VF_FUNCTIONS + vf), 0);
5966 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5968 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5970 /* generate a new mac address as we were hotplug removed/added */
5971 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5972 random_ether_addr(vf_mac);
5974 /* process remaining reset events */
5975 igb_vf_reset(adapter, vf);
5978 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5980 struct e1000_hw *hw = &adapter->hw;
5981 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5983 u8 *addr = (u8 *)(&msgbuf[1]);
5985 /* process all the same items cleared in a function level reset */
5986 igb_vf_reset(adapter, vf);
5988 /* set vf mac address */
5989 igb_del_mac_filter(adapter, vf_mac, vf);
5990 igb_add_mac_filter(adapter, vf_mac, vf);
5992 /* enable transmit and receive for vf */
5993 reg = E1000_READ_REG(hw, E1000_VFTE);
5994 E1000_WRITE_REG(hw, E1000_VFTE, reg | (1 << vf));
5995 reg = E1000_READ_REG(hw, E1000_VFRE);
5996 E1000_WRITE_REG(hw, E1000_VFRE, reg | (1 << vf));
5998 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
6000 /* reply to reset with ack and vf mac address */
6001 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
6002 memcpy(addr, vf_mac, 6);
6003 e1000_write_mbx(hw, msgbuf, 3, vf);
6006 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
6009 * The VF MAC Address is stored in a packed array of bytes
6010 * starting at the second 32 bit word of the msg array
6012 unsigned char *addr = (unsigned char *)&msg[1];
6015 if (is_valid_ether_addr(addr))
6016 err = igb_set_vf_mac(adapter, vf, addr);
6021 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
6023 struct e1000_hw *hw = &adapter->hw;
6024 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6025 u32 msg = E1000_VT_MSGTYPE_NACK;
6027 /* if device isn't clear to send it shouldn't be reading either */
6028 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
6029 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
6030 e1000_write_mbx(hw, &msg, 1, vf);
6031 vf_data->last_nack = jiffies;
6035 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
6037 struct pci_dev *pdev = adapter->pdev;
6038 u32 msgbuf[E1000_VFMAILBOX_SIZE];
6039 struct e1000_hw *hw = &adapter->hw;
6040 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6043 retval = e1000_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
6046 dev_err(pci_dev_to_dev(pdev), "Error receiving message from VF\n");
6050 /* this is a message we already processed, do nothing */
6051 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
6055 * until the vf completes a reset it should not be
6056 * allowed to start any configuration.
6059 if (msgbuf[0] == E1000_VF_RESET) {
6060 igb_vf_reset_msg(adapter, vf);
6064 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
6065 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
6066 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
6067 e1000_write_mbx(hw, msgbuf, 1, vf);
6068 vf_data->last_nack = jiffies;
6073 switch ((msgbuf[0] & 0xFFFF)) {
6074 case E1000_VF_SET_MAC_ADDR:
6076 #ifndef IGB_DISABLE_VF_MAC_SET
6077 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
6078 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
6081 "VF %d attempted to override administratively "
6082 "set MAC address\nReload the VF driver to "
6083 "resume operations\n", vf);
6086 case E1000_VF_SET_PROMISC:
6087 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
6089 case E1000_VF_SET_MULTICAST:
6090 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
6092 case E1000_VF_SET_LPE:
6093 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
6095 case E1000_VF_SET_VLAN:
6098 if (vf_data->pf_vlan)
6100 "VF %d attempted to override administratively "
6101 "set VLAN tag\nReload the VF driver to "
6102 "resume operations\n", vf);
6105 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
6108 dev_err(pci_dev_to_dev(pdev), "Unhandled Msg %08x\n", msgbuf[0]);
6109 retval = -E1000_ERR_MBX;
6113 /* notify the VF of the results of what it sent us */
6115 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
6117 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
6119 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
6121 e1000_write_mbx(hw, msgbuf, 1, vf);
6124 static void igb_msg_task(struct igb_adapter *adapter)
6126 struct e1000_hw *hw = &adapter->hw;
6129 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
6130 /* process any reset requests */
6131 if (!e1000_check_for_rst(hw, vf))
6132 igb_vf_reset_event(adapter, vf);
6134 /* process any messages pending */
6135 if (!e1000_check_for_msg(hw, vf))
6136 igb_rcv_msg_from_vf(adapter, vf);
6138 /* process any acks */
6139 if (!e1000_check_for_ack(hw, vf))
6140 igb_rcv_ack_from_vf(adapter, vf);
6145 * igb_set_uta - Set unicast filter table address
6146 * @adapter: board private structure
6148 * The unicast table address is a register array of 32-bit registers.
6149 * The table is meant to be used in a way similar to how the MTA is used
6150 * however due to certain limitations in the hardware it is necessary to
6151 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
6152 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
6154 static void igb_set_uta(struct igb_adapter *adapter)
6156 struct e1000_hw *hw = &adapter->hw;
6159 /* The UTA table only exists on 82576 hardware and newer */
6160 if (hw->mac.type < e1000_82576)
6163 /* we only need to do this if VMDq is enabled */
6164 if (!adapter->vmdq_pools)
6167 for (i = 0; i < hw->mac.uta_reg_count; i++)
6168 E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, ~0);
6172 * igb_intr_msi - Interrupt Handler
6173 * @irq: interrupt number
6174 * @data: pointer to a network interface device structure
6176 static irqreturn_t igb_intr_msi(int irq, void *data)
6178 struct igb_adapter *adapter = data;
6179 struct igb_q_vector *q_vector = adapter->q_vector[0];
6180 struct e1000_hw *hw = &adapter->hw;
6181 /* read ICR disables interrupts using IAM */
6182 u32 icr = E1000_READ_REG(hw, E1000_ICR);
6184 igb_write_itr(q_vector);
6186 if (icr & E1000_ICR_DRSTA)
6187 schedule_work(&adapter->reset_task);
6189 if (icr & E1000_ICR_DOUTSYNC) {
6190 /* HW is reporting DMA is out of sync */
6191 adapter->stats.doosync++;
6194 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6195 hw->mac.get_link_status = 1;
6196 if (!test_bit(__IGB_DOWN, &adapter->state))
6197 mod_timer(&adapter->watchdog_timer, jiffies + 1);
6200 napi_schedule(&q_vector->napi);
6206 * igb_intr - Legacy Interrupt Handler
6207 * @irq: interrupt number
6208 * @data: pointer to a network interface device structure
6210 static irqreturn_t igb_intr(int irq, void *data)
6212 struct igb_adapter *adapter = data;
6213 struct igb_q_vector *q_vector = adapter->q_vector[0];
6214 struct e1000_hw *hw = &adapter->hw;
6215 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
6216 * need for the IMC write */
6217 u32 icr = E1000_READ_REG(hw, E1000_ICR);
6219 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
6220 * not set, then the adapter didn't send an interrupt */
6221 if (!(icr & E1000_ICR_INT_ASSERTED))
6224 igb_write_itr(q_vector);
6226 if (icr & E1000_ICR_DRSTA)
6227 schedule_work(&adapter->reset_task);
6229 if (icr & E1000_ICR_DOUTSYNC) {
6230 /* HW is reporting DMA is out of sync */
6231 adapter->stats.doosync++;
6234 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
6235 hw->mac.get_link_status = 1;
6236 /* guard against interrupt when we're going down */
6237 if (!test_bit(__IGB_DOWN, &adapter->state))
6238 mod_timer(&adapter->watchdog_timer, jiffies + 1);
6241 napi_schedule(&q_vector->napi);
6246 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
6248 struct igb_adapter *adapter = q_vector->adapter;
6249 struct e1000_hw *hw = &adapter->hw;
6251 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
6252 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
6253 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
6254 igb_set_itr(q_vector);
6256 igb_update_ring_itr(q_vector);
6259 if (!test_bit(__IGB_DOWN, &adapter->state)) {
6260 if (adapter->msix_entries)
6261 E1000_WRITE_REG(hw, E1000_EIMS, q_vector->eims_value);
6263 igb_irq_enable(adapter);
6268 * igb_poll - NAPI Rx polling callback
6269 * @napi: napi polling structure
6270 * @budget: count of how many packets we should handle
6272 static int igb_poll(struct napi_struct *napi, int budget)
6274 struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi);
6275 bool clean_complete = true;
6278 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
6279 igb_update_dca(q_vector);
6281 if (q_vector->tx.ring)
6282 clean_complete = igb_clean_tx_irq(q_vector);
6284 if (q_vector->rx.ring)
6285 clean_complete &= igb_clean_rx_irq(q_vector, budget);
6287 #ifndef HAVE_NETDEV_NAPI_LIST
6288 /* if netdev is disabled we need to stop polling */
6289 if (!netif_running(q_vector->adapter->netdev))
6290 clean_complete = true;
6293 /* If all work not completed, return budget and keep polling */
6294 if (!clean_complete)
6297 /* If not enough Rx work done, exit the polling mode */
6298 napi_complete(napi);
6299 igb_ring_irq_enable(q_vector);
6304 #ifdef HAVE_HW_TIME_STAMP
6306 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
6307 * @adapter: board private structure
6308 * @shhwtstamps: timestamp structure to update
6309 * @regval: unsigned 64bit system time value.
6311 * We need to convert the system time value stored in the RX/TXSTMP registers
6312 * into a hwtstamp which can be used by the upper level timestamping functions
6314 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
6315 struct skb_shared_hwtstamps *shhwtstamps,
6321 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
6322 * 24 to match clock shift we setup earlier.
6324 if (adapter->hw.mac.type >= e1000_82580)
6325 regval <<= IGB_82580_TSYNC_SHIFT;
6327 ns = timecounter_cyc2time(&adapter->clock, regval);
6330 * force a timecompare_update here (even if less than a second
6331 * has passed) in order to prevent the case when ptpd or other
6332 * software jumps the clock offset. othwerise there is a small
6333 * window when the timestamp would be based on previous skew
6334 * and invalid results would be pushed to the network stack.
6336 timecompare_update(&adapter->compare, 0);
6337 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
6338 shhwtstamps->hwtstamp = ns_to_ktime(ns);
6339 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
6343 * igb_tx_hwtstamp - utility function which checks for TX time stamp
6344 * @q_vector: pointer to q_vector containing needed info
6345 * @buffer: pointer to igb_tx_buffer structure
6347 * If we were asked to do hardware stamping and such a time stamp is
6348 * available, then it must have been for this skb here because we only
6349 * allow only one such packet into the queue.
6351 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
6352 struct igb_tx_buffer *buffer_info)
6354 struct igb_adapter *adapter = q_vector->adapter;
6355 struct e1000_hw *hw = &adapter->hw;
6356 struct skb_shared_hwtstamps shhwtstamps;
6359 /* if skb does not support hw timestamp or TX stamp not valid exit */
6360 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
6361 !(E1000_READ_REG(hw, E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
6364 regval = E1000_READ_REG(hw, E1000_TXSTMPL);
6365 regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32;
6367 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
6368 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
6373 * igb_clean_tx_irq - Reclaim resources after transmit completes
6374 * @q_vector: pointer to q_vector containing needed info
6375 * returns TRUE if ring is completely cleaned
6377 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
6379 struct igb_adapter *adapter = q_vector->adapter;
6380 struct igb_ring *tx_ring = q_vector->tx.ring;
6381 struct igb_tx_buffer *tx_buffer;
6382 union e1000_adv_tx_desc *tx_desc, *eop_desc;
6383 unsigned int total_bytes = 0, total_packets = 0;
6384 unsigned int budget = q_vector->tx.work_limit;
6385 unsigned int i = tx_ring->next_to_clean;
6387 if (test_bit(__IGB_DOWN, &adapter->state))
6390 tx_buffer = &tx_ring->tx_buffer_info[i];
6391 tx_desc = IGB_TX_DESC(tx_ring, i);
6392 i -= tx_ring->count;
6394 for (; budget; budget--) {
6395 eop_desc = tx_buffer->next_to_watch;
6397 /* prevent any other reads prior to eop_desc */
6400 /* if next_to_watch is not set then there is no work pending */
6404 /* if DD is not set pending work has not been completed */
6405 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
6408 /* clear next_to_watch to prevent false hangs */
6409 tx_buffer->next_to_watch = NULL;
6411 /* update the statistics for this packet */
6412 total_bytes += tx_buffer->bytecount;
6413 total_packets += tx_buffer->gso_segs;
6415 #ifdef HAVE_HW_TIME_STAMP
6416 /* retrieve hardware timestamp */
6417 igb_tx_hwtstamp(q_vector, tx_buffer);
6421 dev_kfree_skb_any(tx_buffer->skb);
6423 /* unmap skb header data */
6424 dma_unmap_single(tx_ring->dev,
6425 dma_unmap_addr(tx_buffer, dma),
6426 dma_unmap_len(tx_buffer, len),
6429 /* clear tx_buffer data */
6430 tx_buffer->skb = NULL;
6431 dma_unmap_len_set(tx_buffer, len, 0);
6433 /* clear last DMA location and unmap remaining buffers */
6434 while (tx_desc != eop_desc) {
6439 i -= tx_ring->count;
6440 tx_buffer = tx_ring->tx_buffer_info;
6441 tx_desc = IGB_TX_DESC(tx_ring, 0);
6444 /* unmap any remaining paged data */
6445 if (dma_unmap_len(tx_buffer, len)) {
6446 dma_unmap_page(tx_ring->dev,
6447 dma_unmap_addr(tx_buffer, dma),
6448 dma_unmap_len(tx_buffer, len),
6450 dma_unmap_len_set(tx_buffer, len, 0);
6454 /* move us one more past the eop_desc for start of next pkt */
6459 i -= tx_ring->count;
6460 tx_buffer = tx_ring->tx_buffer_info;
6461 tx_desc = IGB_TX_DESC(tx_ring, 0);
6466 netdev_tx_completed_queue(txring_txq(tx_ring),
6467 total_packets, total_bytes);
6468 #endif /* CONFIG_BQL */
6470 i += tx_ring->count;
6471 tx_ring->next_to_clean = i;
6472 tx_ring->tx_stats.bytes += total_bytes;
6473 tx_ring->tx_stats.packets += total_packets;
6474 q_vector->tx.total_bytes += total_bytes;
6475 q_vector->tx.total_packets += total_packets;
6477 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
6478 struct e1000_hw *hw = &adapter->hw;
6480 eop_desc = tx_buffer->next_to_watch;
6482 /* Detect a transmit hang in hardware, this serializes the
6483 * check with the clearing of time_stamp and movement of i */
6484 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
6486 time_after(jiffies, tx_buffer->time_stamp +
6487 (adapter->tx_timeout_factor * HZ))
6488 && !(E1000_READ_REG(hw, E1000_STATUS) &
6489 E1000_STATUS_TXOFF)) {
6491 /* detected Tx unit hang */
6492 dev_err(tx_ring->dev,
6493 "Detected Tx Unit Hang\n"
6497 " next_to_use <%x>\n"
6498 " next_to_clean <%x>\n"
6499 "buffer_info[next_to_clean]\n"
6500 " time_stamp <%lx>\n"
6501 " next_to_watch <%p>\n"
6503 " desc.status <%x>\n",
6504 tx_ring->queue_index,
6505 E1000_READ_REG(hw, E1000_TDH(tx_ring->reg_idx)),
6506 readl(tx_ring->tail),
6507 tx_ring->next_to_use,
6508 tx_ring->next_to_clean,
6509 tx_buffer->time_stamp,
6512 eop_desc->wb.status);
6513 if (netif_is_multiqueue(netdev_ring(tx_ring)))
6514 netif_stop_subqueue(netdev_ring(tx_ring),
6515 ring_queue_index(tx_ring));
6517 netif_stop_queue(netdev_ring(tx_ring));
6519 /* we are about to reset, no point in enabling stuff */
6524 if (unlikely(total_packets &&
6525 netif_carrier_ok(netdev_ring(tx_ring)) &&
6526 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
6527 /* Make sure that anybody stopping the queue after this
6528 * sees the new next_to_clean.
6531 if (netif_is_multiqueue(netdev_ring(tx_ring))) {
6532 if (__netif_subqueue_stopped(netdev_ring(tx_ring),
6533 ring_queue_index(tx_ring)) &&
6534 !(test_bit(__IGB_DOWN, &adapter->state))) {
6535 netif_wake_subqueue(netdev_ring(tx_ring),
6536 ring_queue_index(tx_ring));
6537 tx_ring->tx_stats.restart_queue++;
6540 if (netif_queue_stopped(netdev_ring(tx_ring)) &&
6541 !(test_bit(__IGB_DOWN, &adapter->state))) {
6542 netif_wake_queue(netdev_ring(tx_ring));
6543 tx_ring->tx_stats.restart_queue++;
6551 #ifdef HAVE_VLAN_RX_REGISTER
6553 * igb_receive_skb - helper function to handle rx indications
6554 * @q_vector: structure containing interrupt and ring information
6555 * @skb: packet to send up
6557 static void igb_receive_skb(struct igb_q_vector *q_vector,
6558 struct sk_buff *skb)
6560 struct vlan_group **vlgrp = netdev_priv(skb->dev);
6562 if (IGB_CB(skb)->vid) {
6564 vlan_gro_receive(&q_vector->napi, *vlgrp,
6565 IGB_CB(skb)->vid, skb);
6567 dev_kfree_skb_any(skb);
6570 napi_gro_receive(&q_vector->napi, skb);
6574 #endif /* HAVE_VLAN_RX_REGISTER */
6575 static inline void igb_rx_checksum(struct igb_ring *ring,
6576 union e1000_adv_rx_desc *rx_desc,
6577 struct sk_buff *skb)
6579 skb_checksum_none_assert(skb);
6581 /* Ignore Checksum bit is set */
6582 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
6585 /* Rx checksum disabled via ethtool */
6586 #ifdef HAVE_NDO_SET_FEATURES
6587 if (!(netdev_ring(ring)->features & NETIF_F_RXCSUM))
6589 if (!test_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags))
6593 /* TCP/UDP checksum error bit is set */
6594 if (igb_test_staterr(rx_desc,
6595 E1000_RXDEXT_STATERR_TCPE |
6596 E1000_RXDEXT_STATERR_IPE)) {
6598 * work around errata with sctp packets where the TCPE aka
6599 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
6600 * packets, (aka let the stack check the crc32c)
6602 if (!((skb->len == 60) &&
6603 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags)))
6604 ring->rx_stats.csum_err++;
6606 /* let the stack verify checksum errors */
6609 /* It must be a TCP or UDP packet with a valid checksum */
6610 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
6611 E1000_RXD_STAT_UDPCS))
6612 skb->ip_summed = CHECKSUM_UNNECESSARY;
6615 #ifdef NETIF_F_RXHASH
6616 static inline void igb_rx_hash(struct igb_ring *ring,
6617 union e1000_adv_rx_desc *rx_desc,
6618 struct sk_buff *skb)
6620 if (netdev_ring(ring)->features & NETIF_F_RXHASH)
6621 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
6625 #ifdef HAVE_HW_TIME_STAMP
6626 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
6627 union e1000_adv_rx_desc *rx_desc,
6628 struct sk_buff *skb)
6630 struct igb_adapter *adapter = q_vector->adapter;
6631 struct e1000_hw *hw = &adapter->hw;
6634 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6635 E1000_RXDADV_STAT_TS))
6639 * If this bit is set, then the RX registers contain the time stamp. No
6640 * other packet will be time stamped until we read these registers, so
6641 * read the registers to make them available again. Because only one
6642 * packet can be time stamped at a time, we know that the register
6643 * values must belong to this one here and therefore we don't need to
6644 * compare any of the additional attributes stored for it.
6646 * If nothing went wrong, then it should have a skb_shared_tx that we
6647 * can turn into a skb_shared_hwtstamps.
6649 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6650 u32 *stamp = (u32 *)skb->data;
6651 regval = le32_to_cpu(*(stamp + 2));
6652 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6653 skb_pull(skb, IGB_TS_HDR_LEN);
6655 if(!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6658 regval = E1000_READ_REG(hw, E1000_RXSTMPL);
6659 regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32;
6662 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6665 static void igb_rx_vlan(struct igb_ring *ring,
6666 union e1000_adv_rx_desc *rx_desc,
6667 struct sk_buff *skb)
6669 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6671 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6672 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6673 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6675 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6676 #ifdef HAVE_VLAN_RX_REGISTER
6677 IGB_CB(skb)->vid = vid;
6679 IGB_CB(skb)->vid = 0;
6681 __vlan_hwaccel_put_tag(skb, vid);
6686 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
6687 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6689 /* HW will not DMA in data larger than the given buffer, even if it
6690 * parses the (NFS, of course) header to be larger. In that case, it
6691 * fills the header buffer and spills the rest into the page.
6693 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info) &
6694 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6695 if (hlen > IGB_RX_HDR_LEN)
6696 hlen = IGB_RX_HDR_LEN;
6703 * igb_merge_active_tail - merge active tail into lro skb
6704 * @tail: pointer to active tail in frag_list
6706 * This function merges the length and data of an active tail into the
6707 * skb containing the frag_list. It resets the tail's pointer to the head,
6708 * but it leaves the heads pointer to tail intact.
6710 static inline struct sk_buff *igb_merge_active_tail(struct sk_buff *tail)
6712 struct sk_buff *head = IGB_CB(tail)->head;
6717 head->len += tail->len;
6718 head->data_len += tail->len;
6719 head->truesize += tail->len;
6721 IGB_CB(tail)->head = NULL;
6727 * igb_add_active_tail - adds an active tail into the skb frag_list
6728 * @head: pointer to the start of the skb
6729 * @tail: pointer to active tail to add to frag_list
6731 * This function adds an active tail to the end of the frag list. This tail
6732 * will still be receiving data so we cannot yet ad it's stats to the main
6733 * skb. That is done via igb_merge_active_tail.
6735 static inline void igb_add_active_tail(struct sk_buff *head, struct sk_buff *tail)
6737 struct sk_buff *old_tail = IGB_CB(head)->tail;
6740 igb_merge_active_tail(old_tail);
6741 old_tail->next = tail;
6743 skb_shinfo(head)->frag_list = tail;
6746 IGB_CB(tail)->head = head;
6747 IGB_CB(head)->tail = tail;
6749 IGB_CB(head)->append_cnt++;
6753 * igb_close_active_frag_list - cleanup pointers on a frag_list skb
6754 * @head: pointer to head of an active frag list
6756 * This function will clear the frag_tail_tracker pointer on an active
6757 * frag_list and returns true if the pointer was actually set
6759 static inline bool igb_close_active_frag_list(struct sk_buff *head)
6761 struct sk_buff *tail = IGB_CB(head)->tail;
6766 igb_merge_active_tail(tail);
6768 IGB_CB(head)->tail = NULL;
6774 * igb_can_lro - returns true if packet is TCP/IPV4 and LRO is enabled
6775 * @adapter: board private structure
6776 * @rx_desc: pointer to the rx descriptor
6777 * @skb: pointer to the skb to be merged
6780 static inline bool igb_can_lro(struct igb_ring *rx_ring,
6781 union e1000_adv_rx_desc *rx_desc,
6782 struct sk_buff *skb)
6784 struct iphdr *iph = (struct iphdr *)skb->data;
6785 __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
6787 /* verify LRO is enabled */
6788 if (!(netdev_ring(rx_ring)->features & NETIF_F_LRO))
6791 /* verify hardware indicates this is IPv4/TCP */
6792 if((!(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) ||
6793 !(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4))))
6796 /* verify the header is large enough for us to read IP/TCP fields */
6797 if (!pskb_may_pull(skb, sizeof(struct igb_lrohdr)))
6800 /* verify there are no VLANs on packet */
6801 if (skb->protocol != __constant_htons(ETH_P_IP))
6804 /* ensure we are version 4 with no options */
6805 if (*(u8 *)iph != 0x45)
6808 /* .. and the packet is not fragmented */
6809 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
6812 /* .. and that next header is TCP */
6813 if (iph->protocol != IPPROTO_TCP)
6819 static inline struct igb_lrohdr *igb_lro_hdr(struct sk_buff *skb)
6821 return (struct igb_lrohdr *)skb->data;
6825 * igb_lro_flush - Indicate packets to upper layer.
6827 * Update IP and TCP header part of head skb if more than one
6828 * skb's chained and indicate packets to upper layer.
6830 static void igb_lro_flush(struct igb_q_vector *q_vector,
6831 struct sk_buff *skb)
6833 struct igb_lro_list *lrolist = q_vector->lrolist;
6835 __skb_unlink(skb, &lrolist->active);
6837 if (IGB_CB(skb)->append_cnt) {
6838 struct igb_lrohdr *lroh = igb_lro_hdr(skb);
6840 /* close any active lro contexts */
6841 igb_close_active_frag_list(skb);
6843 /* incorporate ip header and re-calculate checksum */
6844 lroh->iph.tot_len = ntohs(skb->len);
6845 lroh->iph.check = 0;
6847 /* header length is 5 since we know no options exist */
6848 lroh->iph.check = ip_fast_csum((u8 *)lroh, 5);
6850 /* clear TCP checksum to indicate we are an LRO frame */
6853 /* incorporate latest timestamp into the tcp header */
6854 if (IGB_CB(skb)->tsecr) {
6855 lroh->ts[2] = IGB_CB(skb)->tsecr;
6856 lroh->ts[1] = htonl(IGB_CB(skb)->tsval);
6860 skb_shinfo(skb)->gso_size = IGB_CB(skb)->mss;
6864 #ifdef HAVE_VLAN_RX_REGISTER
6865 igb_receive_skb(q_vector, skb);
6867 napi_gro_receive(&q_vector->napi, skb);
6869 lrolist->stats.flushed++;
6872 static void igb_lro_flush_all(struct igb_q_vector *q_vector)
6874 struct igb_lro_list *lrolist = q_vector->lrolist;
6875 struct sk_buff *skb, *tmp;
6877 skb_queue_reverse_walk_safe(&lrolist->active, skb, tmp)
6878 igb_lro_flush(q_vector, skb);
6882 * igb_lro_header_ok - Main LRO function.
6884 static void igb_lro_header_ok(struct sk_buff *skb)
6886 struct igb_lrohdr *lroh = igb_lro_hdr(skb);
6887 u16 opt_bytes, data_len;
6889 IGB_CB(skb)->tail = NULL;
6890 IGB_CB(skb)->tsecr = 0;
6891 IGB_CB(skb)->append_cnt = 0;
6892 IGB_CB(skb)->mss = 0;
6894 /* ensure that the checksum is valid */
6895 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
6898 /* If we see CE codepoint in IP header, packet is not mergeable */
6899 if (INET_ECN_is_ce(ipv4_get_dsfield(&lroh->iph)))
6902 /* ensure no bits set besides ack or psh */
6903 if (lroh->th.fin || lroh->th.syn || lroh->th.rst ||
6904 lroh->th.urg || lroh->th.ece || lroh->th.cwr ||
6908 /* store the total packet length */
6909 data_len = ntohs(lroh->iph.tot_len);
6911 /* remove any padding from the end of the skb */
6912 __pskb_trim(skb, data_len);
6914 /* remove header length from data length */
6915 data_len -= sizeof(struct igb_lrohdr);
6918 * check for timestamps. Since the only option we handle are timestamps,
6919 * we only have to handle the simple case of aligned timestamps
6921 opt_bytes = (lroh->th.doff << 2) - sizeof(struct tcphdr);
6922 if (opt_bytes != 0) {
6923 if ((opt_bytes != TCPOLEN_TSTAMP_ALIGNED) ||
6924 !pskb_may_pull(skb, sizeof(struct igb_lrohdr) +
6925 TCPOLEN_TSTAMP_ALIGNED) ||
6926 (lroh->ts[0] != htonl((TCPOPT_NOP << 24) |
6927 (TCPOPT_NOP << 16) |
6928 (TCPOPT_TIMESTAMP << 8) |
6929 TCPOLEN_TIMESTAMP)) ||
6930 (lroh->ts[2] == 0)) {
6934 IGB_CB(skb)->tsval = ntohl(lroh->ts[1]);
6935 IGB_CB(skb)->tsecr = lroh->ts[2];
6937 data_len -= TCPOLEN_TSTAMP_ALIGNED;
6940 /* record data_len as mss for the packet */
6941 IGB_CB(skb)->mss = data_len;
6942 IGB_CB(skb)->next_seq = ntohl(lroh->th.seq);
6945 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
6946 static bool igb_merge_frags(struct sk_buff *lro_skb, struct sk_buff *new_skb)
6948 struct sk_buff *tail;
6949 struct skb_shared_info *tail_info;
6950 struct skb_shared_info *new_skb_info;
6953 /* header must be empty to pull frags into current skb */
6954 if (skb_headlen(new_skb))
6957 if (IGB_CB(lro_skb)->tail)
6958 tail = IGB_CB(lro_skb)->tail;
6962 tail_info = skb_shinfo(tail);
6963 new_skb_info = skb_shinfo(new_skb);
6965 /* make sure we have room in frags list */
6966 if (new_skb_info->nr_frags >= (MAX_SKB_FRAGS - tail_info->nr_frags))
6969 /* bump append count */
6970 IGB_CB(lro_skb)->append_cnt++;
6972 /* copy frags into the last skb */
6973 memcpy(tail_info->frags + tail_info->nr_frags,
6974 new_skb_info->frags,
6975 new_skb_info->nr_frags * sizeof(skb_frag_t));
6977 /* copy size data over */
6978 tail_info->nr_frags += new_skb_info->nr_frags;
6979 data_len = IGB_CB(new_skb)->mss;
6980 tail->len += data_len;
6981 tail->data_len += data_len;
6982 tail->truesize += data_len;
6984 /* wipe record of data from new_skb */
6985 new_skb_info->nr_frags = 0;
6986 new_skb->len = new_skb->data_len = 0;
6987 new_skb->truesize -= data_len;
6988 new_skb->data = new_skb->head + NET_SKB_PAD + NET_IP_ALIGN;
6989 skb_reset_tail_pointer(new_skb);
6990 new_skb->protocol = 0;
6991 new_skb->ip_summed = CHECKSUM_NONE;
6992 #ifdef HAVE_VLAN_RX_REGISTER
6993 IGB_CB(new_skb)->vid = 0;
6995 new_skb->vlan_tci = 0;
7001 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7003 * igb_lro_queue - if able, queue skb into lro chain
7004 * @q_vector: structure containing interrupt and ring information
7005 * @new_skb: pointer to current skb being checked
7007 * Checks whether the skb given is eligible for LRO and if that's
7008 * fine chains it to the existing lro_skb based on flowid. If an LRO for
7009 * the flow doesn't exist create one.
7011 static struct sk_buff *igb_lro_queue(struct igb_q_vector *q_vector,
7012 struct sk_buff *new_skb)
7014 struct sk_buff *lro_skb;
7015 struct igb_lro_list *lrolist = q_vector->lrolist;
7016 struct igb_lrohdr *lroh = igb_lro_hdr(new_skb);
7017 __be32 saddr = lroh->iph.saddr;
7018 __be32 daddr = lroh->iph.daddr;
7019 __be32 tcp_ports = *(__be32 *)&lroh->th;
7021 #ifdef HAVE_VLAN_RX_REGISTER
7022 u16 vid = IGB_CB(new_skb)->vid;
7024 u16 vid = new_skb->vlan_tci;
7027 igb_lro_header_ok(new_skb);
7030 * we have a packet that might be eligible for LRO,
7031 * so see if it matches anything we might expect
7033 skb_queue_walk(&lrolist->active, lro_skb) {
7034 if (*(__be32 *)&igb_lro_hdr(lro_skb)->th != tcp_ports ||
7035 igb_lro_hdr(lro_skb)->iph.saddr != saddr ||
7036 igb_lro_hdr(lro_skb)->iph.daddr != daddr)
7039 #ifdef HAVE_VLAN_RX_REGISTER
7040 if (IGB_CB(lro_skb)->vid != vid)
7042 if (lro_skb->vlan_tci != vid)
7046 /* out of order packet */
7047 if (IGB_CB(lro_skb)->next_seq != IGB_CB(new_skb)->next_seq) {
7048 igb_lro_flush(q_vector, lro_skb);
7049 IGB_CB(new_skb)->mss = 0;
7053 /* TCP timestamp options have changed */
7054 if (!IGB_CB(lro_skb)->tsecr != !IGB_CB(new_skb)->tsecr) {
7055 igb_lro_flush(q_vector, lro_skb);
7059 /* make sure timestamp values are increasing */
7060 if (IGB_CB(lro_skb)->tsecr &&
7061 IGB_CB(lro_skb)->tsval > IGB_CB(new_skb)->tsval) {
7062 igb_lro_flush(q_vector, lro_skb);
7063 IGB_CB(new_skb)->mss = 0;
7067 data_len = IGB_CB(new_skb)->mss;
7070 * malformed header, no tcp data, resultant packet would
7071 * be too large, or new skb is larger than our current mss.
7073 if (data_len == 0 ||
7074 data_len > IGB_CB(lro_skb)->mss ||
7075 data_len > IGB_CB(lro_skb)->free) {
7076 igb_lro_flush(q_vector, lro_skb);
7080 /* ack sequence numbers or window size has changed */
7081 if (igb_lro_hdr(lro_skb)->th.ack_seq != lroh->th.ack_seq ||
7082 igb_lro_hdr(lro_skb)->th.window != lroh->th.window) {
7083 igb_lro_flush(q_vector, lro_skb);
7087 /* Remove IP and TCP header*/
7088 skb_pull(new_skb, new_skb->len - data_len);
7090 /* update timestamp and timestamp echo response */
7091 IGB_CB(lro_skb)->tsval = IGB_CB(new_skb)->tsval;
7092 IGB_CB(lro_skb)->tsecr = IGB_CB(new_skb)->tsecr;
7094 /* update sequence and free space */
7095 IGB_CB(lro_skb)->next_seq += data_len;
7096 IGB_CB(lro_skb)->free -= data_len;
7098 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7099 /* if header is empty pull pages into current skb */
7100 if (igb_merge_frags(lro_skb, new_skb)) {
7101 lrolist->stats.recycled++;
7104 /* chain this new skb in frag_list */
7105 igb_add_active_tail(lro_skb, new_skb);
7107 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7111 if ((data_len < IGB_CB(lro_skb)->mss) || lroh->th.psh) {
7112 igb_lro_hdr(lro_skb)->th.psh |= lroh->th.psh;
7113 igb_lro_flush(q_vector, lro_skb);
7116 lrolist->stats.coal++;
7120 if (IGB_CB(new_skb)->mss && !lroh->th.psh) {
7121 /* if we are at capacity flush the tail */
7122 if (skb_queue_len(&lrolist->active) >= IGB_LRO_MAX) {
7123 lro_skb = skb_peek_tail(&lrolist->active);
7125 igb_lro_flush(q_vector, lro_skb);
7128 /* update sequence and free space */
7129 IGB_CB(new_skb)->next_seq += IGB_CB(new_skb)->mss;
7130 IGB_CB(new_skb)->free = 65521 - new_skb->len;
7132 /* .. and insert at the front of the active list */
7133 __skb_queue_head(&lrolist->active, new_skb);
7135 lrolist->stats.coal++;
7139 /* packet not handled by any of the above, pass it to the stack */
7140 #ifdef HAVE_VLAN_RX_REGISTER
7141 igb_receive_skb(q_vector, new_skb);
7143 napi_gro_receive(&q_vector->napi, new_skb);
7148 #endif /* IGB_NO_LRO */
7149 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
7151 struct igb_ring *rx_ring = q_vector->rx.ring;
7152 union e1000_adv_rx_desc *rx_desc;
7153 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7154 const int current_node = numa_node_id();
7156 unsigned int total_bytes = 0, total_packets = 0;
7157 u16 cleaned_count = igb_desc_unused(rx_ring);
7158 u16 i = rx_ring->next_to_clean;
7160 rx_desc = IGB_RX_DESC(rx_ring, i);
7162 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
7163 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
7164 struct sk_buff *skb = buffer_info->skb;
7165 union e1000_adv_rx_desc *next_rxd;
7167 buffer_info->skb = NULL;
7168 prefetch(skb->data);
7171 if (i == rx_ring->count)
7174 next_rxd = IGB_RX_DESC(rx_ring, i);
7178 * This memory barrier is needed to keep us from reading
7179 * any other fields out of the rx_desc until we know the
7180 * RXD_STAT_DD bit is set
7184 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7185 __skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length));
7186 dma_unmap_single(rx_ring->dev, buffer_info->dma,
7187 rx_ring->rx_buffer_len,
7189 buffer_info->dma = 0;
7192 if (!skb_is_nonlinear(skb)) {
7193 __skb_put(skb, igb_get_hlen(rx_desc));
7194 dma_unmap_single(rx_ring->dev, buffer_info->dma,
7197 buffer_info->dma = 0;
7200 if (rx_desc->wb.upper.length) {
7201 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
7203 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
7205 buffer_info->page_offset,
7209 skb->data_len += length;
7210 skb->truesize += length;
7212 if ((page_count(buffer_info->page) != 1) ||
7213 (page_to_nid(buffer_info->page) != current_node))
7214 buffer_info->page = NULL;
7216 get_page(buffer_info->page);
7218 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
7219 PAGE_SIZE / 2, DMA_FROM_DEVICE);
7220 buffer_info->page_dma = 0;
7223 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
7224 struct igb_rx_buffer *next_buffer;
7225 next_buffer = &rx_ring->rx_buffer_info[i];
7226 buffer_info->skb = next_buffer->skb;
7227 buffer_info->dma = next_buffer->dma;
7228 next_buffer->skb = skb;
7229 next_buffer->dma = 0;
7233 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7234 if (igb_test_staterr(rx_desc,
7235 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
7236 dev_kfree_skb_any(skb);
7240 #ifdef HAVE_HW_TIME_STAMP
7241 igb_rx_hwtstamp(q_vector, rx_desc, skb);
7243 #ifdef NETIF_F_RXHASH
7244 igb_rx_hash(rx_ring, rx_desc, skb);
7246 igb_rx_checksum(rx_ring, rx_desc, skb);
7247 igb_rx_vlan(rx_ring, rx_desc, skb);
7249 total_bytes += skb->len;
7252 skb->protocol = eth_type_trans(skb, netdev_ring(rx_ring));
7255 if (igb_can_lro(rx_ring, rx_desc, skb))
7256 buffer_info->skb = igb_lro_queue(q_vector, skb);
7259 #ifdef HAVE_VLAN_RX_REGISTER
7260 igb_receive_skb(q_vector, skb);
7262 napi_gro_receive(&q_vector->napi, skb);
7266 netdev_ring(rx_ring)->last_rx = jiffies;
7276 /* return some buffers to hardware, one at a time is too slow */
7277 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
7278 igb_alloc_rx_buffers(rx_ring, cleaned_count);
7282 /* use prefetched values */
7286 rx_ring->next_to_clean = i;
7287 rx_ring->rx_stats.packets += total_packets;
7288 rx_ring->rx_stats.bytes += total_bytes;
7289 q_vector->rx.total_packets += total_packets;
7290 q_vector->rx.total_bytes += total_bytes;
7293 igb_alloc_rx_buffers(rx_ring, cleaned_count);
7296 if (netdev_ring(rx_ring)->features & NETIF_F_LRO)
7297 igb_lro_flush_all(q_vector);
7299 #endif /* IGB_NO_LRO */
7303 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
7304 struct igb_rx_buffer *bi)
7306 struct sk_buff *skb = bi->skb;
7307 dma_addr_t dma = bi->dma;
7313 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7314 skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring),
7315 rx_ring->rx_buffer_len);
7317 skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring),
7322 rx_ring->rx_stats.alloc_failed++;
7326 /* initialize skb for ring */
7327 skb_record_rx_queue(skb, ring_queue_index(rx_ring));
7330 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7331 dma = dma_map_single(rx_ring->dev, skb->data,
7332 rx_ring->rx_buffer_len, DMA_FROM_DEVICE);
7334 dma = dma_map_single(rx_ring->dev, skb->data,
7335 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
7338 if (dma_mapping_error(rx_ring->dev, dma)) {
7339 rx_ring->rx_stats.alloc_failed++;
7347 #ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
7348 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
7349 struct igb_rx_buffer *bi)
7351 struct page *page = bi->page;
7352 dma_addr_t page_dma = bi->page_dma;
7353 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
7359 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
7361 if (unlikely(!page)) {
7362 rx_ring->rx_stats.alloc_failed++;
7367 page_dma = dma_map_page(rx_ring->dev, page,
7368 page_offset, PAGE_SIZE / 2,
7371 if (dma_mapping_error(rx_ring->dev, page_dma)) {
7372 rx_ring->rx_stats.alloc_failed++;
7376 bi->page_dma = page_dma;
7377 bi->page_offset = page_offset;
7381 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7383 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
7384 * @adapter: address of board private structure
7386 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
7388 union e1000_adv_rx_desc *rx_desc;
7389 struct igb_rx_buffer *bi;
7390 u16 i = rx_ring->next_to_use;
7392 rx_desc = IGB_RX_DESC(rx_ring, i);
7393 bi = &rx_ring->rx_buffer_info[i];
7394 i -= rx_ring->count;
7396 while (cleaned_count--) {
7397 if (!igb_alloc_mapped_skb(rx_ring, bi))
7400 /* Refresh the desc even if buffer_addrs didn't change
7401 * because each write-back erases this info. */
7402 #ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT
7403 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
7405 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
7407 if (!igb_alloc_mapped_page(rx_ring, bi))
7410 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
7412 #endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */
7417 rx_desc = IGB_RX_DESC(rx_ring, 0);
7418 bi = rx_ring->rx_buffer_info;
7419 i -= rx_ring->count;
7422 /* clear the hdr_addr for the next_to_use descriptor */
7423 rx_desc->read.hdr_addr = 0;
7426 i += rx_ring->count;
7428 if (rx_ring->next_to_use != i) {
7429 rx_ring->next_to_use = i;
7431 /* Force memory writes to complete before letting h/w
7432 * know there are new descriptors to fetch. (Only
7433 * applicable for weak-ordered memory model archs,
7434 * such as IA-64). */
7436 writel(i, rx_ring->tail);
7447 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7449 struct igb_adapter *adapter = netdev_priv(netdev);
7450 struct mii_ioctl_data *data = if_mii(ifr);
7452 if (adapter->hw.phy.media_type != e1000_media_type_copper)
7457 data->phy_id = adapter->hw.phy.addr;
7460 if (!capable(CAP_NET_ADMIN))
7462 if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
7470 return E1000_SUCCESS;
7474 #ifdef HAVE_HW_TIME_STAMP
7476 * igb_hwtstamp_ioctl - control hardware time stamping
7481 * Outgoing time stamping can be enabled and disabled. Play nice and
7482 * disable it when requested, although it shouldn't case any overhead
7483 * when no packet needs it. At most one packet in the queue may be
7484 * marked for time stamping, otherwise it would be impossible to tell
7485 * for sure to which packet the hardware time stamp belongs.
7487 * Incoming time stamping has to be configured via the hardware
7488 * filters. Not all combinations are supported, in particular event
7489 * type has to be specified. Matching the kind of event packet is
7490 * not supported, with the exception of "all V2 events regardless of
7494 static int igb_hwtstamp_ioctl(struct net_device *netdev,
7495 struct ifreq *ifr, int cmd)
7497 struct igb_adapter *adapter = netdev_priv(netdev);
7498 struct e1000_hw *hw = &adapter->hw;
7499 struct hwtstamp_config config;
7500 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
7501 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
7502 u32 tsync_rx_cfg = 0;
7507 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
7510 /* reserved for future extensions */
7514 switch (config.tx_type) {
7515 case HWTSTAMP_TX_OFF:
7517 case HWTSTAMP_TX_ON:
7523 switch (config.rx_filter) {
7524 case HWTSTAMP_FILTER_NONE:
7527 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
7528 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
7529 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
7530 case HWTSTAMP_FILTER_ALL:
7532 * register TSYNCRXCFG must be set, therefore it is not
7533 * possible to time stamp both Sync and Delay_Req messages
7534 * => fall back to time stamping all packets
7536 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
7537 config.rx_filter = HWTSTAMP_FILTER_ALL;
7539 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
7540 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
7541 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
7544 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
7545 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
7546 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
7549 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
7550 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
7551 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
7552 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
7555 config.rx_filter = HWTSTAMP_FILTER_SOME;
7557 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
7558 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
7559 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
7560 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
7563 config.rx_filter = HWTSTAMP_FILTER_SOME;
7565 case HWTSTAMP_FILTER_PTP_V2_EVENT:
7566 case HWTSTAMP_FILTER_PTP_V2_SYNC:
7567 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
7568 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
7569 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
7577 if (hw->mac.type == e1000_82575) {
7578 if (tsync_rx_ctl | tsync_tx_ctl)
7583 #ifdef IGB_PER_PKT_TIMESTAMP
7585 * Per-packet timestamping only works if all packets are
7586 * timestamped, so enable timestamping in all packets as
7587 * long as one rx filter was configured.
7589 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
7590 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
7591 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
7595 /* enable/disable TX */
7596 regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
7597 regval &= ~E1000_TSYNCTXCTL_ENABLED;
7598 regval |= tsync_tx_ctl;
7599 E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
7601 /* enable/disable RX */
7602 regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
7603 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
7604 regval |= tsync_rx_ctl;
7605 E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
7607 /* define which PTP packets are time stamped */
7608 E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
7610 /* define ethertype filter for timestamped packets */
7612 E1000_WRITE_REG(hw, E1000_ETQF(3),
7613 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
7614 E1000_ETQF_1588 | /* enable timestamping */
7615 ETH_P_1588)); /* 1588 eth protocol type */
7617 E1000_WRITE_REG(hw, E1000_ETQF(3), 0);
7619 #define PTP_PORT 319
7620 /* L4 Queue Filter[3]: filter by destination port and protocol */
7622 u32 ftqf = (IPPROTO_UDP /* UDP */
7623 | E1000_FTQF_VF_BP /* VF not compared */
7624 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
7625 | E1000_FTQF_MASK); /* mask all inputs */
7626 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
7628 E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_PORT));
7629 E1000_WRITE_REG(hw, E1000_IMIREXT(3),
7630 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
7631 if (hw->mac.type == e1000_82576) {
7632 /* enable source port check */
7633 E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_PORT));
7634 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
7636 E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf);
7638 E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK);
7640 E1000_WRITE_FLUSH(hw);
7642 adapter->hwtstamp_config = config;
7644 /* clear TX/RX time stamp registers, just to be sure */
7645 regval = E1000_READ_REG(hw, E1000_TXSTMPH);
7646 regval = E1000_READ_REG(hw, E1000_RXSTMPH);
7648 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
7659 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
7666 return igb_mii_ioctl(netdev, ifr, cmd);
7668 #ifdef HAVE_HW_TIME_STAMP
7670 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
7672 #ifdef ETHTOOL_OPS_COMPAT
7674 return ethtool_ioctl(ifr);
7681 s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7683 struct igb_adapter *adapter = hw->back;
7686 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
7688 return -E1000_ERR_CONFIG;
7690 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
7692 return E1000_SUCCESS;
7695 s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
7697 struct igb_adapter *adapter = hw->back;
7700 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
7702 return -E1000_ERR_CONFIG;
7704 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
7706 return E1000_SUCCESS;
7709 #ifdef HAVE_VLAN_RX_REGISTER
7710 static void igb_vlan_mode(struct net_device *netdev, struct vlan_group *vlgrp)
7712 void igb_vlan_mode(struct net_device *netdev, u32 features)
7715 struct igb_adapter *adapter = netdev_priv(netdev);
7716 struct e1000_hw *hw = &adapter->hw;
7719 #ifdef HAVE_VLAN_RX_REGISTER
7720 bool enable = !!vlgrp;
7722 igb_irq_disable(adapter);
7724 adapter->vlgrp = vlgrp;
7726 if (!test_bit(__IGB_DOWN, &adapter->state))
7727 igb_irq_enable(adapter);
7729 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
7733 /* enable VLAN tag insert/strip */
7734 ctrl = E1000_READ_REG(hw, E1000_CTRL);
7735 ctrl |= E1000_CTRL_VME;
7736 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
7738 /* Disable CFI check */
7739 rctl = E1000_READ_REG(hw, E1000_RCTL);
7740 rctl &= ~E1000_RCTL_CFIEN;
7741 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
7743 /* disable VLAN tag insert/strip */
7744 ctrl = E1000_READ_REG(hw, E1000_CTRL);
7745 ctrl &= ~E1000_CTRL_VME;
7746 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
7749 #ifndef CONFIG_IGB_VMDQ_NETDEV
7750 for (i = 0; i < adapter->vmdq_pools; i++) {
7751 igb_set_vf_vlan_strip(adapter,
7752 adapter->vfs_allocated_count + i,
7757 igb_set_vf_vlan_strip(adapter,
7758 adapter->vfs_allocated_count,
7761 for (i = 1; i < adapter->vmdq_pools; i++) {
7762 #ifdef HAVE_VLAN_RX_REGISTER
7763 struct igb_vmdq_adapter *vadapter;
7764 vadapter = netdev_priv(adapter->vmdq_netdev[i-1]);
7765 enable = !!vadapter->vlgrp;
7767 struct net_device *vnetdev;
7768 vnetdev = adapter->vmdq_netdev[i-1];
7769 enable = !!(vnetdev->features & NETIF_F_HW_VLAN_RX);
7771 igb_set_vf_vlan_strip(adapter,
7772 adapter->vfs_allocated_count + i,
7777 igb_rlpml_set(adapter);
7780 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7781 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
7783 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
7786 struct igb_adapter *adapter = netdev_priv(netdev);
7787 int pf_id = adapter->vfs_allocated_count;
7789 /* attempt to add filter to vlvf array */
7790 igb_vlvf_set(adapter, vid, TRUE, pf_id);
7792 /* add the filter since PF can receive vlans w/o entry in vlvf */
7793 igb_vfta_set(adapter, vid, TRUE);
7794 #ifndef HAVE_NETDEV_VLAN_FEATURES
7796 /* Copy feature flags from netdev to the vlan netdev for this vid.
7797 * This allows things like TSO to bubble down to our vlan device.
7798 * There is no need to update netdev for vlan 0 (DCB), since it
7799 * wouldn't has v_netdev.
7801 if (adapter->vlgrp) {
7802 struct vlan_group *vlgrp = adapter->vlgrp;
7803 struct net_device *v_netdev = vlan_group_get_device(vlgrp, vid);
7805 v_netdev->features |= netdev->features;
7806 vlan_group_set_device(vlgrp, vid, v_netdev);
7810 #ifndef HAVE_VLAN_RX_REGISTER
7812 set_bit(vid, adapter->active_vlans);
7814 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7819 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7820 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
7822 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
7825 struct igb_adapter *adapter = netdev_priv(netdev);
7826 int pf_id = adapter->vfs_allocated_count;
7829 #ifdef HAVE_VLAN_RX_REGISTER
7830 igb_irq_disable(adapter);
7832 vlan_group_set_device(adapter->vlgrp, vid, NULL);
7834 if (!test_bit(__IGB_DOWN, &adapter->state))
7835 igb_irq_enable(adapter);
7837 #endif /* HAVE_VLAN_RX_REGISTER */
7838 /* remove vlan from VLVF table array */
7839 err = igb_vlvf_set(adapter, vid, FALSE, pf_id);
7841 /* if vid was not present in VLVF just remove it from table */
7843 igb_vfta_set(adapter, vid, FALSE);
7844 #ifndef HAVE_VLAN_RX_REGISTER
7846 clear_bit(vid, adapter->active_vlans);
7848 #ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID
7853 static void igb_restore_vlan(struct igb_adapter *adapter)
7855 #ifdef HAVE_VLAN_RX_REGISTER
7856 igb_vlan_mode(adapter->netdev, adapter->vlgrp);
7858 if (adapter->vlgrp) {
7860 for (vid = 0; vid < VLAN_N_VID; vid++) {
7861 if (!vlan_group_get_device(adapter->vlgrp, vid))
7863 igb_vlan_rx_add_vid(adapter->netdev, vid);
7869 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
7871 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
7872 igb_vlan_rx_add_vid(adapter->netdev, vid);
7876 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
7878 struct pci_dev *pdev = adapter->pdev;
7879 struct e1000_mac_info *mac = &adapter->hw.mac;
7883 /* Fiber NIC's only allow 1000 gbps Full duplex */
7884 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes ) &&
7885 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
7886 dev_err(pci_dev_to_dev(pdev),
7887 "Unsupported Speed/Duplex configuration\n");
7892 case SPEED_10 + DUPLEX_HALF:
7893 mac->forced_speed_duplex = ADVERTISE_10_HALF;
7895 case SPEED_10 + DUPLEX_FULL:
7896 mac->forced_speed_duplex = ADVERTISE_10_FULL;
7898 case SPEED_100 + DUPLEX_HALF:
7899 mac->forced_speed_duplex = ADVERTISE_100_HALF;
7901 case SPEED_100 + DUPLEX_FULL:
7902 mac->forced_speed_duplex = ADVERTISE_100_FULL;
7904 case SPEED_1000 + DUPLEX_FULL:
7906 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
7908 case SPEED_1000 + DUPLEX_HALF: /* not supported */
7910 dev_err(pci_dev_to_dev(pdev), "Unsupported Speed/Duplex configuration\n");
7916 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
7919 struct net_device *netdev = pci_get_drvdata(pdev);
7920 struct igb_adapter *adapter = netdev_priv(netdev);
7921 struct e1000_hw *hw = &adapter->hw;
7922 u32 ctrl, rctl, status;
7923 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
7928 netif_device_detach(netdev);
7930 if (netif_running(netdev))
7931 __igb_close(netdev, true);
7933 igb_clear_interrupt_scheme(adapter);
7936 retval = pci_save_state(pdev);
7941 status = E1000_READ_REG(hw, E1000_STATUS);
7942 if (status & E1000_STATUS_LU)
7943 wufc &= ~E1000_WUFC_LNKC;
7946 igb_setup_rctl(adapter);
7947 igb_set_rx_mode(netdev);
7949 /* turn on all-multi mode if wake on multicast is enabled */
7950 if (wufc & E1000_WUFC_MC) {
7951 rctl = E1000_READ_REG(hw, E1000_RCTL);
7952 rctl |= E1000_RCTL_MPE;
7953 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
7956 ctrl = E1000_READ_REG(hw, E1000_CTRL);
7957 /* phy power management enable */
7958 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
7959 ctrl |= E1000_CTRL_ADVD3WUC;
7960 E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
7962 /* Allow time for pending master requests to run */
7963 e1000_disable_pcie_master(hw);
7965 E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PME_EN);
7966 E1000_WRITE_REG(hw, E1000_WUFC, wufc);
7968 E1000_WRITE_REG(hw, E1000_WUC, 0);
7969 E1000_WRITE_REG(hw, E1000_WUFC, 0);
7972 *enable_wake = wufc || adapter->en_mng_pt;
7974 igb_power_down_link(adapter);
7976 igb_power_up_link(adapter);
7978 /* Release control of h/w to f/w. If f/w is AMT enabled, this
7979 * would have already happened in close and is redundant. */
7980 igb_release_hw_control(adapter);
7982 pci_disable_device(pdev);
7988 #ifdef HAVE_SYSTEM_SLEEP_PM_OPS
7989 static int igb_suspend(struct device *dev)
7993 struct pci_dev *pdev = to_pci_dev(dev);
7995 retval = __igb_shutdown(pdev, &wake, 0);
8000 pci_prepare_to_sleep(pdev);
8002 pci_wake_from_d3(pdev, false);
8003 pci_set_power_state(pdev, PCI_D3hot);
8009 static int igb_resume(struct device *dev)
8011 struct pci_dev *pdev = to_pci_dev(dev);
8012 struct net_device *netdev = pci_get_drvdata(pdev);
8013 struct igb_adapter *adapter = netdev_priv(netdev);
8014 struct e1000_hw *hw = &adapter->hw;
8017 pci_set_power_state(pdev, PCI_D0);
8018 pci_restore_state(pdev);
8019 pci_save_state(pdev);
8021 err = pci_enable_device_mem(pdev);
8023 dev_err(pci_dev_to_dev(pdev),
8024 "igb: Cannot enable PCI device from suspend\n");
8027 pci_set_master(pdev);
8029 pci_enable_wake(pdev, PCI_D3hot, 0);
8030 pci_enable_wake(pdev, PCI_D3cold, 0);
8032 #ifdef CONFIG_PM_RUNTIME
8033 if (!rtnl_is_locked()) {
8035 * shut up ASSERT_RTNL() warning in
8036 * netif_set_real_num_tx/rx_queues.
8039 err = igb_init_interrupt_scheme(adapter);
8042 err = igb_init_interrupt_scheme(adapter);
8046 if (igb_init_interrupt_scheme(adapter)) {
8047 #endif /* CONFIG_PM_RUNTIME */
8048 dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n");
8054 /* let the f/w know that the h/w is now under the control of the
8056 igb_get_hw_control(adapter);
8058 E1000_WRITE_REG(hw, E1000_WUS, ~0);
8060 if (netdev->flags & IFF_UP) {
8061 err = __igb_open(netdev, true);
8066 netif_device_attach(netdev);
8071 #ifdef CONFIG_PM_RUNTIME
8072 static int igb_runtime_idle(struct device *dev)
8074 struct pci_dev *pdev = to_pci_dev(dev);
8075 struct net_device *netdev = pci_get_drvdata(pdev);
8076 struct igb_adapter *adapter = netdev_priv(netdev);
8078 if (!igb_has_link(adapter))
8079 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
8084 static int igb_runtime_suspend(struct device *dev)
8086 struct pci_dev *pdev = to_pci_dev(dev);
8090 retval = __igb_shutdown(pdev, &wake, 1);
8095 pci_prepare_to_sleep(pdev);
8097 pci_wake_from_d3(pdev, false);
8098 pci_set_power_state(pdev, PCI_D3hot);
8104 static int igb_runtime_resume(struct device *dev)
8106 return igb_resume(dev);
8108 #endif /* CONFIG_PM_RUNTIME */
8109 #endif /* HAVE_SYSTEM_SLEEP_PM_OPS */
8110 #endif /* CONFIG_PM */
8112 #ifdef USE_REBOOT_NOTIFIER
8113 /* only want to do this for 2.4 kernels? */
8114 static int igb_notify_reboot(struct notifier_block *nb, unsigned long event,
8117 struct pci_dev *pdev = NULL;
8124 while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
8125 if (pci_dev_driver(pdev) == &igb_driver) {
8126 __igb_shutdown(pdev, &wake, 0);
8127 if (event == SYS_POWER_OFF) {
8128 pci_wake_from_d3(pdev, wake);
8129 pci_set_power_state(pdev, PCI_D3hot);
8137 static void igb_shutdown(struct pci_dev *pdev)
8141 __igb_shutdown(pdev, &wake, 0);
8143 if (system_state == SYSTEM_POWER_OFF) {
8144 pci_wake_from_d3(pdev, wake);
8145 pci_set_power_state(pdev, PCI_D3hot);
8148 #endif /* USE_REBOOT_NOTIFIER */
8150 #ifdef CONFIG_NET_POLL_CONTROLLER
8152 * Polling 'interrupt' - used by things like netconsole to send skbs
8153 * without having to re-enable interrupts. It's not called while
8154 * the interrupt routine is executing.
8156 static void igb_netpoll(struct net_device *netdev)
8158 struct igb_adapter *adapter = netdev_priv(netdev);
8159 struct e1000_hw *hw = &adapter->hw;
8160 struct igb_q_vector *q_vector;
8163 for (i = 0; i < adapter->num_q_vectors; i++) {
8164 q_vector = adapter->q_vector[i];
8165 if (adapter->msix_entries)
8166 E1000_WRITE_REG(hw, E1000_EIMC, q_vector->eims_value);
8168 igb_irq_disable(adapter);
8169 napi_schedule(&q_vector->napi);
8172 #endif /* CONFIG_NET_POLL_CONTROLLER */
8175 #define E1000_DEV_ID_82576_VF 0x10CA
8177 * igb_io_error_detected - called when PCI error is detected
8178 * @pdev: Pointer to PCI device
8179 * @state: The current pci connection state
8181 * This function is called after a PCI bus error affecting
8182 * this device has been detected.
8184 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
8185 pci_channel_state_t state)
8187 struct net_device *netdev = pci_get_drvdata(pdev);
8188 struct igb_adapter *adapter = netdev_priv(netdev);
8190 #ifdef CONFIG_PCI_IOV__UNUSED
8191 struct pci_dev *bdev, *vfdev;
8192 u32 dw0, dw1, dw2, dw3;
8194 u16 req_id, pf_func;
8196 if (!(adapter->flags & IGB_FLAG_DETECT_BAD_DMA))
8197 goto skip_bad_vf_detection;
8199 bdev = pdev->bus->self;
8200 while (bdev && (bdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT))
8201 bdev = bdev->bus->self;
8204 goto skip_bad_vf_detection;
8206 pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
8208 goto skip_bad_vf_detection;
8210 pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0);
8211 pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1);
8212 pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2);
8213 pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3);
8216 /* On the 82576 if bit 7 of the requestor ID is set then it's a VF */
8217 if (!(req_id & 0x0080))
8218 goto skip_bad_vf_detection;
8220 pf_func = req_id & 0x01;
8221 if ((pf_func & 1) == (pdev->devfn & 1)) {
8223 vf = (req_id & 0x7F) >> 1;
8224 dev_err(pci_dev_to_dev(pdev),
8225 "VF %d has caused a PCIe error\n", vf);
8226 dev_err(pci_dev_to_dev(pdev),
8227 "TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
8228 "%8.8x\tdw3: %8.8x\n",
8229 dw0, dw1, dw2, dw3);
8231 /* Find the pci device of the offending VF */
8232 vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
8233 E1000_DEV_ID_82576_VF, NULL);
8235 if (vfdev->devfn == (req_id & 0xFF))
8237 vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
8238 E1000_DEV_ID_82576_VF, vfdev);
8241 * There's a slim chance the VF could have been hot plugged,
8242 * so if it is no longer present we don't need to issue the
8243 * VFLR. Just clean up the AER in that case.
8246 dev_err(pci_dev_to_dev(pdev),
8247 "Issuing VFLR to VF %d\n", vf);
8248 pci_write_config_dword(vfdev, 0xA8, 0x00008000);
8251 pci_cleanup_aer_uncorrect_error_status(pdev);
8255 * Even though the error may have occurred on the other port
8256 * we still need to increment the vf error reference count for
8257 * both ports because the I/O resume function will be called
8260 adapter->vferr_refcount++;
8262 return PCI_ERS_RESULT_RECOVERED;
8264 skip_bad_vf_detection:
8265 #endif /* CONFIG_PCI_IOV */
8267 netif_device_detach(netdev);
8269 if (state == pci_channel_io_perm_failure)
8270 return PCI_ERS_RESULT_DISCONNECT;
8272 if (netif_running(netdev))
8274 pci_disable_device(pdev);
8276 /* Request a slot slot reset. */
8277 return PCI_ERS_RESULT_NEED_RESET;
8281 * igb_io_slot_reset - called after the pci bus has been reset.
8282 * @pdev: Pointer to PCI device
8284 * Restart the card from scratch, as if from a cold-boot. Implementation
8285 * resembles the first-half of the igb_resume routine.
8287 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
8289 struct net_device *netdev = pci_get_drvdata(pdev);
8290 struct igb_adapter *adapter = netdev_priv(netdev);
8291 struct e1000_hw *hw = &adapter->hw;
8292 pci_ers_result_t result;
8294 if (pci_enable_device_mem(pdev)) {
8295 dev_err(pci_dev_to_dev(pdev),
8296 "Cannot re-enable PCI device after reset.\n");
8297 result = PCI_ERS_RESULT_DISCONNECT;
8299 pci_set_master(pdev);
8300 pci_restore_state(pdev);
8301 pci_save_state(pdev);
8303 pci_enable_wake(pdev, PCI_D3hot, 0);
8304 pci_enable_wake(pdev, PCI_D3cold, 0);
8306 schedule_work(&adapter->reset_task);
8307 E1000_WRITE_REG(hw, E1000_WUS, ~0);
8308 result = PCI_ERS_RESULT_RECOVERED;
8311 pci_cleanup_aer_uncorrect_error_status(pdev);
8317 * igb_io_resume - called when traffic can start flowing again.
8318 * @pdev: Pointer to PCI device
8320 * This callback is called when the error recovery driver tells us that
8321 * its OK to resume normal operation. Implementation resembles the
8322 * second-half of the igb_resume routine.
8324 static void igb_io_resume(struct pci_dev *pdev)
8326 struct net_device *netdev = pci_get_drvdata(pdev);
8327 struct igb_adapter *adapter = netdev_priv(netdev);
8329 if (adapter->vferr_refcount) {
8330 dev_info(pci_dev_to_dev(pdev), "Resuming after VF err\n");
8331 adapter->vferr_refcount--;
8335 if (netif_running(netdev)) {
8336 if (igb_up(adapter)) {
8337 dev_err(pci_dev_to_dev(pdev), "igb_up failed after reset\n");
8342 netif_device_attach(netdev);
8344 /* let the f/w know that the h/w is now under the control of the
8346 igb_get_hw_control(adapter);
8349 #endif /* HAVE_PCI_ERS */
8351 int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue)
8353 struct e1000_hw *hw = &adapter->hw;
8356 if (is_zero_ether_addr(addr))
8359 for (i = 0; i < hw->mac.rar_entry_count; i++) {
8360 if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE)
8362 adapter->mac_table[i].state = (IGB_MAC_STATE_MODIFIED |
8363 IGB_MAC_STATE_IN_USE);
8364 memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN);
8365 adapter->mac_table[i].queue = queue;
8366 igb_sync_mac_table(adapter);
8371 int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue)
8373 /* search table for addr, if found, set to 0 and sync */
8375 struct e1000_hw *hw = &adapter->hw;
8377 if (is_zero_ether_addr(addr))
8379 for (i = 0; i < hw->mac.rar_entry_count; i++) {
8380 if (!compare_ether_addr(addr, adapter->mac_table[i].addr) &&
8381 adapter->mac_table[i].queue == queue) {
8382 adapter->mac_table[i].state = IGB_MAC_STATE_MODIFIED;
8383 memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
8384 adapter->mac_table[i].queue = 0;
8385 igb_sync_mac_table(adapter);
8391 static int igb_set_vf_mac(struct igb_adapter *adapter,
8392 int vf, unsigned char *mac_addr)
8394 igb_del_mac_filter(adapter, adapter->vf_data[vf].vf_mac_addresses, vf);
8395 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
8397 igb_add_mac_filter(adapter, mac_addr, vf);
8403 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
8405 struct igb_adapter *adapter = netdev_priv(netdev);
8406 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
8408 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
8409 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
8410 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
8411 " change effective.\n");
8412 if (test_bit(__IGB_DOWN, &adapter->state)) {
8413 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
8414 " but the PF device is not up.\n");
8415 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
8416 " attempting to use the VF device.\n");
8418 return igb_set_vf_mac(adapter, vf, mac);
8421 static int igb_link_mbps(int internal_link_speed)
8423 switch (internal_link_speed) {
8433 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
8440 /* Calculate the rate factor values to set */
8441 rf_int = link_speed / tx_rate;
8442 rf_dec = (link_speed - (rf_int * tx_rate));
8443 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
8445 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
8446 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
8447 E1000_RTTBCNRC_RF_INT_MASK);
8448 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
8453 E1000_WRITE_REG(hw, E1000_RTTDQSEL, vf); /* vf X uses queue X */
8455 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
8456 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
8458 E1000_WRITE_REG(hw, E1000_RTTBCNRM(0), 0x14);
8459 E1000_WRITE_REG(hw, E1000_RTTBCNRC, bcnrc_val);
8462 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
8464 int actual_link_speed, i;
8465 bool reset_rate = false;
8467 /* VF TX rate limit was not set */
8468 if ((adapter->vf_rate_link_speed == 0) ||
8469 (adapter->hw.mac.type != e1000_82576))
8472 actual_link_speed = igb_link_mbps(adapter->link_speed);
8473 if (actual_link_speed != adapter->vf_rate_link_speed) {
8475 adapter->vf_rate_link_speed = 0;
8476 dev_info(&adapter->pdev->dev,
8477 "Link speed has been changed. VF Transmit rate is disabled\n");
8480 for (i = 0; i < adapter->vfs_allocated_count; i++) {
8482 adapter->vf_data[i].tx_rate = 0;
8484 igb_set_vf_rate_limit(&adapter->hw, i,
8485 adapter->vf_data[i].tx_rate, actual_link_speed);
8489 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
8491 struct igb_adapter *adapter = netdev_priv(netdev);
8492 struct e1000_hw *hw = &adapter->hw;
8493 int actual_link_speed;
8495 if (hw->mac.type != e1000_82576)
8498 actual_link_speed = igb_link_mbps(adapter->link_speed);
8499 if ((vf >= adapter->vfs_allocated_count) ||
8500 (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) ||
8501 (tx_rate < 0) || (tx_rate > actual_link_speed))
8504 adapter->vf_rate_link_speed = actual_link_speed;
8505 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
8506 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
8511 static int igb_ndo_get_vf_config(struct net_device *netdev,
8512 int vf, struct ifla_vf_info *ivi)
8514 struct igb_adapter *adapter = netdev_priv(netdev);
8515 if (vf >= adapter->vfs_allocated_count)
8518 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
8519 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
8520 ivi->vlan = adapter->vf_data[vf].pf_vlan;
8521 ivi->qos = adapter->vf_data[vf].pf_qos;
8525 static void igb_vmm_control(struct igb_adapter *adapter)
8527 struct e1000_hw *hw = &adapter->hw;
8530 switch (hw->mac.type) {
8533 /* replication is not supported for 82575 */
8536 /* notify HW that the MAC is adding vlan tags */
8537 reg = E1000_READ_REG(hw, E1000_DTXCTL);
8538 reg |= (E1000_DTXCTL_VLAN_ADDED |
8539 E1000_DTXCTL_SPOOF_INT);
8540 E1000_WRITE_REG(hw, E1000_DTXCTL, reg);
8542 /* enable replication vlan tag stripping */
8543 reg = E1000_READ_REG(hw, E1000_RPLOLR);
8544 reg |= E1000_RPLOLR_STRVLAN;
8545 E1000_WRITE_REG(hw, E1000_RPLOLR, reg);
8547 /* none of the above registers are supported by i350 */
8551 /* Enable Malicious Driver Detection */
8552 if ((hw->mac.type == e1000_i350) && (adapter->vfs_allocated_count) &&
8554 igb_enable_mdd(adapter);
8556 /* enable replication and loopback support */
8557 e1000_vmdq_set_loopback_pf(hw, adapter->vfs_allocated_count ||
8558 adapter->vmdq_pools);
8560 e1000_vmdq_set_anti_spoofing_pf(hw, adapter->vfs_allocated_count ||
8561 adapter->vmdq_pools,
8562 adapter->vfs_allocated_count);
8563 e1000_vmdq_set_replication_pf(hw, adapter->vfs_allocated_count ||
8564 adapter->vmdq_pools);
8567 static void igb_init_fw(struct igb_adapter *adapter)
8569 struct e1000_fw_drv_info fw_cmd;
8570 struct e1000_hw *hw = &adapter->hw;
8574 mask = E1000_SWFW_PHY0_SM;
8576 if (!hw->mac.ops.acquire_swfw_sync(hw, mask)) {
8577 for (i = 0; i <= FW_MAX_RETRIES; i++) {
8578 E1000_WRITE_REG(hw, E1000_FWSTS, E1000_FWSTS_FWRI);
8579 fw_cmd.hdr.cmd = FW_CMD_DRV_INFO;
8580 fw_cmd.hdr.buf_len = FW_CMD_DRV_INFO_LEN;
8581 fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CMD_RESERVED;
8582 fw_cmd.port_num = hw->bus.func;
8583 fw_cmd.drv_version = FW_FAMILY_DRV_VER;
8584 fw_cmd.hdr.checksum = 0;
8585 fw_cmd.hdr.checksum = e1000_calculate_checksum((u8 *)&fw_cmd,
8587 fw_cmd.hdr.buf_len));
8588 e1000_host_interface_command(hw, (u8*)&fw_cmd,
8590 if (fw_cmd.hdr.cmd_or_resp.ret_status == FW_STATUS_SUCCESS)
8594 dev_warn(pci_dev_to_dev(adapter->pdev),
8595 "Unable to get semaphore, firmware init failed.\n");
8596 hw->mac.ops.release_swfw_sync(hw, mask);
8599 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
8601 struct e1000_hw *hw = &adapter->hw;
8605 if (hw->mac.type > e1000_82580) {
8606 if (adapter->dmac != IGB_DMAC_DISABLE) {
8609 /* force threshold to 0. */
8610 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
8613 * DMA Coalescing high water mark needs to be greater
8614 * than the Rx threshold. Set hwm to PBA - max frame
8615 * size in 16B units, capping it at PBA - 6KB.
8617 hwm = 64 * pba - adapter->max_frame_size / 16;
8618 if (hwm < 64 * (pba - 6))
8619 hwm = 64 * (pba - 6);
8620 reg = E1000_READ_REG(hw, E1000_FCRTC);
8621 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
8622 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
8623 & E1000_FCRTC_RTH_COAL_MASK);
8624 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
8627 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
8628 * frame size, capping it at PBA - 10KB.
8630 dmac_thr = pba - adapter->max_frame_size / 512;
8631 if (dmac_thr < pba - 10)
8632 dmac_thr = pba - 10;
8633 reg = E1000_READ_REG(hw, E1000_DMACR);
8634 reg &= ~E1000_DMACR_DMACTHR_MASK;
8635 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
8636 & E1000_DMACR_DMACTHR_MASK);
8638 /* transition to L0x or L1 if available..*/
8639 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
8641 /* watchdog timer= msec values in 32usec intervals */
8642 reg |= ((adapter->dmac) >> 5);
8643 E1000_WRITE_REG(hw, E1000_DMACR, reg);
8645 /* no lower threshold to disable coalescing(smart fifb)-UTRESH=0*/
8646 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
8649 * This sets the time to wait before requesting transition to
8650 * low power state to number of usecs needed to receive 1 512
8651 * byte frame at gigabit line rate
8653 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
8655 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
8657 /* free space in tx packet buffer to wake from DMA coal */
8658 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
8659 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
8661 /* make low power state decision controlled by DMA coal */
8662 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
8663 reg &= ~E1000_PCIEMISC_LX_DECISION;
8664 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
8665 } /* endif adapter->dmac is not disabled */
8666 } else if (hw->mac.type == e1000_82580) {
8667 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
8668 E1000_WRITE_REG(hw, E1000_PCIEMISC,
8669 reg & ~E1000_PCIEMISC_LX_DECISION);
8670 E1000_WRITE_REG(hw, E1000_DMACR, 0);
8678 * igb_probe - Device Initialization Routine
8679 * @pdev: PCI device information struct
8680 * @ent: entry in igb_pci_tbl
8682 * Returns 0 on success, negative on failure
8684 * igb_probe initializes an adapter identified by a pci_dev structure.
8685 * The OS initialization, configuring of the adapter private structure,
8686 * and a hardware reset occur.
8688 int igb_kni_probe(struct pci_dev *pdev,
8689 struct net_device **lad_dev)
8691 struct net_device *netdev;
8692 struct igb_adapter *adapter;
8693 struct e1000_hw *hw;
8694 u16 eeprom_data = 0;
8695 u8 pba_str[E1000_PBANUM_LENGTH];
8697 static int global_quad_port_a; /* global quad port a indication */
8698 int i, err, pci_using_dac = 0;
8699 static int cards_found;
8701 err = pci_enable_device_mem(pdev);
8707 err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
8709 err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64));
8713 err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
8715 err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32));
8717 IGB_ERR("No usable DMA configuration, "
8724 #ifndef HAVE_ASPM_QUIRKS
8725 /* 82575 requires that the pci-e link partner disable the L0s state */
8726 switch (pdev->device) {
8727 case E1000_DEV_ID_82575EB_COPPER:
8728 case E1000_DEV_ID_82575EB_FIBER_SERDES:
8729 case E1000_DEV_ID_82575GB_QUAD_COPPER:
8730 pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
8735 #endif /* HAVE_ASPM_QUIRKS */
8736 err = pci_request_selected_regions(pdev,
8737 pci_select_bars(pdev,
8743 pci_enable_pcie_error_reporting(pdev);
8745 pci_set_master(pdev);
8749 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
8752 netdev = alloc_etherdev(sizeof(struct igb_adapter));
8753 #endif /* HAVE_TX_MQ */
8755 goto err_alloc_etherdev;
8758 SET_MODULE_OWNER(netdev);
8759 SET_NETDEV_DEV(netdev, &pdev->dev);
8761 //pci_set_drvdata(pdev, netdev);
8763 adapter = netdev_priv(netdev);
8764 adapter->netdev = netdev;
8765 adapter->pdev = pdev;
8768 adapter->port_num = hw->bus.func;
8769 adapter->msg_enable = (1 << debug) - 1;
8772 err = pci_save_state(pdev);
8777 hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
8778 pci_resource_len(pdev, 0));
8782 #ifdef HAVE_NET_DEVICE_OPS
8783 netdev->netdev_ops = &igb_netdev_ops;
8784 #else /* HAVE_NET_DEVICE_OPS */
8785 netdev->open = &igb_open;
8786 netdev->stop = &igb_close;
8787 netdev->get_stats = &igb_get_stats;
8788 #ifdef HAVE_SET_RX_MODE
8789 netdev->set_rx_mode = &igb_set_rx_mode;
8791 netdev->set_multicast_list = &igb_set_rx_mode;
8792 netdev->set_mac_address = &igb_set_mac;
8793 netdev->change_mtu = &igb_change_mtu;
8794 netdev->do_ioctl = &igb_ioctl;
8795 #ifdef HAVE_TX_TIMEOUT
8796 netdev->tx_timeout = &igb_tx_timeout;
8798 netdev->vlan_rx_register = igb_vlan_mode;
8799 netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid;
8800 netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid;
8801 #ifdef CONFIG_NET_POLL_CONTROLLER
8802 netdev->poll_controller = igb_netpoll;
8804 netdev->hard_start_xmit = &igb_xmit_frame;
8805 #endif /* HAVE_NET_DEVICE_OPS */
8806 igb_set_ethtool_ops(netdev);
8807 #ifdef HAVE_TX_TIMEOUT
8808 netdev->watchdog_timeo = 5 * HZ;
8811 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
8813 adapter->bd_number = cards_found;
8815 /* setup the private structure */
8816 err = igb_sw_init(adapter);
8820 e1000_get_bus_info(hw);
8822 hw->phy.autoneg_wait_to_complete = FALSE;
8823 hw->mac.adaptive_ifs = FALSE;
8825 /* Copper options */
8826 if (hw->phy.media_type == e1000_media_type_copper) {
8828 hw->phy.mdix = ETH_TP_MDI_INVALID;
8830 hw->phy.mdix = AUTO_ALL_MODES;
8831 #endif /* ETH_TP_MDI_X */
8832 hw->phy.disable_polarity_correction = FALSE;
8833 hw->phy.ms_type = e1000_ms_hw_default;
8836 if (e1000_check_reset_block(hw))
8837 dev_info(pci_dev_to_dev(pdev),
8838 "PHY reset is blocked due to SOL/IDER session.\n");
8841 * features is initialized to 0 in allocation, it might have bits
8842 * set by igb_sw_init so we should use an or instead of an
8845 netdev->features |= NETIF_F_SG |
8847 #ifdef NETIF_F_IPV6_CSUM
8855 #endif /* NETIF_F_TSO */
8856 #ifdef NETIF_F_RXHASH
8859 #ifdef HAVE_NDO_SET_FEATURES
8862 NETIF_F_HW_VLAN_RX |
8865 #ifdef HAVE_NDO_SET_FEATURES
8866 /* copy netdev features into list of user selectable features */
8867 netdev->hw_features |= netdev->features;
8870 /* give us the option of enabling LRO later */
8871 netdev->hw_features |= NETIF_F_LRO;
8876 /* this is only needed on kernels prior to 2.6.39 */
8877 netdev->features |= NETIF_F_GRO;
8881 /* set this bit last since it cannot be part of hw_features */
8882 netdev->features |= NETIF_F_HW_VLAN_FILTER;
8884 #ifdef HAVE_NETDEV_VLAN_FEATURES
8885 netdev->vlan_features |= NETIF_F_TSO |
8893 netdev->features |= NETIF_F_HIGHDMA;
8895 if (hw->mac.type >= e1000_82576)
8896 netdev->features |= NETIF_F_SCTP_CSUM;
8899 adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
8901 /* before reading the NVM, reset the controller to put the device in a
8902 * known good starting state */
8906 /* make sure the NVM is good */
8907 if (e1000_validate_nvm_checksum(hw) < 0) {
8908 dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not"
8914 /* copy the MAC address out of the NVM */
8915 if (e1000_read_mac_addr(hw))
8916 dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n");
8917 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
8918 #ifdef ETHTOOL_GPERMADDR
8919 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
8921 if (!is_valid_ether_addr(netdev->perm_addr)) {
8923 if (!is_valid_ether_addr(netdev->dev_addr)) {
8925 dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n");
8930 memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len);
8931 adapter->mac_table[0].queue = adapter->vfs_allocated_count;
8932 adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE);
8933 igb_rar_set(adapter, 0);
8935 /* get firmware version for ethtool -i */
8936 e1000_read_nvm(&adapter->hw, 5, 1, &adapter->fw_version);
8938 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
8939 (unsigned long) adapter);
8940 if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
8941 setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer,
8942 (unsigned long) adapter);
8943 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
8944 (unsigned long) adapter);
8946 INIT_WORK(&adapter->reset_task, igb_reset_task);
8947 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
8948 if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA)
8949 INIT_WORK(&adapter->dma_err_task, igb_dma_err_task);
8952 /* Initialize link properties that are user-changeable */
8953 adapter->fc_autoneg = true;
8954 hw->mac.autoneg = true;
8955 hw->phy.autoneg_advertised = 0x2f;
8957 hw->fc.requested_mode = e1000_fc_default;
8958 hw->fc.current_mode = e1000_fc_default;
8960 e1000_validate_mdi_setting(hw);
8962 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
8963 * enable the ACPI Magic Packet filter
8966 if (hw->bus.func == 0)
8967 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
8968 else if (hw->mac.type >= e1000_82580)
8969 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
8970 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
8972 else if (hw->bus.func == 1)
8973 e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
8975 if (eeprom_data & IGB_EEPROM_APME)
8976 adapter->eeprom_wol |= E1000_WUFC_MAG;
8978 /* now that we have the eeprom settings, apply the special cases where
8979 * the eeprom may be wrong or the board simply won't support wake on
8980 * lan on a particular port */
8981 switch (pdev->device) {
8982 case E1000_DEV_ID_82575GB_QUAD_COPPER:
8983 adapter->eeprom_wol = 0;
8985 case E1000_DEV_ID_82575EB_FIBER_SERDES:
8986 case E1000_DEV_ID_82576_FIBER:
8987 case E1000_DEV_ID_82576_SERDES:
8988 /* Wake events only supported on port A for dual fiber
8989 * regardless of eeprom setting */
8990 if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1)
8991 adapter->eeprom_wol = 0;
8993 case E1000_DEV_ID_82576_QUAD_COPPER:
8994 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
8995 /* if quad port adapter, disable WoL on all but port A */
8996 if (global_quad_port_a != 0)
8997 adapter->eeprom_wol = 0;
8999 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
9000 /* Reset for multiple quad port adapters */
9001 if (++global_quad_port_a == 4)
9002 global_quad_port_a = 0;
9006 /* initialize the wol settings based on the eeprom settings */
9007 adapter->wol = adapter->eeprom_wol;
9009 device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), adapter->wol);
9011 /* reset the hardware with the new settings */
9014 /* let the f/w know that the h/w is now under the control of the
9016 igb_get_hw_control(adapter);
9018 strncpy(netdev->name, "eth%d", IFNAMSIZ);
9019 err = register_netdev(netdev);
9023 #ifdef CONFIG_IGB_VMDQ_NETDEV
9024 err = igb_init_vmdq_netdevs(adapter);
9028 /* carrier off reporting is important to ethtool even BEFORE open */
9029 netif_carrier_off(netdev);
9032 if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) {
9033 adapter->flags |= IGB_FLAG_DCA_ENABLED;
9034 dev_info(pci_dev_to_dev(pdev), "DCA enabled\n");
9035 igb_setup_dca(adapter);
9039 #ifdef HAVE_HW_TIME_STAMP
9040 /* do hw tstamp init after resetting */
9041 igb_init_hw_timer(adapter);
9046 dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n");
9047 /* print bus type/speed/width info */
9048 dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ",
9050 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" :
9051 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" :
9053 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4\n" :
9054 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2\n" :
9055 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1\n" :
9057 dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name);
9058 for (i = 0; i < 6; i++)
9059 printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':');
9061 ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH);
9063 strncpy(pba_str, "Unknown", sizeof(pba_str) - 1);
9064 dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name,
9067 /* Initialize the thermal sensor on i350 devices. */
9068 if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
9072 * Read the NVM to determine if this i350 device supports an
9073 * external thermal sensor.
9075 e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word);
9076 if (ets_word != 0x0000 && ets_word != 0xFFFF)
9077 adapter->ets = true;
9079 adapter->ets = false;
9082 igb_sysfs_init(adapter);
9085 igb_procfs_init(adapter);
9086 #endif /* IGB_PROCFS */
9087 #endif /* IGB_SYSFS */
9090 adapter->ets = false;
9093 switch (hw->mac.type) {
9095 /* Enable EEE for internal copper PHY devices */
9096 if (hw->phy.media_type == e1000_media_type_copper)
9097 e1000_set_eee_i350(hw);
9099 /* send driver version info to firmware */
9100 igb_init_fw(adapter);
9106 if (netdev->features & NETIF_F_LRO)
9107 dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n");
9109 dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n");
9111 dev_info(pci_dev_to_dev(pdev),
9112 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
9113 adapter->msix_entries ? "MSI-X" :
9114 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
9115 adapter->num_rx_queues, adapter->num_tx_queues);
9120 pm_runtime_put_noidle(&pdev->dev);
9124 //igb_release_hw_control(adapter);
9126 //if (!e1000_check_reset_block(hw))
9127 // e1000_phy_hw_reset(hw);
9129 if (hw->flash_address)
9130 iounmap(hw->flash_address);
9132 //igb_clear_interrupt_scheme(adapter);
9133 //igb_reset_sriov_capability(adapter);
9134 iounmap(hw->hw_addr);
9136 free_netdev(netdev);
9138 //pci_release_selected_regions(pdev,
9139 // pci_select_bars(pdev, IORESOURCE_MEM));
9142 pci_disable_device(pdev);
9147 void igb_kni_remove(struct pci_dev *pdev)
9149 struct net_device *netdev = pci_get_drvdata(pdev);
9150 struct igb_adapter *adapter = netdev_priv(netdev);
9151 struct e1000_hw *hw = &adapter->hw;
9153 iounmap(hw->hw_addr);
9155 if (hw->flash_address)
9156 iounmap(hw->flash_address);
9158 pci_disable_device(pdev);