first public release
[dpdk.git] / lib / librte_pmd_igb / igb / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2011, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35
36 #ifdef HAVE_KERNEL_OPTION_HEADERS
37 #include "opt_device_polling.h"
38 #include "opt_inet.h"
39 #include "opt_altq.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #if __FreeBSD_version >= 800000
45 #include <sys/buf_ring.h>
46 #endif
47 #include <sys/bus.h>
48 #include <sys/endian.h>
49 #include <sys/kernel.h>
50 #include <sys/kthread.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/rman.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/taskqueue.h>
59 #include <sys/eventhandler.h>
60 #include <sys/pcpu.h>
61 #include <sys/smp.h>
62 #include <machine/smp.h>
63 #include <machine/bus.h>
64 #include <machine/resource.h>
65
66 #include <net/bpf.h>
67 #include <net/ethernet.h>
68 #include <net/if.h>
69 #include <net/if_arp.h>
70 #include <net/if_dl.h>
71 #include <net/if_media.h>
72
73 #include <net/if_types.h>
74 #include <net/if_vlan_var.h>
75
76 #include <netinet/in_systm.h>
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip6.h>
81 #include <netinet/tcp.h>
82 #include <netinet/tcp_lro.h>
83 #include <netinet/udp.h>
84
85 #include <machine/in_cksum.h>
86 #include <dev/led/led.h>
87 #include <dev/pci/pcivar.h>
88 #include <dev/pci/pcireg.h>
89
90 #include "e1000_api.h"
91 #include "e1000_82575.h"
92 #include "if_igb.h"
93
94 /*********************************************************************
95  *  Set this to one to display debug statistics
96  *********************************************************************/
97 int     igb_display_debug_stats = 0;
98
99 /*********************************************************************
100  *  Driver version:
101  *********************************************************************/
102 char igb_driver_version[] = "version - 2.2.3";
103
104
105 /*********************************************************************
106  *  PCI Device ID Table
107  *
108  *  Used by probe to select devices to load on
109  *  Last field stores an index into e1000_strings
110  *  Last entry must be all 0s
111  *
112  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113  *********************************************************************/
114
115 static igb_vendor_info_t igb_vendor_info_array[] =
116 {
117         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82576_VF,        PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
139                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
141                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
142         { 0x8086, E1000_DEV_ID_DH89XXCC_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
143         { 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,  PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_DH89XXCC_SFP,    PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
146                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_I350_COPPER,     PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_I350_FIBER,      PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_I350_SERDES,     PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_I350_SGMII,      PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_I350_VF,         PCI_ANY_ID, PCI_ANY_ID, 0},
152         /* required last entry */
153         { 0, 0, 0, 0, 0}
154 };
155
156 /*********************************************************************
157  *  Table of branding strings for all supported NICs.
158  *********************************************************************/
159
160 static char *igb_strings[] = {
161         "Intel(R) PRO/1000 Network Connection"
162 };
163
164 /*********************************************************************
165  *  Function prototypes
166  *********************************************************************/
167 static int      igb_probe(device_t);
168 static int      igb_attach(device_t);
169 static int      igb_detach(device_t);
170 static int      igb_shutdown(device_t);
171 static int      igb_suspend(device_t);
172 static int      igb_resume(device_t);
173 static void     igb_start(struct ifnet *);
174 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
175 #if __FreeBSD_version >= 800000
176 static int      igb_mq_start(struct ifnet *, struct mbuf *);
177 static int      igb_mq_start_locked(struct ifnet *,
178                     struct tx_ring *, struct mbuf *);
179 static void     igb_qflush(struct ifnet *);
180 #endif
181 static int      igb_ioctl(struct ifnet *, u_long, caddr_t);
182 static void     igb_init(void *);
183 static void     igb_init_locked(struct adapter *);
184 static void     igb_stop(void *);
185 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
186 static int      igb_media_change(struct ifnet *);
187 static void     igb_identify_hardware(struct adapter *);
188 static int      igb_allocate_pci_resources(struct adapter *);
189 static int      igb_allocate_msix(struct adapter *);
190 static int      igb_allocate_legacy(struct adapter *);
191 static int      igb_setup_msix(struct adapter *);
192 static void     igb_free_pci_resources(struct adapter *);
193 static void     igb_local_timer(void *);
194 static void     igb_reset(struct adapter *);
195 static int      igb_setup_interface(device_t, struct adapter *);
196 static int      igb_allocate_queues(struct adapter *);
197 static void     igb_configure_queues(struct adapter *);
198
199 static int      igb_allocate_transmit_buffers(struct tx_ring *);
200 static void     igb_setup_transmit_structures(struct adapter *);
201 static void     igb_setup_transmit_ring(struct tx_ring *);
202 static void     igb_initialize_transmit_units(struct adapter *);
203 static void     igb_free_transmit_structures(struct adapter *);
204 static void     igb_free_transmit_buffers(struct tx_ring *);
205
206 static int      igb_allocate_receive_buffers(struct rx_ring *);
207 static int      igb_setup_receive_structures(struct adapter *);
208 static int      igb_setup_receive_ring(struct rx_ring *);
209 static void     igb_initialize_receive_units(struct adapter *);
210 static void     igb_free_receive_structures(struct adapter *);
211 static void     igb_free_receive_buffers(struct rx_ring *);
212 static void     igb_free_receive_ring(struct rx_ring *);
213
214 static void     igb_enable_intr(struct adapter *);
215 static void     igb_disable_intr(struct adapter *);
216 static void     igb_update_stats_counters(struct adapter *);
217 static bool     igb_txeof(struct tx_ring *);
218
219 static __inline void igb_rx_discard(struct rx_ring *, int);
220 static __inline void igb_rx_input(struct rx_ring *,
221                     struct ifnet *, struct mbuf *, u32);
222
223 static bool     igb_rxeof(struct igb_queue *, int, int *);
224 static void     igb_rx_checksum(u32, struct mbuf *, u32);
225 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
226 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
227 static void     igb_set_promisc(struct adapter *);
228 static void     igb_disable_promisc(struct adapter *);
229 static void     igb_set_multi(struct adapter *);
230 static void     igb_update_link_status(struct adapter *);
231 static void     igb_refresh_mbufs(struct rx_ring *, int);
232
233 static void     igb_register_vlan(void *, struct ifnet *, u16);
234 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
235 static void     igb_setup_vlan_hw_support(struct adapter *);
236
237 static int      igb_xmit(struct tx_ring *, struct mbuf **);
238 static int      igb_dma_malloc(struct adapter *, bus_size_t,
239                     struct igb_dma_alloc *, int);
240 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
241 static int      igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
242 static void     igb_print_nvm_info(struct adapter *);
243 static int      igb_is_valid_ether_addr(u8 *);
244 static void     igb_add_hw_stats(struct adapter *);
245
246 static void     igb_vf_init_stats(struct adapter *);
247 static void     igb_update_vf_stats_counters(struct adapter *);
248
249 /* Management and WOL Support */
250 static void     igb_init_manageability(struct adapter *);
251 static void     igb_release_manageability(struct adapter *);
252 static void     igb_get_hw_control(struct adapter *);
253 static void     igb_release_hw_control(struct adapter *);
254 static void     igb_enable_wakeup(device_t);
255 static void     igb_led_func(void *, int);
256
257 static int      igb_irq_fast(void *);
258 static void     igb_msix_que(void *);
259 static void     igb_msix_link(void *);
260 static void     igb_handle_que(void *context, int pending);
261 static void     igb_handle_link(void *context, int pending);
262
263 static void     igb_set_sysctl_value(struct adapter *, const char *,
264                     const char *, int *, int);
265 static int      igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
266
267 #ifdef DEVICE_POLLING
268 static poll_handler_t igb_poll;
269 #endif /* POLLING */
270
271 /*********************************************************************
272  *  FreeBSD Device Interface Entry Points
273  *********************************************************************/
274
275 static device_method_t igb_methods[] = {
276         /* Device interface */
277         DEVMETHOD(device_probe, igb_probe),
278         DEVMETHOD(device_attach, igb_attach),
279         DEVMETHOD(device_detach, igb_detach),
280         DEVMETHOD(device_shutdown, igb_shutdown),
281         DEVMETHOD(device_suspend, igb_suspend),
282         DEVMETHOD(device_resume, igb_resume),
283         {0, 0}
284 };
285
286 static driver_t igb_driver = {
287         "igb", igb_methods, sizeof(struct adapter),
288 };
289
290 static devclass_t igb_devclass;
291 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
292 MODULE_DEPEND(igb, pci, 1, 1, 1);
293 MODULE_DEPEND(igb, ether, 1, 1, 1);
294
295 /*********************************************************************
296  *  Tunable default values.
297  *********************************************************************/
298
299 /* Descriptor defaults */
300 static int igb_rxd = IGB_DEFAULT_RXD;
301 static int igb_txd = IGB_DEFAULT_TXD;
302 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
303 TUNABLE_INT("hw.igb.txd", &igb_txd);
304
305 /*
306 ** AIM: Adaptive Interrupt Moderation
307 ** which means that the interrupt rate
308 ** is varied over time based on the
309 ** traffic for that interrupt vector
310 */
311 static int igb_enable_aim = TRUE;
312 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
313
314 /*
315  * MSIX should be the default for best performance,
316  * but this allows it to be forced off for testing.
317  */         
318 static int igb_enable_msix = 1;
319 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
320
321 /*
322 ** Tuneable Interrupt rate
323 */
324 static int igb_max_interrupt_rate = 8000;
325 TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
326
327 /*
328 ** Header split causes the packet header to
329 ** be dma'd to a seperate mbuf from the payload.
330 ** this can have memory alignment benefits. But
331 ** another plus is that small packets often fit
332 ** into the header and thus use no cluster. Its
333 ** a very workload dependent type feature.
334 */
335 static bool igb_header_split = FALSE;
336 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
337
338 /*
339 ** This will autoconfigure based on
340 ** the number of CPUs if left at 0.
341 */
342 static int igb_num_queues = 0;
343 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
344
345 /* How many packets rxeof tries to clean at a time */
346 static int igb_rx_process_limit = 100;
347 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
348
349 /* Flow control setting - default to FULL */
350 static int igb_fc_setting = e1000_fc_full;
351 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
352
353 /* Energy Efficient Ethernet - default to off */
354 static int igb_eee_disabled = TRUE;
355 TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled);
356
357 /*
358 ** DMA Coalescing, only for i350 - default to off,
359 ** this feature is for power savings
360 */
361 static int igb_dma_coalesce = FALSE;
362 TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce);
363
364 /*********************************************************************
365  *  Device identification routine
366  *
367  *  igb_probe determines if the driver should be loaded on
368  *  adapter based on PCI vendor/device id of the adapter.
369  *
370  *  return BUS_PROBE_DEFAULT on success, positive on failure
371  *********************************************************************/
372
373 static int
374 igb_probe(device_t dev)
375 {
376         char            adapter_name[60];
377         uint16_t        pci_vendor_id = 0;
378         uint16_t        pci_device_id = 0;
379         uint16_t        pci_subvendor_id = 0;
380         uint16_t        pci_subdevice_id = 0;
381         igb_vendor_info_t *ent;
382
383         INIT_DEBUGOUT("igb_probe: begin");
384
385         pci_vendor_id = pci_get_vendor(dev);
386         if (pci_vendor_id != IGB_VENDOR_ID)
387                 return (ENXIO);
388
389         pci_device_id = pci_get_device(dev);
390         pci_subvendor_id = pci_get_subvendor(dev);
391         pci_subdevice_id = pci_get_subdevice(dev);
392
393         ent = igb_vendor_info_array;
394         while (ent->vendor_id != 0) {
395                 if ((pci_vendor_id == ent->vendor_id) &&
396                     (pci_device_id == ent->device_id) &&
397
398                     ((pci_subvendor_id == ent->subvendor_id) ||
399                     (ent->subvendor_id == PCI_ANY_ID)) &&
400
401                     ((pci_subdevice_id == ent->subdevice_id) ||
402                     (ent->subdevice_id == PCI_ANY_ID))) {
403                         sprintf(adapter_name, "%s %s",
404                                 igb_strings[ent->index],
405                                 igb_driver_version);
406                         device_set_desc_copy(dev, adapter_name);
407                         return (BUS_PROBE_DEFAULT);
408                 }
409                 ent++;
410         }
411
412         return (ENXIO);
413 }
414
415 /*********************************************************************
416  *  Device initialization routine
417  *
418  *  The attach entry point is called when the driver is being loaded.
419  *  This routine identifies the type of hardware, allocates all resources
420  *  and initializes the hardware.
421  *
422  *  return 0 on success, positive on failure
423  *********************************************************************/
424
425 static int
426 igb_attach(device_t dev)
427 {
428         struct adapter  *adapter;
429         int             error = 0;
430         u16             eeprom_data;
431
432         INIT_DEBUGOUT("igb_attach: begin");
433
434         adapter = device_get_softc(dev);
435         adapter->dev = adapter->osdep.dev = dev;
436         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
437
438         /* SYSCTL stuff */
439         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
440             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
441             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
442             igb_sysctl_nvm_info, "I", "NVM Information");
443
444         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
445             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
446             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
447             &igb_enable_aim, 1, "Interrupt Moderation");
448
449         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
450             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
451             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
452             adapter, 0, igb_set_flowcntl, "I", "Flow Control");
453
454         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
455
456         /* Determine hardware and mac info */
457         igb_identify_hardware(adapter);
458
459         /* Setup PCI resources */
460         if (igb_allocate_pci_resources(adapter)) {
461                 device_printf(dev, "Allocation of PCI resources failed\n");
462                 error = ENXIO;
463                 goto err_pci;
464         }
465
466         /* Do Shared Code initialization */
467         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
468                 device_printf(dev, "Setup of Shared code failed\n");
469                 error = ENXIO;
470                 goto err_pci;
471         }
472
473         e1000_get_bus_info(&adapter->hw);
474
475         /* Sysctl for limiting the amount of work done in the taskqueue */
476         igb_set_sysctl_value(adapter, "rx_processing_limit",
477             "max number of rx packets to process", &adapter->rx_process_limit,
478             igb_rx_process_limit);
479
480         /*
481          * Validate number of transmit and receive descriptors. It
482          * must not exceed hardware maximum, and must be multiple
483          * of E1000_DBA_ALIGN.
484          */
485         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
486             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
487                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
488                     IGB_DEFAULT_TXD, igb_txd);
489                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
490         } else
491                 adapter->num_tx_desc = igb_txd;
492         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
493             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
494                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
495                     IGB_DEFAULT_RXD, igb_rxd);
496                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
497         } else
498                 adapter->num_rx_desc = igb_rxd;
499
500         adapter->hw.mac.autoneg = DO_AUTO_NEG;
501         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
502         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
503
504         /* Copper options */
505         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
506                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
507                 adapter->hw.phy.disable_polarity_correction = FALSE;
508                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
509         }
510
511         /*
512          * Set the frame limits assuming
513          * standard ethernet sized frames.
514          */
515         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
516         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
517
518         /*
519         ** Allocate and Setup Queues
520         */
521         if (igb_allocate_queues(adapter)) {
522                 error = ENOMEM;
523                 goto err_pci;
524         }
525
526         /* Allocate the appropriate stats memory */
527         if (adapter->vf_ifp) {
528                 adapter->stats =
529                     (struct e1000_vf_stats *)malloc(sizeof \
530                     (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
531                 igb_vf_init_stats(adapter);
532         } else
533                 adapter->stats =
534                     (struct e1000_hw_stats *)malloc(sizeof \
535                     (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
536         if (adapter->stats == NULL) {
537                 device_printf(dev, "Can not allocate stats memory\n");
538                 error = ENOMEM;
539                 goto err_late;
540         }
541
542         /* Allocate multicast array memory. */
543         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
544             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
545         if (adapter->mta == NULL) {
546                 device_printf(dev, "Can not allocate multicast setup array\n");
547                 error = ENOMEM;
548                 goto err_late;
549         }
550
551         /* Some adapter-specific advanced features */
552         if (adapter->hw.mac.type >= e1000_i350) {
553                 igb_set_sysctl_value(adapter, "dma_coalesce",
554                     "configure dma coalesce",
555                     &adapter->dma_coalesce, igb_dma_coalesce);
556                 igb_set_sysctl_value(adapter, "eee_disabled",
557                     "enable Energy Efficient Ethernet",
558                     &adapter->hw.dev_spec._82575.eee_disable,
559                     igb_eee_disabled);
560                 e1000_set_eee_i350(&adapter->hw);
561         }
562
563         /*
564         ** Start from a known state, this is
565         ** important in reading the nvm and
566         ** mac from that.
567         */
568         e1000_reset_hw(&adapter->hw);
569
570         /* Make sure we have a good EEPROM before we read from it */
571         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
572                 /*
573                 ** Some PCI-E parts fail the first check due to
574                 ** the link being in sleep state, call it again,
575                 ** if it fails a second time its a real issue.
576                 */
577                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
578                         device_printf(dev,
579                             "The EEPROM Checksum Is Not Valid\n");
580                         error = EIO;
581                         goto err_late;
582                 }
583         }
584
585         /*
586         ** Copy the permanent MAC address out of the EEPROM
587         */
588         if (e1000_read_mac_addr(&adapter->hw) < 0) {
589                 device_printf(dev, "EEPROM read error while reading MAC"
590                     " address\n");
591                 error = EIO;
592                 goto err_late;
593         }
594         /* Check its sanity */
595         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
596                 device_printf(dev, "Invalid MAC address\n");
597                 error = EIO;
598                 goto err_late;
599         }
600
601         /* 
602         ** Configure Interrupts
603         */
604         if ((adapter->msix > 1) && (igb_enable_msix))
605                 error = igb_allocate_msix(adapter);
606         else /* MSI or Legacy */
607                 error = igb_allocate_legacy(adapter);
608         if (error)
609                 goto err_late;
610
611         /* Setup OS specific network interface */
612         if (igb_setup_interface(dev, adapter) != 0)
613                 goto err_late;
614
615         /* Now get a good starting state */
616         igb_reset(adapter);
617
618         /* Initialize statistics */
619         igb_update_stats_counters(adapter);
620
621         adapter->hw.mac.get_link_status = 1;
622         igb_update_link_status(adapter);
623
624         /* Indicate SOL/IDER usage */
625         if (e1000_check_reset_block(&adapter->hw))
626                 device_printf(dev,
627                     "PHY reset is blocked due to SOL/IDER session.\n");
628
629         /* Determine if we have to control management hardware */
630         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
631
632         /*
633          * Setup Wake-on-Lan
634          */
635         /* APME bit in EEPROM is mapped to WUC.APME */
636         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
637         if (eeprom_data)
638                 adapter->wol = E1000_WUFC_MAG;
639
640         /* Register for VLAN events */
641         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
642              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
643         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
644              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
645
646         igb_add_hw_stats(adapter);
647
648         /* Tell the stack that the interface is not active */
649         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
650
651         adapter->led_dev = led_create(igb_led_func, adapter,
652             device_get_nameunit(dev));
653
654         INIT_DEBUGOUT("igb_attach: end");
655
656         return (0);
657
658 err_late:
659         igb_free_transmit_structures(adapter);
660         igb_free_receive_structures(adapter);
661         igb_release_hw_control(adapter);
662         if (adapter->ifp != NULL)
663                 if_free(adapter->ifp);
664 err_pci:
665         igb_free_pci_resources(adapter);
666         free(adapter->mta, M_DEVBUF);
667         IGB_CORE_LOCK_DESTROY(adapter);
668
669         return (error);
670 }
671
672 /*********************************************************************
673  *  Device removal routine
674  *
675  *  The detach entry point is called when the driver is being removed.
676  *  This routine stops the adapter and deallocates all the resources
677  *  that were allocated for driver operation.
678  *
679  *  return 0 on success, positive on failure
680  *********************************************************************/
681
682 static int
683 igb_detach(device_t dev)
684 {
685         struct adapter  *adapter = device_get_softc(dev);
686         struct ifnet    *ifp = adapter->ifp;
687
688         INIT_DEBUGOUT("igb_detach: begin");
689
690         /* Make sure VLANS are not using driver */
691         if (adapter->ifp->if_vlantrunk != NULL) {
692                 device_printf(dev,"Vlan in use, detach first\n");
693                 return (EBUSY);
694         }
695
696         if (adapter->led_dev != NULL)
697                 led_destroy(adapter->led_dev);
698
699 #ifdef DEVICE_POLLING
700         if (ifp->if_capenable & IFCAP_POLLING)
701                 ether_poll_deregister(ifp);
702 #endif
703
704         IGB_CORE_LOCK(adapter);
705         adapter->in_detach = 1;
706         igb_stop(adapter);
707         IGB_CORE_UNLOCK(adapter);
708
709         e1000_phy_hw_reset(&adapter->hw);
710
711         /* Give control back to firmware */
712         igb_release_manageability(adapter);
713         igb_release_hw_control(adapter);
714
715         if (adapter->wol) {
716                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
717                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
718                 igb_enable_wakeup(dev);
719         }
720
721         /* Unregister VLAN events */
722         if (adapter->vlan_attach != NULL)
723                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
724         if (adapter->vlan_detach != NULL)
725                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
726
727         ether_ifdetach(adapter->ifp);
728
729         callout_drain(&adapter->timer);
730
731         igb_free_pci_resources(adapter);
732         bus_generic_detach(dev);
733         if_free(ifp);
734
735         igb_free_transmit_structures(adapter);
736         igb_free_receive_structures(adapter);
737         free(adapter->mta, M_DEVBUF);
738
739         IGB_CORE_LOCK_DESTROY(adapter);
740
741         return (0);
742 }
743
744 /*********************************************************************
745  *
746  *  Shutdown entry point
747  *
748  **********************************************************************/
749
750 static int
751 igb_shutdown(device_t dev)
752 {
753         return igb_suspend(dev);
754 }
755
756 /*
757  * Suspend/resume device methods.
758  */
759 static int
760 igb_suspend(device_t dev)
761 {
762         struct adapter *adapter = device_get_softc(dev);
763
764         IGB_CORE_LOCK(adapter);
765
766         igb_stop(adapter);
767
768         igb_release_manageability(adapter);
769         igb_release_hw_control(adapter);
770
771         if (adapter->wol) {
772                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
773                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
774                 igb_enable_wakeup(dev);
775         }
776
777         IGB_CORE_UNLOCK(adapter);
778
779         return bus_generic_suspend(dev);
780 }
781
782 static int
783 igb_resume(device_t dev)
784 {
785         struct adapter *adapter = device_get_softc(dev);
786         struct ifnet *ifp = adapter->ifp;
787
788         IGB_CORE_LOCK(adapter);
789         igb_init_locked(adapter);
790         igb_init_manageability(adapter);
791
792         if ((ifp->if_flags & IFF_UP) &&
793             (ifp->if_drv_flags & IFF_DRV_RUNNING))
794                 igb_start(ifp);
795
796         IGB_CORE_UNLOCK(adapter);
797
798         return bus_generic_resume(dev);
799 }
800
801
802 /*********************************************************************
803  *  Transmit entry point
804  *
805  *  igb_start is called by the stack to initiate a transmit.
806  *  The driver will remain in this routine as long as there are
807  *  packets to transmit and transmit resources are available.
808  *  In case resources are not available stack is notified and
809  *  the packet is requeued.
810  **********************************************************************/
811
812 static void
813 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
814 {
815         struct adapter  *adapter = ifp->if_softc;
816         struct mbuf     *m_head;
817
818         IGB_TX_LOCK_ASSERT(txr);
819
820         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
821             IFF_DRV_RUNNING)
822                 return;
823         if (!adapter->link_active)
824                 return;
825
826         /* Call cleanup if number of TX descriptors low */
827         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
828                 igb_txeof(txr);
829
830         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
831                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
832                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
833                         break;
834                 }
835                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
836                 if (m_head == NULL)
837                         break;
838                 /*
839                  *  Encapsulation can modify our pointer, and or make it
840                  *  NULL on failure.  In that event, we can't requeue.
841                  */
842                 if (igb_xmit(txr, &m_head)) {
843                         if (m_head == NULL)
844                                 break;
845                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
846                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
847                         break;
848                 }
849
850                 /* Send a copy of the frame to the BPF listener */
851                 ETHER_BPF_MTAP(ifp, m_head);
852
853                 /* Set watchdog on */
854                 txr->watchdog_time = ticks;
855                 txr->queue_status = IGB_QUEUE_WORKING;
856         }
857 }
858  
859 /*
860  * Legacy TX driver routine, called from the
861  * stack, always uses tx[0], and spins for it.
862  * Should not be used with multiqueue tx
863  */
864 static void
865 igb_start(struct ifnet *ifp)
866 {
867         struct adapter  *adapter = ifp->if_softc;
868         struct tx_ring  *txr = adapter->tx_rings;
869
870         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
871                 IGB_TX_LOCK(txr);
872                 igb_start_locked(txr, ifp);
873                 IGB_TX_UNLOCK(txr);
874         }
875         return;
876 }
877
878 #if __FreeBSD_version >= 800000
879 /*
880 ** Multiqueue Transmit driver
881 **
882 */
883 static int
884 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
885 {
886         struct adapter          *adapter = ifp->if_softc;
887         struct igb_queue        *que;
888         struct tx_ring          *txr;
889         int                     i = 0, err = 0;
890
891         /* Which queue to use */
892         if ((m->m_flags & M_FLOWID) != 0)
893                 i = m->m_pkthdr.flowid % adapter->num_queues;
894
895         txr = &adapter->tx_rings[i];
896         que = &adapter->queues[i];
897
898         if (IGB_TX_TRYLOCK(txr)) {
899                 err = igb_mq_start_locked(ifp, txr, m);
900                 IGB_TX_UNLOCK(txr);
901         } else {
902                 err = drbr_enqueue(ifp, txr->br, m);
903                 taskqueue_enqueue(que->tq, &que->que_task);
904         }
905
906         return (err);
907 }
908
909 static int
910 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
911 {
912         struct adapter  *adapter = txr->adapter;
913         struct mbuf     *next;
914         int             err = 0, enq;
915
916         IGB_TX_LOCK_ASSERT(txr);
917
918         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
919             IFF_DRV_RUNNING || adapter->link_active == 0) {
920                 if (m != NULL)
921                         err = drbr_enqueue(ifp, txr->br, m);
922                 return (err);
923         }
924
925         enq = 0;
926         if (m == NULL) {
927                 next = drbr_dequeue(ifp, txr->br);
928         } else if (drbr_needs_enqueue(ifp, txr->br)) {
929                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
930                         return (err);
931                 next = drbr_dequeue(ifp, txr->br);
932         } else
933                 next = m;
934
935         /* Process the queue */
936         while (next != NULL) {
937                 if ((err = igb_xmit(txr, &next)) != 0) {
938                         if (next != NULL)
939                                 err = drbr_enqueue(ifp, txr->br, next);
940                         break;
941                 }
942                 enq++;
943                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
944                 ETHER_BPF_MTAP(ifp, next);
945                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
946                         break;
947                 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
948                         igb_txeof(txr);
949                 if (txr->tx_avail <= IGB_MAX_SCATTER) {
950                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
951                         break;
952                 }
953                 next = drbr_dequeue(ifp, txr->br);
954         }
955         if (enq > 0) {
956                 /* Set the watchdog */
957                 txr->queue_status = IGB_QUEUE_WORKING;
958                 txr->watchdog_time = ticks;
959         }
960         return (err);
961 }
962
963 /*
964 ** Flush all ring buffers
965 */
966 static void
967 igb_qflush(struct ifnet *ifp)
968 {
969         struct adapter  *adapter = ifp->if_softc;
970         struct tx_ring  *txr = adapter->tx_rings;
971         struct mbuf     *m;
972
973         for (int i = 0; i < adapter->num_queues; i++, txr++) {
974                 IGB_TX_LOCK(txr);
975                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
976                         m_freem(m);
977                 IGB_TX_UNLOCK(txr);
978         }
979         if_qflush(ifp);
980 }
981 #endif /* __FreeBSD_version >= 800000 */
982
983 /*********************************************************************
984  *  Ioctl entry point
985  *
986  *  igb_ioctl is called when the user wants to configure the
987  *  interface.
988  *
989  *  return 0 on success, positive on failure
990  **********************************************************************/
991
992 static int
993 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
994 {
995         struct adapter  *adapter = ifp->if_softc;
996         struct ifreq *ifr = (struct ifreq *)data;
997 #ifdef INET
998         struct ifaddr *ifa = (struct ifaddr *)data;
999 #endif
1000         int error = 0;
1001
1002         if (adapter->in_detach)
1003                 return (error);
1004
1005         switch (command) {
1006         case SIOCSIFADDR:
1007 #ifdef INET
1008                 if (ifa->ifa_addr->sa_family == AF_INET) {
1009                         /*
1010                          * XXX
1011                          * Since resetting hardware takes a very long time
1012                          * and results in link renegotiation we only
1013                          * initialize the hardware only when it is absolutely
1014                          * required.
1015                          */
1016                         ifp->if_flags |= IFF_UP;
1017                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1018                                 IGB_CORE_LOCK(adapter);
1019                                 igb_init_locked(adapter);
1020                                 IGB_CORE_UNLOCK(adapter);
1021                         }
1022                         if (!(ifp->if_flags & IFF_NOARP))
1023                                 arp_ifinit(ifp, ifa);
1024                 } else
1025 #endif
1026                         error = ether_ioctl(ifp, command, data);
1027                 break;
1028         case SIOCSIFMTU:
1029             {
1030                 int max_frame_size;
1031
1032                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1033
1034                 IGB_CORE_LOCK(adapter);
1035                 max_frame_size = 9234;
1036                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1037                     ETHER_CRC_LEN) {
1038                         IGB_CORE_UNLOCK(adapter);
1039                         error = EINVAL;
1040                         break;
1041                 }
1042
1043                 ifp->if_mtu = ifr->ifr_mtu;
1044                 adapter->max_frame_size =
1045                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1046                 igb_init_locked(adapter);
1047                 IGB_CORE_UNLOCK(adapter);
1048                 break;
1049             }
1050         case SIOCSIFFLAGS:
1051                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1052                     SIOCSIFFLAGS (Set Interface Flags)");
1053                 IGB_CORE_LOCK(adapter);
1054                 if (ifp->if_flags & IFF_UP) {
1055                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1056                                 if ((ifp->if_flags ^ adapter->if_flags) &
1057                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1058                                         igb_disable_promisc(adapter);
1059                                         igb_set_promisc(adapter);
1060                                 }
1061                         } else
1062                                 igb_init_locked(adapter);
1063                 } else
1064                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1065                                 igb_stop(adapter);
1066                 adapter->if_flags = ifp->if_flags;
1067                 IGB_CORE_UNLOCK(adapter);
1068                 break;
1069         case SIOCADDMULTI:
1070         case SIOCDELMULTI:
1071                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1072                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1073                         IGB_CORE_LOCK(adapter);
1074                         igb_disable_intr(adapter);
1075                         igb_set_multi(adapter);
1076 #ifdef DEVICE_POLLING
1077                         if (!(ifp->if_capenable & IFCAP_POLLING))
1078 #endif
1079                                 igb_enable_intr(adapter);
1080                         IGB_CORE_UNLOCK(adapter);
1081                 }
1082                 break;
1083         case SIOCSIFMEDIA:
1084                 /*
1085                 ** As the speed/duplex settings are being
1086                 ** changed, we need toreset the PHY.
1087                 */
1088                 adapter->hw.phy.reset_disable = FALSE;
1089                 /* Check SOL/IDER usage */
1090                 IGB_CORE_LOCK(adapter);
1091                 if (e1000_check_reset_block(&adapter->hw)) {
1092                         IGB_CORE_UNLOCK(adapter);
1093                         device_printf(adapter->dev, "Media change is"
1094                             " blocked due to SOL/IDER session.\n");
1095                         break;
1096                 }
1097                 IGB_CORE_UNLOCK(adapter);
1098         case SIOCGIFMEDIA:
1099                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1100                     SIOCxIFMEDIA (Get/Set Interface Media)");
1101                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1102                 break;
1103         case SIOCSIFCAP:
1104             {
1105                 int mask, reinit;
1106
1107                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1108                 reinit = 0;
1109                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1110 #ifdef DEVICE_POLLING
1111                 if (mask & IFCAP_POLLING) {
1112                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1113                                 error = ether_poll_register(igb_poll, ifp);
1114                                 if (error)
1115                                         return (error);
1116                                 IGB_CORE_LOCK(adapter);
1117                                 igb_disable_intr(adapter);
1118                                 ifp->if_capenable |= IFCAP_POLLING;
1119                                 IGB_CORE_UNLOCK(adapter);
1120                         } else {
1121                                 error = ether_poll_deregister(ifp);
1122                                 /* Enable interrupt even in error case */
1123                                 IGB_CORE_LOCK(adapter);
1124                                 igb_enable_intr(adapter);
1125                                 ifp->if_capenable &= ~IFCAP_POLLING;
1126                                 IGB_CORE_UNLOCK(adapter);
1127                         }
1128                 }
1129 #endif
1130                 if (mask & IFCAP_HWCSUM) {
1131                         ifp->if_capenable ^= IFCAP_HWCSUM;
1132                         reinit = 1;
1133                 }
1134                 if (mask & IFCAP_TSO4) {
1135                         ifp->if_capenable ^= IFCAP_TSO4;
1136                         reinit = 1;
1137                 }
1138                 if (mask & IFCAP_VLAN_HWTAGGING) {
1139                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1140                         reinit = 1;
1141                 }
1142                 if (mask & IFCAP_VLAN_HWFILTER) {
1143                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1144                         reinit = 1;
1145                 }
1146                 if (mask & IFCAP_LRO) {
1147                         ifp->if_capenable ^= IFCAP_LRO;
1148                         reinit = 1;
1149                 }
1150                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1151                         igb_init(adapter);
1152                 VLAN_CAPABILITIES(ifp);
1153                 break;
1154             }
1155
1156         default:
1157                 error = ether_ioctl(ifp, command, data);
1158                 break;
1159         }
1160
1161         return (error);
1162 }
1163
1164
1165 /*********************************************************************
1166  *  Init entry point
1167  *
1168  *  This routine is used in two ways. It is used by the stack as
1169  *  init entry point in network interface structure. It is also used
1170  *  by the driver as a hw/sw initialization routine to get to a
1171  *  consistent state.
1172  *
1173  *  return 0 on success, positive on failure
1174  **********************************************************************/
1175
1176 static void
1177 igb_init_locked(struct adapter *adapter)
1178 {
1179         struct ifnet    *ifp = adapter->ifp;
1180         device_t        dev = adapter->dev;
1181
1182         INIT_DEBUGOUT("igb_init: begin");
1183
1184         IGB_CORE_LOCK_ASSERT(adapter);
1185
1186         igb_disable_intr(adapter);
1187         callout_stop(&adapter->timer);
1188
1189         /* Get the latest mac address, User can use a LAA */
1190         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1191               ETHER_ADDR_LEN);
1192
1193         /* Put the address into the Receive Address Array */
1194         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1195
1196         igb_reset(adapter);
1197         igb_update_link_status(adapter);
1198
1199         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1200
1201         /* Set hardware offload abilities */
1202         ifp->if_hwassist = 0;
1203         if (ifp->if_capenable & IFCAP_TXCSUM) {
1204                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1205 #if __FreeBSD_version >= 800000
1206                 if (adapter->hw.mac.type == e1000_82576)
1207                         ifp->if_hwassist |= CSUM_SCTP;
1208 #endif
1209         }
1210
1211         if (ifp->if_capenable & IFCAP_TSO4)
1212                 ifp->if_hwassist |= CSUM_TSO;
1213
1214         /* Configure for OS presence */
1215         igb_init_manageability(adapter);
1216
1217         /* Prepare transmit descriptors and buffers */
1218         igb_setup_transmit_structures(adapter);
1219         igb_initialize_transmit_units(adapter);
1220
1221         /* Setup Multicast table */
1222         igb_set_multi(adapter);
1223
1224         /*
1225         ** Figure out the desired mbuf pool
1226         ** for doing jumbo/packetsplit
1227         */
1228         if (adapter->max_frame_size <= 2048)
1229                 adapter->rx_mbuf_sz = MCLBYTES;
1230         else if (adapter->max_frame_size <= 4096)
1231                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1232         else
1233                 adapter->rx_mbuf_sz = MJUM9BYTES;
1234
1235         /* Prepare receive descriptors and buffers */
1236         if (igb_setup_receive_structures(adapter)) {
1237                 device_printf(dev, "Could not setup receive structures\n");
1238                 return;
1239         }
1240         igb_initialize_receive_units(adapter);
1241
1242         /* Enable VLAN support */
1243         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1244                 igb_setup_vlan_hw_support(adapter);
1245                                 
1246         /* Don't lose promiscuous settings */
1247         igb_set_promisc(adapter);
1248
1249         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1250         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1251
1252         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1253         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1254
1255         if (adapter->msix > 1) /* Set up queue routing */
1256                 igb_configure_queues(adapter);
1257
1258         /* this clears any pending interrupts */
1259         E1000_READ_REG(&adapter->hw, E1000_ICR);
1260 #ifdef DEVICE_POLLING
1261         /*
1262          * Only enable interrupts if we are not polling, make sure
1263          * they are off otherwise.
1264          */
1265         if (ifp->if_capenable & IFCAP_POLLING)
1266                 igb_disable_intr(adapter);
1267         else
1268 #endif /* DEVICE_POLLING */
1269         {
1270                 igb_enable_intr(adapter);
1271                 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1272         }
1273
1274         /* Set Energy Efficient Ethernet */
1275         e1000_set_eee_i350(&adapter->hw);
1276
1277         /* Don't reset the phy next time init gets called */
1278         adapter->hw.phy.reset_disable = TRUE;
1279 }
1280
1281 static void
1282 igb_init(void *arg)
1283 {
1284         struct adapter *adapter = arg;
1285
1286         IGB_CORE_LOCK(adapter);
1287         igb_init_locked(adapter);
1288         IGB_CORE_UNLOCK(adapter);
1289 }
1290
1291
1292 static void
1293 igb_handle_que(void *context, int pending)
1294 {
1295         struct igb_queue *que = context;
1296         struct adapter *adapter = que->adapter;
1297         struct tx_ring *txr = que->txr;
1298         struct ifnet    *ifp = adapter->ifp;
1299
1300         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1301                 bool    more;
1302
1303                 more = igb_rxeof(que, -1, NULL);
1304
1305                 IGB_TX_LOCK(txr);
1306                 if (igb_txeof(txr))
1307                         more = TRUE;
1308 #if __FreeBSD_version >= 800000
1309                 if (!drbr_empty(ifp, txr->br))
1310                         igb_mq_start_locked(ifp, txr, NULL);
1311 #else
1312                 igb_start_locked(txr, ifp);
1313 #endif
1314                 IGB_TX_UNLOCK(txr);
1315                 if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1316                         taskqueue_enqueue(que->tq, &que->que_task);
1317                         return;
1318                 }
1319         }
1320
1321 #ifdef DEVICE_POLLING
1322         if (ifp->if_capenable & IFCAP_POLLING)
1323                 return;
1324 #endif
1325         /* Reenable this interrupt */
1326         if (que->eims)
1327                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1328         else
1329                 igb_enable_intr(adapter);
1330 }
1331
1332 /* Deal with link in a sleepable context */
1333 static void
1334 igb_handle_link(void *context, int pending)
1335 {
1336         struct adapter *adapter = context;
1337
1338         adapter->hw.mac.get_link_status = 1;
1339         igb_update_link_status(adapter);
1340 }
1341
1342 /*********************************************************************
1343  *
1344  *  MSI/Legacy Deferred
1345  *  Interrupt Service routine  
1346  *
1347  *********************************************************************/
1348 static int
1349 igb_irq_fast(void *arg)
1350 {
1351         struct adapter          *adapter = arg;
1352         struct igb_queue        *que = adapter->queues;
1353         u32                     reg_icr;
1354
1355
1356         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1357
1358         /* Hot eject?  */
1359         if (reg_icr == 0xffffffff)
1360                 return FILTER_STRAY;
1361
1362         /* Definitely not our interrupt.  */
1363         if (reg_icr == 0x0)
1364                 return FILTER_STRAY;
1365
1366         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1367                 return FILTER_STRAY;
1368
1369         /*
1370          * Mask interrupts until the taskqueue is finished running.  This is
1371          * cheap, just assume that it is needed.  This also works around the
1372          * MSI message reordering errata on certain systems.
1373          */
1374         igb_disable_intr(adapter);
1375         taskqueue_enqueue(que->tq, &que->que_task);
1376
1377         /* Link status change */
1378         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1379                 taskqueue_enqueue(que->tq, &adapter->link_task);
1380
1381         if (reg_icr & E1000_ICR_RXO)
1382                 adapter->rx_overruns++;
1383         return FILTER_HANDLED;
1384 }
1385
1386 #ifdef DEVICE_POLLING
1387 /*********************************************************************
1388  *
1389  *  Legacy polling routine : if using this code you MUST be sure that
1390  *  multiqueue is not defined, ie, set igb_num_queues to 1.
1391  *
1392  *********************************************************************/
1393 #if __FreeBSD_version >= 800000
1394 #define POLL_RETURN_COUNT(a) (a)
1395 static int
1396 #else
1397 #define POLL_RETURN_COUNT(a)
1398 static void
1399 #endif
1400 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1401 {
1402         struct adapter          *adapter = ifp->if_softc;
1403         struct igb_queue        *que = adapter->queues;
1404         struct tx_ring          *txr = adapter->tx_rings;
1405         u32                     reg_icr, rx_done = 0;
1406         u32                     loop = IGB_MAX_LOOP;
1407         bool                    more;
1408
1409         IGB_CORE_LOCK(adapter);
1410         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1411                 IGB_CORE_UNLOCK(adapter);
1412                 return POLL_RETURN_COUNT(rx_done);
1413         }
1414
1415         if (cmd == POLL_AND_CHECK_STATUS) {
1416                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1417                 /* Link status change */
1418                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1419                         igb_handle_link(adapter, 0);
1420
1421                 if (reg_icr & E1000_ICR_RXO)
1422                         adapter->rx_overruns++;
1423         }
1424         IGB_CORE_UNLOCK(adapter);
1425
1426         igb_rxeof(que, count, &rx_done);
1427
1428         IGB_TX_LOCK(txr);
1429         do {
1430                 more = igb_txeof(txr);
1431         } while (loop-- && more);
1432 #if __FreeBSD_version >= 800000
1433         if (!drbr_empty(ifp, txr->br))
1434                 igb_mq_start_locked(ifp, txr, NULL);
1435 #else
1436         igb_start_locked(txr, ifp);
1437 #endif
1438         IGB_TX_UNLOCK(txr);
1439         return POLL_RETURN_COUNT(rx_done);
1440 }
1441 #endif /* DEVICE_POLLING */
1442
1443 /*********************************************************************
1444  *
1445  *  MSIX TX Interrupt Service routine
1446  *
1447  **********************************************************************/
1448 static void
1449 igb_msix_que(void *arg)
1450 {
1451         struct igb_queue *que = arg;
1452         struct adapter *adapter = que->adapter;
1453         struct tx_ring *txr = que->txr;
1454         struct rx_ring *rxr = que->rxr;
1455         u32             newitr = 0;
1456         bool            more_tx, more_rx;
1457
1458         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1459         ++que->irqs;
1460
1461         IGB_TX_LOCK(txr);
1462         more_tx = igb_txeof(txr);
1463         IGB_TX_UNLOCK(txr);
1464
1465         more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1466
1467         if (igb_enable_aim == FALSE)
1468                 goto no_calc;
1469         /*
1470         ** Do Adaptive Interrupt Moderation:
1471         **  - Write out last calculated setting
1472         **  - Calculate based on average size over
1473         **    the last interval.
1474         */
1475         if (que->eitr_setting)
1476                 E1000_WRITE_REG(&adapter->hw,
1477                     E1000_EITR(que->msix), que->eitr_setting);
1478  
1479         que->eitr_setting = 0;
1480
1481         /* Idle, do nothing */
1482         if ((txr->bytes == 0) && (rxr->bytes == 0))
1483                 goto no_calc;
1484                                 
1485         /* Used half Default if sub-gig */
1486         if (adapter->link_speed != 1000)
1487                 newitr = IGB_DEFAULT_ITR / 2;
1488         else {
1489                 if ((txr->bytes) && (txr->packets))
1490                         newitr = txr->bytes/txr->packets;
1491                 if ((rxr->bytes) && (rxr->packets))
1492                         newitr = max(newitr,
1493                             (rxr->bytes / rxr->packets));
1494                 newitr += 24; /* account for hardware frame, crc */
1495                 /* set an upper boundary */
1496                 newitr = min(newitr, 3000);
1497                 /* Be nice to the mid range */
1498                 if ((newitr > 300) && (newitr < 1200))
1499                         newitr = (newitr / 3);
1500                 else
1501                         newitr = (newitr / 2);
1502         }
1503         newitr &= 0x7FFC;  /* Mask invalid bits */
1504         if (adapter->hw.mac.type == e1000_82575)
1505                 newitr |= newitr << 16;
1506         else
1507                 newitr |= E1000_EITR_CNT_IGNR;
1508                  
1509         /* save for next interrupt */
1510         que->eitr_setting = newitr;
1511
1512         /* Reset state */
1513         txr->bytes = 0;
1514         txr->packets = 0;
1515         rxr->bytes = 0;
1516         rxr->packets = 0;
1517
1518 no_calc:
1519         /* Schedule a clean task if needed*/
1520         if (more_tx || more_rx ||
1521             (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE))
1522                 taskqueue_enqueue(que->tq, &que->que_task);
1523         else
1524                 /* Reenable this interrupt */
1525                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1526         return;
1527 }
1528
1529
1530 /*********************************************************************
1531  *
1532  *  MSIX Link Interrupt Service routine
1533  *
1534  **********************************************************************/
1535
1536 static void
1537 igb_msix_link(void *arg)
1538 {
1539         struct adapter  *adapter = arg;
1540         u32             icr;
1541
1542         ++adapter->link_irq;
1543         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1544         if (!(icr & E1000_ICR_LSC))
1545                 goto spurious;
1546         igb_handle_link(adapter, 0);
1547
1548 spurious:
1549         /* Rearm */
1550         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1551         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1552         return;
1553 }
1554
1555
1556 /*********************************************************************
1557  *
1558  *  Media Ioctl callback
1559  *
1560  *  This routine is called whenever the user queries the status of
1561  *  the interface using ifconfig.
1562  *
1563  **********************************************************************/
1564 static void
1565 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1566 {
1567         struct adapter *adapter = ifp->if_softc;
1568         u_char fiber_type = IFM_1000_SX;
1569
1570         INIT_DEBUGOUT("igb_media_status: begin");
1571
1572         IGB_CORE_LOCK(adapter);
1573         igb_update_link_status(adapter);
1574
1575         ifmr->ifm_status = IFM_AVALID;
1576         ifmr->ifm_active = IFM_ETHER;
1577
1578         if (!adapter->link_active) {
1579                 IGB_CORE_UNLOCK(adapter);
1580                 return;
1581         }
1582
1583         ifmr->ifm_status |= IFM_ACTIVE;
1584
1585         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1586             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1587                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1588         else {
1589                 switch (adapter->link_speed) {
1590                 case 10:
1591                         ifmr->ifm_active |= IFM_10_T;
1592                         break;
1593                 case 100:
1594                         ifmr->ifm_active |= IFM_100_TX;
1595                         break;
1596                 case 1000:
1597                         ifmr->ifm_active |= IFM_1000_T;
1598                         break;
1599                 }
1600                 if (adapter->link_duplex == FULL_DUPLEX)
1601                         ifmr->ifm_active |= IFM_FDX;
1602                 else
1603                         ifmr->ifm_active |= IFM_HDX;
1604         }
1605         IGB_CORE_UNLOCK(adapter);
1606 }
1607
1608 /*********************************************************************
1609  *
1610  *  Media Ioctl callback
1611  *
1612  *  This routine is called when the user changes speed/duplex using
1613  *  media/mediopt option with ifconfig.
1614  *
1615  **********************************************************************/
1616 static int
1617 igb_media_change(struct ifnet *ifp)
1618 {
1619         struct adapter *adapter = ifp->if_softc;
1620         struct ifmedia  *ifm = &adapter->media;
1621
1622         INIT_DEBUGOUT("igb_media_change: begin");
1623
1624         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1625                 return (EINVAL);
1626
1627         IGB_CORE_LOCK(adapter);
1628         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1629         case IFM_AUTO:
1630                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1631                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1632                 break;
1633         case IFM_1000_LX:
1634         case IFM_1000_SX:
1635         case IFM_1000_T:
1636                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1637                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1638                 break;
1639         case IFM_100_TX:
1640                 adapter->hw.mac.autoneg = FALSE;
1641                 adapter->hw.phy.autoneg_advertised = 0;
1642                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1643                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1644                 else
1645                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1646                 break;
1647         case IFM_10_T:
1648                 adapter->hw.mac.autoneg = FALSE;
1649                 adapter->hw.phy.autoneg_advertised = 0;
1650                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1651                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1652                 else
1653                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1654                 break;
1655         default:
1656                 device_printf(adapter->dev, "Unsupported media type\n");
1657         }
1658
1659         igb_init_locked(adapter);
1660         IGB_CORE_UNLOCK(adapter);
1661
1662         return (0);
1663 }
1664
1665
1666 /*********************************************************************
1667  *
1668  *  This routine maps the mbufs to Advanced TX descriptors.
1669  *  used by the 82575 adapter.
1670  *  
1671  **********************************************************************/
1672
1673 static int
1674 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1675 {
1676         struct adapter          *adapter = txr->adapter;
1677         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1678         bus_dmamap_t            map;
1679         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1680         union e1000_adv_tx_desc *txd = NULL;
1681         struct mbuf             *m_head;
1682         u32                     olinfo_status = 0, cmd_type_len = 0;
1683         int                     nsegs, i, j, error, first, last = 0;
1684         u32                     hdrlen = 0;
1685
1686         m_head = *m_headp;
1687
1688
1689         /* Set basic descriptor constants */
1690         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1691         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1692         if (m_head->m_flags & M_VLANTAG)
1693                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1694
1695         /*
1696          * Map the packet for DMA.
1697          *
1698          * Capture the first descriptor index,
1699          * this descriptor will have the index
1700          * of the EOP which is the only one that
1701          * now gets a DONE bit writeback.
1702          */
1703         first = txr->next_avail_desc;
1704         tx_buffer = &txr->tx_buffers[first];
1705         tx_buffer_mapped = tx_buffer;
1706         map = tx_buffer->map;
1707
1708         error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1709             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1710
1711         if (error == EFBIG) {
1712                 struct mbuf *m;
1713
1714                 m = m_defrag(*m_headp, M_DONTWAIT);
1715                 if (m == NULL) {
1716                         adapter->mbuf_defrag_failed++;
1717                         m_freem(*m_headp);
1718                         *m_headp = NULL;
1719                         return (ENOBUFS);
1720                 }
1721                 *m_headp = m;
1722
1723                 /* Try it again */
1724                 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1725                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1726
1727                 if (error == ENOMEM) {
1728                         adapter->no_tx_dma_setup++;
1729                         return (error);
1730                 } else if (error != 0) {
1731                         adapter->no_tx_dma_setup++;
1732                         m_freem(*m_headp);
1733                         *m_headp = NULL;
1734                         return (error);
1735                 }
1736         } else if (error == ENOMEM) {
1737                 adapter->no_tx_dma_setup++;
1738                 return (error);
1739         } else if (error != 0) {
1740                 adapter->no_tx_dma_setup++;
1741                 m_freem(*m_headp);
1742                 *m_headp = NULL;
1743                 return (error);
1744         }
1745
1746         /* Check again to be sure we have enough descriptors */
1747         if (nsegs > (txr->tx_avail - 2)) {
1748                 txr->no_desc_avail++;
1749                 bus_dmamap_unload(txr->txtag, map);
1750                 return (ENOBUFS);
1751         }
1752         m_head = *m_headp;
1753
1754         /*
1755          * Set up the context descriptor:
1756          * used when any hardware offload is done.
1757          * This includes CSUM, VLAN, and TSO. It
1758          * will use the first descriptor.
1759          */
1760         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1761                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1762                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1763                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1764                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1765                 } else
1766                         return (ENXIO); 
1767         } else if (igb_tx_ctx_setup(txr, m_head))
1768                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1769
1770         /* Calculate payload length */
1771         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1772             << E1000_ADVTXD_PAYLEN_SHIFT);
1773
1774         /* 82575 needs the queue index added */
1775         if (adapter->hw.mac.type == e1000_82575)
1776                 olinfo_status |= txr->me << 4;
1777
1778         /* Set up our transmit descriptors */
1779         i = txr->next_avail_desc;
1780         for (j = 0; j < nsegs; j++) {
1781                 bus_size_t seg_len;
1782                 bus_addr_t seg_addr;
1783
1784                 tx_buffer = &txr->tx_buffers[i];
1785                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1786                 seg_addr = segs[j].ds_addr;
1787                 seg_len  = segs[j].ds_len;
1788
1789                 txd->read.buffer_addr = htole64(seg_addr);
1790                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1791                 txd->read.olinfo_status = htole32(olinfo_status);
1792                 last = i;
1793                 if (++i == adapter->num_tx_desc)
1794                         i = 0;
1795                 tx_buffer->m_head = NULL;
1796                 tx_buffer->next_eop = -1;
1797         }
1798
1799         txr->next_avail_desc = i;
1800         txr->tx_avail -= nsegs;
1801
1802         tx_buffer->m_head = m_head;
1803         tx_buffer_mapped->map = tx_buffer->map;
1804         tx_buffer->map = map;
1805         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1806
1807         /*
1808          * Last Descriptor of Packet
1809          * needs End Of Packet (EOP)
1810          * and Report Status (RS)
1811          */
1812         txd->read.cmd_type_len |=
1813             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1814         /*
1815          * Keep track in the first buffer which
1816          * descriptor will be written back
1817          */
1818         tx_buffer = &txr->tx_buffers[first];
1819         tx_buffer->next_eop = last;
1820         txr->watchdog_time = ticks;
1821
1822         /*
1823          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1824          * that this frame is available to transmit.
1825          */
1826         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1827             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1828         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1829         ++txr->tx_packets;
1830
1831         return (0);
1832
1833 }
1834
1835 static void
1836 igb_set_promisc(struct adapter *adapter)
1837 {
1838         struct ifnet    *ifp = adapter->ifp;
1839         struct e1000_hw *hw = &adapter->hw;
1840         u32             reg;
1841
1842         if (adapter->vf_ifp) {
1843                 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1844                 return;
1845         }
1846
1847         reg = E1000_READ_REG(hw, E1000_RCTL);
1848         if (ifp->if_flags & IFF_PROMISC) {
1849                 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1850                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1851         } else if (ifp->if_flags & IFF_ALLMULTI) {
1852                 reg |= E1000_RCTL_MPE;
1853                 reg &= ~E1000_RCTL_UPE;
1854                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1855         }
1856 }
1857
1858 static void
1859 igb_disable_promisc(struct adapter *adapter)
1860 {
1861         struct e1000_hw *hw = &adapter->hw;
1862         u32             reg;
1863
1864         if (adapter->vf_ifp) {
1865                 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1866                 return;
1867         }
1868         reg = E1000_READ_REG(hw, E1000_RCTL);
1869         reg &=  (~E1000_RCTL_UPE);
1870         reg &=  (~E1000_RCTL_MPE);
1871         E1000_WRITE_REG(hw, E1000_RCTL, reg);
1872 }
1873
1874
1875 /*********************************************************************
1876  *  Multicast Update
1877  *
1878  *  This routine is called whenever multicast address list is updated.
1879  *
1880  **********************************************************************/
1881
1882 static void
1883 igb_set_multi(struct adapter *adapter)
1884 {
1885         struct ifnet    *ifp = adapter->ifp;
1886         struct ifmultiaddr *ifma;
1887         u32 reg_rctl = 0;
1888         u8  *mta;
1889
1890         int mcnt = 0;
1891
1892         IOCTL_DEBUGOUT("igb_set_multi: begin");
1893
1894         mta = adapter->mta;
1895         bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
1896             MAX_NUM_MULTICAST_ADDRESSES);
1897
1898 #if __FreeBSD_version < 800000
1899         IF_ADDR_LOCK(ifp);
1900 #else
1901         if_maddr_rlock(ifp);
1902 #endif
1903         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1904                 if (ifma->ifma_addr->sa_family != AF_LINK)
1905                         continue;
1906
1907                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1908                         break;
1909
1910                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1911                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1912                 mcnt++;
1913         }
1914 #if __FreeBSD_version < 800000
1915         IF_ADDR_UNLOCK(ifp);
1916 #else
1917         if_maddr_runlock(ifp);
1918 #endif
1919
1920         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1921                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1922                 reg_rctl |= E1000_RCTL_MPE;
1923                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1924         } else
1925                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1926 }
1927
1928
1929 /*********************************************************************
1930  *  Timer routine:
1931  *      This routine checks for link status,
1932  *      updates statistics, and does the watchdog.
1933  *
1934  **********************************************************************/
1935
1936 static void
1937 igb_local_timer(void *arg)
1938 {
1939         struct adapter          *adapter = arg;
1940         device_t                dev = adapter->dev;
1941         struct tx_ring          *txr = adapter->tx_rings;
1942
1943
1944         IGB_CORE_LOCK_ASSERT(adapter);
1945
1946         igb_update_link_status(adapter);
1947         igb_update_stats_counters(adapter);
1948
1949         /* 
1950         ** If flow control has paused us since last checking
1951         ** it invalidates the watchdog timing, so dont run it.
1952         */
1953         if (adapter->pause_frames) {
1954                 adapter->pause_frames = 0;
1955                 goto out;
1956         }
1957
1958         /*
1959         ** Watchdog: check for time since any descriptor was cleaned
1960         */
1961         for (int i = 0; i < adapter->num_queues; i++, txr++)
1962                 if (txr->queue_status == IGB_QUEUE_HUNG) 
1963                         goto timeout;
1964 out:
1965         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1966 #ifndef DEVICE_POLLING
1967         /* Schedule all queue interrupts - deadlock protection */
1968         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
1969 #endif
1970         return;
1971
1972 timeout:
1973         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1974         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1975             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1976             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1977         device_printf(dev,"TX(%d) desc avail = %d,"
1978             "Next TX to Clean = %d\n",
1979             txr->me, txr->tx_avail, txr->next_to_clean);
1980         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1981         adapter->watchdog_events++;
1982         igb_init_locked(adapter);
1983 }
1984
1985 static void
1986 igb_update_link_status(struct adapter *adapter)
1987 {
1988         struct e1000_hw *hw = &adapter->hw;
1989         struct ifnet *ifp = adapter->ifp;
1990         device_t dev = adapter->dev;
1991         struct tx_ring *txr = adapter->tx_rings;
1992         u32 link_check, thstat, ctrl;
1993
1994         link_check = thstat = ctrl = 0;
1995
1996         /* Get the cached link value or read for real */
1997         switch (hw->phy.media_type) {
1998         case e1000_media_type_copper:
1999                 if (hw->mac.get_link_status) {
2000                         /* Do the work to read phy */
2001                         e1000_check_for_link(hw);
2002                         link_check = !hw->mac.get_link_status;
2003                 } else
2004                         link_check = TRUE;
2005                 break;
2006         case e1000_media_type_fiber:
2007                 e1000_check_for_link(hw);
2008                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2009                                  E1000_STATUS_LU);
2010                 break;
2011         case e1000_media_type_internal_serdes:
2012                 e1000_check_for_link(hw);
2013                 link_check = adapter->hw.mac.serdes_has_link;
2014                 break;
2015         /* VF device is type_unknown */
2016         case e1000_media_type_unknown:
2017                 e1000_check_for_link(hw);
2018                 link_check = !hw->mac.get_link_status;
2019                 /* Fall thru */
2020         default:
2021                 break;
2022         }
2023
2024         /* Check for thermal downshift or shutdown */
2025         if (hw->mac.type == e1000_i350) {
2026                 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2027                 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2028         }
2029
2030         /* Now we check if a transition has happened */
2031         if (link_check && (adapter->link_active == 0)) {
2032                 e1000_get_speed_and_duplex(&adapter->hw, 
2033                     &adapter->link_speed, &adapter->link_duplex);
2034                 if (bootverbose)
2035                         device_printf(dev, "Link is up %d Mbps %s\n",
2036                             adapter->link_speed,
2037                             ((adapter->link_duplex == FULL_DUPLEX) ?
2038                             "Full Duplex" : "Half Duplex"));
2039                 adapter->link_active = 1;
2040                 ifp->if_baudrate = adapter->link_speed * 1000000;
2041                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2042                     (thstat & E1000_THSTAT_LINK_THROTTLE))
2043                         device_printf(dev, "Link: thermal downshift\n");
2044                 /* This can sleep */
2045                 if_link_state_change(ifp, LINK_STATE_UP);
2046         } else if (!link_check && (adapter->link_active == 1)) {
2047                 ifp->if_baudrate = adapter->link_speed = 0;
2048                 adapter->link_duplex = 0;
2049                 if (bootverbose)
2050                         device_printf(dev, "Link is Down\n");
2051                 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2052                     (thstat & E1000_THSTAT_PWR_DOWN))
2053                         device_printf(dev, "Link: thermal shutdown\n");
2054                 adapter->link_active = 0;
2055                 /* This can sleep */
2056                 if_link_state_change(ifp, LINK_STATE_DOWN);
2057                 /* Turn off watchdogs */
2058                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2059                         txr->queue_status = IGB_QUEUE_IDLE;
2060         }
2061 }
2062
2063 /*********************************************************************
2064  *
2065  *  This routine disables all traffic on the adapter by issuing a
2066  *  global reset on the MAC and deallocates TX/RX buffers.
2067  *
2068  **********************************************************************/
2069
2070 static void
2071 igb_stop(void *arg)
2072 {
2073         struct adapter  *adapter = arg;
2074         struct ifnet    *ifp = adapter->ifp;
2075         struct tx_ring *txr = adapter->tx_rings;
2076
2077         IGB_CORE_LOCK_ASSERT(adapter);
2078
2079         INIT_DEBUGOUT("igb_stop: begin");
2080
2081         igb_disable_intr(adapter);
2082
2083         callout_stop(&adapter->timer);
2084
2085         /* Tell the stack that the interface is no longer active */
2086         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2087
2088         /* Unarm watchdog timer. */
2089         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2090                 IGB_TX_LOCK(txr);
2091                 txr->queue_status = IGB_QUEUE_IDLE;
2092                 IGB_TX_UNLOCK(txr);
2093         }
2094
2095         e1000_reset_hw(&adapter->hw);
2096         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2097
2098         e1000_led_off(&adapter->hw);
2099         e1000_cleanup_led(&adapter->hw);
2100 }
2101
2102
2103 /*********************************************************************
2104  *
2105  *  Determine hardware revision.
2106  *
2107  **********************************************************************/
2108 static void
2109 igb_identify_hardware(struct adapter *adapter)
2110 {
2111         device_t dev = adapter->dev;
2112
2113         /* Make sure our PCI config space has the necessary stuff set */
2114         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2115         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2116             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2117                 INIT_DEBUGOUT("Memory Access and/or Bus Master "
2118                     "bits were not set!\n");
2119                 adapter->hw.bus.pci_cmd_word |=
2120                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2121                 pci_write_config(dev, PCIR_COMMAND,
2122                     adapter->hw.bus.pci_cmd_word, 2);
2123         }
2124
2125         /* Save off the information about this board */
2126         adapter->hw.vendor_id = pci_get_vendor(dev);
2127         adapter->hw.device_id = pci_get_device(dev);
2128         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2129         adapter->hw.subsystem_vendor_id =
2130             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2131         adapter->hw.subsystem_device_id =
2132             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2133
2134         /* Set MAC type early for PCI setup */
2135         e1000_set_mac_type(&adapter->hw);
2136
2137         /* Are we a VF device? */
2138         if ((adapter->hw.mac.type == e1000_vfadapt) ||
2139             (adapter->hw.mac.type == e1000_vfadapt_i350))
2140                 adapter->vf_ifp = 1;
2141         else
2142                 adapter->vf_ifp = 0;
2143 }
2144
2145 static int
2146 igb_allocate_pci_resources(struct adapter *adapter)
2147 {
2148         device_t        dev = adapter->dev;
2149         int             rid;
2150
2151         rid = PCIR_BAR(0);
2152         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2153             &rid, RF_ACTIVE);
2154         if (adapter->pci_mem == NULL) {
2155                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2156                 return (ENXIO);
2157         }
2158         adapter->osdep.mem_bus_space_tag =
2159             rman_get_bustag(adapter->pci_mem);
2160         adapter->osdep.mem_bus_space_handle =
2161             rman_get_bushandle(adapter->pci_mem);
2162         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2163
2164         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2165
2166         /* This will setup either MSI/X or MSI */
2167         adapter->msix = igb_setup_msix(adapter);
2168         adapter->hw.back = &adapter->osdep;
2169
2170         return (0);
2171 }
2172
2173 /*********************************************************************
2174  *
2175  *  Setup the Legacy or MSI Interrupt handler
2176  *
2177  **********************************************************************/
2178 static int
2179 igb_allocate_legacy(struct adapter *adapter)
2180 {
2181         device_t                dev = adapter->dev;
2182         struct igb_queue        *que = adapter->queues;
2183         int                     error, rid = 0;
2184
2185         /* Turn off all interrupts */
2186         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2187
2188         /* MSI RID is 1 */
2189         if (adapter->msix == 1)
2190                 rid = 1;
2191
2192         /* We allocate a single interrupt resource */
2193         adapter->res = bus_alloc_resource_any(dev,
2194             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2195         if (adapter->res == NULL) {
2196                 device_printf(dev, "Unable to allocate bus resource: "
2197                     "interrupt\n");
2198                 return (ENXIO);
2199         }
2200
2201         /*
2202          * Try allocating a fast interrupt and the associated deferred
2203          * processing contexts.
2204          */
2205         TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2206         /* Make tasklet for deferred link handling */
2207         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2208         que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2209             taskqueue_thread_enqueue, &que->tq);
2210         taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2211             device_get_nameunit(adapter->dev));
2212         if ((error = bus_setup_intr(dev, adapter->res,
2213             INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2214             adapter, &adapter->tag)) != 0) {
2215                 device_printf(dev, "Failed to register fast interrupt "
2216                             "handler: %d\n", error);
2217                 taskqueue_free(que->tq);
2218                 que->tq = NULL;
2219                 return (error);
2220         }
2221
2222         return (0);
2223 }
2224
2225
2226 /*********************************************************************
2227  *
2228  *  Setup the MSIX Queue Interrupt handlers: 
2229  *
2230  **********************************************************************/
2231 static int
2232 igb_allocate_msix(struct adapter *adapter)
2233 {
2234         device_t                dev = adapter->dev;
2235         struct igb_queue        *que = adapter->queues;
2236         int                     error, rid, vector = 0;
2237
2238
2239         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2240                 rid = vector +1;
2241                 que->res = bus_alloc_resource_any(dev,
2242                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2243                 if (que->res == NULL) {
2244                         device_printf(dev,
2245                             "Unable to allocate bus resource: "
2246                             "MSIX Queue Interrupt\n");
2247                         return (ENXIO);
2248                 }
2249                 error = bus_setup_intr(dev, que->res,
2250                     INTR_TYPE_NET | INTR_MPSAFE, NULL,
2251                     igb_msix_que, que, &que->tag);
2252                 if (error) {
2253                         que->res = NULL;
2254                         device_printf(dev, "Failed to register Queue handler");
2255                         return (error);
2256                 }
2257 #if __FreeBSD_version >= 800504
2258                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2259 #endif
2260                 que->msix = vector;
2261                 if (adapter->hw.mac.type == e1000_82575)
2262                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2263                 else
2264                         que->eims = 1 << vector;
2265                 /*
2266                 ** Bind the msix vector, and thus the
2267                 ** rings to the corresponding cpu.
2268                 */
2269                 if (adapter->num_queues > 1)
2270                         bus_bind_intr(dev, que->res, i);
2271                 /* Make tasklet for deferred handling */
2272                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2273                 que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2274                     taskqueue_thread_enqueue, &que->tq);
2275                 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2276                     device_get_nameunit(adapter->dev));
2277         }
2278
2279         /* And Link */
2280         rid = vector + 1;
2281         adapter->res = bus_alloc_resource_any(dev,
2282             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2283         if (adapter->res == NULL) {
2284                 device_printf(dev,
2285                     "Unable to allocate bus resource: "
2286                     "MSIX Link Interrupt\n");
2287                 return (ENXIO);
2288         }
2289         if ((error = bus_setup_intr(dev, adapter->res,
2290             INTR_TYPE_NET | INTR_MPSAFE, NULL,
2291             igb_msix_link, adapter, &adapter->tag)) != 0) {
2292                 device_printf(dev, "Failed to register Link handler");
2293                 return (error);
2294         }
2295 #if __FreeBSD_version >= 800504
2296         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2297 #endif
2298         adapter->linkvec = vector;
2299
2300         return (0);
2301 }
2302
2303
2304 static void
2305 igb_configure_queues(struct adapter *adapter)
2306 {
2307         struct  e1000_hw        *hw = &adapter->hw;
2308         struct  igb_queue       *que;
2309         u32                     tmp, ivar = 0, newitr = 0;
2310
2311         /* First turn on RSS capability */
2312         if (adapter->hw.mac.type != e1000_82575)
2313                 E1000_WRITE_REG(hw, E1000_GPIE,
2314                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2315                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2316
2317         /* Turn on MSIX */
2318         switch (adapter->hw.mac.type) {
2319         case e1000_82580:
2320         case e1000_i350:
2321         case e1000_vfadapt:
2322         case e1000_vfadapt_i350:
2323                 /* RX entries */
2324                 for (int i = 0; i < adapter->num_queues; i++) {
2325                         u32 index = i >> 1;
2326                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2327                         que = &adapter->queues[i];
2328                         if (i & 1) {
2329                                 ivar &= 0xFF00FFFF;
2330                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2331                         } else {
2332                                 ivar &= 0xFFFFFF00;
2333                                 ivar |= que->msix | E1000_IVAR_VALID;
2334                         }
2335                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2336                 }
2337                 /* TX entries */
2338                 for (int i = 0; i < adapter->num_queues; i++) {
2339                         u32 index = i >> 1;
2340                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2341                         que = &adapter->queues[i];
2342                         if (i & 1) {
2343                                 ivar &= 0x00FFFFFF;
2344                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2345                         } else {
2346                                 ivar &= 0xFFFF00FF;
2347                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2348                         }
2349                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2350                         adapter->que_mask |= que->eims;
2351                 }
2352
2353                 /* And for the link interrupt */
2354                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2355                 adapter->link_mask = 1 << adapter->linkvec;
2356                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2357                 break;
2358         case e1000_82576:
2359                 /* RX entries */
2360                 for (int i = 0; i < adapter->num_queues; i++) {
2361                         u32 index = i & 0x7; /* Each IVAR has two entries */
2362                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2363                         que = &adapter->queues[i];
2364                         if (i < 8) {
2365                                 ivar &= 0xFFFFFF00;
2366                                 ivar |= que->msix | E1000_IVAR_VALID;
2367                         } else {
2368                                 ivar &= 0xFF00FFFF;
2369                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2370                         }
2371                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2372                         adapter->que_mask |= que->eims;
2373                 }
2374                 /* TX entries */
2375                 for (int i = 0; i < adapter->num_queues; i++) {
2376                         u32 index = i & 0x7; /* Each IVAR has two entries */
2377                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2378                         que = &adapter->queues[i];
2379                         if (i < 8) {
2380                                 ivar &= 0xFFFF00FF;
2381                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2382                         } else {
2383                                 ivar &= 0x00FFFFFF;
2384                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2385                         }
2386                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2387                         adapter->que_mask |= que->eims;
2388                 }
2389
2390                 /* And for the link interrupt */
2391                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2392                 adapter->link_mask = 1 << adapter->linkvec;
2393                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2394                 break;
2395
2396         case e1000_82575:
2397                 /* enable MSI-X support*/
2398                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2399                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2400                 /* Auto-Mask interrupts upon ICR read. */
2401                 tmp |= E1000_CTRL_EXT_EIAME;
2402                 tmp |= E1000_CTRL_EXT_IRCA;
2403                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2404
2405                 /* Queues */
2406                 for (int i = 0; i < adapter->num_queues; i++) {
2407                         que = &adapter->queues[i];
2408                         tmp = E1000_EICR_RX_QUEUE0 << i;
2409                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2410                         que->eims = tmp;
2411                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2412                             i, que->eims);
2413                         adapter->que_mask |= que->eims;
2414                 }
2415
2416                 /* Link */
2417                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2418                     E1000_EIMS_OTHER);
2419                 adapter->link_mask |= E1000_EIMS_OTHER;
2420         default:
2421                 break;
2422         }
2423
2424         /* Set the starting interrupt rate */
2425         if (igb_max_interrupt_rate > 0)
2426                 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2427
2428         if (hw->mac.type == e1000_82575)
2429                 newitr |= newitr << 16;
2430         else
2431                 newitr |= E1000_EITR_CNT_IGNR;
2432
2433         for (int i = 0; i < adapter->num_queues; i++) {
2434                 que = &adapter->queues[i];
2435                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2436         }
2437
2438         return;
2439 }
2440
2441
2442 static void
2443 igb_free_pci_resources(struct adapter *adapter)
2444 {
2445         struct          igb_queue *que = adapter->queues;
2446         device_t        dev = adapter->dev;
2447         int             rid;
2448
2449         /*
2450         ** There is a slight possibility of a failure mode
2451         ** in attach that will result in entering this function
2452         ** before interrupt resources have been initialized, and
2453         ** in that case we do not want to execute the loops below
2454         ** We can detect this reliably by the state of the adapter
2455         ** res pointer.
2456         */
2457         if (adapter->res == NULL)
2458                 goto mem;
2459
2460         /*
2461          * First release all the interrupt resources:
2462          */
2463         for (int i = 0; i < adapter->num_queues; i++, que++) {
2464                 rid = que->msix + 1;
2465                 if (que->tag != NULL) {
2466                         bus_teardown_intr(dev, que->res, que->tag);
2467                         que->tag = NULL;
2468                 }
2469                 if (que->res != NULL)
2470                         bus_release_resource(dev,
2471                             SYS_RES_IRQ, rid, que->res);
2472         }
2473
2474         /* Clean the Legacy or Link interrupt last */
2475         if (adapter->linkvec) /* we are doing MSIX */
2476                 rid = adapter->linkvec + 1;
2477         else
2478                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2479
2480         if (adapter->tag != NULL) {
2481                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2482                 adapter->tag = NULL;
2483         }
2484         if (adapter->res != NULL)
2485                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2486
2487 mem:
2488         if (adapter->msix)
2489                 pci_release_msi(dev);
2490
2491         if (adapter->msix_mem != NULL)
2492                 bus_release_resource(dev, SYS_RES_MEMORY,
2493                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2494
2495         if (adapter->pci_mem != NULL)
2496                 bus_release_resource(dev, SYS_RES_MEMORY,
2497                     PCIR_BAR(0), adapter->pci_mem);
2498
2499 }
2500
2501 /*
2502  * Setup Either MSI/X or MSI
2503  */
2504 static int
2505 igb_setup_msix(struct adapter *adapter)
2506 {
2507         device_t dev = adapter->dev;
2508         int rid, want, queues, msgs;
2509
2510         /* tuneable override */
2511         if (igb_enable_msix == 0)
2512                 goto msi;
2513
2514         /* First try MSI/X */
2515         rid = PCIR_BAR(IGB_MSIX_BAR);
2516         adapter->msix_mem = bus_alloc_resource_any(dev,
2517             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2518         if (!adapter->msix_mem) {
2519                 /* May not be enabled */
2520                 device_printf(adapter->dev,
2521                     "Unable to map MSIX table \n");
2522                 goto msi;
2523         }
2524
2525         msgs = pci_msix_count(dev); 
2526         if (msgs == 0) { /* system has msix disabled */
2527                 bus_release_resource(dev, SYS_RES_MEMORY,
2528                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2529                 adapter->msix_mem = NULL;
2530                 goto msi;
2531         }
2532
2533         /* Figure out a reasonable auto config value */
2534         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2535
2536         /* Manual override */
2537         if (igb_num_queues != 0)
2538                 queues = igb_num_queues;
2539         if (queues > 8)  /* max queues */
2540                 queues = 8;
2541
2542         /* Can have max of 4 queues on 82575 */
2543         if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2544                 queues = 4;
2545
2546         /* Limit the VF devices to one queue */
2547         if (adapter->vf_ifp)
2548                 queues = 1;
2549
2550         /*
2551         ** One vector (RX/TX pair) per queue
2552         ** plus an additional for Link interrupt
2553         */
2554         want = queues + 1;
2555         if (msgs >= want)
2556                 msgs = want;
2557         else {
2558                 device_printf(adapter->dev,
2559                     "MSIX Configuration Problem, "
2560                     "%d vectors configured, but %d queues wanted!\n",
2561                     msgs, want);
2562                 return (ENXIO);
2563         }
2564         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2565                 device_printf(adapter->dev,
2566                     "Using MSIX interrupts with %d vectors\n", msgs);
2567                 adapter->num_queues = queues;
2568                 return (msgs);
2569         }
2570 msi:
2571         msgs = pci_msi_count(dev);
2572         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2573                 device_printf(adapter->dev,"Using MSI interrupt\n");
2574         return (msgs);
2575 }
2576
2577 /*********************************************************************
2578  *
2579  *  Set up an fresh starting state
2580  *
2581  **********************************************************************/
2582 static void
2583 igb_reset(struct adapter *adapter)
2584 {
2585         device_t        dev = adapter->dev;
2586         struct e1000_hw *hw = &adapter->hw;
2587         struct e1000_fc_info *fc = &hw->fc;
2588         struct ifnet    *ifp = adapter->ifp;
2589         u32             pba = 0;
2590         u16             hwm;
2591
2592         INIT_DEBUGOUT("igb_reset: begin");
2593
2594         /* Let the firmware know the OS is in control */
2595         igb_get_hw_control(adapter);
2596
2597         /*
2598          * Packet Buffer Allocation (PBA)
2599          * Writing PBA sets the receive portion of the buffer
2600          * the remainder is used for the transmit buffer.
2601          */
2602         switch (hw->mac.type) {
2603         case e1000_82575:
2604                 pba = E1000_PBA_32K;
2605                 break;
2606         case e1000_82576:
2607         case e1000_vfadapt:
2608                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2609                 pba &= E1000_RXPBS_SIZE_MASK_82576;
2610                 break;
2611         case e1000_82580:
2612         case e1000_i350:
2613         case e1000_vfadapt_i350:
2614                 pba = E1000_READ_REG(hw, E1000_RXPBS);
2615                 pba = e1000_rxpbs_adjust_82580(pba);
2616                 break;
2617                 pba = E1000_PBA_35K;
2618         default:
2619                 break;
2620         }
2621
2622         /* Special needs in case of Jumbo frames */
2623         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2624                 u32 tx_space, min_tx, min_rx;
2625                 pba = E1000_READ_REG(hw, E1000_PBA);
2626                 tx_space = pba >> 16;
2627                 pba &= 0xffff;
2628                 min_tx = (adapter->max_frame_size +
2629                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2630                 min_tx = roundup2(min_tx, 1024);
2631                 min_tx >>= 10;
2632                 min_rx = adapter->max_frame_size;
2633                 min_rx = roundup2(min_rx, 1024);
2634                 min_rx >>= 10;
2635                 if (tx_space < min_tx &&
2636                     ((min_tx - tx_space) < pba)) {
2637                         pba = pba - (min_tx - tx_space);
2638                         /*
2639                          * if short on rx space, rx wins
2640                          * and must trump tx adjustment
2641                          */
2642                         if (pba < min_rx)
2643                                 pba = min_rx;
2644                 }
2645                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2646         }
2647
2648         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2649
2650         /*
2651          * These parameters control the automatic generation (Tx) and
2652          * response (Rx) to Ethernet PAUSE frames.
2653          * - High water mark should allow for at least two frames to be
2654          *   received after sending an XOFF.
2655          * - Low water mark works best when it is very near the high water mark.
2656          *   This allows the receiver to restart by sending XON when it has
2657          *   drained a bit.
2658          */
2659         hwm = min(((pba << 10) * 9 / 10),
2660             ((pba << 10) - 2 * adapter->max_frame_size));
2661
2662         if (hw->mac.type < e1000_82576) {
2663                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2664                 fc->low_water = fc->high_water - 8;
2665         } else {
2666                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2667                 fc->low_water = fc->high_water - 16;
2668         }
2669
2670         fc->pause_time = IGB_FC_PAUSE_TIME;
2671         fc->send_xon = TRUE;
2672
2673         /* Issue a global reset */
2674         e1000_reset_hw(hw);
2675         E1000_WRITE_REG(hw, E1000_WUC, 0);
2676
2677         if (e1000_init_hw(hw) < 0)
2678                 device_printf(dev, "Hardware Initialization Failed\n");
2679
2680         /* Setup DMA Coalescing */
2681         if ((hw->mac.type == e1000_i350) &&
2682             (adapter->dma_coalesce == TRUE)) {
2683                 u32 reg;
2684
2685                 hwm = (pba - 4) << 10;
2686                 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2687                     & E1000_DMACR_DMACTHR_MASK);
2688
2689                 /* transition to L0x or L1 if available..*/
2690                 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2691
2692                 /* timer = +-1000 usec in 32usec intervals */
2693                 reg |= (1000 >> 5);
2694                 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2695
2696                 /* No lower threshold */
2697                 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2698
2699                 /* set hwm to PBA -  2 * max frame size */
2700                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2701
2702                 /* Set the interval before transition */
2703                 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2704                 reg |= 0x800000FF; /* 255 usec */
2705                 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2706
2707                 /* free space in tx packet buffer to wake from DMA coal */
2708                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2709                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2710
2711                 /* make low power state decision controlled by DMA coal */
2712                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2713                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2714                     reg | E1000_PCIEMISC_LX_DECISION);
2715                 device_printf(dev, "DMA Coalescing enabled\n");
2716         }
2717
2718         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2719         e1000_get_phy_info(hw);
2720         e1000_check_for_link(hw);
2721         return;
2722 }
2723
2724 /*********************************************************************
2725  *
2726  *  Setup networking device structure and register an interface.
2727  *
2728  **********************************************************************/
2729 static int
2730 igb_setup_interface(device_t dev, struct adapter *adapter)
2731 {
2732         struct ifnet   *ifp;
2733
2734         INIT_DEBUGOUT("igb_setup_interface: begin");
2735
2736         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2737         if (ifp == NULL) {
2738                 device_printf(dev, "can not allocate ifnet structure\n");
2739                 return (-1);
2740         }
2741         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2742         ifp->if_mtu = ETHERMTU;
2743         ifp->if_init =  igb_init;
2744         ifp->if_softc = adapter;
2745         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2746         ifp->if_ioctl = igb_ioctl;
2747         ifp->if_start = igb_start;
2748 #if __FreeBSD_version >= 800000
2749         ifp->if_transmit = igb_mq_start;
2750         ifp->if_qflush = igb_qflush;
2751 #endif
2752         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2753         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2754         IFQ_SET_READY(&ifp->if_snd);
2755
2756         ether_ifattach(ifp, adapter->hw.mac.addr);
2757
2758         ifp->if_capabilities = ifp->if_capenable = 0;
2759
2760         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2761         ifp->if_capabilities |= IFCAP_TSO4;
2762         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2763         ifp->if_capenable = ifp->if_capabilities;
2764
2765         /* Don't enable LRO by default */
2766         ifp->if_capabilities |= IFCAP_LRO;
2767
2768 #ifdef DEVICE_POLLING
2769         ifp->if_capabilities |= IFCAP_POLLING;
2770 #endif
2771
2772         /*
2773          * Tell the upper layer(s) we
2774          * support full VLAN capability.
2775          */
2776         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2777         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2778         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2779
2780         /*
2781         ** Dont turn this on by default, if vlans are
2782         ** created on another pseudo device (eg. lagg)
2783         ** then vlan events are not passed thru, breaking
2784         ** operation, but with HW FILTER off it works. If
2785         ** using vlans directly on the em driver you can
2786         ** enable this and get full hardware tag filtering.
2787         */
2788         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2789
2790         /*
2791          * Specify the media types supported by this adapter and register
2792          * callbacks to update media and link information
2793          */
2794         ifmedia_init(&adapter->media, IFM_IMASK,
2795             igb_media_change, igb_media_status);
2796         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2797             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2798                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2799                             0, NULL);
2800                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2801         } else {
2802                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2803                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2804                             0, NULL);
2805                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2806                             0, NULL);
2807                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2808                             0, NULL);
2809                 if (adapter->hw.phy.type != e1000_phy_ife) {
2810                         ifmedia_add(&adapter->media,
2811                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2812                         ifmedia_add(&adapter->media,
2813                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2814                 }
2815         }
2816         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2817         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2818         return (0);
2819 }
2820
2821
2822 /*
2823  * Manage DMA'able memory.
2824  */
2825 static void
2826 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2827 {
2828         if (error)
2829                 return;
2830         *(bus_addr_t *) arg = segs[0].ds_addr;
2831 }
2832
2833 static int
2834 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2835         struct igb_dma_alloc *dma, int mapflags)
2836 {
2837         int error;
2838
2839         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2840                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2841                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2842                                 BUS_SPACE_MAXADDR,      /* highaddr */
2843                                 NULL, NULL,             /* filter, filterarg */
2844                                 size,                   /* maxsize */
2845                                 1,                      /* nsegments */
2846                                 size,                   /* maxsegsize */
2847                                 0,                      /* flags */
2848                                 NULL,                   /* lockfunc */
2849                                 NULL,                   /* lockarg */
2850                                 &dma->dma_tag);
2851         if (error) {
2852                 device_printf(adapter->dev,
2853                     "%s: bus_dma_tag_create failed: %d\n",
2854                     __func__, error);
2855                 goto fail_0;
2856         }
2857
2858         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2859             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2860         if (error) {
2861                 device_printf(adapter->dev,
2862                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2863                     __func__, (uintmax_t)size, error);
2864                 goto fail_2;
2865         }
2866
2867         dma->dma_paddr = 0;
2868         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2869             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2870         if (error || dma->dma_paddr == 0) {
2871                 device_printf(adapter->dev,
2872                     "%s: bus_dmamap_load failed: %d\n",
2873                     __func__, error);
2874                 goto fail_3;
2875         }
2876
2877         return (0);
2878
2879 fail_3:
2880         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2881 fail_2:
2882         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2883         bus_dma_tag_destroy(dma->dma_tag);
2884 fail_0:
2885         dma->dma_map = NULL;
2886         dma->dma_tag = NULL;
2887
2888         return (error);
2889 }
2890
2891 static void
2892 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2893 {
2894         if (dma->dma_tag == NULL)
2895                 return;
2896         if (dma->dma_map != NULL) {
2897                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2898                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2899                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2900                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2901                 dma->dma_map = NULL;
2902         }
2903         bus_dma_tag_destroy(dma->dma_tag);
2904         dma->dma_tag = NULL;
2905 }
2906
2907
2908 /*********************************************************************
2909  *
2910  *  Allocate memory for the transmit and receive rings, and then
2911  *  the descriptors associated with each, called only once at attach.
2912  *
2913  **********************************************************************/
2914 static int
2915 igb_allocate_queues(struct adapter *adapter)
2916 {
2917         device_t dev = adapter->dev;
2918         struct igb_queue        *que = NULL;
2919         struct tx_ring          *txr = NULL;
2920         struct rx_ring          *rxr = NULL;
2921         int rsize, tsize, error = E1000_SUCCESS;
2922         int txconf = 0, rxconf = 0;
2923
2924         /* First allocate the top level queue structs */
2925         if (!(adapter->queues =
2926             (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2927             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2928                 device_printf(dev, "Unable to allocate queue memory\n");
2929                 error = ENOMEM;
2930                 goto fail;
2931         }
2932
2933         /* Next allocate the TX ring struct memory */
2934         if (!(adapter->tx_rings =
2935             (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2936             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2937                 device_printf(dev, "Unable to allocate TX ring memory\n");
2938                 error = ENOMEM;
2939                 goto tx_fail;
2940         }
2941
2942         /* Now allocate the RX */
2943         if (!(adapter->rx_rings =
2944             (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2945             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2946                 device_printf(dev, "Unable to allocate RX ring memory\n");
2947                 error = ENOMEM;
2948                 goto rx_fail;
2949         }
2950
2951         tsize = roundup2(adapter->num_tx_desc *
2952             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2953         /*
2954          * Now set up the TX queues, txconf is needed to handle the
2955          * possibility that things fail midcourse and we need to
2956          * undo memory gracefully
2957          */ 
2958         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2959                 /* Set up some basics */
2960                 txr = &adapter->tx_rings[i];
2961                 txr->adapter = adapter;
2962                 txr->me = i;
2963
2964                 /* Initialize the TX lock */
2965                 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2966                     device_get_nameunit(dev), txr->me);
2967                 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2968
2969                 if (igb_dma_malloc(adapter, tsize,
2970                         &txr->txdma, BUS_DMA_NOWAIT)) {
2971                         device_printf(dev,
2972                             "Unable to allocate TX Descriptor memory\n");
2973                         error = ENOMEM;
2974                         goto err_tx_desc;
2975                 }
2976                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2977                 bzero((void *)txr->tx_base, tsize);
2978
2979                 /* Now allocate transmit buffers for the ring */
2980                 if (igb_allocate_transmit_buffers(txr)) {
2981                         device_printf(dev,
2982                             "Critical Failure setting up transmit buffers\n");
2983                         error = ENOMEM;
2984                         goto err_tx_desc;
2985                 }
2986 #if __FreeBSD_version >= 800000
2987                 /* Allocate a buf ring */
2988                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2989                     M_WAITOK, &txr->tx_mtx);
2990 #endif
2991         }
2992
2993         /*
2994          * Next the RX queues...
2995          */ 
2996         rsize = roundup2(adapter->num_rx_desc *
2997             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2998         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2999                 rxr = &adapter->rx_rings[i];
3000                 rxr->adapter = adapter;
3001                 rxr->me = i;
3002
3003                 /* Initialize the RX lock */
3004                 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3005                     device_get_nameunit(dev), txr->me);
3006                 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3007
3008                 if (igb_dma_malloc(adapter, rsize,
3009                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
3010                         device_printf(dev,
3011                             "Unable to allocate RxDescriptor memory\n");
3012                         error = ENOMEM;
3013                         goto err_rx_desc;
3014                 }
3015                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3016                 bzero((void *)rxr->rx_base, rsize);
3017
3018                 /* Allocate receive buffers for the ring*/
3019                 if (igb_allocate_receive_buffers(rxr)) {
3020                         device_printf(dev,
3021                             "Critical Failure setting up receive buffers\n");
3022                         error = ENOMEM;
3023                         goto err_rx_desc;
3024                 }
3025         }
3026
3027         /*
3028         ** Finally set up the queue holding structs
3029         */
3030         for (int i = 0; i < adapter->num_queues; i++) {
3031                 que = &adapter->queues[i];
3032                 que->adapter = adapter;
3033                 que->txr = &adapter->tx_rings[i];
3034                 que->rxr = &adapter->rx_rings[i];
3035         }
3036
3037         return (0);
3038
3039 err_rx_desc:
3040         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3041                 igb_dma_free(adapter, &rxr->rxdma);
3042 err_tx_desc:
3043         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3044                 igb_dma_free(adapter, &txr->txdma);
3045         free(adapter->rx_rings, M_DEVBUF);
3046 rx_fail:
3047 #if __FreeBSD_version >= 800000
3048         buf_ring_free(txr->br, M_DEVBUF);
3049 #endif
3050         free(adapter->tx_rings, M_DEVBUF);
3051 tx_fail:
3052         free(adapter->queues, M_DEVBUF);
3053 fail:
3054         return (error);
3055 }
3056
3057 /*********************************************************************
3058  *
3059  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3060  *  the information needed to transmit a packet on the wire. This is
3061  *  called only once at attach, setup is done every reset.
3062  *
3063  **********************************************************************/
3064 static int
3065 igb_allocate_transmit_buffers(struct tx_ring *txr)
3066 {
3067         struct adapter *adapter = txr->adapter;
3068         device_t dev = adapter->dev;
3069         struct igb_tx_buffer *txbuf;
3070         int error, i;
3071
3072         /*
3073          * Setup DMA descriptor areas.
3074          */
3075         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3076                                1, 0,                    /* alignment, bounds */
3077                                BUS_SPACE_MAXADDR,       /* lowaddr */
3078                                BUS_SPACE_MAXADDR,       /* highaddr */
3079                                NULL, NULL,              /* filter, filterarg */
3080                                IGB_TSO_SIZE,            /* maxsize */
3081                                IGB_MAX_SCATTER,         /* nsegments */
3082                                PAGE_SIZE,               /* maxsegsize */
3083                                0,                       /* flags */
3084                                NULL,                    /* lockfunc */
3085                                NULL,                    /* lockfuncarg */
3086                                &txr->txtag))) {
3087                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3088                 goto fail;
3089         }
3090
3091         if (!(txr->tx_buffers =
3092             (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3093             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3094                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3095                 error = ENOMEM;
3096                 goto fail;
3097         }
3098
3099         /* Create the descriptor buffer dma maps */
3100         txbuf = txr->tx_buffers;
3101         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3102                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3103                 if (error != 0) {
3104                         device_printf(dev, "Unable to create TX DMA map\n");
3105                         goto fail;
3106                 }
3107         }
3108
3109         return 0;
3110 fail:
3111         /* We free all, it handles case where we are in the middle */
3112         igb_free_transmit_structures(adapter);
3113         return (error);
3114 }
3115
3116 /*********************************************************************
3117  *
3118  *  Initialize a transmit ring.
3119  *
3120  **********************************************************************/
3121 static void
3122 igb_setup_transmit_ring(struct tx_ring *txr)
3123 {
3124         struct adapter *adapter = txr->adapter;
3125         struct igb_tx_buffer *txbuf;
3126         int i;
3127
3128         /* Clear the old descriptor contents */
3129         IGB_TX_LOCK(txr);
3130         bzero((void *)txr->tx_base,
3131               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3132         /* Reset indices */
3133         txr->next_avail_desc = 0;
3134         txr->next_to_clean = 0;
3135
3136         /* Free any existing tx buffers. */
3137         txbuf = txr->tx_buffers;
3138         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3139                 if (txbuf->m_head != NULL) {
3140                         bus_dmamap_sync(txr->txtag, txbuf->map,
3141                             BUS_DMASYNC_POSTWRITE);
3142                         bus_dmamap_unload(txr->txtag, txbuf->map);
3143                         m_freem(txbuf->m_head);
3144                         txbuf->m_head = NULL;
3145                 }
3146                 /* clear the watch index */
3147                 txbuf->next_eop = -1;
3148         }
3149
3150         /* Set number of descriptors available */
3151         txr->tx_avail = adapter->num_tx_desc;
3152
3153         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3154             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3155         IGB_TX_UNLOCK(txr);
3156 }
3157
3158 /*********************************************************************
3159  *
3160  *  Initialize all transmit rings.
3161  *
3162  **********************************************************************/
3163 static void
3164 igb_setup_transmit_structures(struct adapter *adapter)
3165 {
3166         struct tx_ring *txr = adapter->tx_rings;
3167
3168         for (int i = 0; i < adapter->num_queues; i++, txr++)
3169                 igb_setup_transmit_ring(txr);
3170
3171         return;
3172 }
3173
3174 /*********************************************************************
3175  *
3176  *  Enable transmit unit.
3177  *
3178  **********************************************************************/
3179 static void
3180 igb_initialize_transmit_units(struct adapter *adapter)
3181 {
3182         struct tx_ring  *txr = adapter->tx_rings;
3183         struct e1000_hw *hw = &adapter->hw;
3184         u32             tctl, txdctl;
3185
3186         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3187         tctl = txdctl = 0;
3188
3189         /* Setup the Tx Descriptor Rings */
3190         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3191                 u64 bus_addr = txr->txdma.dma_paddr;
3192
3193                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3194                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3195                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3196                     (uint32_t)(bus_addr >> 32));
3197                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3198                     (uint32_t)bus_addr);
3199
3200                 /* Setup the HW Tx Head and Tail descriptor pointers */
3201                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3202                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3203
3204                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3205                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3206                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3207
3208                 txr->queue_status = IGB_QUEUE_IDLE;
3209
3210                 txdctl |= IGB_TX_PTHRESH;
3211                 txdctl |= IGB_TX_HTHRESH << 8;
3212                 txdctl |= IGB_TX_WTHRESH << 16;
3213                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3214                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3215         }
3216
3217         if (adapter->vf_ifp)
3218                 return;
3219
3220         e1000_config_collision_dist(hw);
3221
3222         /* Program the Transmit Control Register */
3223         tctl = E1000_READ_REG(hw, E1000_TCTL);
3224         tctl &= ~E1000_TCTL_CT;
3225         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3226                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3227
3228         /* This write will effectively turn on the transmit unit. */
3229         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3230 }
3231
3232 /*********************************************************************
3233  *
3234  *  Free all transmit rings.
3235  *
3236  **********************************************************************/
3237 static void
3238 igb_free_transmit_structures(struct adapter *adapter)
3239 {
3240         struct tx_ring *txr = adapter->tx_rings;
3241
3242         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3243                 IGB_TX_LOCK(txr);
3244                 igb_free_transmit_buffers(txr);
3245                 igb_dma_free(adapter, &txr->txdma);
3246                 IGB_TX_UNLOCK(txr);
3247                 IGB_TX_LOCK_DESTROY(txr);
3248         }
3249         free(adapter->tx_rings, M_DEVBUF);
3250 }
3251
3252 /*********************************************************************
3253  *
3254  *  Free transmit ring related data structures.
3255  *
3256  **********************************************************************/
3257 static void
3258 igb_free_transmit_buffers(struct tx_ring *txr)
3259 {
3260         struct adapter *adapter = txr->adapter;
3261         struct igb_tx_buffer *tx_buffer;
3262         int             i;
3263
3264         INIT_DEBUGOUT("free_transmit_ring: begin");
3265
3266         if (txr->tx_buffers == NULL)
3267                 return;
3268
3269         tx_buffer = txr->tx_buffers;
3270         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3271                 if (tx_buffer->m_head != NULL) {
3272                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3273                             BUS_DMASYNC_POSTWRITE);
3274                         bus_dmamap_unload(txr->txtag,
3275                             tx_buffer->map);
3276                         m_freem(tx_buffer->m_head);
3277                         tx_buffer->m_head = NULL;
3278                         if (tx_buffer->map != NULL) {
3279                                 bus_dmamap_destroy(txr->txtag,
3280                                     tx_buffer->map);
3281                                 tx_buffer->map = NULL;
3282                         }
3283                 } else if (tx_buffer->map != NULL) {
3284                         bus_dmamap_unload(txr->txtag,
3285                             tx_buffer->map);
3286                         bus_dmamap_destroy(txr->txtag,
3287                             tx_buffer->map);
3288                         tx_buffer->map = NULL;
3289                 }
3290         }
3291 #if __FreeBSD_version >= 800000
3292         if (txr->br != NULL)
3293                 buf_ring_free(txr->br, M_DEVBUF);
3294 #endif
3295         if (txr->tx_buffers != NULL) {
3296                 free(txr->tx_buffers, M_DEVBUF);
3297                 txr->tx_buffers = NULL;
3298         }
3299         if (txr->txtag != NULL) {
3300                 bus_dma_tag_destroy(txr->txtag);
3301                 txr->txtag = NULL;
3302         }
3303         return;
3304 }
3305
3306 /**********************************************************************
3307  *
3308  *  Setup work for hardware segmentation offload (TSO)
3309  *
3310  **********************************************************************/
3311 static boolean_t
3312 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3313 {
3314         struct adapter *adapter = txr->adapter;
3315         struct e1000_adv_tx_context_desc *TXD;
3316         struct igb_tx_buffer        *tx_buffer;
3317         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3318         u32 mss_l4len_idx = 0;
3319         u16 vtag = 0;
3320         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3321         struct ether_vlan_header *eh;
3322         struct ip *ip;
3323         struct tcphdr *th;
3324
3325
3326         /*
3327          * Determine where frame payload starts.
3328          * Jump over vlan headers if already present
3329          */
3330         eh = mtod(mp, struct ether_vlan_header *);
3331         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3332                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3333         else
3334                 ehdrlen = ETHER_HDR_LEN;
3335
3336         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3337         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3338                 return FALSE;
3339
3340         /* Only supports IPV4 for now */
3341         ctxd = txr->next_avail_desc;
3342         tx_buffer = &txr->tx_buffers[ctxd];
3343         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3344
3345         ip = (struct ip *)(mp->m_data + ehdrlen);
3346         if (ip->ip_p != IPPROTO_TCP)
3347                 return FALSE;   /* 0 */
3348         ip->ip_sum = 0;
3349         ip_hlen = ip->ip_hl << 2;
3350         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3351         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3352             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3353         tcp_hlen = th->th_off << 2;
3354         /*
3355          * Calculate header length, this is used
3356          * in the transmit desc in igb_xmit
3357          */
3358         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3359
3360         /* VLAN MACLEN IPLEN */
3361         if (mp->m_flags & M_VLANTAG) {
3362                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3363                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3364         }
3365
3366         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3367         vlan_macip_lens |= ip_hlen;
3368         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3369
3370         /* ADV DTYPE TUCMD */
3371         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3372         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3373         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3374         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3375
3376         /* MSS L4LEN IDX */
3377         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3378         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3379         /* 82575 needs the queue index added */
3380         if (adapter->hw.mac.type == e1000_82575)
3381                 mss_l4len_idx |= txr->me << 4;
3382         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3383
3384         TXD->seqnum_seed = htole32(0);
3385         tx_buffer->m_head = NULL;
3386         tx_buffer->next_eop = -1;
3387
3388         if (++ctxd == adapter->num_tx_desc)
3389                 ctxd = 0;
3390
3391         txr->tx_avail--;
3392         txr->next_avail_desc = ctxd;
3393         return TRUE;
3394 }
3395
3396
3397 /*********************************************************************
3398  *
3399  *  Context Descriptor setup for VLAN or CSUM
3400  *
3401  **********************************************************************/
3402
3403 static bool
3404 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3405 {
3406         struct adapter *adapter = txr->adapter;
3407         struct e1000_adv_tx_context_desc *TXD;
3408         struct igb_tx_buffer        *tx_buffer;
3409         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3410         struct ether_vlan_header *eh;
3411         struct ip *ip = NULL;
3412         struct ip6_hdr *ip6;
3413         int  ehdrlen, ctxd, ip_hlen = 0;
3414         u16     etype, vtag = 0;
3415         u8      ipproto = 0;
3416         bool    offload = TRUE;
3417
3418         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3419                 offload = FALSE;
3420
3421         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3422         ctxd = txr->next_avail_desc;
3423         tx_buffer = &txr->tx_buffers[ctxd];
3424         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3425
3426         /*
3427         ** In advanced descriptors the vlan tag must 
3428         ** be placed into the context descriptor, thus
3429         ** we need to be here just for that setup.
3430         */
3431         if (mp->m_flags & M_VLANTAG) {
3432                 vtag = htole16(mp->m_pkthdr.ether_vtag);
3433                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3434         } else if (offload == FALSE)
3435                 return FALSE;
3436
3437         /*
3438          * Determine where frame payload starts.
3439          * Jump over vlan headers if already present,
3440          * helpful for QinQ too.
3441          */
3442         eh = mtod(mp, struct ether_vlan_header *);
3443         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3444                 etype = ntohs(eh->evl_proto);
3445                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3446         } else {
3447                 etype = ntohs(eh->evl_encap_proto);
3448                 ehdrlen = ETHER_HDR_LEN;
3449         }
3450
3451         /* Set the ether header length */
3452         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3453
3454         switch (etype) {
3455                 case ETHERTYPE_IP:
3456                         ip = (struct ip *)(mp->m_data + ehdrlen);
3457                         ip_hlen = ip->ip_hl << 2;
3458                         if (mp->m_len < ehdrlen + ip_hlen) {
3459                                 offload = FALSE;
3460                                 break;
3461                         }
3462                         ipproto = ip->ip_p;
3463                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3464                         break;
3465                 case ETHERTYPE_IPV6:
3466                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3467                         ip_hlen = sizeof(struct ip6_hdr);
3468                         ipproto = ip6->ip6_nxt;
3469                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3470                         break;
3471                 default:
3472                         offload = FALSE;
3473                         break;
3474         }
3475
3476         vlan_macip_lens |= ip_hlen;
3477         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3478
3479         switch (ipproto) {
3480                 case IPPROTO_TCP:
3481                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3482                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3483                         break;
3484                 case IPPROTO_UDP:
3485                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3486                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3487                         break;
3488 #if __FreeBSD_version >= 800000
3489                 case IPPROTO_SCTP:
3490                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3491                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3492                         break;
3493 #endif
3494                 default:
3495                         offload = FALSE;
3496                         break;
3497         }
3498
3499         /* 82575 needs the queue index added */
3500         if (adapter->hw.mac.type == e1000_82575)
3501                 mss_l4len_idx = txr->me << 4;
3502
3503         /* Now copy bits into descriptor */
3504         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3505         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3506         TXD->seqnum_seed = htole32(0);
3507         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3508
3509         tx_buffer->m_head = NULL;
3510         tx_buffer->next_eop = -1;
3511
3512         /* We've consumed the first desc, adjust counters */
3513         if (++ctxd == adapter->num_tx_desc)
3514                 ctxd = 0;
3515         txr->next_avail_desc = ctxd;
3516         --txr->tx_avail;
3517
3518         return (offload);
3519 }
3520
3521
3522 /**********************************************************************
3523  *
3524  *  Examine each tx_buffer in the used queue. If the hardware is done
3525  *  processing the packet then free associated resources. The
3526  *  tx_buffer is put back on the free queue.
3527  *
3528  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3529  **********************************************************************/
3530 static bool
3531 igb_txeof(struct tx_ring *txr)
3532 {
3533         struct adapter  *adapter = txr->adapter;
3534         int first, last, done, processed;
3535         struct igb_tx_buffer *tx_buffer;
3536         struct e1000_tx_desc   *tx_desc, *eop_desc;
3537         struct ifnet   *ifp = adapter->ifp;
3538
3539         IGB_TX_LOCK_ASSERT(txr);
3540
3541         if (txr->tx_avail == adapter->num_tx_desc) {
3542                 txr->queue_status = IGB_QUEUE_IDLE;
3543                 return FALSE;
3544         }
3545
3546         processed = 0;
3547         first = txr->next_to_clean;
3548         tx_desc = &txr->tx_base[first];
3549         tx_buffer = &txr->tx_buffers[first];
3550         last = tx_buffer->next_eop;
3551         eop_desc = &txr->tx_base[last];
3552
3553         /*
3554          * What this does is get the index of the
3555          * first descriptor AFTER the EOP of the 
3556          * first packet, that way we can do the
3557          * simple comparison on the inner while loop.
3558          */
3559         if (++last == adapter->num_tx_desc)
3560                 last = 0;
3561         done = last;
3562
3563         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3564             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3565
3566         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3567                 /* We clean the range of the packet */
3568                 while (first != done) {
3569                         tx_desc->upper.data = 0;
3570                         tx_desc->lower.data = 0;
3571                         tx_desc->buffer_addr = 0;
3572                         ++txr->tx_avail;
3573                         ++processed;
3574
3575                         if (tx_buffer->m_head) {
3576                                 txr->bytes +=
3577                                     tx_buffer->m_head->m_pkthdr.len;
3578                                 bus_dmamap_sync(txr->txtag,
3579                                     tx_buffer->map,
3580                                     BUS_DMASYNC_POSTWRITE);
3581                                 bus_dmamap_unload(txr->txtag,
3582                                     tx_buffer->map);
3583
3584                                 m_freem(tx_buffer->m_head);
3585                                 tx_buffer->m_head = NULL;
3586                         }
3587                         tx_buffer->next_eop = -1;
3588                         txr->watchdog_time = ticks;
3589
3590                         if (++first == adapter->num_tx_desc)
3591                                 first = 0;
3592
3593                         tx_buffer = &txr->tx_buffers[first];
3594                         tx_desc = &txr->tx_base[first];
3595                 }
3596                 ++txr->packets;
3597                 ++ifp->if_opackets;
3598                 /* See if we can continue to the next packet */
3599                 last = tx_buffer->next_eop;
3600                 if (last != -1) {
3601                         eop_desc = &txr->tx_base[last];
3602                         /* Get new done point */
3603                         if (++last == adapter->num_tx_desc) last = 0;
3604                         done = last;
3605                 } else
3606                         break;
3607         }
3608         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3609             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3610
3611         txr->next_to_clean = first;
3612
3613         /*
3614         ** Watchdog calculation, we know there's
3615         ** work outstanding or the first return
3616         ** would have been taken, so none processed
3617         ** for too long indicates a hang.
3618         */
3619         if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3620                 txr->queue_status = IGB_QUEUE_HUNG;
3621
3622         /*
3623          * If we have a minimum free, clear IFF_DRV_OACTIVE
3624          * to tell the stack that it is OK to send packets.
3625          */
3626         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3627                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3628                 /* All clean, turn off the watchdog */
3629                 if (txr->tx_avail == adapter->num_tx_desc) {
3630                         txr->queue_status = IGB_QUEUE_IDLE;
3631                         return (FALSE);
3632                 }
3633         }
3634         return (TRUE);
3635 }
3636
3637 /*********************************************************************
3638  *
3639  *  Refresh mbuf buffers for RX descriptor rings
3640  *   - now keeps its own state so discards due to resource
3641  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3642  *     it just returns, keeping its placeholder, thus it can simply
3643  *     be recalled to try again.
3644  *
3645  **********************************************************************/
3646 static void
3647 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3648 {
3649         struct adapter          *adapter = rxr->adapter;
3650         bus_dma_segment_t       hseg[1];
3651         bus_dma_segment_t       pseg[1];
3652         struct igb_rx_buf       *rxbuf;
3653         struct mbuf             *mh, *mp;
3654         int                     i, j, nsegs, error;
3655         bool                    refreshed = FALSE;
3656
3657         i = j = rxr->next_to_refresh;
3658         /*
3659         ** Get one descriptor beyond
3660         ** our work mark to control
3661         ** the loop.
3662         */
3663         if (++j == adapter->num_rx_desc)
3664                 j = 0;
3665
3666         while (j != limit) {
3667                 rxbuf = &rxr->rx_buffers[i];
3668                 /* No hdr mbuf used with header split off */
3669                 if (rxr->hdr_split == FALSE)
3670                         goto no_split;
3671                 if (rxbuf->m_head == NULL) {
3672                         mh = m_gethdr(M_DONTWAIT, MT_DATA);
3673                         if (mh == NULL)
3674                                 goto update;
3675                 } else
3676                         mh = rxbuf->m_head;
3677
3678                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3679                 mh->m_len = MHLEN;
3680                 mh->m_flags |= M_PKTHDR;
3681                 /* Get the memory mapping */
3682                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
3683                     rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3684                 if (error != 0) {
3685                         printf("Refresh mbufs: hdr dmamap load"
3686                             " failure - %d\n", error);
3687                         m_free(mh);
3688                         rxbuf->m_head = NULL;
3689                         goto update;
3690                 }
3691                 rxbuf->m_head = mh;
3692                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3693                     BUS_DMASYNC_PREREAD);
3694                 rxr->rx_base[i].read.hdr_addr =
3695                     htole64(hseg[0].ds_addr);
3696 no_split:
3697                 if (rxbuf->m_pack == NULL) {
3698                         mp = m_getjcl(M_DONTWAIT, MT_DATA,
3699                             M_PKTHDR, adapter->rx_mbuf_sz);
3700                         if (mp == NULL)
3701                                 goto update;
3702                 } else
3703                         mp = rxbuf->m_pack;
3704
3705                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3706                 /* Get the memory mapping */
3707                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3708                     rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3709                 if (error != 0) {
3710                         printf("Refresh mbufs: payload dmamap load"
3711                             " failure - %d\n", error);
3712                         m_free(mp);
3713                         rxbuf->m_pack = NULL;
3714                         goto update;
3715                 }
3716                 rxbuf->m_pack = mp;
3717                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3718                     BUS_DMASYNC_PREREAD);
3719                 rxr->rx_base[i].read.pkt_addr =
3720                     htole64(pseg[0].ds_addr);
3721                 refreshed = TRUE; /* I feel wefreshed :) */
3722
3723                 i = j; /* our next is precalculated */
3724                 rxr->next_to_refresh = i;
3725                 if (++j == adapter->num_rx_desc)
3726                         j = 0;
3727         }
3728 update:
3729         if (refreshed) /* update tail */
3730                 E1000_WRITE_REG(&adapter->hw,
3731                     E1000_RDT(rxr->me), rxr->next_to_refresh);
3732         return;
3733 }
3734
3735
3736 /*********************************************************************
3737  *
3738  *  Allocate memory for rx_buffer structures. Since we use one
3739  *  rx_buffer per received packet, the maximum number of rx_buffer's
3740  *  that we'll need is equal to the number of receive descriptors
3741  *  that we've allocated.
3742  *
3743  **********************************************************************/
3744 static int
3745 igb_allocate_receive_buffers(struct rx_ring *rxr)
3746 {
3747         struct  adapter         *adapter = rxr->adapter;
3748         device_t                dev = adapter->dev;
3749         struct igb_rx_buf       *rxbuf;
3750         int                     i, bsize, error;
3751
3752         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3753         if (!(rxr->rx_buffers =
3754             (struct igb_rx_buf *) malloc(bsize,
3755             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3756                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3757                 error = ENOMEM;
3758                 goto fail;
3759         }
3760
3761         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3762                                    1, 0,                /* alignment, bounds */
3763                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3764                                    BUS_SPACE_MAXADDR,   /* highaddr */
3765                                    NULL, NULL,          /* filter, filterarg */
3766                                    MSIZE,               /* maxsize */
3767                                    1,                   /* nsegments */
3768                                    MSIZE,               /* maxsegsize */
3769                                    0,                   /* flags */
3770                                    NULL,                /* lockfunc */
3771                                    NULL,                /* lockfuncarg */
3772                                    &rxr->htag))) {
3773                 device_printf(dev, "Unable to create RX DMA tag\n");
3774                 goto fail;
3775         }
3776
3777         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3778                                    1, 0,                /* alignment, bounds */
3779                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3780                                    BUS_SPACE_MAXADDR,   /* highaddr */
3781                                    NULL, NULL,          /* filter, filterarg */
3782                                    MJUM9BYTES,          /* maxsize */
3783                                    1,                   /* nsegments */
3784                                    MJUM9BYTES,          /* maxsegsize */
3785                                    0,                   /* flags */
3786                                    NULL,                /* lockfunc */
3787                                    NULL,                /* lockfuncarg */
3788                                    &rxr->ptag))) {
3789                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3790                 goto fail;
3791         }
3792
3793         for (i = 0; i < adapter->num_rx_desc; i++) {
3794                 rxbuf = &rxr->rx_buffers[i];
3795                 error = bus_dmamap_create(rxr->htag,
3796                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3797                 if (error) {
3798                         device_printf(dev,
3799                             "Unable to create RX head DMA maps\n");
3800                         goto fail;
3801                 }
3802                 error = bus_dmamap_create(rxr->ptag,
3803                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3804                 if (error) {
3805                         device_printf(dev,
3806                             "Unable to create RX packet DMA maps\n");
3807                         goto fail;
3808                 }
3809         }
3810
3811         return (0);
3812
3813 fail:
3814         /* Frees all, but can handle partial completion */
3815         igb_free_receive_structures(adapter);
3816         return (error);
3817 }
3818
3819
3820 static void
3821 igb_free_receive_ring(struct rx_ring *rxr)
3822 {
3823         struct  adapter         *adapter = rxr->adapter;
3824         struct igb_rx_buf       *rxbuf;
3825
3826
3827         for (int i = 0; i < adapter->num_rx_desc; i++) {
3828                 rxbuf = &rxr->rx_buffers[i];
3829                 if (rxbuf->m_head != NULL) {
3830                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3831                             BUS_DMASYNC_POSTREAD);
3832                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3833                         rxbuf->m_head->m_flags |= M_PKTHDR;
3834                         m_freem(rxbuf->m_head);
3835                 }
3836                 if (rxbuf->m_pack != NULL) {
3837                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3838                             BUS_DMASYNC_POSTREAD);
3839                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3840                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3841                         m_freem(rxbuf->m_pack);
3842                 }
3843                 rxbuf->m_head = NULL;
3844                 rxbuf->m_pack = NULL;
3845         }
3846 }
3847
3848
3849 /*********************************************************************
3850  *
3851  *  Initialize a receive ring and its buffers.
3852  *
3853  **********************************************************************/
3854 static int
3855 igb_setup_receive_ring(struct rx_ring *rxr)
3856 {
3857         struct  adapter         *adapter;
3858         struct  ifnet           *ifp;
3859         device_t                dev;
3860         struct igb_rx_buf       *rxbuf;
3861         bus_dma_segment_t       pseg[1], hseg[1];
3862         struct lro_ctrl         *lro = &rxr->lro;
3863         int                     rsize, nsegs, error = 0;
3864
3865         adapter = rxr->adapter;
3866         dev = adapter->dev;
3867         ifp = adapter->ifp;
3868
3869         /* Clear the ring contents */
3870         IGB_RX_LOCK(rxr);
3871         rsize = roundup2(adapter->num_rx_desc *
3872             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3873         bzero((void *)rxr->rx_base, rsize);
3874
3875         /*
3876         ** Free current RX buffer structures and their mbufs
3877         */
3878         igb_free_receive_ring(rxr);
3879
3880         /* Configure for header split? */
3881         if (igb_header_split)
3882                 rxr->hdr_split = TRUE;
3883
3884         /* Now replenish the ring mbufs */
3885         for (int j = 0; j < adapter->num_rx_desc; ++j) {
3886                 struct mbuf     *mh, *mp;
3887
3888                 rxbuf = &rxr->rx_buffers[j];
3889                 if (rxr->hdr_split == FALSE)
3890                         goto skip_head;
3891
3892                 /* First the header */
3893                 rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3894                 if (rxbuf->m_head == NULL) {
3895                         error = ENOBUFS;
3896                         goto fail;
3897                 }
3898                 m_adj(rxbuf->m_head, ETHER_ALIGN);
3899                 mh = rxbuf->m_head;
3900                 mh->m_len = mh->m_pkthdr.len = MHLEN;
3901                 mh->m_flags |= M_PKTHDR;
3902                 /* Get the memory mapping */
3903                 error = bus_dmamap_load_mbuf_sg(rxr->htag,
3904                     rxbuf->hmap, rxbuf->m_head, hseg,
3905                     &nsegs, BUS_DMA_NOWAIT);
3906                 if (error != 0) /* Nothing elegant to do here */
3907                         goto fail;
3908                 bus_dmamap_sync(rxr->htag,
3909                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
3910                 /* Update descriptor */
3911                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3912
3913 skip_head:
3914                 /* Now the payload cluster */
3915                 rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3916                     M_PKTHDR, adapter->rx_mbuf_sz);
3917                 if (rxbuf->m_pack == NULL) {
3918                         error = ENOBUFS;
3919                         goto fail;
3920                 }
3921                 mp = rxbuf->m_pack;
3922                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3923                 /* Get the memory mapping */
3924                 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3925                     rxbuf->pmap, mp, pseg,
3926                     &nsegs, BUS_DMA_NOWAIT);
3927                 if (error != 0)
3928                         goto fail;
3929                 bus_dmamap_sync(rxr->ptag,
3930                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
3931                 /* Update descriptor */
3932                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3933         }
3934
3935         /* Setup our descriptor indices */
3936         rxr->next_to_check = 0;
3937         rxr->next_to_refresh = adapter->num_rx_desc - 1;
3938         rxr->lro_enabled = FALSE;
3939         rxr->rx_split_packets = 0;
3940         rxr->rx_bytes = 0;
3941
3942         rxr->fmp = NULL;
3943         rxr->lmp = NULL;
3944         rxr->discard = FALSE;
3945
3946         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3947             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3948
3949         /*
3950         ** Now set up the LRO interface, we
3951         ** also only do head split when LRO
3952         ** is enabled, since so often they
3953         ** are undesireable in similar setups.
3954         */
3955         if (ifp->if_capenable & IFCAP_LRO) {
3956                 error = tcp_lro_init(lro);
3957                 if (error) {
3958                         device_printf(dev, "LRO Initialization failed!\n");
3959                         goto fail;
3960                 }
3961                 INIT_DEBUGOUT("RX LRO Initialized\n");
3962                 rxr->lro_enabled = TRUE;
3963                 lro->ifp = adapter->ifp;
3964         }
3965
3966         IGB_RX_UNLOCK(rxr);
3967         return (0);
3968
3969 fail:
3970         igb_free_receive_ring(rxr);
3971         IGB_RX_UNLOCK(rxr);
3972         return (error);
3973 }
3974
3975
3976 /*********************************************************************
3977  *
3978  *  Initialize all receive rings.
3979  *
3980  **********************************************************************/
3981 static int
3982 igb_setup_receive_structures(struct adapter *adapter)
3983 {
3984         struct rx_ring *rxr = adapter->rx_rings;
3985         int i;
3986
3987         for (i = 0; i < adapter->num_queues; i++, rxr++)
3988                 if (igb_setup_receive_ring(rxr))
3989                         goto fail;
3990
3991         return (0);
3992 fail:
3993         /*
3994          * Free RX buffers allocated so far, we will only handle
3995          * the rings that completed, the failing case will have
3996          * cleaned up for itself. 'i' is the endpoint.
3997          */
3998         for (int j = 0; j > i; ++j) {
3999                 rxr = &adapter->rx_rings[i];
4000                 IGB_RX_LOCK(rxr);
4001                 igb_free_receive_ring(rxr);
4002                 IGB_RX_UNLOCK(rxr);
4003         }
4004
4005         return (ENOBUFS);
4006 }
4007
4008 /*********************************************************************
4009  *
4010  *  Enable receive unit.
4011  *
4012  **********************************************************************/
4013 static void
4014 igb_initialize_receive_units(struct adapter *adapter)
4015 {
4016         struct rx_ring  *rxr = adapter->rx_rings;
4017         struct ifnet    *ifp = adapter->ifp;
4018         struct e1000_hw *hw = &adapter->hw;
4019         u32             rctl, rxcsum, psize, srrctl = 0;
4020
4021         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4022
4023         /*
4024          * Make sure receives are disabled while setting
4025          * up the descriptor ring
4026          */
4027         rctl = E1000_READ_REG(hw, E1000_RCTL);
4028         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4029
4030         /*
4031         ** Set up for header split
4032         */
4033         if (igb_header_split) {
4034                 /* Use a standard mbuf for the header */
4035                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4036                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4037         } else
4038                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4039
4040         /*
4041         ** Set up for jumbo frames
4042         */
4043         if (ifp->if_mtu > ETHERMTU) {
4044                 rctl |= E1000_RCTL_LPE;
4045                 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4046                         srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4047                         rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4048                 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4049                         srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4050                         rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4051                 }
4052                 /* Set maximum packet len */
4053                 psize = adapter->max_frame_size;
4054                 /* are we on a vlan? */
4055                 if (adapter->ifp->if_vlantrunk != NULL)
4056                         psize += VLAN_TAG_SIZE;
4057                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4058         } else {
4059                 rctl &= ~E1000_RCTL_LPE;
4060                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4061                 rctl |= E1000_RCTL_SZ_2048;
4062         }
4063
4064         /* Setup the Base and Length of the Rx Descriptor Rings */
4065         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4066                 u64 bus_addr = rxr->rxdma.dma_paddr;
4067                 u32 rxdctl;
4068
4069                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4070                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4071                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4072                     (uint32_t)(bus_addr >> 32));
4073                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4074                     (uint32_t)bus_addr);
4075                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4076                 /* Enable this Queue */
4077                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4078                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4079                 rxdctl &= 0xFFF00000;
4080                 rxdctl |= IGB_RX_PTHRESH;
4081                 rxdctl |= IGB_RX_HTHRESH << 8;
4082                 rxdctl |= IGB_RX_WTHRESH << 16;
4083                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4084         }
4085
4086         /*
4087         ** Setup for RX MultiQueue
4088         */
4089         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4090         if (adapter->num_queues >1) {
4091                 u32 random[10], mrqc, shift = 0;
4092                 union igb_reta {
4093                         u32 dword;
4094                         u8  bytes[4];
4095                 } reta;
4096
4097                 arc4rand(&random, sizeof(random), 0);
4098                 if (adapter->hw.mac.type == e1000_82575)
4099                         shift = 6;
4100                 /* Warning FM follows */
4101                 for (int i = 0; i < 128; i++) {
4102                         reta.bytes[i & 3] =
4103                             (i % adapter->num_queues) << shift;
4104                         if ((i & 3) == 3)
4105                                 E1000_WRITE_REG(hw,
4106                                     E1000_RETA(i >> 2), reta.dword);
4107                 }
4108                 /* Now fill in hash table */
4109                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4110                 for (int i = 0; i < 10; i++)
4111                         E1000_WRITE_REG_ARRAY(hw,
4112                             E1000_RSSRK(0), i, random[i]);
4113
4114                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4115                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4116                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4117                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4118                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4119                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4120                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4121                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4122
4123                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4124
4125                 /*
4126                 ** NOTE: Receive Full-Packet Checksum Offload 
4127                 ** is mutually exclusive with Multiqueue. However
4128                 ** this is not the same as TCP/IP checksums which
4129                 ** still work.
4130                 */
4131                 rxcsum |= E1000_RXCSUM_PCSD;
4132 #if __FreeBSD_version >= 800000
4133                 /* For SCTP Offload */
4134                 if ((hw->mac.type == e1000_82576)
4135                     && (ifp->if_capenable & IFCAP_RXCSUM))
4136                         rxcsum |= E1000_RXCSUM_CRCOFL;
4137 #endif
4138         } else {
4139                 /* Non RSS setup */
4140                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4141                         rxcsum |= E1000_RXCSUM_IPPCSE;
4142 #if __FreeBSD_version >= 800000
4143                         if (adapter->hw.mac.type == e1000_82576)
4144                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4145 #endif
4146                 } else
4147                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4148         }
4149         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4150
4151         /* Setup the Receive Control Register */
4152         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4153         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4154                    E1000_RCTL_RDMTS_HALF |
4155                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4156         /* Strip CRC bytes. */
4157         rctl |= E1000_RCTL_SECRC;
4158         /* Make sure VLAN Filters are off */
4159         rctl &= ~E1000_RCTL_VFE;
4160         /* Don't store bad packets */
4161         rctl &= ~E1000_RCTL_SBP;
4162
4163         /* Enable Receives */
4164         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4165
4166         /*
4167          * Setup the HW Rx Head and Tail Descriptor Pointers
4168          *   - needs to be after enable
4169          */
4170         for (int i = 0; i < adapter->num_queues; i++) {
4171                 rxr = &adapter->rx_rings[i];
4172                 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4173                 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4174         }
4175         return;
4176 }
4177
4178 /*********************************************************************
4179  *
4180  *  Free receive rings.
4181  *
4182  **********************************************************************/
4183 static void
4184 igb_free_receive_structures(struct adapter *adapter)
4185 {
4186         struct rx_ring *rxr = adapter->rx_rings;
4187
4188         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4189                 struct lro_ctrl *lro = &rxr->lro;
4190                 igb_free_receive_buffers(rxr);
4191                 tcp_lro_free(lro);
4192                 igb_dma_free(adapter, &rxr->rxdma);
4193         }
4194
4195         free(adapter->rx_rings, M_DEVBUF);
4196 }
4197
4198 /*********************************************************************
4199  *
4200  *  Free receive ring data structures.
4201  *
4202  **********************************************************************/
4203 static void
4204 igb_free_receive_buffers(struct rx_ring *rxr)
4205 {
4206         struct adapter          *adapter = rxr->adapter;
4207         struct igb_rx_buf       *rxbuf;
4208         int i;
4209
4210         INIT_DEBUGOUT("free_receive_structures: begin");
4211
4212         /* Cleanup any existing buffers */
4213         if (rxr->rx_buffers != NULL) {
4214                 for (i = 0; i < adapter->num_rx_desc; i++) {
4215                         rxbuf = &rxr->rx_buffers[i];
4216                         if (rxbuf->m_head != NULL) {
4217                                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4218                                     BUS_DMASYNC_POSTREAD);
4219                                 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4220                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4221                                 m_freem(rxbuf->m_head);
4222                         }
4223                         if (rxbuf->m_pack != NULL) {
4224                                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4225                                     BUS_DMASYNC_POSTREAD);
4226                                 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4227                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4228                                 m_freem(rxbuf->m_pack);
4229                         }
4230                         rxbuf->m_head = NULL;
4231                         rxbuf->m_pack = NULL;
4232                         if (rxbuf->hmap != NULL) {
4233                                 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4234                                 rxbuf->hmap = NULL;
4235                         }
4236                         if (rxbuf->pmap != NULL) {
4237                                 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4238                                 rxbuf->pmap = NULL;
4239                         }
4240                 }
4241                 if (rxr->rx_buffers != NULL) {
4242                         free(rxr->rx_buffers, M_DEVBUF);
4243                         rxr->rx_buffers = NULL;
4244                 }
4245         }
4246
4247         if (rxr->htag != NULL) {
4248                 bus_dma_tag_destroy(rxr->htag);
4249                 rxr->htag = NULL;
4250         }
4251         if (rxr->ptag != NULL) {
4252                 bus_dma_tag_destroy(rxr->ptag);
4253                 rxr->ptag = NULL;
4254         }
4255 }
4256
4257 static __inline void
4258 igb_rx_discard(struct rx_ring *rxr, int i)
4259 {
4260         struct igb_rx_buf       *rbuf;
4261
4262         rbuf = &rxr->rx_buffers[i];
4263
4264         /* Partially received? Free the chain */
4265         if (rxr->fmp != NULL) {
4266                 rxr->fmp->m_flags |= M_PKTHDR;
4267                 m_freem(rxr->fmp);
4268                 rxr->fmp = NULL;
4269                 rxr->lmp = NULL;
4270         }
4271
4272         /*
4273         ** With advanced descriptors the writeback
4274         ** clobbers the buffer addrs, so its easier
4275         ** to just free the existing mbufs and take
4276         ** the normal refresh path to get new buffers
4277         ** and mapping.
4278         */
4279         if (rbuf->m_head) {
4280                 m_free(rbuf->m_head);
4281                 rbuf->m_head = NULL;
4282         }
4283
4284         if (rbuf->m_pack) {
4285                 m_free(rbuf->m_pack);
4286                 rbuf->m_pack = NULL;
4287         }
4288
4289         return;
4290 }
4291
4292 static __inline void
4293 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4294 {
4295
4296         /*
4297          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4298          * should be computed by hardware. Also it should not have VLAN tag in
4299          * ethernet header.
4300          */
4301         if (rxr->lro_enabled &&
4302             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4303             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4304             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4305             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4306             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4307             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4308                 /*
4309                  * Send to the stack if:
4310                  **  - LRO not enabled, or
4311                  **  - no LRO resources, or
4312                  **  - lro enqueue fails
4313                  */
4314                 if (rxr->lro.lro_cnt != 0)
4315                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4316                                 return;
4317         }
4318         IGB_RX_UNLOCK(rxr);
4319         (*ifp->if_input)(ifp, m);
4320         IGB_RX_LOCK(rxr);
4321 }
4322
4323 /*********************************************************************
4324  *
4325  *  This routine executes in interrupt context. It replenishes
4326  *  the mbufs in the descriptor and sends data which has been
4327  *  dma'ed into host memory to upper layer.
4328  *
4329  *  We loop at most count times if count is > 0, or until done if
4330  *  count < 0.
4331  *
4332  *  Return TRUE if more to clean, FALSE otherwise
4333  *********************************************************************/
4334 static bool
4335 igb_rxeof(struct igb_queue *que, int count, int *done)
4336 {
4337         struct adapter          *adapter = que->adapter;
4338         struct rx_ring          *rxr = que->rxr;
4339         struct ifnet            *ifp = adapter->ifp;
4340         struct lro_ctrl         *lro = &rxr->lro;
4341         struct lro_entry        *queued;
4342         int                     i, processed = 0, rxdone = 0;
4343         u32                     ptype, staterr = 0;
4344         union e1000_adv_rx_desc *cur;
4345
4346         IGB_RX_LOCK(rxr);
4347         /* Sync the ring. */
4348         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4349             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4350
4351         /* Main clean loop */
4352         for (i = rxr->next_to_check; count != 0;) {
4353                 struct mbuf             *sendmp, *mh, *mp;
4354                 struct igb_rx_buf       *rxbuf;
4355                 u16                     hlen, plen, hdr, vtag;
4356                 bool                    eop = FALSE;
4357  
4358                 cur = &rxr->rx_base[i];
4359                 staterr = le32toh(cur->wb.upper.status_error);
4360                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4361                         break;
4362                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4363                         break;
4364                 count--;
4365                 sendmp = mh = mp = NULL;
4366                 cur->wb.upper.status_error = 0;
4367                 rxbuf = &rxr->rx_buffers[i];
4368                 plen = le16toh(cur->wb.upper.length);
4369                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4370                 if ((adapter->hw.mac.type == e1000_i350) &&
4371                     (staterr & E1000_RXDEXT_STATERR_LB))
4372                         vtag = be16toh(cur->wb.upper.vlan);
4373                 else
4374                         vtag = le16toh(cur->wb.upper.vlan);
4375                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4376                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4377
4378                 /* Make sure all segments of a bad packet are discarded */
4379                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4380                     (rxr->discard)) {
4381                         ifp->if_ierrors++;
4382                         ++rxr->rx_discarded;
4383                         if (!eop) /* Catch subsequent segs */
4384                                 rxr->discard = TRUE;
4385                         else
4386                                 rxr->discard = FALSE;
4387                         igb_rx_discard(rxr, i);
4388                         goto next_desc;
4389                 }
4390
4391                 /*
4392                 ** The way the hardware is configured to
4393                 ** split, it will ONLY use the header buffer
4394                 ** when header split is enabled, otherwise we
4395                 ** get normal behavior, ie, both header and
4396                 ** payload are DMA'd into the payload buffer.
4397                 **
4398                 ** The fmp test is to catch the case where a
4399                 ** packet spans multiple descriptors, in that
4400                 ** case only the first header is valid.
4401                 */
4402                 if (rxr->hdr_split && rxr->fmp == NULL) {
4403                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4404                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4405                         if (hlen > IGB_HDR_BUF)
4406                                 hlen = IGB_HDR_BUF;
4407                         mh = rxr->rx_buffers[i].m_head;
4408                         mh->m_len = hlen;
4409                         /* clear buf pointer for refresh */
4410                         rxbuf->m_head = NULL;
4411                         /*
4412                         ** Get the payload length, this
4413                         ** could be zero if its a small
4414                         ** packet.
4415                         */
4416                         if (plen > 0) {
4417                                 mp = rxr->rx_buffers[i].m_pack;
4418                                 mp->m_len = plen;
4419                                 mh->m_next = mp;
4420                                 /* clear buf pointer */
4421                                 rxbuf->m_pack = NULL;
4422                                 rxr->rx_split_packets++;
4423                         }
4424                 } else {
4425                         /*
4426                         ** Either no header split, or a
4427                         ** secondary piece of a fragmented
4428                         ** split packet.
4429                         */
4430                         mh = rxr->rx_buffers[i].m_pack;
4431                         mh->m_len = plen;
4432                         /* clear buf info for refresh */
4433                         rxbuf->m_pack = NULL;
4434                 }
4435
4436                 ++processed; /* So we know when to refresh */
4437
4438                 /* Initial frame - setup */
4439                 if (rxr->fmp == NULL) {
4440                         mh->m_pkthdr.len = mh->m_len;
4441                         /* Save the head of the chain */
4442                         rxr->fmp = mh;
4443                         rxr->lmp = mh;
4444                         if (mp != NULL) {
4445                                 /* Add payload if split */
4446                                 mh->m_pkthdr.len += mp->m_len;
4447                                 rxr->lmp = mh->m_next;
4448                         }
4449                 } else {
4450                         /* Chain mbuf's together */
4451                         rxr->lmp->m_next = mh;
4452                         rxr->lmp = rxr->lmp->m_next;
4453                         rxr->fmp->m_pkthdr.len += mh->m_len;
4454                 }
4455
4456                 if (eop) {
4457                         rxr->fmp->m_pkthdr.rcvif = ifp;
4458                         ifp->if_ipackets++;
4459                         rxr->rx_packets++;
4460                         /* capture data for AIM */
4461                         rxr->packets++;
4462                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4463                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4464
4465                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4466                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4467
4468                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4469                             (staterr & E1000_RXD_STAT_VP) != 0) {
4470                                 rxr->fmp->m_pkthdr.ether_vtag = vtag;
4471                                 rxr->fmp->m_flags |= M_VLANTAG;
4472                         }
4473 #if __FreeBSD_version >= 800000
4474                         rxr->fmp->m_pkthdr.flowid = que->msix;
4475                         rxr->fmp->m_flags |= M_FLOWID;
4476 #endif
4477                         sendmp = rxr->fmp;
4478                         /* Make sure to set M_PKTHDR. */
4479                         sendmp->m_flags |= M_PKTHDR;
4480                         rxr->fmp = NULL;
4481                         rxr->lmp = NULL;
4482                 }
4483
4484 next_desc:
4485                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4486                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4487
4488                 /* Advance our pointers to the next descriptor. */
4489                 if (++i == adapter->num_rx_desc)
4490                         i = 0;
4491                 /*
4492                 ** Send to the stack or LRO
4493                 */
4494                 if (sendmp != NULL) {
4495                         rxr->next_to_check = i;
4496                         igb_rx_input(rxr, ifp, sendmp, ptype);
4497                         i = rxr->next_to_check;
4498                         rxdone++;
4499                 }
4500
4501                 /* Every 8 descriptors we go to refresh mbufs */
4502                 if (processed == 8) {
4503                         igb_refresh_mbufs(rxr, i);
4504                         processed = 0;
4505                 }
4506         }
4507
4508         /* Catch any remainders */
4509         if (igb_rx_unrefreshed(rxr))
4510                 igb_refresh_mbufs(rxr, i);
4511
4512         rxr->next_to_check = i;
4513
4514         /*
4515          * Flush any outstanding LRO work
4516          */
4517         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4518                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4519                 tcp_lro_flush(lro, queued);
4520         }
4521
4522         if (done != NULL)
4523                 *done = rxdone;
4524
4525         IGB_RX_UNLOCK(rxr);
4526         return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4527 }
4528
4529 /*********************************************************************
4530  *
4531  *  Verify that the hardware indicated that the checksum is valid.
4532  *  Inform the stack about the status of checksum so that stack
4533  *  doesn't spend time verifying the checksum.
4534  *
4535  *********************************************************************/
4536 static void
4537 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4538 {
4539         u16 status = (u16)staterr;
4540         u8  errors = (u8) (staterr >> 24);
4541         int sctp;
4542
4543         /* Ignore Checksum bit is set */
4544         if (status & E1000_RXD_STAT_IXSM) {
4545                 mp->m_pkthdr.csum_flags = 0;
4546                 return;
4547         }
4548
4549         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4550             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4551                 sctp = 1;
4552         else
4553                 sctp = 0;
4554         if (status & E1000_RXD_STAT_IPCS) {
4555                 /* Did it pass? */
4556                 if (!(errors & E1000_RXD_ERR_IPE)) {
4557                         /* IP Checksum Good */
4558                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4559                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4560                 } else
4561                         mp->m_pkthdr.csum_flags = 0;
4562         }
4563
4564         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4565                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4566 #if __FreeBSD_version >= 800000
4567                 if (sctp) /* reassign */
4568                         type = CSUM_SCTP_VALID;
4569 #endif
4570                 /* Did it pass? */
4571                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4572                         mp->m_pkthdr.csum_flags |= type;
4573                         if (sctp == 0)
4574                                 mp->m_pkthdr.csum_data = htons(0xffff);
4575                 }
4576         }
4577         return;
4578 }
4579
4580 /*
4581  * This routine is run via an vlan
4582  * config EVENT
4583  */
4584 static void
4585 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4586 {
4587         struct adapter  *adapter = ifp->if_softc;
4588         u32             index, bit;
4589
4590         if (ifp->if_softc !=  arg)   /* Not our event */
4591                 return;
4592
4593         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4594                 return;
4595
4596         IGB_CORE_LOCK(adapter);
4597         index = (vtag >> 5) & 0x7F;
4598         bit = vtag & 0x1F;
4599         adapter->shadow_vfta[index] |= (1 << bit);
4600         ++adapter->num_vlans;
4601         /* Change hw filter setting */
4602         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4603                 igb_setup_vlan_hw_support(adapter);
4604         IGB_CORE_UNLOCK(adapter);
4605 }
4606
4607 /*
4608  * This routine is run via an vlan
4609  * unconfig EVENT
4610  */
4611 static void
4612 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4613 {
4614         struct adapter  *adapter = ifp->if_softc;
4615         u32             index, bit;
4616
4617         if (ifp->if_softc !=  arg)
4618                 return;
4619
4620         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4621                 return;
4622
4623         IGB_CORE_LOCK(adapter);
4624         index = (vtag >> 5) & 0x7F;
4625         bit = vtag & 0x1F;
4626         adapter->shadow_vfta[index] &= ~(1 << bit);
4627         --adapter->num_vlans;
4628         /* Change hw filter setting */
4629         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4630                 igb_setup_vlan_hw_support(adapter);
4631         IGB_CORE_UNLOCK(adapter);
4632 }
4633
4634 static void
4635 igb_setup_vlan_hw_support(struct adapter *adapter)
4636 {
4637         struct e1000_hw *hw = &adapter->hw;
4638         struct ifnet    *ifp = adapter->ifp;
4639         u32             reg;
4640
4641         if (adapter->vf_ifp) {
4642                 e1000_rlpml_set_vf(hw,
4643                     adapter->max_frame_size + VLAN_TAG_SIZE);
4644                 return;
4645         }
4646
4647         reg = E1000_READ_REG(hw, E1000_CTRL);
4648         reg |= E1000_CTRL_VME;
4649         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4650
4651         /* Enable the Filter Table */
4652         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4653                 reg = E1000_READ_REG(hw, E1000_RCTL);
4654                 reg &= ~E1000_RCTL_CFIEN;
4655                 reg |= E1000_RCTL_VFE;
4656                 E1000_WRITE_REG(hw, E1000_RCTL, reg);
4657         }
4658
4659         /* Update the frame size */
4660         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4661             adapter->max_frame_size + VLAN_TAG_SIZE);
4662
4663         /* Don't bother with table if no vlans */
4664         if ((adapter->num_vlans == 0) ||
4665             ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4666                 return;
4667         /*
4668         ** A soft reset zero's out the VFTA, so
4669         ** we need to repopulate it now.
4670         */
4671         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4672                 if (adapter->shadow_vfta[i] != 0) {
4673                         if (adapter->vf_ifp)
4674                                 e1000_vfta_set_vf(hw,
4675                                     adapter->shadow_vfta[i], TRUE);
4676                         else
4677                                 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4678                                  i, adapter->shadow_vfta[i]);
4679                 }
4680 }
4681
4682 static void
4683 igb_enable_intr(struct adapter *adapter)
4684 {
4685         /* With RSS set up what to auto clear */
4686         if (adapter->msix_mem) {
4687                 u32 mask = (adapter->que_mask | adapter->link_mask);
4688                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4689                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4690                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4691                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4692                     E1000_IMS_LSC);
4693         } else {
4694                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4695                     IMS_ENABLE_MASK);
4696         }
4697         E1000_WRITE_FLUSH(&adapter->hw);
4698
4699         return;
4700 }
4701
4702 static void
4703 igb_disable_intr(struct adapter *adapter)
4704 {
4705         if (adapter->msix_mem) {
4706                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4707                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4708         } 
4709         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4710         E1000_WRITE_FLUSH(&adapter->hw);
4711         return;
4712 }
4713
4714 /*
4715  * Bit of a misnomer, what this really means is
4716  * to enable OS management of the system... aka
4717  * to disable special hardware management features 
4718  */
4719 static void
4720 igb_init_manageability(struct adapter *adapter)
4721 {
4722         if (adapter->has_manage) {
4723                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4724                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4725
4726                 /* disable hardware interception of ARP */
4727                 manc &= ~(E1000_MANC_ARP_EN);
4728
4729                 /* enable receiving management packets to the host */
4730                 manc |= E1000_MANC_EN_MNG2HOST;
4731                 manc2h |= 1 << 5;  /* Mng Port 623 */
4732                 manc2h |= 1 << 6;  /* Mng Port 664 */
4733                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4734                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4735         }
4736 }
4737
4738 /*
4739  * Give control back to hardware management
4740  * controller if there is one.
4741  */
4742 static void
4743 igb_release_manageability(struct adapter *adapter)
4744 {
4745         if (adapter->has_manage) {
4746                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4747
4748                 /* re-enable hardware interception of ARP */
4749                 manc |= E1000_MANC_ARP_EN;
4750                 manc &= ~E1000_MANC_EN_MNG2HOST;
4751
4752                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4753         }
4754 }
4755
4756 /*
4757  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4758  * For ASF and Pass Through versions of f/w this means that
4759  * the driver is loaded. 
4760  *
4761  */
4762 static void
4763 igb_get_hw_control(struct adapter *adapter)
4764 {
4765         u32 ctrl_ext;
4766
4767         if (adapter->vf_ifp)
4768                 return;
4769
4770         /* Let firmware know the driver has taken over */
4771         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4772         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4773             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4774 }
4775
4776 /*
4777  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4778  * For ASF and Pass Through versions of f/w this means that the
4779  * driver is no longer loaded.
4780  *
4781  */
4782 static void
4783 igb_release_hw_control(struct adapter *adapter)
4784 {
4785         u32 ctrl_ext;
4786
4787         if (adapter->vf_ifp)
4788                 return;
4789
4790         /* Let firmware taken over control of h/w */
4791         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4792         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4793             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4794 }
4795
4796 static int
4797 igb_is_valid_ether_addr(uint8_t *addr)
4798 {
4799         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4800
4801         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4802                 return (FALSE);
4803         }
4804
4805         return (TRUE);
4806 }
4807
4808
4809 /*
4810  * Enable PCI Wake On Lan capability
4811  */
4812 static void
4813 igb_enable_wakeup(device_t dev)
4814 {
4815         u16     cap, status;
4816         u8      id;
4817
4818         /* First find the capabilities pointer*/
4819         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4820         /* Read the PM Capabilities */
4821         id = pci_read_config(dev, cap, 1);
4822         if (id != PCIY_PMG)     /* Something wrong */
4823                 return;
4824         /* OK, we have the power capabilities, so
4825            now get the status register */
4826         cap += PCIR_POWER_STATUS;
4827         status = pci_read_config(dev, cap, 2);
4828         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4829         pci_write_config(dev, cap, status, 2);
4830         return;
4831 }
4832
4833 static void
4834 igb_led_func(void *arg, int onoff)
4835 {
4836         struct adapter  *adapter = arg;
4837
4838         IGB_CORE_LOCK(adapter);
4839         if (onoff) {
4840                 e1000_setup_led(&adapter->hw);
4841                 e1000_led_on(&adapter->hw);
4842         } else {
4843                 e1000_led_off(&adapter->hw);
4844                 e1000_cleanup_led(&adapter->hw);
4845         }
4846         IGB_CORE_UNLOCK(adapter);
4847 }
4848
4849 /**********************************************************************
4850  *
4851  *  Update the board statistics counters.
4852  *
4853  **********************************************************************/
4854 static void
4855 igb_update_stats_counters(struct adapter *adapter)
4856 {
4857         struct ifnet            *ifp;
4858         struct e1000_hw         *hw = &adapter->hw;
4859         struct e1000_hw_stats   *stats;
4860
4861         /* 
4862         ** The virtual function adapter has only a
4863         ** small controlled set of stats, do only 
4864         ** those and return.
4865         */
4866         if (adapter->vf_ifp) {
4867                 igb_update_vf_stats_counters(adapter);
4868                 return;
4869         }
4870
4871         stats = (struct e1000_hw_stats  *)adapter->stats;
4872
4873         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4874            (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4875                 stats->symerrs +=
4876                     E1000_READ_REG(hw,E1000_SYMERRS);
4877                 stats->sec += E1000_READ_REG(hw, E1000_SEC);
4878         }
4879
4880         stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4881         stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4882         stats->scc += E1000_READ_REG(hw, E1000_SCC);
4883         stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4884
4885         stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4886         stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4887         stats->colc += E1000_READ_REG(hw, E1000_COLC);
4888         stats->dc += E1000_READ_REG(hw, E1000_DC);
4889         stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4890         stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4891         stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4892         /*
4893         ** For watchdog management we need to know if we have been
4894         ** paused during the last interval, so capture that here.
4895         */ 
4896         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4897         stats->xoffrxc += adapter->pause_frames;
4898         stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4899         stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4900         stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4901         stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4902         stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4903         stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4904         stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4905         stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4906         stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4907         stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4908         stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4909         stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4910
4911         /* For the 64-bit byte counters the low dword must be read first. */
4912         /* Both registers clear on the read of the high dword */
4913
4914         stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4915             ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4916         stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4917             ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
4918
4919         stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4920         stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4921         stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4922         stats->roc += E1000_READ_REG(hw, E1000_ROC);
4923         stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4924
4925         stats->tor += E1000_READ_REG(hw, E1000_TORH);
4926         stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4927
4928         stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4929         stats->tpt += E1000_READ_REG(hw, E1000_TPT);
4930         stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
4931         stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
4932         stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
4933         stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
4934         stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
4935         stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
4936         stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
4937         stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
4938
4939         /* Interrupt Counts */
4940
4941         stats->iac += E1000_READ_REG(hw, E1000_IAC);
4942         stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
4943         stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
4944         stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
4945         stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
4946         stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
4947         stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
4948         stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
4949         stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
4950
4951         /* Host to Card Statistics */
4952
4953         stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
4954         stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
4955         stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
4956         stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
4957         stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
4958         stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
4959         stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
4960         stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
4961             ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
4962         stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
4963             ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
4964         stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
4965         stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
4966         stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
4967
4968         stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
4969         stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
4970         stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
4971         stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
4972         stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
4973         stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
4974
4975         ifp = adapter->ifp;
4976         ifp->if_collisions = stats->colc;
4977
4978         /* Rx Errors */
4979         ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
4980             stats->crcerrs + stats->algnerrc +
4981             stats->ruc + stats->roc + stats->mpc + stats->cexterr;
4982
4983         /* Tx Errors */
4984         ifp->if_oerrors = stats->ecol +
4985             stats->latecol + adapter->watchdog_events;
4986
4987         /* Driver specific counters */
4988         adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
4989         adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
4990         adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
4991         adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
4992         adapter->packet_buf_alloc_tx =
4993             ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
4994         adapter->packet_buf_alloc_rx =
4995             (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
4996 }
4997
4998
4999 /**********************************************************************
5000  *
5001  *  Initialize the VF board statistics counters.
5002  *
5003  **********************************************************************/
5004 static void
5005 igb_vf_init_stats(struct adapter *adapter)
5006 {
5007         struct e1000_hw *hw = &adapter->hw;
5008         struct e1000_vf_stats   *stats;
5009
5010         stats = (struct e1000_vf_stats  *)adapter->stats;
5011         if (stats == NULL)
5012                 return;
5013         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5014         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5015         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5016         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5017         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5018 }
5019  
5020 /**********************************************************************
5021  *
5022  *  Update the VF board statistics counters.
5023  *
5024  **********************************************************************/
5025 static void
5026 igb_update_vf_stats_counters(struct adapter *adapter)
5027 {
5028         struct e1000_hw *hw = &adapter->hw;
5029         struct e1000_vf_stats   *stats;
5030
5031         if (adapter->link_speed == 0)
5032                 return;
5033
5034         stats = (struct e1000_vf_stats  *)adapter->stats;
5035
5036         UPDATE_VF_REG(E1000_VFGPRC,
5037             stats->last_gprc, stats->gprc);
5038         UPDATE_VF_REG(E1000_VFGORC,
5039             stats->last_gorc, stats->gorc);
5040         UPDATE_VF_REG(E1000_VFGPTC,
5041             stats->last_gptc, stats->gptc);
5042         UPDATE_VF_REG(E1000_VFGOTC,
5043             stats->last_gotc, stats->gotc);
5044         UPDATE_VF_REG(E1000_VFMPRC,
5045             stats->last_mprc, stats->mprc);
5046 }
5047
5048 /* Export a single 32-bit register via a read-only sysctl. */
5049 static int
5050 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5051 {
5052         struct adapter *adapter;
5053         u_int val;
5054
5055         adapter = oidp->oid_arg1;
5056         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5057         return (sysctl_handle_int(oidp, &val, 0, req));
5058 }
5059
5060 /*
5061 **  Tuneable interrupt rate handler
5062 */
5063 static int
5064 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5065 {
5066         struct igb_queue        *que = ((struct igb_queue *)oidp->oid_arg1);
5067         int                     error;
5068         u32                     reg, usec, rate;
5069                         
5070         reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5071         usec = ((reg & 0x7FFC) >> 2);
5072         if (usec > 0)
5073                 rate = 1000000 / usec;
5074         else
5075                 rate = 0;
5076         error = sysctl_handle_int(oidp, &rate, 0, req);
5077         if (error || !req->newptr)
5078                 return error;
5079         return 0;
5080 }
5081
5082 /*
5083  * Add sysctl variables, one per statistic, to the system.
5084  */
5085 static void
5086 igb_add_hw_stats(struct adapter *adapter)
5087 {
5088         device_t dev = adapter->dev;
5089
5090         struct tx_ring *txr = adapter->tx_rings;
5091         struct rx_ring *rxr = adapter->rx_rings;
5092
5093         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5094         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5095         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5096         struct e1000_hw_stats *stats = adapter->stats;
5097
5098         struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5099         struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5100
5101 #define QUEUE_NAME_LEN 32
5102         char namebuf[QUEUE_NAME_LEN];
5103
5104         /* Driver Statistics */
5105         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq", 
5106                         CTLFLAG_RD, &adapter->link_irq, 0,
5107                         "Link MSIX IRQ Handled");
5108         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
5109                         CTLFLAG_RD, &adapter->dropped_pkts,
5110                         "Driver dropped packets");
5111         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
5112                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
5113                         "Driver tx dma failure in xmit");
5114         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5115                         CTLFLAG_RD, &adapter->rx_overruns,
5116                         "RX overruns");
5117         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5118                         CTLFLAG_RD, &adapter->watchdog_events,
5119                         "Watchdog timeouts");
5120
5121         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
5122                         CTLFLAG_RD, &adapter->device_control,
5123                         "Device Control Register");
5124         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
5125                         CTLFLAG_RD, &adapter->rx_control,
5126                         "Receiver Control Register");
5127         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
5128                         CTLFLAG_RD, &adapter->int_mask,
5129                         "Interrupt Mask");
5130         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
5131                         CTLFLAG_RD, &adapter->eint_mask,
5132                         "Extended Interrupt Mask");
5133         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
5134                         CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5135                         "Transmit Buffer Packet Allocation");
5136         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
5137                         CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5138                         "Receive Buffer Packet Allocation");
5139         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5140                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5141                         "Flow Control High Watermark");
5142         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
5143                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5144                         "Flow Control Low Watermark");
5145
5146         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5147                 struct lro_ctrl *lro = &rxr->lro;
5148
5149                 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5150                 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5151                                             CTLFLAG_RD, NULL, "Queue Name");
5152                 queue_list = SYSCTL_CHILDREN(queue_node);
5153
5154                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
5155                                 CTLFLAG_RD, &adapter->queues[i],
5156                                 sizeof(&adapter->queues[i]),
5157                                 igb_sysctl_interrupt_rate_handler,
5158                                 "IU", "Interrupt Rate");
5159
5160                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
5161                                 CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5162                                 igb_sysctl_reg_handler, "IU",
5163                                 "Transmit Descriptor Head");
5164                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
5165                                 CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5166                                 igb_sysctl_reg_handler, "IU",
5167                                 "Transmit Descriptor Tail");
5168                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
5169                                 CTLFLAG_RD, &txr->no_desc_avail,
5170                                 "Queue No Descriptor Available");
5171                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5172                                 CTLFLAG_RD, &txr->tx_packets,
5173                                 "Queue Packets Transmitted");
5174
5175                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
5176                                 CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5177                                 igb_sysctl_reg_handler, "IU",
5178                                 "Receive Descriptor Head");
5179                 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
5180                                 CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5181                                 igb_sysctl_reg_handler, "IU",
5182                                 "Receive Descriptor Tail");
5183                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5184                                 CTLFLAG_RD, &rxr->rx_packets,
5185                                 "Queue Packets Received");
5186                 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5187                                 CTLFLAG_RD, &rxr->rx_bytes,
5188                                 "Queue Bytes Received");
5189                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5190                                 CTLFLAG_RD, &lro->lro_queued, 0,
5191                                 "LRO Queued");
5192                 SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5193                                 CTLFLAG_RD, &lro->lro_flushed, 0,
5194                                 "LRO Flushed");
5195         }
5196
5197         /* MAC stats get their own sub node */
5198
5199         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
5200                                     CTLFLAG_RD, NULL, "MAC Statistics");
5201         stat_list = SYSCTL_CHILDREN(stat_node);
5202
5203         /*
5204         ** VF adapter has a very limited set of stats
5205         ** since its not managing the metal, so to speak.
5206         */
5207         if (adapter->vf_ifp) {
5208         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5209                         CTLFLAG_RD, &stats->gprc,
5210                         "Good Packets Received");
5211         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5212                         CTLFLAG_RD, &stats->gptc,
5213                         "Good Packets Transmitted");
5214         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5215                         CTLFLAG_RD, &stats->gorc, 
5216                         "Good Octets Received"); 
5217         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5218                         CTLFLAG_RD, &stats->gotc, 
5219                         "Good Octets Transmitted"); 
5220         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5221                         CTLFLAG_RD, &stats->mprc,
5222                         "Multicast Packets Received");
5223                 return;
5224         }
5225
5226         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
5227                         CTLFLAG_RD, &stats->ecol,
5228                         "Excessive collisions");
5229         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
5230                         CTLFLAG_RD, &stats->scc,
5231                         "Single collisions");
5232         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
5233                         CTLFLAG_RD, &stats->mcc,
5234                         "Multiple collisions");
5235         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
5236                         CTLFLAG_RD, &stats->latecol,
5237                         "Late collisions");
5238         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
5239                         CTLFLAG_RD, &stats->colc,
5240                         "Collision Count");
5241         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5242                         CTLFLAG_RD, &stats->symerrs,
5243                         "Symbol Errors");
5244         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5245                         CTLFLAG_RD, &stats->sec,
5246                         "Sequence Errors");
5247         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5248                         CTLFLAG_RD, &stats->dc,
5249                         "Defer Count");
5250         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5251                         CTLFLAG_RD, &stats->mpc,
5252                         "Missed Packets");
5253         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5254                         CTLFLAG_RD, &stats->rnbc,
5255                         "Receive No Buffers");
5256         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5257                         CTLFLAG_RD, &stats->ruc,
5258                         "Receive Undersize");
5259         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5260                         CTLFLAG_RD, &stats->rfc,
5261                         "Fragmented Packets Received ");
5262         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5263                         CTLFLAG_RD, &stats->roc,
5264                         "Oversized Packets Received");
5265         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5266                         CTLFLAG_RD, &stats->rjc,
5267                         "Recevied Jabber");
5268         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5269                         CTLFLAG_RD, &stats->rxerrc,
5270                         "Receive Errors");
5271         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5272                         CTLFLAG_RD, &stats->crcerrs,
5273                         "CRC errors");
5274         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5275                         CTLFLAG_RD, &stats->algnerrc,
5276                         "Alignment Errors");
5277         /* On 82575 these are collision counts */
5278         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5279                         CTLFLAG_RD, &stats->cexterr,
5280                         "Collision/Carrier extension errors");
5281         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5282                         CTLFLAG_RD, &stats->xonrxc,
5283                         "XON Received");
5284         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5285                         CTLFLAG_RD, &stats->xontxc,
5286                         "XON Transmitted");
5287         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5288                         CTLFLAG_RD, &stats->xoffrxc,
5289                         "XOFF Received");
5290         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5291                         CTLFLAG_RD, &stats->xofftxc,
5292                         "XOFF Transmitted");
5293         /* Packet Reception Stats */
5294         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5295                         CTLFLAG_RD, &stats->tpr,
5296                         "Total Packets Received ");
5297         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5298                         CTLFLAG_RD, &stats->gprc,
5299                         "Good Packets Received");
5300         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5301                         CTLFLAG_RD, &stats->bprc,
5302                         "Broadcast Packets Received");
5303         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5304                         CTLFLAG_RD, &stats->mprc,
5305                         "Multicast Packets Received");
5306         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5307                         CTLFLAG_RD, &stats->prc64,
5308                         "64 byte frames received ");
5309         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5310                         CTLFLAG_RD, &stats->prc127,
5311                         "65-127 byte frames received");
5312         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5313                         CTLFLAG_RD, &stats->prc255,
5314                         "128-255 byte frames received");
5315         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5316                         CTLFLAG_RD, &stats->prc511,
5317                         "256-511 byte frames received");
5318         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5319                         CTLFLAG_RD, &stats->prc1023,
5320                         "512-1023 byte frames received");
5321         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5322                         CTLFLAG_RD, &stats->prc1522,
5323                         "1023-1522 byte frames received");
5324         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
5325                         CTLFLAG_RD, &stats->gorc, 
5326                         "Good Octets Received"); 
5327
5328         /* Packet Transmission Stats */
5329         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
5330                         CTLFLAG_RD, &stats->gotc, 
5331                         "Good Octets Transmitted"); 
5332         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5333                         CTLFLAG_RD, &stats->tpt,
5334                         "Total Packets Transmitted");
5335         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5336                         CTLFLAG_RD, &stats->gptc,
5337                         "Good Packets Transmitted");
5338         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5339                         CTLFLAG_RD, &stats->bptc,
5340                         "Broadcast Packets Transmitted");
5341         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5342                         CTLFLAG_RD, &stats->mptc,
5343                         "Multicast Packets Transmitted");
5344         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5345                         CTLFLAG_RD, &stats->ptc64,
5346                         "64 byte frames transmitted ");
5347         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5348                         CTLFLAG_RD, &stats->ptc127,
5349                         "65-127 byte frames transmitted");
5350         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5351                         CTLFLAG_RD, &stats->ptc255,
5352                         "128-255 byte frames transmitted");
5353         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5354                         CTLFLAG_RD, &stats->ptc511,
5355                         "256-511 byte frames transmitted");
5356         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5357                         CTLFLAG_RD, &stats->ptc1023,
5358                         "512-1023 byte frames transmitted");
5359         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5360                         CTLFLAG_RD, &stats->ptc1522,
5361                         "1024-1522 byte frames transmitted");
5362         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5363                         CTLFLAG_RD, &stats->tsctc,
5364                         "TSO Contexts Transmitted");
5365         SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5366                         CTLFLAG_RD, &stats->tsctfc,
5367                         "TSO Contexts Failed");
5368
5369
5370         /* Interrupt Stats */
5371
5372         int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
5373                                     CTLFLAG_RD, NULL, "Interrupt Statistics");
5374         int_list = SYSCTL_CHILDREN(int_node);
5375
5376         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5377                         CTLFLAG_RD, &stats->iac,
5378                         "Interrupt Assertion Count");
5379
5380         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5381                         CTLFLAG_RD, &stats->icrxptc,
5382                         "Interrupt Cause Rx Pkt Timer Expire Count");
5383
5384         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5385                         CTLFLAG_RD, &stats->icrxatc,
5386                         "Interrupt Cause Rx Abs Timer Expire Count");
5387
5388         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5389                         CTLFLAG_RD, &stats->ictxptc,
5390                         "Interrupt Cause Tx Pkt Timer Expire Count");
5391
5392         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5393                         CTLFLAG_RD, &stats->ictxatc,
5394                         "Interrupt Cause Tx Abs Timer Expire Count");
5395
5396         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5397                         CTLFLAG_RD, &stats->ictxqec,
5398                         "Interrupt Cause Tx Queue Empty Count");
5399
5400         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5401                         CTLFLAG_RD, &stats->ictxqmtc,
5402                         "Interrupt Cause Tx Queue Min Thresh Count");
5403
5404         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5405                         CTLFLAG_RD, &stats->icrxdmtc,
5406                         "Interrupt Cause Rx Desc Min Thresh Count");
5407
5408         SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5409                         CTLFLAG_RD, &stats->icrxoc,
5410                         "Interrupt Cause Receiver Overrun Count");
5411
5412         /* Host to Card Stats */
5413
5414         host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
5415                                     CTLFLAG_RD, NULL, 
5416                                     "Host to Card Statistics");
5417
5418         host_list = SYSCTL_CHILDREN(host_node);
5419
5420         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5421                         CTLFLAG_RD, &stats->cbtmpc,
5422                         "Circuit Breaker Tx Packet Count");
5423
5424         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5425                         CTLFLAG_RD, &stats->htdpmc,
5426                         "Host Transmit Discarded Packets");
5427
5428         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5429                         CTLFLAG_RD, &stats->rpthc,
5430                         "Rx Packets To Host");
5431
5432         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5433                         CTLFLAG_RD, &stats->cbrmpc,
5434                         "Circuit Breaker Rx Packet Count");
5435
5436         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5437                         CTLFLAG_RD, &stats->cbrdpc,
5438                         "Circuit Breaker Rx Dropped Count");
5439
5440         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5441                         CTLFLAG_RD, &stats->hgptc,
5442                         "Host Good Packets Tx Count");
5443
5444         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5445                         CTLFLAG_RD, &stats->htcbdpc,
5446                         "Host Tx Circuit Breaker Dropped Count");
5447
5448         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5449                         CTLFLAG_RD, &stats->hgorc,
5450                         "Host Good Octets Received Count");
5451
5452         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5453                         CTLFLAG_RD, &stats->hgotc,
5454                         "Host Good Octets Transmit Count");
5455
5456         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5457                         CTLFLAG_RD, &stats->lenerrs,
5458                         "Length Errors");
5459
5460         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5461                         CTLFLAG_RD, &stats->scvpc,
5462                         "SerDes/SGMII Code Violation Pkt Count");
5463
5464         SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5465                         CTLFLAG_RD, &stats->hrmpc,
5466                         "Header Redirection Missed Packet Count");
5467 }
5468
5469
5470 /**********************************************************************
5471  *
5472  *  This routine provides a way to dump out the adapter eeprom,
5473  *  often a useful debug/service tool. This only dumps the first
5474  *  32 words, stuff that matters is in that extent.
5475  *
5476  **********************************************************************/
5477 static int
5478 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5479 {
5480         struct adapter *adapter;
5481         int error;
5482         int result;
5483
5484         result = -1;
5485         error = sysctl_handle_int(oidp, &result, 0, req);
5486
5487         if (error || !req->newptr)
5488                 return (error);
5489
5490         /*
5491          * This value will cause a hex dump of the
5492          * first 32 16-bit words of the EEPROM to
5493          * the screen.
5494          */
5495         if (result == 1) {
5496                 adapter = (struct adapter *)arg1;
5497                 igb_print_nvm_info(adapter);
5498         }
5499
5500         return (error);
5501 }
5502
5503 static void
5504 igb_print_nvm_info(struct adapter *adapter)
5505 {
5506         u16     eeprom_data;
5507         int     i, j, row = 0;
5508
5509         /* Its a bit crude, but it gets the job done */
5510         printf("\nInterface EEPROM Dump:\n");
5511         printf("Offset\n0x0000  ");
5512         for (i = 0, j = 0; i < 32; i++, j++) {
5513                 if (j == 8) { /* Make the offset block */
5514                         j = 0; ++row;
5515                         printf("\n0x00%x0  ",row);
5516                 }
5517                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5518                 printf("%04x ", eeprom_data);
5519         }
5520         printf("\n");
5521 }
5522
5523 static void
5524 igb_set_sysctl_value(struct adapter *adapter, const char *name,
5525         const char *description, int *limit, int value)
5526 {
5527         *limit = value;
5528         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5529             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5530             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5531 }
5532
5533 /*
5534 ** Set flow control using sysctl:
5535 ** Flow control values:
5536 **      0 - off
5537 **      1 - rx pause
5538 **      2 - tx pause
5539 **      3 - full
5540 */
5541 static int
5542 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5543 {
5544         int error;
5545         struct adapter *adapter;
5546
5547         error = sysctl_handle_int(oidp, &igb_fc_setting, 0, req);
5548
5549         if (error)
5550                 return (error);
5551
5552         adapter = (struct adapter *) arg1;
5553         switch (igb_fc_setting) {
5554                 case e1000_fc_rx_pause:
5555                 case e1000_fc_tx_pause:
5556                 case e1000_fc_full:
5557                         adapter->hw.fc.requested_mode = igb_fc_setting;
5558                         break;
5559                 case e1000_fc_none:
5560                 default:
5561                         adapter->hw.fc.requested_mode = e1000_fc_none;
5562         }
5563
5564         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5565         e1000_force_mac_fc(&adapter->hw);
5566         return error;
5567 }