net/cxgbe: add Compressed Local IP region
[dpdk.git] / drivers / net / cxgbe / cxgbe_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2014-2018 Chelsio Communications.
3  * All rights reserved.
4  */
5
6 #include <sys/queue.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <stdarg.h>
13 #include <inttypes.h>
14 #include <netinet/in.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_interrupts.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_pci.h>
23 #include <rte_atomic.h>
24 #include <rte_branch_prediction.h>
25 #include <rte_memory.h>
26 #include <rte_tailq.h>
27 #include <rte_eal.h>
28 #include <rte_alarm.h>
29 #include <rte_ether.h>
30 #include <rte_ethdev_driver.h>
31 #include <rte_ethdev_pci.h>
32 #include <rte_random.h>
33 #include <rte_dev.h>
34 #include <rte_kvargs.h>
35
36 #include "common.h"
37 #include "t4_regs.h"
38 #include "t4_msg.h"
39 #include "cxgbe.h"
40 #include "clip_tbl.h"
41
42 /**
43  * Allocate a chunk of memory. The allocated memory is cleared.
44  */
45 void *t4_alloc_mem(size_t size)
46 {
47         return rte_zmalloc(NULL, size, 0);
48 }
49
50 /**
51  * Free memory allocated through t4_alloc_mem().
52  */
53 void t4_free_mem(void *addr)
54 {
55         rte_free(addr);
56 }
57
58 /*
59  * Response queue handler for the FW event queue.
60  */
61 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
62                           __rte_unused const struct pkt_gl *gl)
63 {
64         u8 opcode = ((const struct rss_header *)rsp)->opcode;
65
66         rsp++;                                          /* skip RSS header */
67
68         /*
69          * FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
70          */
71         if (unlikely(opcode == CPL_FW4_MSG &&
72                      ((const struct cpl_fw4_msg *)rsp)->type ==
73                       FW_TYPE_RSSCPL)) {
74                 rsp++;
75                 opcode = ((const struct rss_header *)rsp)->opcode;
76                 rsp++;
77                 if (opcode != CPL_SGE_EGR_UPDATE) {
78                         dev_err(q->adapter, "unexpected FW4/CPL %#x on FW event queue\n",
79                                 opcode);
80                         goto out;
81                 }
82         }
83
84         if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
85                 /* do nothing */
86         } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
87                 const struct cpl_fw6_msg *msg = (const void *)rsp;
88
89                 t4_handle_fw_rpl(q->adapter, msg->data);
90         } else if (opcode == CPL_SET_TCB_RPL) {
91                 const struct cpl_set_tcb_rpl *p = (const void *)rsp;
92
93                 filter_rpl(q->adapter, p);
94         } else {
95                 dev_err(adapter, "unexpected CPL %#x on FW event queue\n",
96                         opcode);
97         }
98 out:
99         return 0;
100 }
101
102 /**
103  * Setup sge control queues to pass control information.
104  */
105 int setup_sge_ctrl_txq(struct adapter *adapter)
106 {
107         struct sge *s = &adapter->sge;
108         int err = 0, i = 0;
109
110         for_each_port(adapter, i) {
111                 char name[RTE_ETH_NAME_MAX_LEN];
112                 struct sge_ctrl_txq *q = &s->ctrlq[i];
113
114                 q->q.size = 1024;
115                 err = t4_sge_alloc_ctrl_txq(adapter, q,
116                                             adapter->eth_dev,  i,
117                                             s->fw_evtq.cntxt_id,
118                                             rte_socket_id());
119                 if (err) {
120                         dev_err(adapter, "Failed to alloc ctrl txq. Err: %d",
121                                 err);
122                         goto out;
123                 }
124                 snprintf(name, sizeof(name), "cxgbe_ctrl_pool_%d", i);
125                 q->mb_pool = rte_pktmbuf_pool_create(name, s->ctrlq[i].q.size,
126                                                      RTE_CACHE_LINE_SIZE,
127                                                      RTE_MBUF_PRIV_ALIGN,
128                                                      RTE_MBUF_DEFAULT_BUF_SIZE,
129                                                      SOCKET_ID_ANY);
130                 if (!q->mb_pool) {
131                         dev_err(adapter, "Can't create ctrl pool for port: %d",
132                                 i);
133                         err = -ENOMEM;
134                         goto out;
135                 }
136         }
137         return 0;
138 out:
139         t4_free_sge_resources(adapter);
140         return err;
141 }
142
143 /**
144  * cxgbe_poll_for_completion: Poll rxq for completion
145  * @q: rxq to poll
146  * @us: microseconds to delay
147  * @cnt: number of times to poll
148  * @c: completion to check for 'done' status
149  *
150  * Polls the rxq for reples until completion is done or the count
151  * expires.
152  */
153 int cxgbe_poll_for_completion(struct sge_rspq *q, unsigned int us,
154                               unsigned int cnt, struct t4_completion *c)
155 {
156         unsigned int i;
157         unsigned int work_done, budget = 4;
158
159         if (!c)
160                 return -EINVAL;
161
162         for (i = 0; i < cnt; i++) {
163                 cxgbe_poll(q, NULL, budget, &work_done);
164                 t4_os_lock(&c->lock);
165                 if (c->done) {
166                         t4_os_unlock(&c->lock);
167                         return 0;
168                 }
169                 t4_os_unlock(&c->lock);
170                 udelay(us);
171         }
172         return -ETIMEDOUT;
173 }
174
175 int setup_sge_fwevtq(struct adapter *adapter)
176 {
177         struct sge *s = &adapter->sge;
178         int err = 0;
179         int msi_idx = 0;
180
181         err = t4_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->eth_dev,
182                                msi_idx, NULL, fwevtq_handler, -1, NULL, 0,
183                                rte_socket_id());
184         return err;
185 }
186
187 static int closest_timer(const struct sge *s, int time)
188 {
189         unsigned int i, match = 0;
190         int delta, min_delta = INT_MAX;
191
192         for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
193                 delta = time - s->timer_val[i];
194                 if (delta < 0)
195                         delta = -delta;
196                 if (delta < min_delta) {
197                         min_delta = delta;
198                         match = i;
199                 }
200         }
201         return match;
202 }
203
204 static int closest_thres(const struct sge *s, int thres)
205 {
206         unsigned int i, match = 0;
207         int delta, min_delta = INT_MAX;
208
209         for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
210                 delta = thres - s->counter_val[i];
211                 if (delta < 0)
212                         delta = -delta;
213                 if (delta < min_delta) {
214                         min_delta = delta;
215                         match = i;
216                 }
217         }
218         return match;
219 }
220
221 /**
222  * cxgb4_set_rspq_intr_params - set a queue's interrupt holdoff parameters
223  * @q: the Rx queue
224  * @us: the hold-off time in us, or 0 to disable timer
225  * @cnt: the hold-off packet count, or 0 to disable counter
226  *
227  * Sets an Rx queue's interrupt hold-off time and packet count.  At least
228  * one of the two needs to be enabled for the queue to generate interrupts.
229  */
230 int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
231                                unsigned int cnt)
232 {
233         struct adapter *adap = q->adapter;
234         unsigned int timer_val;
235
236         if (cnt) {
237                 int err;
238                 u32 v, new_idx;
239
240                 new_idx = closest_thres(&adap->sge, cnt);
241                 if (q->desc && q->pktcnt_idx != new_idx) {
242                         /* the queue has already been created, update it */
243                         v = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
244                             V_FW_PARAMS_PARAM_X(
245                             FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
246                             V_FW_PARAMS_PARAM_YZ(q->cntxt_id);
247                         err = t4_set_params(adap, adap->mbox, adap->pf, 0, 1,
248                                             &v, &new_idx);
249                         if (err)
250                                 return err;
251                 }
252                 q->pktcnt_idx = new_idx;
253         }
254
255         timer_val = (us == 0) ? X_TIMERREG_RESTART_COUNTER :
256                                 closest_timer(&adap->sge, us);
257
258         if ((us | cnt) == 0)
259                 q->intr_params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX);
260         else
261                 q->intr_params = V_QINTR_TIMER_IDX(timer_val) |
262                                  V_QINTR_CNT_EN(cnt > 0);
263         return 0;
264 }
265
266 /**
267  * Free TID tables.
268  */
269 static void tid_free(struct tid_info *t)
270 {
271         if (t->tid_tab) {
272                 if (t->ftid_bmap)
273                         rte_bitmap_free(t->ftid_bmap);
274
275                 if (t->ftid_bmap_array)
276                         t4_os_free(t->ftid_bmap_array);
277
278                 t4_os_free(t->tid_tab);
279         }
280
281         memset(t, 0, sizeof(struct tid_info));
282 }
283
284 /**
285  * Allocate and initialize the TID tables.  Returns 0 on success.
286  */
287 static int tid_init(struct tid_info *t)
288 {
289         size_t size;
290         unsigned int ftid_bmap_size;
291         unsigned int natids = t->natids;
292         unsigned int max_ftids = t->nftids;
293
294         ftid_bmap_size = rte_bitmap_get_memory_footprint(t->nftids);
295         size = t->ntids * sizeof(*t->tid_tab) +
296                 max_ftids * sizeof(*t->ftid_tab) +
297                 natids * sizeof(*t->atid_tab);
298
299         t->tid_tab = t4_os_alloc(size);
300         if (!t->tid_tab)
301                 return -ENOMEM;
302
303         t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
304         t->ftid_tab = (struct filter_entry *)&t->tid_tab[t->natids];
305         t->ftid_bmap_array = t4_os_alloc(ftid_bmap_size);
306         if (!t->ftid_bmap_array) {
307                 tid_free(t);
308                 return -ENOMEM;
309         }
310
311         t4_os_lock_init(&t->atid_lock);
312         t4_os_lock_init(&t->ftid_lock);
313
314         t->afree = NULL;
315         t->atids_in_use = 0;
316         rte_atomic32_init(&t->tids_in_use);
317         rte_atomic32_set(&t->tids_in_use, 0);
318         rte_atomic32_init(&t->conns_in_use);
319         rte_atomic32_set(&t->conns_in_use, 0);
320
321         /* Setup the free list for atid_tab and clear the stid bitmap. */
322         if (natids) {
323                 while (--natids)
324                         t->atid_tab[natids - 1].next = &t->atid_tab[natids];
325                 t->afree = t->atid_tab;
326         }
327
328         t->ftid_bmap = rte_bitmap_init(t->nftids, t->ftid_bmap_array,
329                                        ftid_bmap_size);
330         if (!t->ftid_bmap) {
331                 tid_free(t);
332                 return -ENOMEM;
333         }
334
335         return 0;
336 }
337
338 static inline bool is_x_1g_port(const struct link_config *lc)
339 {
340         return (lc->pcaps & FW_PORT_CAP32_SPEED_1G) != 0;
341 }
342
343 static inline bool is_x_10g_port(const struct link_config *lc)
344 {
345         unsigned int speeds, high_speeds;
346
347         speeds = V_FW_PORT_CAP32_SPEED(G_FW_PORT_CAP32_SPEED(lc->pcaps));
348         high_speeds = speeds &
349                       ~(FW_PORT_CAP32_SPEED_100M | FW_PORT_CAP32_SPEED_1G);
350
351         return high_speeds != 0;
352 }
353
354 inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
355                       unsigned int us, unsigned int cnt,
356                       unsigned int size, unsigned int iqe_size)
357 {
358         q->adapter = adap;
359         cxgb4_set_rspq_intr_params(q, us, cnt);
360         q->iqe_len = iqe_size;
361         q->size = size;
362 }
363
364 int cfg_queue_count(struct rte_eth_dev *eth_dev)
365 {
366         struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
367         struct adapter *adap = pi->adapter;
368         struct sge *s = &adap->sge;
369         unsigned int max_queues = s->max_ethqsets / adap->params.nports;
370
371         if ((eth_dev->data->nb_rx_queues < 1) ||
372             (eth_dev->data->nb_tx_queues < 1))
373                 return -EINVAL;
374
375         if ((eth_dev->data->nb_rx_queues > max_queues) ||
376             (eth_dev->data->nb_tx_queues > max_queues))
377                 return -EINVAL;
378
379         if (eth_dev->data->nb_rx_queues > pi->rss_size)
380                 return -EINVAL;
381
382         /* We must configure RSS, since config has changed*/
383         pi->flags &= ~PORT_RSS_DONE;
384
385         pi->n_rx_qsets = eth_dev->data->nb_rx_queues;
386         pi->n_tx_qsets = eth_dev->data->nb_tx_queues;
387
388         return 0;
389 }
390
391 void cfg_queues(struct rte_eth_dev *eth_dev)
392 {
393         struct rte_config *config = rte_eal_get_configuration();
394         struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
395         struct adapter *adap = pi->adapter;
396         struct sge *s = &adap->sge;
397         unsigned int i, nb_ports = 0, qidx = 0;
398         unsigned int q_per_port = 0;
399
400         if (!(adap->flags & CFG_QUEUES)) {
401                 for_each_port(adap, i) {
402                         struct port_info *tpi = adap2pinfo(adap, i);
403
404                         nb_ports += (is_x_10g_port(&tpi->link_cfg)) ||
405                                      is_x_1g_port(&tpi->link_cfg) ? 1 : 0;
406                 }
407
408                 /*
409                  * We default up to # of cores queues per 1G/10G port.
410                  */
411                 if (nb_ports)
412                         q_per_port = (MAX_ETH_QSETS -
413                                      (adap->params.nports - nb_ports)) /
414                                      nb_ports;
415
416                 if (q_per_port > config->lcore_count)
417                         q_per_port = config->lcore_count;
418
419                 for_each_port(adap, i) {
420                         struct port_info *pi = adap2pinfo(adap, i);
421
422                         pi->first_qset = qidx;
423
424                         /* Initially n_rx_qsets == n_tx_qsets */
425                         pi->n_rx_qsets = (is_x_10g_port(&pi->link_cfg) ||
426                                           is_x_1g_port(&pi->link_cfg)) ?
427                                           q_per_port : 1;
428                         pi->n_tx_qsets = pi->n_rx_qsets;
429
430                         if (pi->n_rx_qsets > pi->rss_size)
431                                 pi->n_rx_qsets = pi->rss_size;
432
433                         qidx += pi->n_rx_qsets;
434                 }
435
436                 s->max_ethqsets = qidx;
437
438                 for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
439                         struct sge_eth_rxq *r = &s->ethrxq[i];
440
441                         init_rspq(adap, &r->rspq, 5, 32, 1024, 64);
442                         r->usembufs = 1;
443                         r->fl.size = (r->usembufs ? 1024 : 72);
444                 }
445
446                 for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
447                         s->ethtxq[i].q.size = 1024;
448
449                 init_rspq(adap, &adap->sge.fw_evtq, 0, 0, 1024, 64);
450                 adap->flags |= CFG_QUEUES;
451         }
452 }
453
454 void cxgbe_stats_get(struct port_info *pi, struct port_stats *stats)
455 {
456         t4_get_port_stats_offset(pi->adapter, pi->tx_chan, stats,
457                                  &pi->stats_base);
458 }
459
460 void cxgbe_stats_reset(struct port_info *pi)
461 {
462         t4_clr_port_stats(pi->adapter, pi->tx_chan);
463 }
464
465 static void setup_memwin(struct adapter *adap)
466 {
467         u32 mem_win0_base;
468
469         /* For T5, only relative offset inside the PCIe BAR is passed */
470         mem_win0_base = MEMWIN0_BASE;
471
472         /*
473          * Set up memory window for accessing adapter memory ranges.  (Read
474          * back MA register to ensure that changes propagate before we attempt
475          * to use the new values.)
476          */
477         t4_write_reg(adap,
478                      PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN,
479                                          MEMWIN_NIC),
480                      mem_win0_base | V_BIR(0) |
481                      V_WINDOW(ilog2(MEMWIN0_APERTURE) - X_WINDOW_SHIFT));
482         t4_read_reg(adap,
483                     PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN,
484                                         MEMWIN_NIC));
485 }
486
487 int init_rss(struct adapter *adap)
488 {
489         unsigned int i;
490
491         if (is_pf4(adap)) {
492                 int err;
493
494                 err = t4_init_rss_mode(adap, adap->mbox);
495                 if (err)
496                         return err;
497         }
498
499         for_each_port(adap, i) {
500                 struct port_info *pi = adap2pinfo(adap, i);
501
502                 pi->rss = rte_zmalloc(NULL, pi->rss_size * sizeof(u16), 0);
503                 if (!pi->rss)
504                         return -ENOMEM;
505
506                 pi->rss_hf = CXGBE_RSS_HF_ALL;
507         }
508         return 0;
509 }
510
511 /**
512  * Dump basic information about the adapter.
513  */
514 void print_adapter_info(struct adapter *adap)
515 {
516         /**
517          * Hardware/Firmware/etc. Version/Revision IDs.
518          */
519         t4_dump_version_info(adap);
520 }
521
522 void print_port_info(struct adapter *adap)
523 {
524         int i;
525         char buf[80];
526         struct rte_pci_addr *loc = &adap->pdev->addr;
527
528         for_each_port(adap, i) {
529                 const struct port_info *pi = adap2pinfo(adap, i);
530                 char *bufp = buf;
531
532                 if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_100M)
533                         bufp += sprintf(bufp, "100M/");
534                 if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_1G)
535                         bufp += sprintf(bufp, "1G/");
536                 if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_10G)
537                         bufp += sprintf(bufp, "10G/");
538                 if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_25G)
539                         bufp += sprintf(bufp, "25G/");
540                 if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_40G)
541                         bufp += sprintf(bufp, "40G/");
542                 if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_50G)
543                         bufp += sprintf(bufp, "50G/");
544                 if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_100G)
545                         bufp += sprintf(bufp, "100G/");
546                 if (bufp != buf)
547                         --bufp;
548                 sprintf(bufp, "BASE-%s",
549                         t4_get_port_type_description(
550                                         (enum fw_port_type)pi->port_type));
551
552                 dev_info(adap,
553                          " " PCI_PRI_FMT " Chelsio rev %d %s %s\n",
554                          loc->domain, loc->bus, loc->devid, loc->function,
555                          CHELSIO_CHIP_RELEASE(adap->params.chip), buf,
556                          (adap->flags & USING_MSIX) ? " MSI-X" :
557                          (adap->flags & USING_MSI) ? " MSI" : "");
558         }
559 }
560
561 static int
562 check_devargs_handler(__rte_unused const char *key, const char *value,
563                       __rte_unused void *opaque)
564 {
565         if (strcmp(value, "1"))
566                 return -1;
567
568         return 0;
569 }
570
571 int cxgbe_get_devargs(struct rte_devargs *devargs, const char *key)
572 {
573         struct rte_kvargs *kvlist;
574
575         if (!devargs)
576                 return 0;
577
578         kvlist = rte_kvargs_parse(devargs->args, NULL);
579         if (!kvlist)
580                 return 0;
581
582         if (!rte_kvargs_count(kvlist, key)) {
583                 rte_kvargs_free(kvlist);
584                 return 0;
585         }
586
587         if (rte_kvargs_process(kvlist, key,
588                                check_devargs_handler, NULL) < 0) {
589                 rte_kvargs_free(kvlist);
590                 return 0;
591         }
592         rte_kvargs_free(kvlist);
593
594         return 1;
595 }
596
597 static void configure_vlan_types(struct adapter *adapter)
598 {
599         struct rte_pci_device *pdev = adapter->pdev;
600         int i;
601
602         for_each_port(adapter, i) {
603                 /* OVLAN Type 0x88a8 */
604                 t4_set_reg_field(adapter, MPS_PORT_RX_OVLAN_REG(i, A_RX_OVLAN0),
605                                  V_OVLAN_MASK(M_OVLAN_MASK) |
606                                  V_OVLAN_ETYPE(M_OVLAN_ETYPE),
607                                  V_OVLAN_MASK(M_OVLAN_MASK) |
608                                  V_OVLAN_ETYPE(0x88a8));
609                 /* OVLAN Type 0x9100 */
610                 t4_set_reg_field(adapter, MPS_PORT_RX_OVLAN_REG(i, A_RX_OVLAN1),
611                                  V_OVLAN_MASK(M_OVLAN_MASK) |
612                                  V_OVLAN_ETYPE(M_OVLAN_ETYPE),
613                                  V_OVLAN_MASK(M_OVLAN_MASK) |
614                                  V_OVLAN_ETYPE(0x9100));
615                 /* OVLAN Type 0x8100 */
616                 t4_set_reg_field(adapter, MPS_PORT_RX_OVLAN_REG(i, A_RX_OVLAN2),
617                                  V_OVLAN_MASK(M_OVLAN_MASK) |
618                                  V_OVLAN_ETYPE(M_OVLAN_ETYPE),
619                                  V_OVLAN_MASK(M_OVLAN_MASK) |
620                                  V_OVLAN_ETYPE(0x8100));
621
622                 /* IVLAN 0X8100 */
623                 t4_set_reg_field(adapter, MPS_PORT_RX_IVLAN(i),
624                                  V_IVLAN_ETYPE(M_IVLAN_ETYPE),
625                                  V_IVLAN_ETYPE(0x8100));
626
627                 t4_set_reg_field(adapter, MPS_PORT_RX_CTL(i),
628                                  F_OVLAN_EN0 | F_OVLAN_EN1 |
629                                  F_OVLAN_EN2 | F_IVLAN_EN,
630                                  F_OVLAN_EN0 | F_OVLAN_EN1 |
631                                  F_OVLAN_EN2 | F_IVLAN_EN);
632         }
633
634         if (cxgbe_get_devargs(pdev->device.devargs, CXGBE_DEVARG_KEEP_OVLAN))
635                 t4_tp_wr_bits_indirect(adapter, A_TP_INGRESS_CONFIG,
636                                        V_RM_OVLAN(1), V_RM_OVLAN(0));
637 }
638
639 static void configure_pcie_ext_tag(struct adapter *adapter)
640 {
641         u16 v;
642         int pos = t4_os_find_pci_capability(adapter, PCI_CAP_ID_EXP);
643
644         if (!pos)
645                 return;
646
647         if (pos > 0) {
648                 t4_os_pci_read_cfg2(adapter, pos + PCI_EXP_DEVCTL, &v);
649                 v |= PCI_EXP_DEVCTL_EXT_TAG;
650                 t4_os_pci_write_cfg2(adapter, pos + PCI_EXP_DEVCTL, v);
651                 if (is_t6(adapter->params.chip)) {
652                         t4_set_reg_field(adapter, A_PCIE_CFG2,
653                                          V_T6_TOTMAXTAG(M_T6_TOTMAXTAG),
654                                          V_T6_TOTMAXTAG(7));
655                         t4_set_reg_field(adapter, A_PCIE_CMD_CFG,
656                                          V_T6_MINTAG(M_T6_MINTAG),
657                                          V_T6_MINTAG(8));
658                 } else {
659                         t4_set_reg_field(adapter, A_PCIE_CFG2,
660                                          V_TOTMAXTAG(M_TOTMAXTAG),
661                                          V_TOTMAXTAG(3));
662                         t4_set_reg_field(adapter, A_PCIE_CMD_CFG,
663                                          V_MINTAG(M_MINTAG),
664                                          V_MINTAG(8));
665                 }
666         }
667 }
668
669 /*
670  * Tweak configuration based on system architecture, etc.  Most of these have
671  * defaults assigned to them by Firmware Configuration Files (if we're using
672  * them) but need to be explicitly set if we're using hard-coded
673  * initialization. So these are essentially common tweaks/settings for
674  * Configuration Files and hard-coded initialization ...
675  */
676 static int adap_init0_tweaks(struct adapter *adapter)
677 {
678         u8 rx_dma_offset;
679
680         /*
681          * Fix up various Host-Dependent Parameters like Page Size, Cache
682          * Line Size, etc.  The firmware default is for a 4KB Page Size and
683          * 64B Cache Line Size ...
684          */
685         t4_fixup_host_params_compat(adapter, CXGBE_PAGE_SIZE, L1_CACHE_BYTES,
686                                     T5_LAST_REV);
687
688         /*
689          * Keep the chip default offset to deliver Ingress packets into our
690          * DMA buffers to zero
691          */
692         rx_dma_offset = 0;
693         t4_set_reg_field(adapter, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT),
694                          V_PKTSHIFT(rx_dma_offset));
695
696         t4_set_reg_field(adapter, A_SGE_FLM_CFG,
697                          V_CREDITCNT(M_CREDITCNT) | M_CREDITCNTPACKING,
698                          V_CREDITCNT(3) | V_CREDITCNTPACKING(1));
699
700         t4_set_reg_field(adapter, A_SGE_INGRESS_RX_THRESHOLD,
701                          V_THRESHOLD_3(M_THRESHOLD_3), V_THRESHOLD_3(32U));
702
703         t4_set_reg_field(adapter, A_SGE_CONTROL2, V_IDMAARBROUNDROBIN(1U),
704                          V_IDMAARBROUNDROBIN(1U));
705
706         /*
707          * Don't include the "IP Pseudo Header" in CPL_RX_PKT checksums: Linux
708          * adds the pseudo header itself.
709          */
710         t4_tp_wr_bits_indirect(adapter, A_TP_INGRESS_CONFIG,
711                                F_CSUM_HAS_PSEUDO_HDR, 0);
712
713         return 0;
714 }
715
716 /*
717  * Attempt to initialize the adapter via a Firmware Configuration File.
718  */
719 static int adap_init0_config(struct adapter *adapter, int reset)
720 {
721         struct fw_caps_config_cmd caps_cmd;
722         unsigned long mtype = 0, maddr = 0;
723         u32 finiver, finicsum, cfcsum;
724         int ret;
725         int config_issued = 0;
726         int cfg_addr;
727         char config_name[20];
728
729         /*
730          * Reset device if necessary.
731          */
732         if (reset) {
733                 ret = t4_fw_reset(adapter, adapter->mbox,
734                                   F_PIORSTMODE | F_PIORST);
735                 if (ret < 0) {
736                         dev_warn(adapter, "Firmware reset failed, error %d\n",
737                                  -ret);
738                         goto bye;
739                 }
740         }
741
742         cfg_addr = t4_flash_cfg_addr(adapter);
743         if (cfg_addr < 0) {
744                 ret = cfg_addr;
745                 dev_warn(adapter, "Finding address for firmware config file in flash failed, error %d\n",
746                          -ret);
747                 goto bye;
748         }
749
750         strcpy(config_name, "On Flash");
751         mtype = FW_MEMTYPE_CF_FLASH;
752         maddr = cfg_addr;
753
754         /*
755          * Issue a Capability Configuration command to the firmware to get it
756          * to parse the Configuration File.  We don't use t4_fw_config_file()
757          * because we want the ability to modify various features after we've
758          * processed the configuration file ...
759          */
760         memset(&caps_cmd, 0, sizeof(caps_cmd));
761         caps_cmd.op_to_write = cpu_to_be32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
762                                            F_FW_CMD_REQUEST | F_FW_CMD_READ);
763         caps_cmd.cfvalid_to_len16 =
764                 cpu_to_be32(F_FW_CAPS_CONFIG_CMD_CFVALID |
765                             V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
766                             V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) |
767                             FW_LEN16(caps_cmd));
768         ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
769                          &caps_cmd);
770         /*
771          * If the CAPS_CONFIG failed with an ENOENT (for a Firmware
772          * Configuration File in FLASH), our last gasp effort is to use the
773          * Firmware Configuration File which is embedded in the firmware.  A
774          * very few early versions of the firmware didn't have one embedded
775          * but we can ignore those.
776          */
777         if (ret == -ENOENT) {
778                 dev_info(adapter, "%s: Going for embedded config in firmware..\n",
779                          __func__);
780
781                 memset(&caps_cmd, 0, sizeof(caps_cmd));
782                 caps_cmd.op_to_write =
783                         cpu_to_be32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
784                                     F_FW_CMD_REQUEST | F_FW_CMD_READ);
785                 caps_cmd.cfvalid_to_len16 = cpu_to_be32(FW_LEN16(caps_cmd));
786                 ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd,
787                                  sizeof(caps_cmd), &caps_cmd);
788                 strcpy(config_name, "Firmware Default");
789         }
790
791         config_issued = 1;
792         if (ret < 0)
793                 goto bye;
794
795         finiver = be32_to_cpu(caps_cmd.finiver);
796         finicsum = be32_to_cpu(caps_cmd.finicsum);
797         cfcsum = be32_to_cpu(caps_cmd.cfcsum);
798         if (finicsum != cfcsum)
799                 dev_warn(adapter, "Configuration File checksum mismatch: [fini] csum=%#x, computed csum=%#x\n",
800                          finicsum, cfcsum);
801
802         /*
803          * If we're a pure NIC driver then disable all offloading facilities.
804          * This will allow the firmware to optimize aspects of the hardware
805          * configuration which will result in improved performance.
806          */
807         caps_cmd.niccaps &= cpu_to_be16(~FW_CAPS_CONFIG_NIC_ETHOFLD);
808         caps_cmd.toecaps = 0;
809         caps_cmd.iscsicaps = 0;
810         caps_cmd.rdmacaps = 0;
811         caps_cmd.fcoecaps = 0;
812
813         /*
814          * And now tell the firmware to use the configuration we just loaded.
815          */
816         caps_cmd.op_to_write = cpu_to_be32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
817                                            F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
818         caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
819         ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd),
820                          NULL);
821         if (ret < 0) {
822                 dev_warn(adapter, "Unable to finalize Firmware Capabilities %d\n",
823                          -ret);
824                 goto bye;
825         }
826
827         /*
828          * Tweak configuration based on system architecture, etc.
829          */
830         ret = adap_init0_tweaks(adapter);
831         if (ret < 0) {
832                 dev_warn(adapter, "Unable to do init0-tweaks %d\n", -ret);
833                 goto bye;
834         }
835
836         /*
837          * And finally tell the firmware to initialize itself using the
838          * parameters from the Configuration File.
839          */
840         ret = t4_fw_initialize(adapter, adapter->mbox);
841         if (ret < 0) {
842                 dev_warn(adapter, "Initializing Firmware failed, error %d\n",
843                          -ret);
844                 goto bye;
845         }
846
847         /*
848          * Return successfully and note that we're operating with parameters
849          * not supplied by the driver, rather than from hard-wired
850          * initialization constants buried in the driver.
851          */
852         dev_info(adapter,
853                  "Successfully configured using Firmware Configuration File \"%s\", version %#x, computed checksum %#x\n",
854                  config_name, finiver, cfcsum);
855
856         return 0;
857
858         /*
859          * Something bad happened.  Return the error ...  (If the "error"
860          * is that there's no Configuration File on the adapter we don't
861          * want to issue a warning since this is fairly common.)
862          */
863 bye:
864         if (config_issued && ret != -ENOENT)
865                 dev_warn(adapter, "\"%s\" configuration file error %d\n",
866                          config_name, -ret);
867
868         dev_debug(adapter, "%s: returning ret = %d ..\n", __func__, ret);
869         return ret;
870 }
871
872 static int adap_init0(struct adapter *adap)
873 {
874         struct fw_caps_config_cmd caps_cmd;
875         int ret = 0;
876         u32 v, port_vec;
877         enum dev_state state;
878         u32 params[7], val[7];
879         int reset = 1;
880         int mbox = adap->mbox;
881
882         /*
883          * Contact FW, advertising Master capability.
884          */
885         ret = t4_fw_hello(adap, adap->mbox, adap->mbox, MASTER_MAY, &state);
886         if (ret < 0) {
887                 dev_err(adap, "%s: could not connect to FW, error %d\n",
888                         __func__, -ret);
889                 goto bye;
890         }
891
892         CXGBE_DEBUG_MBOX(adap, "%s: adap->mbox = %d; ret = %d\n", __func__,
893                          adap->mbox, ret);
894
895         if (ret == mbox)
896                 adap->flags |= MASTER_PF;
897
898         if (state == DEV_STATE_INIT) {
899                 /*
900                  * Force halt and reset FW because a previous instance may have
901                  * exited abnormally without properly shutting down
902                  */
903                 ret = t4_fw_halt(adap, adap->mbox, reset);
904                 if (ret < 0) {
905                         dev_err(adap, "Failed to halt. Exit.\n");
906                         goto bye;
907                 }
908
909                 ret = t4_fw_restart(adap, adap->mbox, reset);
910                 if (ret < 0) {
911                         dev_err(adap, "Failed to restart. Exit.\n");
912                         goto bye;
913                 }
914                 state = (enum dev_state)((unsigned)state & ~DEV_STATE_INIT);
915         }
916
917         t4_get_version_info(adap);
918
919         ret = t4_get_core_clock(adap, &adap->params.vpd);
920         if (ret < 0) {
921                 dev_err(adap, "%s: could not get core clock, error %d\n",
922                         __func__, -ret);
923                 goto bye;
924         }
925
926         /*
927          * If the firmware is initialized already (and we're not forcing a
928          * master initialization), note that we're living with existing
929          * adapter parameters.  Otherwise, it's time to try initializing the
930          * adapter ...
931          */
932         if (state == DEV_STATE_INIT) {
933                 dev_info(adap, "Coming up as %s: Adapter already initialized\n",
934                          adap->flags & MASTER_PF ? "MASTER" : "SLAVE");
935         } else {
936                 dev_info(adap, "Coming up as MASTER: Initializing adapter\n");
937
938                 ret = adap_init0_config(adap, reset);
939                 if (ret == -ENOENT) {
940                         dev_err(adap,
941                                 "No Configuration File present on adapter. Using hard-wired configuration parameters.\n");
942                         goto bye;
943                 }
944         }
945         if (ret < 0) {
946                 dev_err(adap, "could not initialize adapter, error %d\n", -ret);
947                 goto bye;
948         }
949
950         /* Find out what ports are available to us. */
951         v = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
952             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_PORTVEC);
953         ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, &v, &port_vec);
954         if (ret < 0) {
955                 dev_err(adap, "%s: failure in t4_query_params; error = %d\n",
956                         __func__, ret);
957                 goto bye;
958         }
959
960         adap->params.nports = hweight32(port_vec);
961         adap->params.portvec = port_vec;
962
963         dev_debug(adap, "%s: adap->params.nports = %u\n", __func__,
964                   adap->params.nports);
965
966         /*
967          * Give the SGE code a chance to pull in anything that it needs ...
968          * Note that this must be called after we retrieve our VPD parameters
969          * in order to know how to convert core ticks to seconds, etc.
970          */
971         ret = t4_sge_init(adap);
972         if (ret < 0) {
973                 dev_err(adap, "t4_sge_init failed with error %d\n",
974                         -ret);
975                 goto bye;
976         }
977
978         /*
979          * Grab some of our basic fundamental operating parameters.
980          */
981 #define FW_PARAM_DEV(param) \
982         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
983          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
984
985 #define FW_PARAM_PFVF(param) \
986         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
987          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param) |  \
988          V_FW_PARAMS_PARAM_Y(0) | \
989          V_FW_PARAMS_PARAM_Z(0))
990
991         params[0] = FW_PARAM_PFVF(FILTER_START);
992         params[1] = FW_PARAM_PFVF(FILTER_END);
993         ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, params, val);
994         if (ret < 0)
995                 goto bye;
996         adap->tids.ftid_base = val[0];
997         adap->tids.nftids = val[1] - val[0] + 1;
998
999         params[0] = FW_PARAM_PFVF(CLIP_START);
1000         params[1] = FW_PARAM_PFVF(CLIP_END);
1001         ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, params, val);
1002         if (ret < 0)
1003                 goto bye;
1004         adap->clipt_start = val[0];
1005         adap->clipt_end = val[1];
1006
1007         /*
1008          * Get device capabilities so we can determine what resources we need
1009          * to manage.
1010          */
1011         memset(&caps_cmd, 0, sizeof(caps_cmd));
1012         caps_cmd.op_to_write = htonl(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
1013                                      F_FW_CMD_REQUEST | F_FW_CMD_READ);
1014         caps_cmd.cfvalid_to_len16 = htonl(FW_LEN16(caps_cmd));
1015         ret = t4_wr_mbox(adap, adap->mbox, &caps_cmd, sizeof(caps_cmd),
1016                          &caps_cmd);
1017         if (ret < 0)
1018                 goto bye;
1019
1020         if ((caps_cmd.niccaps & cpu_to_be16(FW_CAPS_CONFIG_NIC_HASHFILTER)) &&
1021             is_t6(adap->params.chip)) {
1022                 if (init_hash_filter(adap) < 0)
1023                         goto bye;
1024         }
1025
1026         /* query tid-related parameters */
1027         params[0] = FW_PARAM_DEV(NTID);
1028         ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
1029                               params, val);
1030         if (ret < 0)
1031                 goto bye;
1032         adap->tids.ntids = val[0];
1033         adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
1034
1035         /* If we're running on newer firmware, let it know that we're
1036          * prepared to deal with encapsulated CPL messages.  Older
1037          * firmware won't understand this and we'll just get
1038          * unencapsulated messages ...
1039          */
1040         params[0] = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
1041         val[0] = 1;
1042         (void)t4_set_params(adap, adap->mbox, adap->pf, 0, 1, params, val);
1043
1044         /*
1045          * Find out whether we're allowed to use the T5+ ULPTX MEMWRITE DSGL
1046          * capability.  Earlier versions of the firmware didn't have the
1047          * ULPTX_MEMWRITE_DSGL so we'll interpret a query failure as no
1048          * permission to use ULPTX MEMWRITE DSGL.
1049          */
1050         if (is_t4(adap->params.chip)) {
1051                 adap->params.ulptx_memwrite_dsgl = false;
1052         } else {
1053                 params[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
1054                 ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
1055                                       1, params, val);
1056                 adap->params.ulptx_memwrite_dsgl = (ret == 0 && val[0] != 0);
1057         }
1058
1059         /*
1060          * The MTU/MSS Table is initialized by now, so load their values.  If
1061          * we're initializing the adapter, then we'll make any modifications
1062          * we want to the MTU/MSS Table and also initialize the congestion
1063          * parameters.
1064          */
1065         t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
1066         if (state != DEV_STATE_INIT) {
1067                 int i;
1068
1069                 /*
1070                  * The default MTU Table contains values 1492 and 1500.
1071                  * However, for TCP, it's better to have two values which are
1072                  * a multiple of 8 +/- 4 bytes apart near this popular MTU.
1073                  * This allows us to have a TCP Data Payload which is a
1074                  * multiple of 8 regardless of what combination of TCP Options
1075                  * are in use (always a multiple of 4 bytes) which is
1076                  * important for performance reasons.  For instance, if no
1077                  * options are in use, then we have a 20-byte IP header and a
1078                  * 20-byte TCP header.  In this case, a 1500-byte MSS would
1079                  * result in a TCP Data Payload of 1500 - 40 == 1460 bytes
1080                  * which is not a multiple of 8.  So using an MSS of 1488 in
1081                  * this case results in a TCP Data Payload of 1448 bytes which
1082                  * is a multiple of 8.  On the other hand, if 12-byte TCP Time
1083                  * Stamps have been negotiated, then an MTU of 1500 bytes
1084                  * results in a TCP Data Payload of 1448 bytes which, as
1085                  * above, is a multiple of 8 bytes ...
1086                  */
1087                 for (i = 0; i < NMTUS; i++)
1088                         if (adap->params.mtus[i] == 1492) {
1089                                 adap->params.mtus[i] = 1488;
1090                                 break;
1091                         }
1092
1093                 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
1094                              adap->params.b_wnd);
1095         }
1096         t4_init_sge_params(adap);
1097         t4_init_tp_params(adap);
1098         configure_pcie_ext_tag(adap);
1099         configure_vlan_types(adap);
1100
1101         adap->params.drv_memwin = MEMWIN_NIC;
1102         adap->flags |= FW_OK;
1103         dev_debug(adap, "%s: returning zero..\n", __func__);
1104         return 0;
1105
1106         /*
1107          * Something bad happened.  If a command timed out or failed with EIO
1108          * FW does not operate within its spec or something catastrophic
1109          * happened to HW/FW, stop issuing commands.
1110          */
1111 bye:
1112         if (ret != -ETIMEDOUT && ret != -EIO)
1113                 t4_fw_bye(adap, adap->mbox);
1114         return ret;
1115 }
1116
1117 /**
1118  * t4_os_portmod_changed - handle port module changes
1119  * @adap: the adapter associated with the module change
1120  * @port_id: the port index whose module status has changed
1121  *
1122  * This is the OS-dependent handler for port module changes.  It is
1123  * invoked when a port module is removed or inserted for any OS-specific
1124  * processing.
1125  */
1126 void t4_os_portmod_changed(const struct adapter *adap, int port_id)
1127 {
1128         static const char * const mod_str[] = {
1129                 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
1130         };
1131
1132         const struct port_info *pi = adap2pinfo(adap, port_id);
1133
1134         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
1135                 dev_info(adap, "Port%d: port module unplugged\n", pi->port_id);
1136         else if (pi->mod_type < ARRAY_SIZE(mod_str))
1137                 dev_info(adap, "Port%d: %s port module inserted\n", pi->port_id,
1138                          mod_str[pi->mod_type]);
1139         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
1140                 dev_info(adap, "Port%d: unsupported port module inserted\n",
1141                          pi->port_id);
1142         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
1143                 dev_info(adap, "Port%d: unknown port module inserted\n",
1144                          pi->port_id);
1145         else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
1146                 dev_info(adap, "Port%d: transceiver module error\n",
1147                          pi->port_id);
1148         else
1149                 dev_info(adap, "Port%d: unknown module type %d inserted\n",
1150                          pi->port_id, pi->mod_type);
1151 }
1152
1153 inline bool force_linkup(struct adapter *adap)
1154 {
1155         struct rte_pci_device *pdev = adap->pdev;
1156
1157         if (is_pf4(adap))
1158                 return false;   /* force_linkup not required for pf driver*/
1159         if (!cxgbe_get_devargs(pdev->device.devargs,
1160                                CXGBE_DEVARG_FORCE_LINK_UP))
1161                 return false;
1162         return true;
1163 }
1164
1165 /**
1166  * link_start - enable a port
1167  * @dev: the port to enable
1168  *
1169  * Performs the MAC and PHY actions needed to enable a port.
1170  */
1171 int link_start(struct port_info *pi)
1172 {
1173         struct adapter *adapter = pi->adapter;
1174         int ret;
1175         unsigned int mtu;
1176
1177         mtu = pi->eth_dev->data->dev_conf.rxmode.max_rx_pkt_len -
1178               (ETHER_HDR_LEN + ETHER_CRC_LEN);
1179
1180         /*
1181          * We do not set address filters and promiscuity here, the stack does
1182          * that step explicitly.
1183          */
1184         ret = t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu, -1, -1,
1185                             -1, 1, true);
1186         if (ret == 0) {
1187                 ret = t4_change_mac(adapter, adapter->mbox, pi->viid,
1188                                     pi->xact_addr_filt,
1189                                     (u8 *)&pi->eth_dev->data->mac_addrs[0],
1190                                     true, true);
1191                 if (ret >= 0) {
1192                         pi->xact_addr_filt = ret;
1193                         ret = 0;
1194                 }
1195         }
1196         if (ret == 0 && is_pf4(adapter))
1197                 ret = t4_link_l1cfg(adapter, adapter->mbox, pi->tx_chan,
1198                                     &pi->link_cfg);
1199         if (ret == 0) {
1200                 /*
1201                  * Enabling a Virtual Interface can result in an interrupt
1202                  * during the processing of the VI Enable command and, in some
1203                  * paths, result in an attempt to issue another command in the
1204                  * interrupt context.  Thus, we disable interrupts during the
1205                  * course of the VI Enable command ...
1206                  */
1207                 ret = t4_enable_vi_params(adapter, adapter->mbox, pi->viid,
1208                                           true, true, false);
1209         }
1210
1211         if (ret == 0 && force_linkup(adapter))
1212                 pi->eth_dev->data->dev_link.link_status = ETH_LINK_UP;
1213         return ret;
1214 }
1215
1216 /**
1217  * cxgbe_write_rss_conf - flash the RSS configuration for a given port
1218  * @pi: the port
1219  * @rss_hf: Hash configuration to apply
1220  */
1221 int cxgbe_write_rss_conf(const struct port_info *pi, uint64_t rss_hf)
1222 {
1223         struct adapter *adapter = pi->adapter;
1224         const struct sge_eth_rxq *rxq;
1225         u64 flags = 0;
1226         u16 rss;
1227         int err;
1228
1229         /*  Should never be called before setting up sge eth rx queues */
1230         if (!(adapter->flags & FULL_INIT_DONE)) {
1231                 dev_err(adap, "%s No RXQs available on port %d\n",
1232                         __func__, pi->port_id);
1233                 return -EINVAL;
1234         }
1235
1236         /* Don't allow unsupported hash functions */
1237         if (rss_hf & ~CXGBE_RSS_HF_ALL)
1238                 return -EINVAL;
1239
1240         if (rss_hf & ETH_RSS_IPV4)
1241                 flags |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
1242
1243         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1244                 flags |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
1245
1246         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1247                 flags |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
1248                          F_FW_RSS_VI_CONFIG_CMD_UDPEN;
1249
1250         if (rss_hf & ETH_RSS_IPV6)
1251                 flags |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
1252
1253         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1254                 flags |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
1255
1256         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1257                 flags |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
1258                          F_FW_RSS_VI_CONFIG_CMD_UDPEN;
1259
1260         rxq = &adapter->sge.ethrxq[pi->first_qset];
1261         rss = rxq[0].rspq.abs_id;
1262
1263         /* If Tunnel All Lookup isn't specified in the global RSS
1264          * Configuration, then we need to specify a default Ingress
1265          * Queue for any ingress packets which aren't hashed.  We'll
1266          * use our first ingress queue ...
1267          */
1268         err = t4_config_vi_rss(adapter, adapter->mbox, pi->viid,
1269                                flags, rss);
1270         return err;
1271 }
1272
1273 /**
1274  * cxgbe_write_rss - write the RSS table for a given port
1275  * @pi: the port
1276  * @queues: array of queue indices for RSS
1277  *
1278  * Sets up the portion of the HW RSS table for the port's VI to distribute
1279  * packets to the Rx queues in @queues.
1280  */
1281 int cxgbe_write_rss(const struct port_info *pi, const u16 *queues)
1282 {
1283         u16 *rss;
1284         int i, err;
1285         struct adapter *adapter = pi->adapter;
1286         const struct sge_eth_rxq *rxq;
1287
1288         /*  Should never be called before setting up sge eth rx queues */
1289         BUG_ON(!(adapter->flags & FULL_INIT_DONE));
1290
1291         rxq = &adapter->sge.ethrxq[pi->first_qset];
1292         rss = rte_zmalloc(NULL, pi->rss_size * sizeof(u16), 0);
1293         if (!rss)
1294                 return -ENOMEM;
1295
1296         /* map the queue indices to queue ids */
1297         for (i = 0; i < pi->rss_size; i++, queues++)
1298                 rss[i] = rxq[*queues].rspq.abs_id;
1299
1300         err = t4_config_rss_range(adapter, adapter->pf, pi->viid, 0,
1301                                   pi->rss_size, rss, pi->rss_size);
1302         rte_free(rss);
1303         return err;
1304 }
1305
1306 /**
1307  * setup_rss - configure RSS
1308  * @adapter: the adapter
1309  *
1310  * Sets up RSS to distribute packets to multiple receive queues.  We
1311  * configure the RSS CPU lookup table to distribute to the number of HW
1312  * receive queues, and the response queue lookup table to narrow that
1313  * down to the response queues actually configured for each port.
1314  * We always configure the RSS mapping for all ports since the mapping
1315  * table has plenty of entries.
1316  */
1317 int setup_rss(struct port_info *pi)
1318 {
1319         int j, err;
1320         struct adapter *adapter = pi->adapter;
1321
1322         dev_debug(adapter, "%s:  pi->rss_size = %u; pi->n_rx_qsets = %u\n",
1323                   __func__, pi->rss_size, pi->n_rx_qsets);
1324
1325         if (!(pi->flags & PORT_RSS_DONE)) {
1326                 if (adapter->flags & FULL_INIT_DONE) {
1327                         /* Fill default values with equal distribution */
1328                         for (j = 0; j < pi->rss_size; j++)
1329                                 pi->rss[j] = j % pi->n_rx_qsets;
1330
1331                         err = cxgbe_write_rss(pi, pi->rss);
1332                         if (err)
1333                                 return err;
1334
1335                         err = cxgbe_write_rss_conf(pi, pi->rss_hf);
1336                         if (err)
1337                                 return err;
1338                         pi->flags |= PORT_RSS_DONE;
1339                 }
1340         }
1341         return 0;
1342 }
1343
1344 /*
1345  * Enable NAPI scheduling and interrupt generation for all Rx queues.
1346  */
1347 static void enable_rx(struct adapter *adap, struct sge_rspq *q)
1348 {
1349         /* 0-increment GTS to start the timer and enable interrupts */
1350         t4_write_reg(adap, is_pf4(adap) ? MYPF_REG(A_SGE_PF_GTS) :
1351                                           T4VF_SGE_BASE_ADDR + A_SGE_VF_GTS,
1352                      V_SEINTARM(q->intr_params) |
1353                      V_INGRESSQID(q->cntxt_id));
1354 }
1355
1356 void cxgbe_enable_rx_queues(struct port_info *pi)
1357 {
1358         struct adapter *adap = pi->adapter;
1359         struct sge *s = &adap->sge;
1360         unsigned int i;
1361
1362         for (i = 0; i < pi->n_rx_qsets; i++)
1363                 enable_rx(adap, &s->ethrxq[pi->first_qset + i].rspq);
1364 }
1365
1366 /**
1367  * fw_caps_to_speed_caps - translate Firmware Port Caps to Speed Caps.
1368  * @port_type: Firmware Port Type
1369  * @fw_caps: Firmware Port Capabilities
1370  * @speed_caps: Device Info Speed Capabilities
1371  *
1372  * Translate a Firmware Port Capabilities specification to Device Info
1373  * Speed Capabilities.
1374  */
1375 static void fw_caps_to_speed_caps(enum fw_port_type port_type,
1376                                   unsigned int fw_caps,
1377                                   u32 *speed_caps)
1378 {
1379 #define SET_SPEED(__speed_name) \
1380         do { \
1381                 *speed_caps |= ETH_LINK_ ## __speed_name; \
1382         } while (0)
1383
1384 #define FW_CAPS_TO_SPEED(__fw_name) \
1385         do { \
1386                 if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
1387                         SET_SPEED(__fw_name); \
1388         } while (0)
1389
1390         switch (port_type) {
1391         case FW_PORT_TYPE_BT_SGMII:
1392         case FW_PORT_TYPE_BT_XFI:
1393         case FW_PORT_TYPE_BT_XAUI:
1394                 FW_CAPS_TO_SPEED(SPEED_100M);
1395                 FW_CAPS_TO_SPEED(SPEED_1G);
1396                 FW_CAPS_TO_SPEED(SPEED_10G);
1397                 break;
1398
1399         case FW_PORT_TYPE_KX4:
1400         case FW_PORT_TYPE_KX:
1401         case FW_PORT_TYPE_FIBER_XFI:
1402         case FW_PORT_TYPE_FIBER_XAUI:
1403         case FW_PORT_TYPE_SFP:
1404         case FW_PORT_TYPE_QSFP_10G:
1405         case FW_PORT_TYPE_QSA:
1406                 FW_CAPS_TO_SPEED(SPEED_1G);
1407                 FW_CAPS_TO_SPEED(SPEED_10G);
1408                 break;
1409
1410         case FW_PORT_TYPE_KR:
1411                 SET_SPEED(SPEED_10G);
1412                 break;
1413
1414         case FW_PORT_TYPE_BP_AP:
1415         case FW_PORT_TYPE_BP4_AP:
1416                 SET_SPEED(SPEED_1G);
1417                 SET_SPEED(SPEED_10G);
1418                 break;
1419
1420         case FW_PORT_TYPE_BP40_BA:
1421         case FW_PORT_TYPE_QSFP:
1422                 SET_SPEED(SPEED_40G);
1423                 break;
1424
1425         case FW_PORT_TYPE_CR_QSFP:
1426         case FW_PORT_TYPE_SFP28:
1427         case FW_PORT_TYPE_KR_SFP28:
1428                 FW_CAPS_TO_SPEED(SPEED_1G);
1429                 FW_CAPS_TO_SPEED(SPEED_10G);
1430                 FW_CAPS_TO_SPEED(SPEED_25G);
1431                 break;
1432
1433         case FW_PORT_TYPE_CR2_QSFP:
1434                 SET_SPEED(SPEED_50G);
1435                 break;
1436
1437         case FW_PORT_TYPE_KR4_100G:
1438         case FW_PORT_TYPE_CR4_QSFP:
1439                 FW_CAPS_TO_SPEED(SPEED_25G);
1440                 FW_CAPS_TO_SPEED(SPEED_40G);
1441                 FW_CAPS_TO_SPEED(SPEED_50G);
1442                 FW_CAPS_TO_SPEED(SPEED_100G);
1443                 break;
1444
1445         default:
1446                 break;
1447         }
1448
1449 #undef FW_CAPS_TO_SPEED
1450 #undef SET_SPEED
1451 }
1452
1453 /**
1454  * cxgbe_get_speed_caps - Fetch supported speed capabilities
1455  * @pi: Underlying port's info
1456  * @speed_caps: Device Info speed capabilities
1457  *
1458  * Fetch supported speed capabilities of the underlying port.
1459  */
1460 void cxgbe_get_speed_caps(struct port_info *pi, u32 *speed_caps)
1461 {
1462         *speed_caps = 0;
1463
1464         fw_caps_to_speed_caps(pi->port_type, pi->link_cfg.pcaps,
1465                               speed_caps);
1466
1467         if (!(pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG))
1468                 *speed_caps |= ETH_LINK_SPEED_FIXED;
1469 }
1470
1471 /**
1472  * cxgb_up - enable the adapter
1473  * @adap: adapter being enabled
1474  *
1475  * Called when the first port is enabled, this function performs the
1476  * actions necessary to make an adapter operational, such as completing
1477  * the initialization of HW modules, and enabling interrupts.
1478  */
1479 int cxgbe_up(struct adapter *adap)
1480 {
1481         enable_rx(adap, &adap->sge.fw_evtq);
1482         t4_sge_tx_monitor_start(adap);
1483         if (is_pf4(adap))
1484                 t4_intr_enable(adap);
1485         adap->flags |= FULL_INIT_DONE;
1486
1487         /* TODO: deadman watchdog ?? */
1488         return 0;
1489 }
1490
1491 /*
1492  * Close the port
1493  */
1494 int cxgbe_down(struct port_info *pi)
1495 {
1496         struct adapter *adapter = pi->adapter;
1497         int err = 0;
1498
1499         err = t4_enable_vi(adapter, adapter->mbox, pi->viid, false, false);
1500         if (err) {
1501                 dev_err(adapter, "%s: disable_vi failed: %d\n", __func__, err);
1502                 return err;
1503         }
1504
1505         t4_reset_link_config(adapter, pi->pidx);
1506         return 0;
1507 }
1508
1509 /*
1510  * Release resources when all the ports have been stopped.
1511  */
1512 void cxgbe_close(struct adapter *adapter)
1513 {
1514         struct port_info *pi;
1515         int i;
1516
1517         if (adapter->flags & FULL_INIT_DONE) {
1518                 if (is_pf4(adapter))
1519                         t4_intr_disable(adapter);
1520                 tid_free(&adapter->tids);
1521                 t4_cleanup_clip_tbl(adapter);
1522                 t4_sge_tx_monitor_stop(adapter);
1523                 t4_free_sge_resources(adapter);
1524                 for_each_port(adapter, i) {
1525                         pi = adap2pinfo(adapter, i);
1526                         if (pi->viid != 0)
1527                                 t4_free_vi(adapter, adapter->mbox,
1528                                            adapter->pf, 0, pi->viid);
1529                         rte_free(pi->eth_dev->data->mac_addrs);
1530                         /* Skip first port since it'll be freed by DPDK stack */
1531                         if (i) {
1532                                 rte_free(pi->eth_dev->data->dev_private);
1533                                 rte_eth_dev_release_port(pi->eth_dev);
1534                         }
1535                 }
1536                 adapter->flags &= ~FULL_INIT_DONE;
1537         }
1538
1539         if (is_pf4(adapter) && (adapter->flags & FW_OK))
1540                 t4_fw_bye(adapter, adapter->mbox);
1541 }
1542
1543 int cxgbe_probe(struct adapter *adapter)
1544 {
1545         struct port_info *pi;
1546         int chip;
1547         int func, i;
1548         int err = 0;
1549         u32 whoami;
1550
1551         whoami = t4_read_reg(adapter, A_PL_WHOAMI);
1552         chip = t4_get_chip_type(adapter,
1553                         CHELSIO_PCI_ID_VER(adapter->pdev->id.device_id));
1554         if (chip < 0)
1555                 return chip;
1556
1557         func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ?
1558                G_SOURCEPF(whoami) : G_T6_SOURCEPF(whoami);
1559
1560         adapter->mbox = func;
1561         adapter->pf = func;
1562
1563         t4_os_lock_init(&adapter->mbox_lock);
1564         TAILQ_INIT(&adapter->mbox_list);
1565         t4_os_lock_init(&adapter->win0_lock);
1566
1567         err = t4_prep_adapter(adapter);
1568         if (err)
1569                 return err;
1570
1571         setup_memwin(adapter);
1572         err = adap_init0(adapter);
1573         if (err) {
1574                 dev_err(adapter, "%s: Adapter initialization failed, error %d\n",
1575                         __func__, err);
1576                 goto out_free;
1577         }
1578
1579         if (!is_t4(adapter->params.chip)) {
1580                 /*
1581                  * The userspace doorbell BAR is split evenly into doorbell
1582                  * regions, each associated with an egress queue.  If this
1583                  * per-queue region is large enough (at least UDBS_SEG_SIZE)
1584                  * then it can be used to submit a tx work request with an
1585                  * implied doorbell.  Enable write combining on the BAR if
1586                  * there is room for such work requests.
1587                  */
1588                 int s_qpp, qpp, num_seg;
1589
1590                 s_qpp = (S_QUEUESPERPAGEPF0 +
1591                         (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) *
1592                         adapter->pf);
1593                 qpp = 1 << ((t4_read_reg(adapter,
1594                                 A_SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp)
1595                                 & M_QUEUESPERPAGEPF0);
1596                 num_seg = CXGBE_PAGE_SIZE / UDBS_SEG_SIZE;
1597                 if (qpp > num_seg)
1598                         dev_warn(adapter, "Incorrect SGE EGRESS QUEUES_PER_PAGE configuration, continuing in debug mode\n");
1599
1600                 adapter->bar2 = (void *)adapter->pdev->mem_resource[2].addr;
1601                 if (!adapter->bar2) {
1602                         dev_err(adapter, "cannot map device bar2 region\n");
1603                         err = -ENOMEM;
1604                         goto out_free;
1605                 }
1606                 t4_write_reg(adapter, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) |
1607                              V_STATMODE(0));
1608         }
1609
1610         for_each_port(adapter, i) {
1611                 const unsigned int numa_node = rte_socket_id();
1612                 char name[RTE_ETH_NAME_MAX_LEN];
1613                 struct rte_eth_dev *eth_dev;
1614
1615                 snprintf(name, sizeof(name), "%s_%d",
1616                          adapter->pdev->device.name, i);
1617
1618                 if (i == 0) {
1619                         /* First port is already allocated by DPDK */
1620                         eth_dev = adapter->eth_dev;
1621                         goto allocate_mac;
1622                 }
1623
1624                 /*
1625                  * now do all data allocation - for eth_dev structure,
1626                  * and internal (private) data for the remaining ports
1627                  */
1628
1629                 /* reserve an ethdev entry */
1630                 eth_dev = rte_eth_dev_allocate(name);
1631                 if (!eth_dev)
1632                         goto out_free;
1633
1634                 eth_dev->data->dev_private =
1635                         rte_zmalloc_socket(name, sizeof(struct port_info),
1636                                            RTE_CACHE_LINE_SIZE, numa_node);
1637                 if (!eth_dev->data->dev_private)
1638                         goto out_free;
1639
1640 allocate_mac:
1641                 pi = (struct port_info *)eth_dev->data->dev_private;
1642                 adapter->port[i] = pi;
1643                 pi->eth_dev = eth_dev;
1644                 pi->adapter = adapter;
1645                 pi->xact_addr_filt = -1;
1646                 pi->port_id = i;
1647                 pi->pidx = i;
1648
1649                 pi->eth_dev->device = &adapter->pdev->device;
1650                 pi->eth_dev->dev_ops = adapter->eth_dev->dev_ops;
1651                 pi->eth_dev->tx_pkt_burst = adapter->eth_dev->tx_pkt_burst;
1652                 pi->eth_dev->rx_pkt_burst = adapter->eth_dev->rx_pkt_burst;
1653
1654                 rte_eth_copy_pci_info(pi->eth_dev, adapter->pdev);
1655
1656                 pi->eth_dev->data->mac_addrs = rte_zmalloc(name,
1657                                                            ETHER_ADDR_LEN, 0);
1658                 if (!pi->eth_dev->data->mac_addrs) {
1659                         dev_err(adapter, "%s: Mem allocation failed for storing mac addr, aborting\n",
1660                                 __func__);
1661                         err = -1;
1662                         goto out_free;
1663                 }
1664
1665                 if (i > 0) {
1666                         /* First port will be notified by upper layer */
1667                         rte_eth_dev_probing_finish(eth_dev);
1668                 }
1669         }
1670
1671         if (adapter->flags & FW_OK) {
1672                 err = t4_port_init(adapter, adapter->mbox, adapter->pf, 0);
1673                 if (err) {
1674                         dev_err(adapter, "%s: t4_port_init failed with err %d\n",
1675                                 __func__, err);
1676                         goto out_free;
1677                 }
1678         }
1679
1680         cfg_queues(adapter->eth_dev);
1681
1682         print_adapter_info(adapter);
1683         print_port_info(adapter);
1684
1685         adapter->clipt = t4_init_clip_tbl(adapter->clipt_start,
1686                                           adapter->clipt_end);
1687         if (!adapter->clipt) {
1688                 /* We tolerate a lack of clip_table, giving up some
1689                  * functionality
1690                  */
1691                 dev_warn(adapter, "could not allocate CLIP. Continuing\n");
1692         }
1693
1694         if (tid_init(&adapter->tids) < 0) {
1695                 /* Disable filtering support */
1696                 dev_warn(adapter, "could not allocate TID table, "
1697                          "filter support disabled. Continuing\n");
1698         }
1699
1700         if (is_hashfilter(adapter)) {
1701                 if (t4_read_reg(adapter, A_LE_DB_CONFIG) & F_HASHEN) {
1702                         u32 hash_base, hash_reg;
1703
1704                         hash_reg = A_LE_DB_TID_HASHBASE;
1705                         hash_base = t4_read_reg(adapter, hash_reg);
1706                         adapter->tids.hash_base = hash_base / 4;
1707                 }
1708         } else {
1709                 /* Disable hash filtering support */
1710                 dev_warn(adapter,
1711                          "Maskless filter support disabled. Continuing\n");
1712         }
1713
1714         err = init_rss(adapter);
1715         if (err)
1716                 goto out_free;
1717
1718         return 0;
1719
1720 out_free:
1721         for_each_port(adapter, i) {
1722                 pi = adap2pinfo(adapter, i);
1723                 if (pi->viid != 0)
1724                         t4_free_vi(adapter, adapter->mbox, adapter->pf,
1725                                    0, pi->viid);
1726                 /* Skip first port since it'll be de-allocated by DPDK */
1727                 if (i == 0)
1728                         continue;
1729                 if (pi->eth_dev) {
1730                         if (pi->eth_dev->data->dev_private)
1731                                 rte_free(pi->eth_dev->data->dev_private);
1732                         rte_eth_dev_release_port(pi->eth_dev);
1733                 }
1734         }
1735
1736         if (adapter->flags & FW_OK)
1737                 t4_fw_bye(adapter, adapter->mbox);
1738         return -err;
1739 }