net/enic: add minimal VF representor
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11
12 #include <rte_pci.h>
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <rte_ethdev_driver.h>
19
20 #include "enic_compat.h"
21 #include "enic.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
26 #include "vnic_dev.h"
27 #include "vnic_wq.h"
28 #include "vnic_rq.h"
29 #include "vnic_cq.h"
30 #include "vnic_intr.h"
31 #include "vnic_nic.h"
32
33 static inline int enic_is_sriov_vf(struct enic *enic)
34 {
35         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
36 }
37
38 static int is_zero_addr(uint8_t *addr)
39 {
40         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
41 }
42
43 static int is_mcast_addr(uint8_t *addr)
44 {
45         return addr[0] & 1;
46 }
47
48 static int is_eth_addr_valid(uint8_t *addr)
49 {
50         return !is_mcast_addr(addr) && !is_zero_addr(addr);
51 }
52
53 static void
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
55 {
56         uint16_t i;
57
58         if (!rq || !rq->mbuf_ring) {
59                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
60                 return;
61         }
62
63         for (i = 0; i < rq->ring.desc_count; i++) {
64                 if (rq->mbuf_ring[i]) {
65                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66                         rq->mbuf_ring[i] = NULL;
67                 }
68         }
69 }
70
71 static void enic_free_wq_buf(struct rte_mbuf **buf)
72 {
73         struct rte_mbuf *mbuf = *buf;
74
75         rte_pktmbuf_free_seg(mbuf);
76         *buf = NULL;
77 }
78
79 static void enic_log_q_error(struct enic *enic)
80 {
81         unsigned int i;
82         uint32_t error_status;
83
84         for (i = 0; i < enic->wq_count; i++) {
85                 error_status = vnic_wq_error_status(&enic->wq[i]);
86                 if (error_status)
87                         dev_err(enic, "WQ[%d] error_status %d\n", i,
88                                 error_status);
89         }
90
91         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92                 if (!enic->rq[i].in_use)
93                         continue;
94                 error_status = vnic_rq_error_status(&enic->rq[i]);
95                 if (error_status)
96                         dev_err(enic, "RQ[%d] error_status %d\n", i,
97                                 error_status);
98         }
99 }
100
101 static void enic_clear_soft_stats(struct enic *enic)
102 {
103         struct enic_soft_stats *soft_stats = &enic->soft_stats;
104         rte_atomic64_clear(&soft_stats->rx_nombuf);
105         rte_atomic64_clear(&soft_stats->rx_packet_errors);
106         rte_atomic64_clear(&soft_stats->tx_oversized);
107 }
108
109 static void enic_init_soft_stats(struct enic *enic)
110 {
111         struct enic_soft_stats *soft_stats = &enic->soft_stats;
112         rte_atomic64_init(&soft_stats->rx_nombuf);
113         rte_atomic64_init(&soft_stats->rx_packet_errors);
114         rte_atomic64_init(&soft_stats->tx_oversized);
115         enic_clear_soft_stats(enic);
116 }
117
118 int enic_dev_stats_clear(struct enic *enic)
119 {
120         int ret;
121
122         ret = vnic_dev_stats_clear(enic->vdev);
123         if (ret != 0) {
124                 dev_err(enic, "Error in clearing stats\n");
125                 return ret;
126         }
127         enic_clear_soft_stats(enic);
128
129         return 0;
130 }
131
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
133 {
134         struct vnic_stats *stats;
135         struct enic_soft_stats *soft_stats = &enic->soft_stats;
136         int64_t rx_truncated;
137         uint64_t rx_packet_errors;
138         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
139
140         if (ret) {
141                 dev_err(enic, "Error in getting stats\n");
142                 return ret;
143         }
144
145         /* The number of truncated packets can only be calculated by
146          * subtracting a hardware counter from error packets received by
147          * the driver. Note: this causes transient inaccuracies in the
148          * ipackets count. Also, the length of truncated packets are
149          * counted in ibytes even though truncated packets are dropped
150          * which can make ibytes be slightly higher than it should be.
151          */
152         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
154
155         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156         r_stats->opackets = stats->tx.tx_frames_ok;
157
158         r_stats->ibytes = stats->rx.rx_bytes_ok;
159         r_stats->obytes = stats->tx.tx_bytes_ok;
160
161         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162         r_stats->oerrors = stats->tx.tx_errors
163                            + rte_atomic64_read(&soft_stats->tx_oversized);
164
165         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
166
167         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
168         return 0;
169 }
170
171 int enic_del_mac_address(struct enic *enic, int mac_index)
172 {
173         struct rte_eth_dev *eth_dev = enic->rte_dev;
174         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
175
176         return vnic_dev_del_addr(enic->vdev, mac_addr);
177 }
178
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
180 {
181         int err;
182
183         if (!is_eth_addr_valid(mac_addr)) {
184                 dev_err(enic, "invalid mac address\n");
185                 return -EINVAL;
186         }
187
188         err = vnic_dev_add_addr(enic->vdev, mac_addr);
189         if (err)
190                 dev_err(enic, "add mac addr failed\n");
191         return err;
192 }
193
194 static void
195 enic_free_rq_buf(struct rte_mbuf **mbuf)
196 {
197         if (*mbuf == NULL)
198                 return;
199
200         rte_pktmbuf_free(*mbuf);
201         *mbuf = NULL;
202 }
203
204 void enic_init_vnic_resources(struct enic *enic)
205 {
206         unsigned int error_interrupt_enable = 1;
207         unsigned int error_interrupt_offset = 0;
208         unsigned int rxq_interrupt_enable = 0;
209         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
210         unsigned int index = 0;
211         unsigned int cq_idx;
212         struct vnic_rq *data_rq;
213
214         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
215                 rxq_interrupt_enable = 1;
216
217         for (index = 0; index < enic->rq_count; index++) {
218                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
219
220                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
221                         cq_idx,
222                         error_interrupt_enable,
223                         error_interrupt_offset);
224
225                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
226                 if (data_rq->in_use)
227                         vnic_rq_init(data_rq,
228                                      cq_idx,
229                                      error_interrupt_enable,
230                                      error_interrupt_offset);
231                 vnic_cq_init(&enic->cq[cq_idx],
232                         0 /* flow_control_enable */,
233                         1 /* color_enable */,
234                         0 /* cq_head */,
235                         0 /* cq_tail */,
236                         1 /* cq_tail_color */,
237                         rxq_interrupt_enable,
238                         1 /* cq_entry_enable */,
239                         0 /* cq_message_enable */,
240                         rxq_interrupt_offset,
241                         0 /* cq_message_addr */);
242                 if (rxq_interrupt_enable)
243                         rxq_interrupt_offset++;
244         }
245
246         for (index = 0; index < enic->wq_count; index++) {
247                 vnic_wq_init(&enic->wq[index],
248                         enic_cq_wq(enic, index),
249                         error_interrupt_enable,
250                         error_interrupt_offset);
251                 /* Compute unsupported ol flags for enic_prep_pkts() */
252                 enic->wq[index].tx_offload_notsup_mask =
253                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
254
255                 cq_idx = enic_cq_wq(enic, index);
256                 vnic_cq_init(&enic->cq[cq_idx],
257                         0 /* flow_control_enable */,
258                         1 /* color_enable */,
259                         0 /* cq_head */,
260                         0 /* cq_tail */,
261                         1 /* cq_tail_color */,
262                         0 /* interrupt_enable */,
263                         0 /* cq_entry_enable */,
264                         1 /* cq_message_enable */,
265                         0 /* interrupt offset */,
266                         (uint64_t)enic->wq[index].cqmsg_rz->iova);
267         }
268
269         for (index = 0; index < enic->intr_count; index++) {
270                 vnic_intr_init(&enic->intr[index],
271                                enic->config.intr_timer_usec,
272                                enic->config.intr_timer_type,
273                                /*mask_on_assertion*/1);
274         }
275 }
276
277
278 static int
279 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
280 {
281         struct rte_mbuf *mb;
282         struct rq_enet_desc *rqd = rq->ring.descs;
283         unsigned i;
284         dma_addr_t dma_addr;
285         uint32_t max_rx_pkt_len;
286         uint16_t rq_buf_len;
287
288         if (!rq->in_use)
289                 return 0;
290
291         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
292                   rq->ring.desc_count);
293
294         /*
295          * If *not* using scatter and the mbuf size is greater than the
296          * requested max packet size (max_rx_pkt_len), then reduce the
297          * posted buffer size to max_rx_pkt_len. HW still receives packets
298          * larger than max_rx_pkt_len, but they will be truncated, which we
299          * drop in the rx handler. Not ideal, but better than returning
300          * large packets when the user is not expecting them.
301          */
302         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
303         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
304         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
305                 rq_buf_len = max_rx_pkt_len;
306         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
307                 mb = rte_mbuf_raw_alloc(rq->mp);
308                 if (mb == NULL) {
309                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
310                         (unsigned)rq->index);
311                         return -ENOMEM;
312                 }
313
314                 mb->data_off = RTE_PKTMBUF_HEADROOM;
315                 dma_addr = (dma_addr_t)(mb->buf_iova
316                            + RTE_PKTMBUF_HEADROOM);
317                 rq_enet_desc_enc(rqd, dma_addr,
318                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
319                                 : RQ_ENET_TYPE_NOT_SOP),
320                                 rq_buf_len);
321                 rq->mbuf_ring[i] = mb;
322         }
323         /*
324          * Do not post the buffers to the NIC until we enable the RQ via
325          * enic_start_rq().
326          */
327         rq->need_initial_post = true;
328         /* Initialize fetch index while RQ is disabled */
329         iowrite32(0, &rq->ctrl->fetch_index);
330         return 0;
331 }
332
333 /*
334  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
335  * allocated the buffers and filled the RQ descriptor ring. Just need to push
336  * the post index to the NIC.
337  */
338 static void
339 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
340 {
341         if (!rq->in_use || !rq->need_initial_post)
342                 return;
343
344         /* make sure all prior writes are complete before doing the PIO write */
345         rte_rmb();
346
347         /* Post all but the last buffer to VIC. */
348         rq->posted_index = rq->ring.desc_count - 1;
349
350         rq->rx_nb_hold = 0;
351
352         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
353                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
354         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
355         rte_rmb();
356         rq->need_initial_post = false;
357 }
358
359 void *
360 enic_alloc_consistent(void *priv, size_t size,
361         dma_addr_t *dma_handle, uint8_t *name)
362 {
363         void *vaddr;
364         const struct rte_memzone *rz;
365         *dma_handle = 0;
366         struct enic *enic = (struct enic *)priv;
367         struct enic_memzone_entry *mze;
368
369         rz = rte_memzone_reserve_aligned((const char *)name, size,
370                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
371         if (!rz) {
372                 pr_err("%s : Failed to allocate memory requested for %s\n",
373                         __func__, name);
374                 return NULL;
375         }
376
377         vaddr = rz->addr;
378         *dma_handle = (dma_addr_t)rz->iova;
379
380         mze = rte_malloc("enic memzone entry",
381                          sizeof(struct enic_memzone_entry), 0);
382
383         if (!mze) {
384                 pr_err("%s : Failed to allocate memory for memzone list\n",
385                        __func__);
386                 rte_memzone_free(rz);
387                 return NULL;
388         }
389
390         mze->rz = rz;
391
392         rte_spinlock_lock(&enic->memzone_list_lock);
393         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
394         rte_spinlock_unlock(&enic->memzone_list_lock);
395
396         return vaddr;
397 }
398
399 void
400 enic_free_consistent(void *priv,
401                      __rte_unused size_t size,
402                      void *vaddr,
403                      dma_addr_t dma_handle)
404 {
405         struct enic_memzone_entry *mze;
406         struct enic *enic = (struct enic *)priv;
407
408         rte_spinlock_lock(&enic->memzone_list_lock);
409         LIST_FOREACH(mze, &enic->memzone_list, entries) {
410                 if (mze->rz->addr == vaddr &&
411                     mze->rz->iova == dma_handle)
412                         break;
413         }
414         if (mze == NULL) {
415                 rte_spinlock_unlock(&enic->memzone_list_lock);
416                 dev_warning(enic,
417                             "Tried to free memory, but couldn't find it in the memzone list\n");
418                 return;
419         }
420         LIST_REMOVE(mze, entries);
421         rte_spinlock_unlock(&enic->memzone_list_lock);
422         rte_memzone_free(mze->rz);
423         rte_free(mze);
424 }
425
426 int enic_link_update(struct rte_eth_dev *eth_dev)
427 {
428         struct enic *enic = pmd_priv(eth_dev);
429         struct rte_eth_link link;
430
431         memset(&link, 0, sizeof(link));
432         link.link_status = enic_get_link_status(enic);
433         link.link_duplex = ETH_LINK_FULL_DUPLEX;
434         link.link_speed = vnic_dev_port_speed(enic->vdev);
435
436         return rte_eth_linkstatus_set(eth_dev, &link);
437 }
438
439 static void
440 enic_intr_handler(void *arg)
441 {
442         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
443         struct enic *enic = pmd_priv(dev);
444
445         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
446
447         enic_link_update(dev);
448         rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
449         enic_log_q_error(enic);
450         /* Re-enable irq in case of INTx */
451         rte_intr_ack(&enic->pdev->intr_handle);
452 }
453
454 static int enic_rxq_intr_init(struct enic *enic)
455 {
456         struct rte_intr_handle *intr_handle;
457         uint32_t rxq_intr_count, i;
458         int err;
459
460         intr_handle = enic->rte_dev->intr_handle;
461         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
462                 return 0;
463         /*
464          * Rx queue interrupts only work when we have MSI-X interrupts,
465          * one per queue. Sharing one interrupt is technically
466          * possible with VIC, but it is not worth the complications it brings.
467          */
468         if (!rte_intr_cap_multiple(intr_handle)) {
469                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
470                         " (vfio-pci driver)\n");
471                 return -ENOTSUP;
472         }
473         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
474         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
475         if (err) {
476                 dev_err(enic, "Failed to enable event fds for Rx queue"
477                         " interrupts\n");
478                 return err;
479         }
480         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
481                                             rxq_intr_count * sizeof(int), 0);
482         if (intr_handle->intr_vec == NULL) {
483                 dev_err(enic, "Failed to allocate intr_vec\n");
484                 return -ENOMEM;
485         }
486         for (i = 0; i < rxq_intr_count; i++)
487                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
488         return 0;
489 }
490
491 static void enic_rxq_intr_deinit(struct enic *enic)
492 {
493         struct rte_intr_handle *intr_handle;
494
495         intr_handle = enic->rte_dev->intr_handle;
496         rte_intr_efd_disable(intr_handle);
497         if (intr_handle->intr_vec != NULL) {
498                 rte_free(intr_handle->intr_vec);
499                 intr_handle->intr_vec = NULL;
500         }
501 }
502
503 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
504 {
505         struct wq_enet_desc *desc;
506         struct vnic_wq *wq;
507         unsigned int i;
508
509         /*
510          * Fill WQ descriptor fields that never change. Every descriptor is
511          * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
512          * descriptors (i.e. request one completion update every 32 packets).
513          */
514         wq = &enic->wq[queue_idx];
515         desc = (struct wq_enet_desc *)wq->ring.descs;
516         for (i = 0; i < wq->ring.desc_count; i++, desc++) {
517                 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
518                 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
519                         desc->header_length_flags |=
520                                 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
521         }
522 }
523
524 /*
525  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
526  * used when that file is not compiled.
527  */
528 __rte_weak bool
529 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
530 {
531         return false;
532 }
533
534 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
535 {
536         struct enic *enic = pmd_priv(eth_dev);
537
538         /*
539          * Preference order:
540          * 1. The vectorized handler if possible and requested.
541          * 2. The non-scatter, simplified handler if scatter Rx is not used.
542          * 3. The default handler as a fallback.
543          */
544         if (enic_use_vector_rx_handler(eth_dev))
545                 return;
546         if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
547                 ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
548                 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
549         } else {
550                 ENICPMD_LOG(DEBUG, " use the normal Rx handler");
551                 eth_dev->rx_pkt_burst = &enic_recv_pkts;
552         }
553 }
554
555 /* Secondary process uses this to set the Tx handler */
556 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
557 {
558         struct enic *enic = pmd_priv(eth_dev);
559
560         if (enic->use_simple_tx_handler) {
561                 ENICPMD_LOG(DEBUG, " use the simple tx handler");
562                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
563         } else {
564                 ENICPMD_LOG(DEBUG, " use the default tx handler");
565                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
566         }
567 }
568
569 int enic_enable(struct enic *enic)
570 {
571         unsigned int index;
572         int err;
573         struct rte_eth_dev *eth_dev = enic->rte_dev;
574         uint64_t simple_tx_offloads;
575         uintptr_t p;
576
577         if (enic->enable_avx2_rx) {
578                 struct rte_mbuf mb_def = { .buf_addr = 0 };
579
580                 /*
581                  * mbuf_initializer contains const-after-init fields of
582                  * receive mbufs (i.e. 64 bits of fields from rearm_data).
583                  * It is currently used by the vectorized handler.
584                  */
585                 mb_def.nb_segs = 1;
586                 mb_def.data_off = RTE_PKTMBUF_HEADROOM;
587                 mb_def.port = enic->port_id;
588                 rte_mbuf_refcnt_set(&mb_def, 1);
589                 rte_compiler_barrier();
590                 p = (uintptr_t)&mb_def.rearm_data;
591                 enic->mbuf_initializer = *(uint64_t *)p;
592         }
593
594         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
595         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
596
597         /* vnic notification of link status has already been turned on in
598          * enic_dev_init() which is called during probe time.  Here we are
599          * just turning on interrupt vector 0 if needed.
600          */
601         if (eth_dev->data->dev_conf.intr_conf.lsc)
602                 vnic_dev_notify_set(enic->vdev, 0);
603
604         err = enic_rxq_intr_init(enic);
605         if (err)
606                 return err;
607         if (enic_clsf_init(enic))
608                 dev_warning(enic, "Init of hash table for clsf failed."\
609                         "Flow director feature will not work\n");
610
611         /* Initialize flowman if not already initialized during probe */
612         if (enic->fm == NULL && enic_fm_init(enic))
613                 dev_warning(enic, "Init of flowman failed.\n");
614
615         for (index = 0; index < enic->rq_count; index++) {
616                 err = enic_alloc_rx_queue_mbufs(enic,
617                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
618                 if (err) {
619                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
620                         return err;
621                 }
622                 err = enic_alloc_rx_queue_mbufs(enic,
623                         &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
624                 if (err) {
625                         /* release the allocated mbufs for the sop rq*/
626                         enic_rxmbuf_queue_release(enic,
627                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
628
629                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
630                         return err;
631                 }
632         }
633
634         /*
635          * Use the simple TX handler if possible. Only checksum offloads
636          * and vlan insertion are supported.
637          */
638         simple_tx_offloads = enic->tx_offload_capa &
639                 (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
640                  DEV_TX_OFFLOAD_VLAN_INSERT |
641                  DEV_TX_OFFLOAD_IPV4_CKSUM |
642                  DEV_TX_OFFLOAD_UDP_CKSUM |
643                  DEV_TX_OFFLOAD_TCP_CKSUM);
644         if ((eth_dev->data->dev_conf.txmode.offloads &
645              ~simple_tx_offloads) == 0) {
646                 ENICPMD_LOG(DEBUG, " use the simple tx handler");
647                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
648                 for (index = 0; index < enic->wq_count; index++)
649                         enic_prep_wq_for_simple_tx(enic, index);
650                 enic->use_simple_tx_handler = 1;
651         } else {
652                 ENICPMD_LOG(DEBUG, " use the default tx handler");
653                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
654         }
655
656         enic_pick_rx_handler(eth_dev);
657
658         for (index = 0; index < enic->wq_count; index++)
659                 enic_start_wq(enic, index);
660         for (index = 0; index < enic->rq_count; index++)
661                 enic_start_rq(enic, index);
662
663         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
664
665         vnic_dev_enable_wait(enic->vdev);
666
667         /* Register and enable error interrupt */
668         rte_intr_callback_register(&(enic->pdev->intr_handle),
669                 enic_intr_handler, (void *)enic->rte_dev);
670
671         rte_intr_enable(&(enic->pdev->intr_handle));
672         /* Unmask LSC interrupt */
673         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
674
675         return 0;
676 }
677
678 int enic_alloc_intr_resources(struct enic *enic)
679 {
680         int err;
681         unsigned int i;
682
683         dev_info(enic, "vNIC resources used:  "\
684                 "wq %d rq %d cq %d intr %d\n",
685                 enic->wq_count, enic_vnic_rq_count(enic),
686                 enic->cq_count, enic->intr_count);
687
688         for (i = 0; i < enic->intr_count; i++) {
689                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
690                 if (err) {
691                         enic_free_vnic_resources(enic);
692                         return err;
693                 }
694         }
695         return 0;
696 }
697
698 void enic_free_rq(void *rxq)
699 {
700         struct vnic_rq *rq_sop, *rq_data;
701         struct enic *enic;
702
703         if (rxq == NULL)
704                 return;
705
706         rq_sop = (struct vnic_rq *)rxq;
707         enic = vnic_dev_priv(rq_sop->vdev);
708         rq_data = &enic->rq[rq_sop->data_queue_idx];
709
710         if (rq_sop->free_mbufs) {
711                 struct rte_mbuf **mb;
712                 int i;
713
714                 mb = rq_sop->free_mbufs;
715                 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
716                      i < ENIC_RX_BURST_MAX; i++)
717                         rte_pktmbuf_free(mb[i]);
718                 rte_free(rq_sop->free_mbufs);
719                 rq_sop->free_mbufs = NULL;
720                 rq_sop->num_free_mbufs = 0;
721         }
722
723         enic_rxmbuf_queue_release(enic, rq_sop);
724         if (rq_data->in_use)
725                 enic_rxmbuf_queue_release(enic, rq_data);
726
727         rte_free(rq_sop->mbuf_ring);
728         if (rq_data->in_use)
729                 rte_free(rq_data->mbuf_ring);
730
731         rq_sop->mbuf_ring = NULL;
732         rq_data->mbuf_ring = NULL;
733
734         vnic_rq_free(rq_sop);
735         if (rq_data->in_use)
736                 vnic_rq_free(rq_data);
737
738         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
739
740         rq_sop->in_use = 0;
741         rq_data->in_use = 0;
742 }
743
744 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
745 {
746         struct rte_eth_dev_data *data = enic->dev_data;
747         vnic_wq_enable(&enic->wq[queue_idx]);
748         data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
749 }
750
751 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
752 {
753         struct rte_eth_dev_data *data = enic->dev_data;
754         int ret;
755
756         ret = vnic_wq_disable(&enic->wq[queue_idx]);
757         if (ret)
758                 return ret;
759
760         data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
761         return 0;
762 }
763
764 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
765 {
766         struct rte_eth_dev_data *data = enic->dev_data;
767         struct vnic_rq *rq_sop;
768         struct vnic_rq *rq_data;
769         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
770         rq_data = &enic->rq[rq_sop->data_queue_idx];
771
772         if (rq_data->in_use) {
773                 vnic_rq_enable(rq_data);
774                 enic_initial_post_rx(enic, rq_data);
775         }
776         rte_mb();
777         vnic_rq_enable(rq_sop);
778         enic_initial_post_rx(enic, rq_sop);
779         data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
780 }
781
782 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
783 {
784         struct rte_eth_dev_data *data = enic->dev_data;
785         int ret1 = 0, ret2 = 0;
786         struct vnic_rq *rq_sop;
787         struct vnic_rq *rq_data;
788         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
789         rq_data = &enic->rq[rq_sop->data_queue_idx];
790
791         ret2 = vnic_rq_disable(rq_sop);
792         rte_mb();
793         if (rq_data->in_use)
794                 ret1 = vnic_rq_disable(rq_data);
795
796         if (ret2)
797                 return ret2;
798         else if (ret1)
799                 return ret1;
800
801         data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
802         return 0;
803 }
804
805 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
806         unsigned int socket_id, struct rte_mempool *mp,
807         uint16_t nb_desc, uint16_t free_thresh)
808 {
809         int rc;
810         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
811         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
812         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
813         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
814         unsigned int mbuf_size, mbufs_per_pkt;
815         unsigned int nb_sop_desc, nb_data_desc;
816         uint16_t min_sop, max_sop, min_data, max_data;
817         uint32_t max_rx_pkt_len;
818
819         rq_sop->is_sop = 1;
820         rq_sop->data_queue_idx = data_queue_idx;
821         rq_data->is_sop = 0;
822         rq_data->data_queue_idx = 0;
823         rq_sop->socket_id = socket_id;
824         rq_sop->mp = mp;
825         rq_data->socket_id = socket_id;
826         rq_data->mp = mp;
827         rq_sop->in_use = 1;
828         rq_sop->rx_free_thresh = free_thresh;
829         rq_data->rx_free_thresh = free_thresh;
830         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
831                   free_thresh);
832
833         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
834                                RTE_PKTMBUF_HEADROOM);
835         /* max_rx_pkt_len includes the ethernet header and CRC. */
836         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
837
838         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
839             DEV_RX_OFFLOAD_SCATTER) {
840                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
841                 /* ceil((max pkt len)/mbuf_size) */
842                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
843         } else {
844                 dev_info(enic, "Scatter rx mode disabled\n");
845                 mbufs_per_pkt = 1;
846                 if (max_rx_pkt_len > mbuf_size) {
847                         dev_warning(enic, "The maximum Rx packet size (%u) is"
848                                     " larger than the mbuf size (%u), and"
849                                     " scatter is disabled. Larger packets will"
850                                     " be truncated.\n",
851                                     max_rx_pkt_len, mbuf_size);
852                 }
853         }
854
855         if (mbufs_per_pkt > 1) {
856                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
857                 rq_sop->data_queue_enable = 1;
858                 rq_data->in_use = 1;
859                 /*
860                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
861                  * receives packet sizes up to the "max" MTU.
862                  * If not using scatter, we can achieve the effect of dropping
863                  * larger packets by reducing the size of posted buffers.
864                  * See enic_alloc_rx_queue_mbufs().
865                  */
866                 if (max_rx_pkt_len <
867                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
868                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
869                                     " when scatter rx mode is in use.\n");
870                 }
871         } else {
872                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
873                          queue_idx);
874                 rq_sop->data_queue_enable = 0;
875                 rq_data->in_use = 0;
876         }
877
878         /* number of descriptors have to be a multiple of 32 */
879         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
880         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
881
882         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
883         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
884
885         if (mbufs_per_pkt > 1) {
886                 min_sop = ENIC_RX_BURST_MAX;
887                 max_sop = ((enic->config.rq_desc_count /
888                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
889                 min_data = min_sop * (mbufs_per_pkt - 1);
890                 max_data = enic->config.rq_desc_count;
891         } else {
892                 min_sop = ENIC_RX_BURST_MAX;
893                 max_sop = enic->config.rq_desc_count;
894                 min_data = 0;
895                 max_data = 0;
896         }
897
898         if (nb_desc < (min_sop + min_data)) {
899                 dev_warning(enic,
900                             "Number of rx descs too low, adjusting to minimum\n");
901                 nb_sop_desc = min_sop;
902                 nb_data_desc = min_data;
903         } else if (nb_desc > (max_sop + max_data)) {
904                 dev_warning(enic,
905                             "Number of rx_descs too high, adjusting to maximum\n");
906                 nb_sop_desc = max_sop;
907                 nb_data_desc = max_data;
908         }
909         if (mbufs_per_pkt > 1) {
910                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
911                          " rx descriptor range is %u to %u\n",
912                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
913                          max_sop + max_data);
914         }
915         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
916                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
917
918         /* Allocate sop queue resources */
919         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
920                 nb_sop_desc, sizeof(struct rq_enet_desc));
921         if (rc) {
922                 dev_err(enic, "error in allocation of sop rq\n");
923                 goto err_exit;
924         }
925         nb_sop_desc = rq_sop->ring.desc_count;
926
927         if (rq_data->in_use) {
928                 /* Allocate data queue resources */
929                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
930                                    nb_data_desc,
931                                    sizeof(struct rq_enet_desc));
932                 if (rc) {
933                         dev_err(enic, "error in allocation of data rq\n");
934                         goto err_free_rq_sop;
935                 }
936                 nb_data_desc = rq_data->ring.desc_count;
937         }
938         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
939                            socket_id, nb_sop_desc + nb_data_desc,
940                            sizeof(struct cq_enet_rq_desc));
941         if (rc) {
942                 dev_err(enic, "error in allocation of cq for rq\n");
943                 goto err_free_rq_data;
944         }
945
946         /* Allocate the mbuf rings */
947         rq_sop->mbuf_ring = (struct rte_mbuf **)
948                 rte_zmalloc_socket("rq->mbuf_ring",
949                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
950                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
951         if (rq_sop->mbuf_ring == NULL)
952                 goto err_free_cq;
953
954         if (rq_data->in_use) {
955                 rq_data->mbuf_ring = (struct rte_mbuf **)
956                         rte_zmalloc_socket("rq->mbuf_ring",
957                                 sizeof(struct rte_mbuf *) * nb_data_desc,
958                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
959                 if (rq_data->mbuf_ring == NULL)
960                         goto err_free_sop_mbuf;
961         }
962
963         rq_sop->free_mbufs = (struct rte_mbuf **)
964                 rte_zmalloc_socket("rq->free_mbufs",
965                                    sizeof(struct rte_mbuf *) *
966                                    ENIC_RX_BURST_MAX,
967                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
968         if (rq_sop->free_mbufs == NULL)
969                 goto err_free_data_mbuf;
970         rq_sop->num_free_mbufs = 0;
971
972         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
973
974         return 0;
975
976 err_free_data_mbuf:
977         rte_free(rq_data->mbuf_ring);
978 err_free_sop_mbuf:
979         rte_free(rq_sop->mbuf_ring);
980 err_free_cq:
981         /* cleanup on error */
982         vnic_cq_free(&enic->cq[queue_idx]);
983 err_free_rq_data:
984         if (rq_data->in_use)
985                 vnic_rq_free(rq_data);
986 err_free_rq_sop:
987         vnic_rq_free(rq_sop);
988 err_exit:
989         return -ENOMEM;
990 }
991
992 void enic_free_wq(void *txq)
993 {
994         struct vnic_wq *wq;
995         struct enic *enic;
996
997         if (txq == NULL)
998                 return;
999
1000         wq = (struct vnic_wq *)txq;
1001         enic = vnic_dev_priv(wq->vdev);
1002         rte_memzone_free(wq->cqmsg_rz);
1003         vnic_wq_free(wq);
1004         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1005 }
1006
1007 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1008         unsigned int socket_id, uint16_t nb_desc)
1009 {
1010         int err;
1011         struct vnic_wq *wq = &enic->wq[queue_idx];
1012         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
1013         char name[RTE_MEMZONE_NAMESIZE];
1014         static int instance;
1015
1016         wq->socket_id = socket_id;
1017         /*
1018          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1019          * print an info message for diagnostics.
1020          */
1021         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1022
1023         /* Allocate queue resources */
1024         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1025                 nb_desc,
1026                 sizeof(struct wq_enet_desc));
1027         if (err) {
1028                 dev_err(enic, "error in allocation of wq\n");
1029                 return err;
1030         }
1031
1032         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1033                 socket_id, nb_desc,
1034                 sizeof(struct cq_enet_wq_desc));
1035         if (err) {
1036                 vnic_wq_free(wq);
1037                 dev_err(enic, "error in allocation of cq for wq\n");
1038         }
1039
1040         /* setup up CQ message */
1041         snprintf((char *)name, sizeof(name),
1042                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1043                 instance++);
1044
1045         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1046                         sizeof(uint32_t), SOCKET_ID_ANY,
1047                         RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1048         if (!wq->cqmsg_rz)
1049                 return -ENOMEM;
1050
1051         return err;
1052 }
1053
1054 int enic_disable(struct enic *enic)
1055 {
1056         unsigned int i;
1057         int err;
1058
1059         for (i = 0; i < enic->intr_count; i++) {
1060                 vnic_intr_mask(&enic->intr[i]);
1061                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1062         }
1063         enic_rxq_intr_deinit(enic);
1064         rte_intr_disable(&enic->pdev->intr_handle);
1065         rte_intr_callback_unregister(&enic->pdev->intr_handle,
1066                                      enic_intr_handler,
1067                                      (void *)enic->rte_dev);
1068
1069         vnic_dev_disable(enic->vdev);
1070
1071         enic_clsf_destroy(enic);
1072         enic_fm_destroy(enic);
1073
1074         if (!enic_is_sriov_vf(enic))
1075                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1076
1077         for (i = 0; i < enic->wq_count; i++) {
1078                 err = vnic_wq_disable(&enic->wq[i]);
1079                 if (err)
1080                         return err;
1081         }
1082         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1083                 if (enic->rq[i].in_use) {
1084                         err = vnic_rq_disable(&enic->rq[i]);
1085                         if (err)
1086                                 return err;
1087                 }
1088         }
1089
1090         /* If we were using interrupts, set the interrupt vector to -1
1091          * to disable interrupts.  We are not disabling link notifcations,
1092          * though, as we want the polling of link status to continue working.
1093          */
1094         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1095                 vnic_dev_notify_set(enic->vdev, -1);
1096
1097         vnic_dev_set_reset_flag(enic->vdev, 1);
1098
1099         for (i = 0; i < enic->wq_count; i++)
1100                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1101
1102         for (i = 0; i < enic_vnic_rq_count(enic); i++)
1103                 if (enic->rq[i].in_use)
1104                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1105         for (i = 0; i < enic->cq_count; i++)
1106                 vnic_cq_clean(&enic->cq[i]);
1107         for (i = 0; i < enic->intr_count; i++)
1108                 vnic_intr_clean(&enic->intr[i]);
1109
1110         return 0;
1111 }
1112
1113 static int enic_dev_wait(struct vnic_dev *vdev,
1114         int (*start)(struct vnic_dev *, int),
1115         int (*finished)(struct vnic_dev *, int *),
1116         int arg)
1117 {
1118         int done;
1119         int err;
1120         int i;
1121
1122         err = start(vdev, arg);
1123         if (err)
1124                 return err;
1125
1126         /* Wait for func to complete...2 seconds max */
1127         for (i = 0; i < 2000; i++) {
1128                 err = finished(vdev, &done);
1129                 if (err)
1130                         return err;
1131                 if (done)
1132                         return 0;
1133                 usleep(1000);
1134         }
1135         return -ETIMEDOUT;
1136 }
1137
1138 static int enic_dev_open(struct enic *enic)
1139 {
1140         int err;
1141         int flags = CMD_OPENF_IG_DESCCACHE;
1142
1143         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1144                 vnic_dev_open_done, flags);
1145         if (err)
1146                 dev_err(enic_get_dev(enic),
1147                         "vNIC device open failed, err %d\n", err);
1148
1149         return err;
1150 }
1151
1152 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1153 {
1154         dma_addr_t rss_key_buf_pa;
1155         union vnic_rss_key *rss_key_buf_va = NULL;
1156         int err, i;
1157         uint8_t name[RTE_MEMZONE_NAMESIZE];
1158
1159         RTE_ASSERT(user_key != NULL);
1160         snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1161         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1162                 &rss_key_buf_pa, name);
1163         if (!rss_key_buf_va)
1164                 return -ENOMEM;
1165
1166         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1167                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1168
1169         err = enic_set_rss_key(enic,
1170                 rss_key_buf_pa,
1171                 sizeof(union vnic_rss_key));
1172
1173         /* Save for later queries */
1174         if (!err) {
1175                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1176                            sizeof(union vnic_rss_key));
1177         }
1178         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1179                 rss_key_buf_va, rss_key_buf_pa);
1180
1181         return err;
1182 }
1183
1184 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1185 {
1186         dma_addr_t rss_cpu_buf_pa;
1187         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1188         int err;
1189         uint8_t name[RTE_MEMZONE_NAMESIZE];
1190
1191         snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1192         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1193                 &rss_cpu_buf_pa, name);
1194         if (!rss_cpu_buf_va)
1195                 return -ENOMEM;
1196
1197         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1198
1199         err = enic_set_rss_cpu(enic,
1200                 rss_cpu_buf_pa,
1201                 sizeof(union vnic_rss_cpu));
1202
1203         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1204                 rss_cpu_buf_va, rss_cpu_buf_pa);
1205
1206         /* Save for later queries */
1207         if (!err)
1208                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1209         return err;
1210 }
1211
1212 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1213         uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1214         uint8_t rss_enable)
1215 {
1216         const uint8_t tso_ipid_split_en = 0;
1217         int err;
1218
1219         err = enic_set_nic_cfg(enic,
1220                 rss_default_cpu, rss_hash_type,
1221                 rss_hash_bits, rss_base_cpu,
1222                 rss_enable, tso_ipid_split_en,
1223                 enic->ig_vlan_strip_en);
1224
1225         return err;
1226 }
1227
1228 /* Initialize RSS with defaults, called from dev_configure */
1229 int enic_init_rss_nic_cfg(struct enic *enic)
1230 {
1231         static uint8_t default_rss_key[] = {
1232                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1233                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1234                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1235                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1236         };
1237         struct rte_eth_rss_conf rss_conf;
1238         union vnic_rss_cpu rss_cpu;
1239         int ret, i;
1240
1241         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1242         /*
1243          * If setting key for the first time, and the user gives us none, then
1244          * push the default key to NIC.
1245          */
1246         if (rss_conf.rss_key == NULL) {
1247                 rss_conf.rss_key = default_rss_key;
1248                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1249         }
1250         ret = enic_set_rss_conf(enic, &rss_conf);
1251         if (ret) {
1252                 dev_err(enic, "Failed to configure RSS\n");
1253                 return ret;
1254         }
1255         if (enic->rss_enable) {
1256                 /* If enabling RSS, use the default reta */
1257                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1258                         rss_cpu.cpu[i / 4].b[i % 4] =
1259                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1260                 }
1261                 ret = enic_set_rss_reta(enic, &rss_cpu);
1262                 if (ret)
1263                         dev_err(enic, "Failed to set RSS indirection table\n");
1264         }
1265         return ret;
1266 }
1267
1268 int enic_setup_finish(struct enic *enic)
1269 {
1270         enic_init_soft_stats(enic);
1271
1272         /* switchdev: enable promisc mode on PF */
1273         if (enic->switchdev_mode) {
1274                 vnic_dev_packet_filter(enic->vdev,
1275                                        0 /* directed  */,
1276                                        0 /* multicast */,
1277                                        0 /* broadcast */,
1278                                        1 /* promisc   */,
1279                                        0 /* allmulti  */);
1280                 enic->promisc = 1;
1281                 enic->allmulti = 0;
1282                 return 0;
1283         }
1284         /* Default conf */
1285         vnic_dev_packet_filter(enic->vdev,
1286                 1 /* directed  */,
1287                 1 /* multicast */,
1288                 1 /* broadcast */,
1289                 0 /* promisc   */,
1290                 1 /* allmulti  */);
1291
1292         enic->promisc = 0;
1293         enic->allmulti = 1;
1294
1295         return 0;
1296 }
1297
1298 static int enic_rss_conf_valid(struct enic *enic,
1299                                struct rte_eth_rss_conf *rss_conf)
1300 {
1301         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1302         if (enic->flow_type_rss_offloads == 0)
1303                 return 0;
1304         if (rss_conf->rss_key != NULL &&
1305             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1306                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1307                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1308                 return -EINVAL;
1309         }
1310         if (rss_conf->rss_hf != 0 &&
1311             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1312                 dev_err(enic, "Given rss_hf contains none of the supported"
1313                         " types\n");
1314                 return -EINVAL;
1315         }
1316         return 0;
1317 }
1318
1319 /* Set hash type and key according to rss_conf */
1320 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1321 {
1322         struct rte_eth_dev *eth_dev;
1323         uint64_t rss_hf;
1324         uint8_t rss_hash_type;
1325         uint8_t rss_enable;
1326         int ret;
1327
1328         RTE_ASSERT(rss_conf != NULL);
1329         ret = enic_rss_conf_valid(enic, rss_conf);
1330         if (ret) {
1331                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1332                 return ret;
1333         }
1334
1335         eth_dev = enic->rte_dev;
1336         rss_hash_type = 0;
1337         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1338         if (enic->rq_count > 1 &&
1339             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1340             rss_hf != 0) {
1341                 rss_enable = 1;
1342                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1343                               ETH_RSS_NONFRAG_IPV4_OTHER))
1344                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1345                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1346                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1347                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1348                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1349                         if (enic->udp_rss_weak) {
1350                                 /*
1351                                  * 'TCP' is not a typo. The "weak" version of
1352                                  * UDP RSS requires both the TCP and UDP bits
1353                                  * be set. It does enable TCP RSS as well.
1354                                  */
1355                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1356                         }
1357                 }
1358                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1359                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1360                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1361                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1362                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1363                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1364                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1365                         if (enic->udp_rss_weak)
1366                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1367                 }
1368         } else {
1369                 rss_enable = 0;
1370                 rss_hf = 0;
1371         }
1372
1373         /* Set the hash key if provided */
1374         if (rss_enable && rss_conf->rss_key) {
1375                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1376                 if (ret) {
1377                         dev_err(enic, "Failed to set RSS key\n");
1378                         return ret;
1379                 }
1380         }
1381
1382         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1383                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1384                               rss_enable);
1385         if (!ret) {
1386                 enic->rss_hf = rss_hf;
1387                 enic->rss_hash_type = rss_hash_type;
1388                 enic->rss_enable = rss_enable;
1389         } else {
1390                 dev_err(enic, "Failed to update RSS configurations."
1391                         " hash=0x%x\n", rss_hash_type);
1392         }
1393         return ret;
1394 }
1395
1396 int enic_set_vlan_strip(struct enic *enic)
1397 {
1398         /*
1399          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1400          * together. So, re-do niccfg, preserving the current RSS settings.
1401          */
1402         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1403                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1404                                enic->rss_enable);
1405 }
1406
1407 int enic_add_packet_filter(struct enic *enic)
1408 {
1409         /* switchdev ignores packet filters */
1410         if (enic->switchdev_mode) {
1411                 ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1412                 return 0;
1413         }
1414         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1415         return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1416                 enic->promisc, enic->allmulti);
1417 }
1418
1419 int enic_get_link_status(struct enic *enic)
1420 {
1421         return vnic_dev_link_status(enic->vdev);
1422 }
1423
1424 static void enic_dev_deinit(struct enic *enic)
1425 {
1426         /* stop link status checking */
1427         vnic_dev_notify_unset(enic->vdev);
1428
1429         /* mac_addrs is freed by rte_eth_dev_release_port() */
1430         rte_free(enic->cq);
1431         rte_free(enic->intr);
1432         rte_free(enic->rq);
1433         rte_free(enic->wq);
1434 }
1435
1436
1437 int enic_set_vnic_res(struct enic *enic)
1438 {
1439         struct rte_eth_dev *eth_dev = enic->rte_dev;
1440         int rc = 0;
1441         unsigned int required_rq, required_wq, required_cq, required_intr;
1442
1443         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1444         required_rq = eth_dev->data->nb_rx_queues * 2;
1445         required_wq = eth_dev->data->nb_tx_queues;
1446         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1447         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1448         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1449                 required_intr += eth_dev->data->nb_rx_queues;
1450         }
1451
1452         if (enic->conf_rq_count < required_rq) {
1453                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1454                         eth_dev->data->nb_rx_queues,
1455                         required_rq, enic->conf_rq_count);
1456                 rc = -EINVAL;
1457         }
1458         if (enic->conf_wq_count < required_wq) {
1459                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1460                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1461                 rc = -EINVAL;
1462         }
1463
1464         if (enic->conf_cq_count < required_cq) {
1465                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1466                         required_cq, enic->conf_cq_count);
1467                 rc = -EINVAL;
1468         }
1469         if (enic->conf_intr_count < required_intr) {
1470                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1471                         " interrupts. Required:%u, Configured:%u\n",
1472                         required_intr, enic->conf_intr_count);
1473                 rc = -EINVAL;
1474         }
1475
1476         if (rc == 0) {
1477                 enic->rq_count = eth_dev->data->nb_rx_queues;
1478                 enic->wq_count = eth_dev->data->nb_tx_queues;
1479                 enic->cq_count = enic->rq_count + enic->wq_count;
1480                 enic->intr_count = required_intr;
1481         }
1482
1483         return rc;
1484 }
1485
1486 /* Initialize the completion queue for an RQ */
1487 static int
1488 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1489 {
1490         struct vnic_rq *sop_rq, *data_rq;
1491         unsigned int cq_idx;
1492         int rc = 0;
1493
1494         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1495         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1496         cq_idx = rq_idx;
1497
1498         vnic_cq_clean(&enic->cq[cq_idx]);
1499         vnic_cq_init(&enic->cq[cq_idx],
1500                      0 /* flow_control_enable */,
1501                      1 /* color_enable */,
1502                      0 /* cq_head */,
1503                      0 /* cq_tail */,
1504                      1 /* cq_tail_color */,
1505                      0 /* interrupt_enable */,
1506                      1 /* cq_entry_enable */,
1507                      0 /* cq_message_enable */,
1508                      0 /* interrupt offset */,
1509                      0 /* cq_message_addr */);
1510
1511
1512         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1513                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1514                            sop_rq->ring.desc_count - 1, 1, 0);
1515         if (data_rq->in_use) {
1516                 vnic_rq_init_start(data_rq,
1517                                    enic_cq_rq(enic,
1518                                    enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1519                                    0, data_rq->ring.desc_count - 1, 1, 0);
1520         }
1521
1522         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1523         if (rc)
1524                 return rc;
1525
1526         if (data_rq->in_use) {
1527                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1528                 if (rc) {
1529                         enic_rxmbuf_queue_release(enic, sop_rq);
1530                         return rc;
1531                 }
1532         }
1533
1534         return 0;
1535 }
1536
1537 /* The Cisco NIC can send and receive packets up to a max packet size
1538  * determined by the NIC type and firmware. There is also an MTU
1539  * configured into the NIC via the CIMC/UCSM management interface
1540  * which can be overridden by this function (up to the max packet size).
1541  * Depending on the network setup, doing so may cause packet drops
1542  * and unexpected behavior.
1543  */
1544 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1545 {
1546         unsigned int rq_idx;
1547         struct vnic_rq *rq;
1548         int rc = 0;
1549         uint16_t old_mtu;       /* previous setting */
1550         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1551         struct rte_eth_dev *eth_dev = enic->rte_dev;
1552
1553         old_mtu = eth_dev->data->mtu;
1554         config_mtu = enic->config.mtu;
1555
1556         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1557                 return -E_RTE_SECONDARY;
1558
1559         if (new_mtu > enic->max_mtu) {
1560                 dev_err(enic,
1561                         "MTU not updated: requested (%u) greater than max (%u)\n",
1562                         new_mtu, enic->max_mtu);
1563                 return -EINVAL;
1564         }
1565         if (new_mtu < ENIC_MIN_MTU) {
1566                 dev_info(enic,
1567                         "MTU not updated: requested (%u) less than min (%u)\n",
1568                         new_mtu, ENIC_MIN_MTU);
1569                 return -EINVAL;
1570         }
1571         if (new_mtu > config_mtu)
1572                 dev_warning(enic,
1573                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1574                         new_mtu, config_mtu);
1575
1576         /* Update the MTU and maximum packet length */
1577         eth_dev->data->mtu = new_mtu;
1578         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1579                 enic_mtu_to_max_rx_pktlen(new_mtu);
1580
1581         /*
1582          * If the device has not started (enic_enable), nothing to do.
1583          * Later, enic_enable() will set up RQs reflecting the new maximum
1584          * packet length.
1585          */
1586         if (!eth_dev->data->dev_started)
1587                 goto set_mtu_done;
1588
1589         /*
1590          * The device has started, re-do RQs on the fly. In the process, we
1591          * pick up the new maximum packet length.
1592          *
1593          * Some applications rely on the ability to change MTU without stopping
1594          * the device. So keep this behavior for now.
1595          */
1596         rte_spinlock_lock(&enic->mtu_lock);
1597
1598         /* Stop traffic on all RQs */
1599         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1600                 rq = &enic->rq[rq_idx];
1601                 if (rq->is_sop && rq->in_use) {
1602                         rc = enic_stop_rq(enic,
1603                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1604                         if (rc) {
1605                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1606                                 goto set_mtu_done;
1607                         }
1608                 }
1609         }
1610
1611         /* replace Rx function with a no-op to avoid getting stale pkts */
1612         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1613         rte_mb();
1614
1615         /* Allow time for threads to exit the real Rx function. */
1616         usleep(100000);
1617
1618         /* now it is safe to reconfigure the RQs */
1619
1620
1621         /* free and reallocate RQs with the new MTU */
1622         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1623                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1624                 if (!rq->in_use)
1625                         continue;
1626
1627                 enic_free_rq(rq);
1628                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1629                                    rq->tot_nb_desc, rq->rx_free_thresh);
1630                 if (rc) {
1631                         dev_err(enic,
1632                                 "Fatal MTU alloc error- No traffic will pass\n");
1633                         goto set_mtu_done;
1634                 }
1635
1636                 rc = enic_reinit_rq(enic, rq_idx);
1637                 if (rc) {
1638                         dev_err(enic,
1639                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1640                         goto set_mtu_done;
1641                 }
1642         }
1643
1644         /* put back the real receive function */
1645         rte_mb();
1646         enic_pick_rx_handler(eth_dev);
1647         rte_mb();
1648
1649         /* restart Rx traffic */
1650         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1651                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1652                 if (rq->is_sop && rq->in_use)
1653                         enic_start_rq(enic, rq_idx);
1654         }
1655
1656 set_mtu_done:
1657         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1658         rte_spinlock_unlock(&enic->mtu_lock);
1659         return rc;
1660 }
1661
1662 static int enic_dev_init(struct enic *enic)
1663 {
1664         int err;
1665         struct rte_eth_dev *eth_dev = enic->rte_dev;
1666
1667         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1668
1669         /* Get vNIC configuration
1670         */
1671         err = enic_get_vnic_config(enic);
1672         if (err) {
1673                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1674                 return err;
1675         }
1676
1677         /* Get available resource counts */
1678         enic_get_res_counts(enic);
1679         if (enic->conf_rq_count == 1) {
1680                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1681                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1682                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1683                 return -EINVAL;
1684         }
1685         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1686         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1687                                enic->conf_cq_count, 8);
1688         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1689                                  enic->conf_intr_count, 8);
1690         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1691                                enic->conf_rq_count, 8);
1692         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1693                                enic->conf_wq_count, 8);
1694         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1695                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1696                 return -1;
1697         }
1698         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1699                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1700                 return -1;
1701         }
1702         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1703                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1704                 return -1;
1705         }
1706         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1707                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1708                 return -1;
1709         }
1710
1711         /* Get the supported filters */
1712         enic_fdir_info(enic);
1713
1714         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1715                                         sizeof(struct rte_ether_addr) *
1716                                         ENIC_UNICAST_PERFECT_FILTERS, 0);
1717         if (!eth_dev->data->mac_addrs) {
1718                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1719                 return -1;
1720         }
1721         rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1722                         eth_dev->data->mac_addrs);
1723
1724         vnic_dev_set_reset_flag(enic->vdev, 0);
1725
1726         LIST_INIT(&enic->flows);
1727
1728         /* set up link status checking */
1729         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1730
1731         /*
1732          * When Geneve with options offload is available, always disable it
1733          * first as it can interfere with user flow rules.
1734          */
1735         if (enic->geneve_opt_avail) {
1736                 /*
1737                  * Disabling fails if the feature is provisioned but
1738                  * not enabled. So ignore result and do not log error.
1739                  */
1740                 vnic_dev_overlay_offload_ctrl(enic->vdev,
1741                         OVERLAY_FEATURE_GENEVE,
1742                         OVERLAY_OFFLOAD_DISABLE);
1743         }
1744         enic->overlay_offload = false;
1745         if (enic->disable_overlay && enic->vxlan) {
1746                 /*
1747                  * Explicitly disable overlay offload as the setting is
1748                  * sticky, and resetting vNIC does not disable it.
1749                  */
1750                 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1751                                                   OVERLAY_FEATURE_VXLAN,
1752                                                   OVERLAY_OFFLOAD_DISABLE)) {
1753                         dev_err(enic, "failed to disable overlay offload\n");
1754                 } else {
1755                         dev_info(enic, "Overlay offload is disabled\n");
1756                 }
1757         }
1758         if (!enic->disable_overlay && enic->vxlan &&
1759             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1760             vnic_dev_overlay_offload_ctrl(enic->vdev,
1761                                           OVERLAY_FEATURE_VXLAN,
1762                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1763                 enic->tx_offload_capa |=
1764                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1765                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1766                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1767                 enic->tx_offload_mask |=
1768                         PKT_TX_OUTER_IPV6 |
1769                         PKT_TX_OUTER_IPV4 |
1770                         PKT_TX_OUTER_IP_CKSUM |
1771                         PKT_TX_TUNNEL_MASK;
1772                 enic->overlay_offload = true;
1773                 dev_info(enic, "Overlay offload is enabled\n");
1774         }
1775         /* Geneve with options offload requires overlay offload */
1776         if (enic->overlay_offload && enic->geneve_opt_avail &&
1777             enic->geneve_opt_request) {
1778                 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1779                                 OVERLAY_FEATURE_GENEVE,
1780                                 OVERLAY_OFFLOAD_ENABLE)) {
1781                         dev_err(enic, "failed to enable geneve+option\n");
1782                 } else {
1783                         enic->geneve_opt_enabled = 1;
1784                         dev_info(enic, "Geneve with options is enabled\n");
1785                 }
1786         }
1787         /*
1788          * Reset the vxlan port if HW vxlan parsing is available. It
1789          * is always enabled regardless of overlay offload
1790          * enable/disable.
1791          */
1792         if (enic->vxlan) {
1793                 enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1794                 /*
1795                  * Reset the vxlan port to the default, as the NIC firmware
1796                  * does not reset it automatically and keeps the old setting.
1797                  */
1798                 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1799                                                  OVERLAY_CFG_VXLAN_PORT_UPDATE,
1800                                                  RTE_VXLAN_DEFAULT_PORT)) {
1801                         dev_err(enic, "failed to update vxlan port\n");
1802                         return -EINVAL;
1803                 }
1804         }
1805
1806         if (enic_fm_init(enic))
1807                 dev_warning(enic, "Init of flowman failed.\n");
1808         return 0;
1809
1810 }
1811
1812 static void lock_devcmd(void *priv)
1813 {
1814         struct enic *enic = priv;
1815
1816         rte_spinlock_lock(&enic->devcmd_lock);
1817 }
1818
1819 static void unlock_devcmd(void *priv)
1820 {
1821         struct enic *enic = priv;
1822
1823         rte_spinlock_unlock(&enic->devcmd_lock);
1824 }
1825
1826 int enic_probe(struct enic *enic)
1827 {
1828         struct rte_pci_device *pdev = enic->pdev;
1829         int err = -1;
1830
1831         dev_debug(enic, "Initializing ENIC PMD\n");
1832
1833         /* if this is a secondary process the hardware is already initialized */
1834         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1835                 return 0;
1836
1837         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1838         enic->bar0.len = pdev->mem_resource[0].len;
1839
1840         /* Register vNIC device */
1841         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1842         if (!enic->vdev) {
1843                 dev_err(enic, "vNIC registration failed, aborting\n");
1844                 goto err_out;
1845         }
1846
1847         LIST_INIT(&enic->memzone_list);
1848         rte_spinlock_init(&enic->memzone_list_lock);
1849
1850         vnic_register_cbacks(enic->vdev,
1851                 enic_alloc_consistent,
1852                 enic_free_consistent);
1853
1854         /*
1855          * Allocate the consistent memory for stats upfront so both primary and
1856          * secondary processes can dump stats.
1857          */
1858         err = vnic_dev_alloc_stats_mem(enic->vdev);
1859         if (err) {
1860                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1861                 goto err_out_unregister;
1862         }
1863         /* Issue device open to get device in known state */
1864         err = enic_dev_open(enic);
1865         if (err) {
1866                 dev_err(enic, "vNIC dev open failed, aborting\n");
1867                 goto err_out_unregister;
1868         }
1869
1870         /* Set ingress vlan rewrite mode before vnic initialization */
1871         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1872                   enic->ig_vlan_rewrite_mode);
1873         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1874                 enic->ig_vlan_rewrite_mode);
1875         if (err) {
1876                 dev_err(enic,
1877                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1878                 goto err_out_dev_close;
1879         }
1880
1881         /* Issue device init to initialize the vnic-to-switch link.
1882          * We'll start with carrier off and wait for link UP
1883          * notification later to turn on carrier.  We don't need
1884          * to wait here for the vnic-to-switch link initialization
1885          * to complete; link UP notification is the indication that
1886          * the process is complete.
1887          */
1888
1889         err = vnic_dev_init(enic->vdev, 0);
1890         if (err) {
1891                 dev_err(enic, "vNIC dev init failed, aborting\n");
1892                 goto err_out_dev_close;
1893         }
1894
1895         err = enic_dev_init(enic);
1896         if (err) {
1897                 dev_err(enic, "Device initialization failed, aborting\n");
1898                 goto err_out_dev_close;
1899         }
1900
1901         /* Use a PF spinlock to serialize devcmd from PF and VF representors */
1902         if (enic->switchdev_mode) {
1903                 rte_spinlock_init(&enic->devcmd_lock);
1904                 vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1905         }
1906         return 0;
1907
1908 err_out_dev_close:
1909         vnic_dev_close(enic->vdev);
1910 err_out_unregister:
1911         vnic_dev_unregister(enic->vdev);
1912 err_out:
1913         return err;
1914 }
1915
1916 void enic_remove(struct enic *enic)
1917 {
1918         enic_dev_deinit(enic);
1919         vnic_dev_close(enic->vdev);
1920         vnic_dev_unregister(enic->vdev);
1921 }