net/enic: add single queue Tx and Rx to VF representor
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11
12 #include <rte_pci.h>
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <rte_ethdev_driver.h>
19
20 #include "enic_compat.h"
21 #include "enic.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
26 #include "vnic_dev.h"
27 #include "vnic_wq.h"
28 #include "vnic_rq.h"
29 #include "vnic_cq.h"
30 #include "vnic_intr.h"
31 #include "vnic_nic.h"
32
33 static inline int enic_is_sriov_vf(struct enic *enic)
34 {
35         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
36 }
37
38 static int is_zero_addr(uint8_t *addr)
39 {
40         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
41 }
42
43 static int is_mcast_addr(uint8_t *addr)
44 {
45         return addr[0] & 1;
46 }
47
48 static int is_eth_addr_valid(uint8_t *addr)
49 {
50         return !is_mcast_addr(addr) && !is_zero_addr(addr);
51 }
52
53 void
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
55 {
56         uint16_t i;
57
58         if (!rq || !rq->mbuf_ring) {
59                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
60                 return;
61         }
62
63         for (i = 0; i < rq->ring.desc_count; i++) {
64                 if (rq->mbuf_ring[i]) {
65                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66                         rq->mbuf_ring[i] = NULL;
67                 }
68         }
69 }
70
71 void enic_free_wq_buf(struct rte_mbuf **buf)
72 {
73         struct rte_mbuf *mbuf = *buf;
74
75         rte_pktmbuf_free_seg(mbuf);
76         *buf = NULL;
77 }
78
79 static void enic_log_q_error(struct enic *enic)
80 {
81         unsigned int i;
82         uint32_t error_status;
83
84         for (i = 0; i < enic->wq_count; i++) {
85                 error_status = vnic_wq_error_status(&enic->wq[i]);
86                 if (error_status)
87                         dev_err(enic, "WQ[%d] error_status %d\n", i,
88                                 error_status);
89         }
90
91         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92                 if (!enic->rq[i].in_use)
93                         continue;
94                 error_status = vnic_rq_error_status(&enic->rq[i]);
95                 if (error_status)
96                         dev_err(enic, "RQ[%d] error_status %d\n", i,
97                                 error_status);
98         }
99 }
100
101 static void enic_clear_soft_stats(struct enic *enic)
102 {
103         struct enic_soft_stats *soft_stats = &enic->soft_stats;
104         rte_atomic64_clear(&soft_stats->rx_nombuf);
105         rte_atomic64_clear(&soft_stats->rx_packet_errors);
106         rte_atomic64_clear(&soft_stats->tx_oversized);
107 }
108
109 static void enic_init_soft_stats(struct enic *enic)
110 {
111         struct enic_soft_stats *soft_stats = &enic->soft_stats;
112         rte_atomic64_init(&soft_stats->rx_nombuf);
113         rte_atomic64_init(&soft_stats->rx_packet_errors);
114         rte_atomic64_init(&soft_stats->tx_oversized);
115         enic_clear_soft_stats(enic);
116 }
117
118 int enic_dev_stats_clear(struct enic *enic)
119 {
120         int ret;
121
122         ret = vnic_dev_stats_clear(enic->vdev);
123         if (ret != 0) {
124                 dev_err(enic, "Error in clearing stats\n");
125                 return ret;
126         }
127         enic_clear_soft_stats(enic);
128
129         return 0;
130 }
131
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
133 {
134         struct vnic_stats *stats;
135         struct enic_soft_stats *soft_stats = &enic->soft_stats;
136         int64_t rx_truncated;
137         uint64_t rx_packet_errors;
138         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
139
140         if (ret) {
141                 dev_err(enic, "Error in getting stats\n");
142                 return ret;
143         }
144
145         /* The number of truncated packets can only be calculated by
146          * subtracting a hardware counter from error packets received by
147          * the driver. Note: this causes transient inaccuracies in the
148          * ipackets count. Also, the length of truncated packets are
149          * counted in ibytes even though truncated packets are dropped
150          * which can make ibytes be slightly higher than it should be.
151          */
152         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
154
155         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156         r_stats->opackets = stats->tx.tx_frames_ok;
157
158         r_stats->ibytes = stats->rx.rx_bytes_ok;
159         r_stats->obytes = stats->tx.tx_bytes_ok;
160
161         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162         r_stats->oerrors = stats->tx.tx_errors
163                            + rte_atomic64_read(&soft_stats->tx_oversized);
164
165         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
166
167         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
168         return 0;
169 }
170
171 int enic_del_mac_address(struct enic *enic, int mac_index)
172 {
173         struct rte_eth_dev *eth_dev = enic->rte_dev;
174         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
175
176         return vnic_dev_del_addr(enic->vdev, mac_addr);
177 }
178
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
180 {
181         int err;
182
183         if (!is_eth_addr_valid(mac_addr)) {
184                 dev_err(enic, "invalid mac address\n");
185                 return -EINVAL;
186         }
187
188         err = vnic_dev_add_addr(enic->vdev, mac_addr);
189         if (err)
190                 dev_err(enic, "add mac addr failed\n");
191         return err;
192 }
193
194 void enic_free_rq_buf(struct rte_mbuf **mbuf)
195 {
196         if (*mbuf == NULL)
197                 return;
198
199         rte_pktmbuf_free(*mbuf);
200         *mbuf = NULL;
201 }
202
203 void enic_init_vnic_resources(struct enic *enic)
204 {
205         unsigned int error_interrupt_enable = 1;
206         unsigned int error_interrupt_offset = 0;
207         unsigned int rxq_interrupt_enable = 0;
208         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
209         unsigned int index = 0;
210         unsigned int cq_idx;
211         struct vnic_rq *data_rq;
212
213         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
214                 rxq_interrupt_enable = 1;
215
216         for (index = 0; index < enic->rq_count; index++) {
217                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
218
219                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
220                         cq_idx,
221                         error_interrupt_enable,
222                         error_interrupt_offset);
223
224                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
225                 if (data_rq->in_use)
226                         vnic_rq_init(data_rq,
227                                      cq_idx,
228                                      error_interrupt_enable,
229                                      error_interrupt_offset);
230                 vnic_cq_init(&enic->cq[cq_idx],
231                         0 /* flow_control_enable */,
232                         1 /* color_enable */,
233                         0 /* cq_head */,
234                         0 /* cq_tail */,
235                         1 /* cq_tail_color */,
236                         rxq_interrupt_enable,
237                         1 /* cq_entry_enable */,
238                         0 /* cq_message_enable */,
239                         rxq_interrupt_offset,
240                         0 /* cq_message_addr */);
241                 if (rxq_interrupt_enable)
242                         rxq_interrupt_offset++;
243         }
244
245         for (index = 0; index < enic->wq_count; index++) {
246                 vnic_wq_init(&enic->wq[index],
247                         enic_cq_wq(enic, index),
248                         error_interrupt_enable,
249                         error_interrupt_offset);
250                 /* Compute unsupported ol flags for enic_prep_pkts() */
251                 enic->wq[index].tx_offload_notsup_mask =
252                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
253
254                 cq_idx = enic_cq_wq(enic, index);
255                 vnic_cq_init(&enic->cq[cq_idx],
256                         0 /* flow_control_enable */,
257                         1 /* color_enable */,
258                         0 /* cq_head */,
259                         0 /* cq_tail */,
260                         1 /* cq_tail_color */,
261                         0 /* interrupt_enable */,
262                         0 /* cq_entry_enable */,
263                         1 /* cq_message_enable */,
264                         0 /* interrupt offset */,
265                         (uint64_t)enic->wq[index].cqmsg_rz->iova);
266         }
267
268         for (index = 0; index < enic->intr_count; index++) {
269                 vnic_intr_init(&enic->intr[index],
270                                enic->config.intr_timer_usec,
271                                enic->config.intr_timer_type,
272                                /*mask_on_assertion*/1);
273         }
274 }
275
276
277 int
278 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
279 {
280         struct rte_mbuf *mb;
281         struct rq_enet_desc *rqd = rq->ring.descs;
282         unsigned i;
283         dma_addr_t dma_addr;
284         uint32_t max_rx_pkt_len;
285         uint16_t rq_buf_len;
286
287         if (!rq->in_use)
288                 return 0;
289
290         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
291                   rq->ring.desc_count);
292
293         /*
294          * If *not* using scatter and the mbuf size is greater than the
295          * requested max packet size (max_rx_pkt_len), then reduce the
296          * posted buffer size to max_rx_pkt_len. HW still receives packets
297          * larger than max_rx_pkt_len, but they will be truncated, which we
298          * drop in the rx handler. Not ideal, but better than returning
299          * large packets when the user is not expecting them.
300          */
301         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
302         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
303         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
304                 rq_buf_len = max_rx_pkt_len;
305         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
306                 mb = rte_mbuf_raw_alloc(rq->mp);
307                 if (mb == NULL) {
308                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
309                         (unsigned)rq->index);
310                         return -ENOMEM;
311                 }
312
313                 mb->data_off = RTE_PKTMBUF_HEADROOM;
314                 dma_addr = (dma_addr_t)(mb->buf_iova
315                            + RTE_PKTMBUF_HEADROOM);
316                 rq_enet_desc_enc(rqd, dma_addr,
317                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
318                                 : RQ_ENET_TYPE_NOT_SOP),
319                                 rq_buf_len);
320                 rq->mbuf_ring[i] = mb;
321         }
322         /*
323          * Do not post the buffers to the NIC until we enable the RQ via
324          * enic_start_rq().
325          */
326         rq->need_initial_post = true;
327         /* Initialize fetch index while RQ is disabled */
328         iowrite32(0, &rq->ctrl->fetch_index);
329         return 0;
330 }
331
332 /*
333  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
334  * allocated the buffers and filled the RQ descriptor ring. Just need to push
335  * the post index to the NIC.
336  */
337 static void
338 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
339 {
340         if (!rq->in_use || !rq->need_initial_post)
341                 return;
342
343         /* make sure all prior writes are complete before doing the PIO write */
344         rte_rmb();
345
346         /* Post all but the last buffer to VIC. */
347         rq->posted_index = rq->ring.desc_count - 1;
348
349         rq->rx_nb_hold = 0;
350
351         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
352                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
353         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
354         rte_rmb();
355         rq->need_initial_post = false;
356 }
357
358 void *
359 enic_alloc_consistent(void *priv, size_t size,
360         dma_addr_t *dma_handle, uint8_t *name)
361 {
362         void *vaddr;
363         const struct rte_memzone *rz;
364         *dma_handle = 0;
365         struct enic *enic = (struct enic *)priv;
366         struct enic_memzone_entry *mze;
367
368         rz = rte_memzone_reserve_aligned((const char *)name, size,
369                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
370         if (!rz) {
371                 pr_err("%s : Failed to allocate memory requested for %s\n",
372                         __func__, name);
373                 return NULL;
374         }
375
376         vaddr = rz->addr;
377         *dma_handle = (dma_addr_t)rz->iova;
378
379         mze = rte_malloc("enic memzone entry",
380                          sizeof(struct enic_memzone_entry), 0);
381
382         if (!mze) {
383                 pr_err("%s : Failed to allocate memory for memzone list\n",
384                        __func__);
385                 rte_memzone_free(rz);
386                 return NULL;
387         }
388
389         mze->rz = rz;
390
391         rte_spinlock_lock(&enic->memzone_list_lock);
392         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
393         rte_spinlock_unlock(&enic->memzone_list_lock);
394
395         return vaddr;
396 }
397
398 void
399 enic_free_consistent(void *priv,
400                      __rte_unused size_t size,
401                      void *vaddr,
402                      dma_addr_t dma_handle)
403 {
404         struct enic_memzone_entry *mze;
405         struct enic *enic = (struct enic *)priv;
406
407         rte_spinlock_lock(&enic->memzone_list_lock);
408         LIST_FOREACH(mze, &enic->memzone_list, entries) {
409                 if (mze->rz->addr == vaddr &&
410                     mze->rz->iova == dma_handle)
411                         break;
412         }
413         if (mze == NULL) {
414                 rte_spinlock_unlock(&enic->memzone_list_lock);
415                 dev_warning(enic,
416                             "Tried to free memory, but couldn't find it in the memzone list\n");
417                 return;
418         }
419         LIST_REMOVE(mze, entries);
420         rte_spinlock_unlock(&enic->memzone_list_lock);
421         rte_memzone_free(mze->rz);
422         rte_free(mze);
423 }
424
425 int enic_link_update(struct rte_eth_dev *eth_dev)
426 {
427         struct enic *enic = pmd_priv(eth_dev);
428         struct rte_eth_link link;
429
430         memset(&link, 0, sizeof(link));
431         link.link_status = enic_get_link_status(enic);
432         link.link_duplex = ETH_LINK_FULL_DUPLEX;
433         link.link_speed = vnic_dev_port_speed(enic->vdev);
434
435         return rte_eth_linkstatus_set(eth_dev, &link);
436 }
437
438 static void
439 enic_intr_handler(void *arg)
440 {
441         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
442         struct enic *enic = pmd_priv(dev);
443
444         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
445
446         enic_link_update(dev);
447         rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
448         enic_log_q_error(enic);
449         /* Re-enable irq in case of INTx */
450         rte_intr_ack(&enic->pdev->intr_handle);
451 }
452
453 static int enic_rxq_intr_init(struct enic *enic)
454 {
455         struct rte_intr_handle *intr_handle;
456         uint32_t rxq_intr_count, i;
457         int err;
458
459         intr_handle = enic->rte_dev->intr_handle;
460         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
461                 return 0;
462         /*
463          * Rx queue interrupts only work when we have MSI-X interrupts,
464          * one per queue. Sharing one interrupt is technically
465          * possible with VIC, but it is not worth the complications it brings.
466          */
467         if (!rte_intr_cap_multiple(intr_handle)) {
468                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
469                         " (vfio-pci driver)\n");
470                 return -ENOTSUP;
471         }
472         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
473         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
474         if (err) {
475                 dev_err(enic, "Failed to enable event fds for Rx queue"
476                         " interrupts\n");
477                 return err;
478         }
479         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
480                                             rxq_intr_count * sizeof(int), 0);
481         if (intr_handle->intr_vec == NULL) {
482                 dev_err(enic, "Failed to allocate intr_vec\n");
483                 return -ENOMEM;
484         }
485         for (i = 0; i < rxq_intr_count; i++)
486                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
487         return 0;
488 }
489
490 static void enic_rxq_intr_deinit(struct enic *enic)
491 {
492         struct rte_intr_handle *intr_handle;
493
494         intr_handle = enic->rte_dev->intr_handle;
495         rte_intr_efd_disable(intr_handle);
496         if (intr_handle->intr_vec != NULL) {
497                 rte_free(intr_handle->intr_vec);
498                 intr_handle->intr_vec = NULL;
499         }
500 }
501
502 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
503 {
504         struct wq_enet_desc *desc;
505         struct vnic_wq *wq;
506         unsigned int i;
507
508         /*
509          * Fill WQ descriptor fields that never change. Every descriptor is
510          * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
511          * descriptors (i.e. request one completion update every 32 packets).
512          */
513         wq = &enic->wq[queue_idx];
514         desc = (struct wq_enet_desc *)wq->ring.descs;
515         for (i = 0; i < wq->ring.desc_count; i++, desc++) {
516                 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
517                 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
518                         desc->header_length_flags |=
519                                 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
520         }
521 }
522
523 /*
524  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
525  * used when that file is not compiled.
526  */
527 __rte_weak bool
528 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
529 {
530         return false;
531 }
532
533 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
534 {
535         struct enic *enic = pmd_priv(eth_dev);
536
537         /*
538          * Preference order:
539          * 1. The vectorized handler if possible and requested.
540          * 2. The non-scatter, simplified handler if scatter Rx is not used.
541          * 3. The default handler as a fallback.
542          */
543         if (enic_use_vector_rx_handler(eth_dev))
544                 return;
545         if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
546                 ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
547                 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
548         } else {
549                 ENICPMD_LOG(DEBUG, " use the normal Rx handler");
550                 eth_dev->rx_pkt_burst = &enic_recv_pkts;
551         }
552 }
553
554 /* Secondary process uses this to set the Tx handler */
555 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
556 {
557         struct enic *enic = pmd_priv(eth_dev);
558
559         if (enic->use_simple_tx_handler) {
560                 ENICPMD_LOG(DEBUG, " use the simple tx handler");
561                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
562         } else {
563                 ENICPMD_LOG(DEBUG, " use the default tx handler");
564                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
565         }
566 }
567
568 int enic_enable(struct enic *enic)
569 {
570         unsigned int index;
571         int err;
572         struct rte_eth_dev *eth_dev = enic->rte_dev;
573         uint64_t simple_tx_offloads;
574         uintptr_t p;
575
576         if (enic->enable_avx2_rx) {
577                 struct rte_mbuf mb_def = { .buf_addr = 0 };
578
579                 /*
580                  * mbuf_initializer contains const-after-init fields of
581                  * receive mbufs (i.e. 64 bits of fields from rearm_data).
582                  * It is currently used by the vectorized handler.
583                  */
584                 mb_def.nb_segs = 1;
585                 mb_def.data_off = RTE_PKTMBUF_HEADROOM;
586                 mb_def.port = enic->port_id;
587                 rte_mbuf_refcnt_set(&mb_def, 1);
588                 rte_compiler_barrier();
589                 p = (uintptr_t)&mb_def.rearm_data;
590                 enic->mbuf_initializer = *(uint64_t *)p;
591         }
592
593         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
594         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
595
596         /* vnic notification of link status has already been turned on in
597          * enic_dev_init() which is called during probe time.  Here we are
598          * just turning on interrupt vector 0 if needed.
599          */
600         if (eth_dev->data->dev_conf.intr_conf.lsc)
601                 vnic_dev_notify_set(enic->vdev, 0);
602
603         err = enic_rxq_intr_init(enic);
604         if (err)
605                 return err;
606         if (enic_clsf_init(enic))
607                 dev_warning(enic, "Init of hash table for clsf failed."\
608                         "Flow director feature will not work\n");
609
610         /* Initialize flowman if not already initialized during probe */
611         if (enic->fm == NULL && enic_fm_init(enic))
612                 dev_warning(enic, "Init of flowman failed.\n");
613
614         for (index = 0; index < enic->rq_count; index++) {
615                 err = enic_alloc_rx_queue_mbufs(enic,
616                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
617                 if (err) {
618                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
619                         return err;
620                 }
621                 err = enic_alloc_rx_queue_mbufs(enic,
622                         &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
623                 if (err) {
624                         /* release the allocated mbufs for the sop rq*/
625                         enic_rxmbuf_queue_release(enic,
626                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
627
628                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
629                         return err;
630                 }
631         }
632
633         /*
634          * Use the simple TX handler if possible. Only checksum offloads
635          * and vlan insertion are supported.
636          */
637         simple_tx_offloads = enic->tx_offload_capa &
638                 (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
639                  DEV_TX_OFFLOAD_VLAN_INSERT |
640                  DEV_TX_OFFLOAD_IPV4_CKSUM |
641                  DEV_TX_OFFLOAD_UDP_CKSUM |
642                  DEV_TX_OFFLOAD_TCP_CKSUM);
643         if ((eth_dev->data->dev_conf.txmode.offloads &
644              ~simple_tx_offloads) == 0) {
645                 ENICPMD_LOG(DEBUG, " use the simple tx handler");
646                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
647                 for (index = 0; index < enic->wq_count; index++)
648                         enic_prep_wq_for_simple_tx(enic, index);
649                 enic->use_simple_tx_handler = 1;
650         } else {
651                 ENICPMD_LOG(DEBUG, " use the default tx handler");
652                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
653         }
654
655         enic_pick_rx_handler(eth_dev);
656
657         for (index = 0; index < enic->wq_count; index++)
658                 enic_start_wq(enic, index);
659         for (index = 0; index < enic->rq_count; index++)
660                 enic_start_rq(enic, index);
661
662         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
663
664         vnic_dev_enable_wait(enic->vdev);
665
666         /* Register and enable error interrupt */
667         rte_intr_callback_register(&(enic->pdev->intr_handle),
668                 enic_intr_handler, (void *)enic->rte_dev);
669
670         rte_intr_enable(&(enic->pdev->intr_handle));
671         /* Unmask LSC interrupt */
672         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
673
674         return 0;
675 }
676
677 int enic_alloc_intr_resources(struct enic *enic)
678 {
679         int err;
680         unsigned int i;
681
682         dev_info(enic, "vNIC resources used:  "\
683                 "wq %d rq %d cq %d intr %d\n",
684                 enic->wq_count, enic_vnic_rq_count(enic),
685                 enic->cq_count, enic->intr_count);
686
687         for (i = 0; i < enic->intr_count; i++) {
688                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
689                 if (err) {
690                         enic_free_vnic_resources(enic);
691                         return err;
692                 }
693         }
694         return 0;
695 }
696
697 void enic_free_rq(void *rxq)
698 {
699         struct vnic_rq *rq_sop, *rq_data;
700         struct enic *enic;
701
702         if (rxq == NULL)
703                 return;
704
705         rq_sop = (struct vnic_rq *)rxq;
706         enic = vnic_dev_priv(rq_sop->vdev);
707         rq_data = &enic->rq[rq_sop->data_queue_idx];
708
709         if (rq_sop->free_mbufs) {
710                 struct rte_mbuf **mb;
711                 int i;
712
713                 mb = rq_sop->free_mbufs;
714                 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
715                      i < ENIC_RX_BURST_MAX; i++)
716                         rte_pktmbuf_free(mb[i]);
717                 rte_free(rq_sop->free_mbufs);
718                 rq_sop->free_mbufs = NULL;
719                 rq_sop->num_free_mbufs = 0;
720         }
721
722         enic_rxmbuf_queue_release(enic, rq_sop);
723         if (rq_data->in_use)
724                 enic_rxmbuf_queue_release(enic, rq_data);
725
726         rte_free(rq_sop->mbuf_ring);
727         if (rq_data->in_use)
728                 rte_free(rq_data->mbuf_ring);
729
730         rq_sop->mbuf_ring = NULL;
731         rq_data->mbuf_ring = NULL;
732
733         vnic_rq_free(rq_sop);
734         if (rq_data->in_use)
735                 vnic_rq_free(rq_data);
736
737         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
738
739         rq_sop->in_use = 0;
740         rq_data->in_use = 0;
741 }
742
743 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
744 {
745         struct rte_eth_dev_data *data = enic->dev_data;
746         vnic_wq_enable(&enic->wq[queue_idx]);
747         data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
748 }
749
750 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
751 {
752         struct rte_eth_dev_data *data = enic->dev_data;
753         int ret;
754
755         ret = vnic_wq_disable(&enic->wq[queue_idx]);
756         if (ret)
757                 return ret;
758
759         data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
760         return 0;
761 }
762
763 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
764 {
765         struct rte_eth_dev_data *data = enic->dev_data;
766         struct vnic_rq *rq_sop;
767         struct vnic_rq *rq_data;
768         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
769         rq_data = &enic->rq[rq_sop->data_queue_idx];
770
771         if (rq_data->in_use) {
772                 vnic_rq_enable(rq_data);
773                 enic_initial_post_rx(enic, rq_data);
774         }
775         rte_mb();
776         vnic_rq_enable(rq_sop);
777         enic_initial_post_rx(enic, rq_sop);
778         data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
779 }
780
781 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
782 {
783         struct rte_eth_dev_data *data = enic->dev_data;
784         int ret1 = 0, ret2 = 0;
785         struct vnic_rq *rq_sop;
786         struct vnic_rq *rq_data;
787         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
788         rq_data = &enic->rq[rq_sop->data_queue_idx];
789
790         ret2 = vnic_rq_disable(rq_sop);
791         rte_mb();
792         if (rq_data->in_use)
793                 ret1 = vnic_rq_disable(rq_data);
794
795         if (ret2)
796                 return ret2;
797         else if (ret1)
798                 return ret1;
799
800         data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
801         return 0;
802 }
803
804 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
805         unsigned int socket_id, struct rte_mempool *mp,
806         uint16_t nb_desc, uint16_t free_thresh)
807 {
808         struct enic_vf_representor *vf;
809         int rc;
810         uint16_t sop_queue_idx;
811         uint16_t data_queue_idx;
812         uint16_t cq_idx;
813         struct vnic_rq *rq_sop;
814         struct vnic_rq *rq_data;
815         unsigned int mbuf_size, mbufs_per_pkt;
816         unsigned int nb_sop_desc, nb_data_desc;
817         uint16_t min_sop, max_sop, min_data, max_data;
818         uint32_t max_rx_pkt_len;
819
820         /*
821          * Representor uses a reserved PF queue. Translate representor
822          * queue number to PF queue number.
823          */
824         if (enic_is_vf_rep(enic)) {
825                 RTE_ASSERT(queue_idx == 0);
826                 vf = VF_ENIC_TO_VF_REP(enic);
827                 sop_queue_idx = vf->pf_rq_sop_idx;
828                 data_queue_idx = vf->pf_rq_data_idx;
829                 enic = vf->pf;
830                 queue_idx = sop_queue_idx;
831         } else {
832                 sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
833                 data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
834         }
835         cq_idx = enic_cq_rq(enic, sop_queue_idx);
836         rq_sop = &enic->rq[sop_queue_idx];
837         rq_data = &enic->rq[data_queue_idx];
838         rq_sop->is_sop = 1;
839         rq_sop->data_queue_idx = data_queue_idx;
840         rq_data->is_sop = 0;
841         rq_data->data_queue_idx = 0;
842         rq_sop->socket_id = socket_id;
843         rq_sop->mp = mp;
844         rq_data->socket_id = socket_id;
845         rq_data->mp = mp;
846         rq_sop->in_use = 1;
847         rq_sop->rx_free_thresh = free_thresh;
848         rq_data->rx_free_thresh = free_thresh;
849         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
850                   free_thresh);
851
852         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
853                                RTE_PKTMBUF_HEADROOM);
854         /* max_rx_pkt_len includes the ethernet header and CRC. */
855         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
856
857         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
858             DEV_RX_OFFLOAD_SCATTER) {
859                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
860                 /* ceil((max pkt len)/mbuf_size) */
861                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
862         } else {
863                 dev_info(enic, "Scatter rx mode disabled\n");
864                 mbufs_per_pkt = 1;
865                 if (max_rx_pkt_len > mbuf_size) {
866                         dev_warning(enic, "The maximum Rx packet size (%u) is"
867                                     " larger than the mbuf size (%u), and"
868                                     " scatter is disabled. Larger packets will"
869                                     " be truncated.\n",
870                                     max_rx_pkt_len, mbuf_size);
871                 }
872         }
873
874         if (mbufs_per_pkt > 1) {
875                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
876                 rq_sop->data_queue_enable = 1;
877                 rq_data->in_use = 1;
878                 /*
879                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
880                  * receives packet sizes up to the "max" MTU.
881                  * If not using scatter, we can achieve the effect of dropping
882                  * larger packets by reducing the size of posted buffers.
883                  * See enic_alloc_rx_queue_mbufs().
884                  */
885                 if (max_rx_pkt_len <
886                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
887                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
888                                     " when scatter rx mode is in use.\n");
889                 }
890         } else {
891                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
892                          queue_idx);
893                 rq_sop->data_queue_enable = 0;
894                 rq_data->in_use = 0;
895         }
896
897         /* number of descriptors have to be a multiple of 32 */
898         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
899         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
900
901         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
902         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
903
904         if (mbufs_per_pkt > 1) {
905                 min_sop = ENIC_RX_BURST_MAX;
906                 max_sop = ((enic->config.rq_desc_count /
907                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
908                 min_data = min_sop * (mbufs_per_pkt - 1);
909                 max_data = enic->config.rq_desc_count;
910         } else {
911                 min_sop = ENIC_RX_BURST_MAX;
912                 max_sop = enic->config.rq_desc_count;
913                 min_data = 0;
914                 max_data = 0;
915         }
916
917         if (nb_desc < (min_sop + min_data)) {
918                 dev_warning(enic,
919                             "Number of rx descs too low, adjusting to minimum\n");
920                 nb_sop_desc = min_sop;
921                 nb_data_desc = min_data;
922         } else if (nb_desc > (max_sop + max_data)) {
923                 dev_warning(enic,
924                             "Number of rx_descs too high, adjusting to maximum\n");
925                 nb_sop_desc = max_sop;
926                 nb_data_desc = max_data;
927         }
928         if (mbufs_per_pkt > 1) {
929                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
930                          " rx descriptor range is %u to %u\n",
931                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
932                          max_sop + max_data);
933         }
934         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
935                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
936
937         /* Allocate sop queue resources */
938         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
939                 nb_sop_desc, sizeof(struct rq_enet_desc));
940         if (rc) {
941                 dev_err(enic, "error in allocation of sop rq\n");
942                 goto err_exit;
943         }
944         nb_sop_desc = rq_sop->ring.desc_count;
945
946         if (rq_data->in_use) {
947                 /* Allocate data queue resources */
948                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
949                                    nb_data_desc,
950                                    sizeof(struct rq_enet_desc));
951                 if (rc) {
952                         dev_err(enic, "error in allocation of data rq\n");
953                         goto err_free_rq_sop;
954                 }
955                 nb_data_desc = rq_data->ring.desc_count;
956         }
957         rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
958                            socket_id, nb_sop_desc + nb_data_desc,
959                            sizeof(struct cq_enet_rq_desc));
960         if (rc) {
961                 dev_err(enic, "error in allocation of cq for rq\n");
962                 goto err_free_rq_data;
963         }
964
965         /* Allocate the mbuf rings */
966         rq_sop->mbuf_ring = (struct rte_mbuf **)
967                 rte_zmalloc_socket("rq->mbuf_ring",
968                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
969                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
970         if (rq_sop->mbuf_ring == NULL)
971                 goto err_free_cq;
972
973         if (rq_data->in_use) {
974                 rq_data->mbuf_ring = (struct rte_mbuf **)
975                         rte_zmalloc_socket("rq->mbuf_ring",
976                                 sizeof(struct rte_mbuf *) * nb_data_desc,
977                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
978                 if (rq_data->mbuf_ring == NULL)
979                         goto err_free_sop_mbuf;
980         }
981
982         rq_sop->free_mbufs = (struct rte_mbuf **)
983                 rte_zmalloc_socket("rq->free_mbufs",
984                                    sizeof(struct rte_mbuf *) *
985                                    ENIC_RX_BURST_MAX,
986                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
987         if (rq_sop->free_mbufs == NULL)
988                 goto err_free_data_mbuf;
989         rq_sop->num_free_mbufs = 0;
990
991         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
992
993         return 0;
994
995 err_free_data_mbuf:
996         rte_free(rq_data->mbuf_ring);
997 err_free_sop_mbuf:
998         rte_free(rq_sop->mbuf_ring);
999 err_free_cq:
1000         /* cleanup on error */
1001         vnic_cq_free(&enic->cq[cq_idx]);
1002 err_free_rq_data:
1003         if (rq_data->in_use)
1004                 vnic_rq_free(rq_data);
1005 err_free_rq_sop:
1006         vnic_rq_free(rq_sop);
1007 err_exit:
1008         return -ENOMEM;
1009 }
1010
1011 void enic_free_wq(void *txq)
1012 {
1013         struct vnic_wq *wq;
1014         struct enic *enic;
1015
1016         if (txq == NULL)
1017                 return;
1018
1019         wq = (struct vnic_wq *)txq;
1020         enic = vnic_dev_priv(wq->vdev);
1021         rte_memzone_free(wq->cqmsg_rz);
1022         vnic_wq_free(wq);
1023         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1024 }
1025
1026 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1027         unsigned int socket_id, uint16_t nb_desc)
1028 {
1029         struct enic_vf_representor *vf;
1030         int err;
1031         struct vnic_wq *wq;
1032         unsigned int cq_index;
1033         char name[RTE_MEMZONE_NAMESIZE];
1034         static int instance;
1035
1036         /*
1037          * Representor uses a reserved PF queue. Translate representor
1038          * queue number to PF queue number.
1039          */
1040         if (enic_is_vf_rep(enic)) {
1041                 RTE_ASSERT(queue_idx == 0);
1042                 vf = VF_ENIC_TO_VF_REP(enic);
1043                 queue_idx = vf->pf_wq_idx;
1044                 cq_index = vf->pf_wq_cq_idx;
1045                 enic = vf->pf;
1046         } else {
1047                 cq_index = enic_cq_wq(enic, queue_idx);
1048         }
1049         wq = &enic->wq[queue_idx];
1050         wq->socket_id = socket_id;
1051         /*
1052          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1053          * print an info message for diagnostics.
1054          */
1055         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1056
1057         /* Allocate queue resources */
1058         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1059                 nb_desc,
1060                 sizeof(struct wq_enet_desc));
1061         if (err) {
1062                 dev_err(enic, "error in allocation of wq\n");
1063                 return err;
1064         }
1065
1066         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1067                 socket_id, nb_desc,
1068                 sizeof(struct cq_enet_wq_desc));
1069         if (err) {
1070                 vnic_wq_free(wq);
1071                 dev_err(enic, "error in allocation of cq for wq\n");
1072         }
1073
1074         /* setup up CQ message */
1075         snprintf((char *)name, sizeof(name),
1076                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1077                 instance++);
1078
1079         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1080                         sizeof(uint32_t), SOCKET_ID_ANY,
1081                         RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1082         if (!wq->cqmsg_rz)
1083                 return -ENOMEM;
1084
1085         return err;
1086 }
1087
1088 int enic_disable(struct enic *enic)
1089 {
1090         unsigned int i;
1091         int err;
1092
1093         for (i = 0; i < enic->intr_count; i++) {
1094                 vnic_intr_mask(&enic->intr[i]);
1095                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1096         }
1097         enic_rxq_intr_deinit(enic);
1098         rte_intr_disable(&enic->pdev->intr_handle);
1099         rte_intr_callback_unregister(&enic->pdev->intr_handle,
1100                                      enic_intr_handler,
1101                                      (void *)enic->rte_dev);
1102
1103         vnic_dev_disable(enic->vdev);
1104
1105         enic_clsf_destroy(enic);
1106         enic_fm_destroy(enic);
1107
1108         if (!enic_is_sriov_vf(enic))
1109                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1110
1111         for (i = 0; i < enic->wq_count; i++) {
1112                 err = vnic_wq_disable(&enic->wq[i]);
1113                 if (err)
1114                         return err;
1115         }
1116         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1117                 if (enic->rq[i].in_use) {
1118                         err = vnic_rq_disable(&enic->rq[i]);
1119                         if (err)
1120                                 return err;
1121                 }
1122         }
1123
1124         /* If we were using interrupts, set the interrupt vector to -1
1125          * to disable interrupts.  We are not disabling link notifcations,
1126          * though, as we want the polling of link status to continue working.
1127          */
1128         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1129                 vnic_dev_notify_set(enic->vdev, -1);
1130
1131         vnic_dev_set_reset_flag(enic->vdev, 1);
1132
1133         for (i = 0; i < enic->wq_count; i++)
1134                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1135
1136         for (i = 0; i < enic_vnic_rq_count(enic); i++)
1137                 if (enic->rq[i].in_use)
1138                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1139         for (i = 0; i < enic->cq_count; i++)
1140                 vnic_cq_clean(&enic->cq[i]);
1141         for (i = 0; i < enic->intr_count; i++)
1142                 vnic_intr_clean(&enic->intr[i]);
1143
1144         return 0;
1145 }
1146
1147 static int enic_dev_wait(struct vnic_dev *vdev,
1148         int (*start)(struct vnic_dev *, int),
1149         int (*finished)(struct vnic_dev *, int *),
1150         int arg)
1151 {
1152         int done;
1153         int err;
1154         int i;
1155
1156         err = start(vdev, arg);
1157         if (err)
1158                 return err;
1159
1160         /* Wait for func to complete...2 seconds max */
1161         for (i = 0; i < 2000; i++) {
1162                 err = finished(vdev, &done);
1163                 if (err)
1164                         return err;
1165                 if (done)
1166                         return 0;
1167                 usleep(1000);
1168         }
1169         return -ETIMEDOUT;
1170 }
1171
1172 static int enic_dev_open(struct enic *enic)
1173 {
1174         int err;
1175         int flags = CMD_OPENF_IG_DESCCACHE;
1176
1177         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1178                 vnic_dev_open_done, flags);
1179         if (err)
1180                 dev_err(enic_get_dev(enic),
1181                         "vNIC device open failed, err %d\n", err);
1182
1183         return err;
1184 }
1185
1186 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1187 {
1188         dma_addr_t rss_key_buf_pa;
1189         union vnic_rss_key *rss_key_buf_va = NULL;
1190         int err, i;
1191         uint8_t name[RTE_MEMZONE_NAMESIZE];
1192
1193         RTE_ASSERT(user_key != NULL);
1194         snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1195         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1196                 &rss_key_buf_pa, name);
1197         if (!rss_key_buf_va)
1198                 return -ENOMEM;
1199
1200         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1201                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1202
1203         err = enic_set_rss_key(enic,
1204                 rss_key_buf_pa,
1205                 sizeof(union vnic_rss_key));
1206
1207         /* Save for later queries */
1208         if (!err) {
1209                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1210                            sizeof(union vnic_rss_key));
1211         }
1212         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1213                 rss_key_buf_va, rss_key_buf_pa);
1214
1215         return err;
1216 }
1217
1218 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1219 {
1220         dma_addr_t rss_cpu_buf_pa;
1221         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1222         int err;
1223         uint8_t name[RTE_MEMZONE_NAMESIZE];
1224
1225         snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1226         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1227                 &rss_cpu_buf_pa, name);
1228         if (!rss_cpu_buf_va)
1229                 return -ENOMEM;
1230
1231         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1232
1233         err = enic_set_rss_cpu(enic,
1234                 rss_cpu_buf_pa,
1235                 sizeof(union vnic_rss_cpu));
1236
1237         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1238                 rss_cpu_buf_va, rss_cpu_buf_pa);
1239
1240         /* Save for later queries */
1241         if (!err)
1242                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1243         return err;
1244 }
1245
1246 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1247         uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1248         uint8_t rss_enable)
1249 {
1250         const uint8_t tso_ipid_split_en = 0;
1251         int err;
1252
1253         err = enic_set_nic_cfg(enic,
1254                 rss_default_cpu, rss_hash_type,
1255                 rss_hash_bits, rss_base_cpu,
1256                 rss_enable, tso_ipid_split_en,
1257                 enic->ig_vlan_strip_en);
1258
1259         return err;
1260 }
1261
1262 /* Initialize RSS with defaults, called from dev_configure */
1263 int enic_init_rss_nic_cfg(struct enic *enic)
1264 {
1265         static uint8_t default_rss_key[] = {
1266                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1267                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1268                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1269                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1270         };
1271         struct rte_eth_rss_conf rss_conf;
1272         union vnic_rss_cpu rss_cpu;
1273         int ret, i;
1274
1275         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1276         /*
1277          * If setting key for the first time, and the user gives us none, then
1278          * push the default key to NIC.
1279          */
1280         if (rss_conf.rss_key == NULL) {
1281                 rss_conf.rss_key = default_rss_key;
1282                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1283         }
1284         ret = enic_set_rss_conf(enic, &rss_conf);
1285         if (ret) {
1286                 dev_err(enic, "Failed to configure RSS\n");
1287                 return ret;
1288         }
1289         if (enic->rss_enable) {
1290                 /* If enabling RSS, use the default reta */
1291                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1292                         rss_cpu.cpu[i / 4].b[i % 4] =
1293                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1294                 }
1295                 ret = enic_set_rss_reta(enic, &rss_cpu);
1296                 if (ret)
1297                         dev_err(enic, "Failed to set RSS indirection table\n");
1298         }
1299         return ret;
1300 }
1301
1302 int enic_setup_finish(struct enic *enic)
1303 {
1304         enic_init_soft_stats(enic);
1305
1306         /* switchdev: enable promisc mode on PF */
1307         if (enic->switchdev_mode) {
1308                 vnic_dev_packet_filter(enic->vdev,
1309                                        0 /* directed  */,
1310                                        0 /* multicast */,
1311                                        0 /* broadcast */,
1312                                        1 /* promisc   */,
1313                                        0 /* allmulti  */);
1314                 enic->promisc = 1;
1315                 enic->allmulti = 0;
1316                 return 0;
1317         }
1318         /* Default conf */
1319         vnic_dev_packet_filter(enic->vdev,
1320                 1 /* directed  */,
1321                 1 /* multicast */,
1322                 1 /* broadcast */,
1323                 0 /* promisc   */,
1324                 1 /* allmulti  */);
1325
1326         enic->promisc = 0;
1327         enic->allmulti = 1;
1328
1329         return 0;
1330 }
1331
1332 static int enic_rss_conf_valid(struct enic *enic,
1333                                struct rte_eth_rss_conf *rss_conf)
1334 {
1335         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1336         if (enic->flow_type_rss_offloads == 0)
1337                 return 0;
1338         if (rss_conf->rss_key != NULL &&
1339             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1340                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1341                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1342                 return -EINVAL;
1343         }
1344         if (rss_conf->rss_hf != 0 &&
1345             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1346                 dev_err(enic, "Given rss_hf contains none of the supported"
1347                         " types\n");
1348                 return -EINVAL;
1349         }
1350         return 0;
1351 }
1352
1353 /* Set hash type and key according to rss_conf */
1354 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1355 {
1356         struct rte_eth_dev *eth_dev;
1357         uint64_t rss_hf;
1358         uint8_t rss_hash_type;
1359         uint8_t rss_enable;
1360         int ret;
1361
1362         RTE_ASSERT(rss_conf != NULL);
1363         ret = enic_rss_conf_valid(enic, rss_conf);
1364         if (ret) {
1365                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1366                 return ret;
1367         }
1368
1369         eth_dev = enic->rte_dev;
1370         rss_hash_type = 0;
1371         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1372         if (enic->rq_count > 1 &&
1373             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1374             rss_hf != 0) {
1375                 rss_enable = 1;
1376                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1377                               ETH_RSS_NONFRAG_IPV4_OTHER))
1378                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1379                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1380                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1381                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1382                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1383                         if (enic->udp_rss_weak) {
1384                                 /*
1385                                  * 'TCP' is not a typo. The "weak" version of
1386                                  * UDP RSS requires both the TCP and UDP bits
1387                                  * be set. It does enable TCP RSS as well.
1388                                  */
1389                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1390                         }
1391                 }
1392                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1393                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1394                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1395                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1396                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1397                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1398                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1399                         if (enic->udp_rss_weak)
1400                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1401                 }
1402         } else {
1403                 rss_enable = 0;
1404                 rss_hf = 0;
1405         }
1406
1407         /* Set the hash key if provided */
1408         if (rss_enable && rss_conf->rss_key) {
1409                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1410                 if (ret) {
1411                         dev_err(enic, "Failed to set RSS key\n");
1412                         return ret;
1413                 }
1414         }
1415
1416         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1417                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1418                               rss_enable);
1419         if (!ret) {
1420                 enic->rss_hf = rss_hf;
1421                 enic->rss_hash_type = rss_hash_type;
1422                 enic->rss_enable = rss_enable;
1423         } else {
1424                 dev_err(enic, "Failed to update RSS configurations."
1425                         " hash=0x%x\n", rss_hash_type);
1426         }
1427         return ret;
1428 }
1429
1430 int enic_set_vlan_strip(struct enic *enic)
1431 {
1432         /*
1433          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1434          * together. So, re-do niccfg, preserving the current RSS settings.
1435          */
1436         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1437                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1438                                enic->rss_enable);
1439 }
1440
1441 int enic_add_packet_filter(struct enic *enic)
1442 {
1443         /* switchdev ignores packet filters */
1444         if (enic->switchdev_mode) {
1445                 ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1446                 return 0;
1447         }
1448         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1449         return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1450                 enic->promisc, enic->allmulti);
1451 }
1452
1453 int enic_get_link_status(struct enic *enic)
1454 {
1455         return vnic_dev_link_status(enic->vdev);
1456 }
1457
1458 static void enic_dev_deinit(struct enic *enic)
1459 {
1460         /* stop link status checking */
1461         vnic_dev_notify_unset(enic->vdev);
1462
1463         /* mac_addrs is freed by rte_eth_dev_release_port() */
1464         rte_free(enic->cq);
1465         rte_free(enic->intr);
1466         rte_free(enic->rq);
1467         rte_free(enic->wq);
1468 }
1469
1470
1471 int enic_set_vnic_res(struct enic *enic)
1472 {
1473         struct rte_eth_dev *eth_dev = enic->rte_dev;
1474         int rc = 0;
1475         unsigned int required_rq, required_wq, required_cq, required_intr;
1476
1477         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1478         required_rq = eth_dev->data->nb_rx_queues * 2;
1479         required_wq = eth_dev->data->nb_tx_queues;
1480         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1481         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1482         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1483                 required_intr += eth_dev->data->nb_rx_queues;
1484         }
1485         ENICPMD_LOG(DEBUG, "Required queues for PF: rq %u wq %u cq %u",
1486                     required_rq, required_wq, required_cq);
1487         if (enic->vf_required_rq) {
1488                 /* Queues needed for VF representors */
1489                 required_rq += enic->vf_required_rq;
1490                 required_wq += enic->vf_required_wq;
1491                 required_cq += enic->vf_required_cq;
1492                 ENICPMD_LOG(DEBUG, "Required queues for VF representors: rq %u wq %u cq %u",
1493                             enic->vf_required_rq, enic->vf_required_wq,
1494                             enic->vf_required_cq);
1495         }
1496
1497         if (enic->conf_rq_count < required_rq) {
1498                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1499                         eth_dev->data->nb_rx_queues,
1500                         required_rq, enic->conf_rq_count);
1501                 rc = -EINVAL;
1502         }
1503         if (enic->conf_wq_count < required_wq) {
1504                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1505                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1506                 rc = -EINVAL;
1507         }
1508
1509         if (enic->conf_cq_count < required_cq) {
1510                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1511                         required_cq, enic->conf_cq_count);
1512                 rc = -EINVAL;
1513         }
1514         if (enic->conf_intr_count < required_intr) {
1515                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1516                         " interrupts. Required:%u, Configured:%u\n",
1517                         required_intr, enic->conf_intr_count);
1518                 rc = -EINVAL;
1519         }
1520
1521         if (rc == 0) {
1522                 enic->rq_count = eth_dev->data->nb_rx_queues;
1523                 enic->wq_count = eth_dev->data->nb_tx_queues;
1524                 enic->cq_count = enic->rq_count + enic->wq_count;
1525                 enic->intr_count = required_intr;
1526         }
1527
1528         return rc;
1529 }
1530
1531 /* Initialize the completion queue for an RQ */
1532 static int
1533 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1534 {
1535         struct vnic_rq *sop_rq, *data_rq;
1536         unsigned int cq_idx;
1537         int rc = 0;
1538
1539         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1540         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1541         cq_idx = enic_cq_rq(enic, rq_idx);
1542
1543         vnic_cq_clean(&enic->cq[cq_idx]);
1544         vnic_cq_init(&enic->cq[cq_idx],
1545                      0 /* flow_control_enable */,
1546                      1 /* color_enable */,
1547                      0 /* cq_head */,
1548                      0 /* cq_tail */,
1549                      1 /* cq_tail_color */,
1550                      0 /* interrupt_enable */,
1551                      1 /* cq_entry_enable */,
1552                      0 /* cq_message_enable */,
1553                      0 /* interrupt offset */,
1554                      0 /* cq_message_addr */);
1555
1556
1557         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1558                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1559                            sop_rq->ring.desc_count - 1, 1, 0);
1560         if (data_rq->in_use) {
1561                 vnic_rq_init_start(data_rq,
1562                                    enic_cq_rq(enic,
1563                                    enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1564                                    0, data_rq->ring.desc_count - 1, 1, 0);
1565         }
1566
1567         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1568         if (rc)
1569                 return rc;
1570
1571         if (data_rq->in_use) {
1572                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1573                 if (rc) {
1574                         enic_rxmbuf_queue_release(enic, sop_rq);
1575                         return rc;
1576                 }
1577         }
1578
1579         return 0;
1580 }
1581
1582 /* The Cisco NIC can send and receive packets up to a max packet size
1583  * determined by the NIC type and firmware. There is also an MTU
1584  * configured into the NIC via the CIMC/UCSM management interface
1585  * which can be overridden by this function (up to the max packet size).
1586  * Depending on the network setup, doing so may cause packet drops
1587  * and unexpected behavior.
1588  */
1589 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1590 {
1591         unsigned int rq_idx;
1592         struct vnic_rq *rq;
1593         int rc = 0;
1594         uint16_t old_mtu;       /* previous setting */
1595         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1596         struct rte_eth_dev *eth_dev = enic->rte_dev;
1597
1598         old_mtu = eth_dev->data->mtu;
1599         config_mtu = enic->config.mtu;
1600
1601         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1602                 return -E_RTE_SECONDARY;
1603
1604         if (new_mtu > enic->max_mtu) {
1605                 dev_err(enic,
1606                         "MTU not updated: requested (%u) greater than max (%u)\n",
1607                         new_mtu, enic->max_mtu);
1608                 return -EINVAL;
1609         }
1610         if (new_mtu < ENIC_MIN_MTU) {
1611                 dev_info(enic,
1612                         "MTU not updated: requested (%u) less than min (%u)\n",
1613                         new_mtu, ENIC_MIN_MTU);
1614                 return -EINVAL;
1615         }
1616         if (new_mtu > config_mtu)
1617                 dev_warning(enic,
1618                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1619                         new_mtu, config_mtu);
1620
1621         /* Update the MTU and maximum packet length */
1622         eth_dev->data->mtu = new_mtu;
1623         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1624                 enic_mtu_to_max_rx_pktlen(new_mtu);
1625
1626         /*
1627          * If the device has not started (enic_enable), nothing to do.
1628          * Later, enic_enable() will set up RQs reflecting the new maximum
1629          * packet length.
1630          */
1631         if (!eth_dev->data->dev_started)
1632                 goto set_mtu_done;
1633
1634         /*
1635          * The device has started, re-do RQs on the fly. In the process, we
1636          * pick up the new maximum packet length.
1637          *
1638          * Some applications rely on the ability to change MTU without stopping
1639          * the device. So keep this behavior for now.
1640          */
1641         rte_spinlock_lock(&enic->mtu_lock);
1642
1643         /* Stop traffic on all RQs */
1644         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1645                 rq = &enic->rq[rq_idx];
1646                 if (rq->is_sop && rq->in_use) {
1647                         rc = enic_stop_rq(enic,
1648                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1649                         if (rc) {
1650                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1651                                 goto set_mtu_done;
1652                         }
1653                 }
1654         }
1655
1656         /* replace Rx function with a no-op to avoid getting stale pkts */
1657         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1658         rte_mb();
1659
1660         /* Allow time for threads to exit the real Rx function. */
1661         usleep(100000);
1662
1663         /* now it is safe to reconfigure the RQs */
1664
1665
1666         /* free and reallocate RQs with the new MTU */
1667         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1668                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1669                 if (!rq->in_use)
1670                         continue;
1671
1672                 enic_free_rq(rq);
1673                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1674                                    rq->tot_nb_desc, rq->rx_free_thresh);
1675                 if (rc) {
1676                         dev_err(enic,
1677                                 "Fatal MTU alloc error- No traffic will pass\n");
1678                         goto set_mtu_done;
1679                 }
1680
1681                 rc = enic_reinit_rq(enic, rq_idx);
1682                 if (rc) {
1683                         dev_err(enic,
1684                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1685                         goto set_mtu_done;
1686                 }
1687         }
1688
1689         /* put back the real receive function */
1690         rte_mb();
1691         enic_pick_rx_handler(eth_dev);
1692         rte_mb();
1693
1694         /* restart Rx traffic */
1695         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1696                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1697                 if (rq->is_sop && rq->in_use)
1698                         enic_start_rq(enic, rq_idx);
1699         }
1700
1701 set_mtu_done:
1702         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1703         rte_spinlock_unlock(&enic->mtu_lock);
1704         return rc;
1705 }
1706
1707 static int enic_dev_init(struct enic *enic)
1708 {
1709         int err;
1710         struct rte_eth_dev *eth_dev = enic->rte_dev;
1711
1712         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1713
1714         /* Get vNIC configuration
1715         */
1716         err = enic_get_vnic_config(enic);
1717         if (err) {
1718                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1719                 return err;
1720         }
1721
1722         /* Get available resource counts */
1723         enic_get_res_counts(enic);
1724         if (enic->conf_rq_count == 1) {
1725                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1726                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1727                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1728                 return -EINVAL;
1729         }
1730         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1731         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1732                                enic->conf_cq_count, 8);
1733         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1734                                  enic->conf_intr_count, 8);
1735         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1736                                enic->conf_rq_count, 8);
1737         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1738                                enic->conf_wq_count, 8);
1739         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1740                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1741                 return -1;
1742         }
1743         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1744                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1745                 return -1;
1746         }
1747         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1748                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1749                 return -1;
1750         }
1751         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1752                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1753                 return -1;
1754         }
1755
1756         /* Get the supported filters */
1757         enic_fdir_info(enic);
1758
1759         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1760                                         sizeof(struct rte_ether_addr) *
1761                                         ENIC_UNICAST_PERFECT_FILTERS, 0);
1762         if (!eth_dev->data->mac_addrs) {
1763                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1764                 return -1;
1765         }
1766         rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1767                         eth_dev->data->mac_addrs);
1768
1769         vnic_dev_set_reset_flag(enic->vdev, 0);
1770
1771         LIST_INIT(&enic->flows);
1772
1773         /* set up link status checking */
1774         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1775
1776         /*
1777          * When Geneve with options offload is available, always disable it
1778          * first as it can interfere with user flow rules.
1779          */
1780         if (enic->geneve_opt_avail) {
1781                 /*
1782                  * Disabling fails if the feature is provisioned but
1783                  * not enabled. So ignore result and do not log error.
1784                  */
1785                 vnic_dev_overlay_offload_ctrl(enic->vdev,
1786                         OVERLAY_FEATURE_GENEVE,
1787                         OVERLAY_OFFLOAD_DISABLE);
1788         }
1789         enic->overlay_offload = false;
1790         if (enic->disable_overlay && enic->vxlan) {
1791                 /*
1792                  * Explicitly disable overlay offload as the setting is
1793                  * sticky, and resetting vNIC does not disable it.
1794                  */
1795                 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1796                                                   OVERLAY_FEATURE_VXLAN,
1797                                                   OVERLAY_OFFLOAD_DISABLE)) {
1798                         dev_err(enic, "failed to disable overlay offload\n");
1799                 } else {
1800                         dev_info(enic, "Overlay offload is disabled\n");
1801                 }
1802         }
1803         if (!enic->disable_overlay && enic->vxlan &&
1804             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1805             vnic_dev_overlay_offload_ctrl(enic->vdev,
1806                                           OVERLAY_FEATURE_VXLAN,
1807                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1808                 enic->tx_offload_capa |=
1809                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1810                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1811                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1812                 enic->tx_offload_mask |=
1813                         PKT_TX_OUTER_IPV6 |
1814                         PKT_TX_OUTER_IPV4 |
1815                         PKT_TX_OUTER_IP_CKSUM |
1816                         PKT_TX_TUNNEL_MASK;
1817                 enic->overlay_offload = true;
1818                 dev_info(enic, "Overlay offload is enabled\n");
1819         }
1820         /* Geneve with options offload requires overlay offload */
1821         if (enic->overlay_offload && enic->geneve_opt_avail &&
1822             enic->geneve_opt_request) {
1823                 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1824                                 OVERLAY_FEATURE_GENEVE,
1825                                 OVERLAY_OFFLOAD_ENABLE)) {
1826                         dev_err(enic, "failed to enable geneve+option\n");
1827                 } else {
1828                         enic->geneve_opt_enabled = 1;
1829                         dev_info(enic, "Geneve with options is enabled\n");
1830                 }
1831         }
1832         /*
1833          * Reset the vxlan port if HW vxlan parsing is available. It
1834          * is always enabled regardless of overlay offload
1835          * enable/disable.
1836          */
1837         if (enic->vxlan) {
1838                 enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1839                 /*
1840                  * Reset the vxlan port to the default, as the NIC firmware
1841                  * does not reset it automatically and keeps the old setting.
1842                  */
1843                 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1844                                                  OVERLAY_CFG_VXLAN_PORT_UPDATE,
1845                                                  RTE_VXLAN_DEFAULT_PORT)) {
1846                         dev_err(enic, "failed to update vxlan port\n");
1847                         return -EINVAL;
1848                 }
1849         }
1850
1851         if (enic_fm_init(enic))
1852                 dev_warning(enic, "Init of flowman failed.\n");
1853         return 0;
1854
1855 }
1856
1857 static void lock_devcmd(void *priv)
1858 {
1859         struct enic *enic = priv;
1860
1861         rte_spinlock_lock(&enic->devcmd_lock);
1862 }
1863
1864 static void unlock_devcmd(void *priv)
1865 {
1866         struct enic *enic = priv;
1867
1868         rte_spinlock_unlock(&enic->devcmd_lock);
1869 }
1870
1871 int enic_probe(struct enic *enic)
1872 {
1873         struct rte_pci_device *pdev = enic->pdev;
1874         int err = -1;
1875
1876         dev_debug(enic, "Initializing ENIC PMD\n");
1877
1878         /* if this is a secondary process the hardware is already initialized */
1879         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1880                 return 0;
1881
1882         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1883         enic->bar0.len = pdev->mem_resource[0].len;
1884
1885         /* Register vNIC device */
1886         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1887         if (!enic->vdev) {
1888                 dev_err(enic, "vNIC registration failed, aborting\n");
1889                 goto err_out;
1890         }
1891
1892         LIST_INIT(&enic->memzone_list);
1893         rte_spinlock_init(&enic->memzone_list_lock);
1894
1895         vnic_register_cbacks(enic->vdev,
1896                 enic_alloc_consistent,
1897                 enic_free_consistent);
1898
1899         /*
1900          * Allocate the consistent memory for stats upfront so both primary and
1901          * secondary processes can dump stats.
1902          */
1903         err = vnic_dev_alloc_stats_mem(enic->vdev);
1904         if (err) {
1905                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1906                 goto err_out_unregister;
1907         }
1908         /* Issue device open to get device in known state */
1909         err = enic_dev_open(enic);
1910         if (err) {
1911                 dev_err(enic, "vNIC dev open failed, aborting\n");
1912                 goto err_out_unregister;
1913         }
1914
1915         /* Set ingress vlan rewrite mode before vnic initialization */
1916         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1917                   enic->ig_vlan_rewrite_mode);
1918         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1919                 enic->ig_vlan_rewrite_mode);
1920         if (err) {
1921                 dev_err(enic,
1922                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1923                 goto err_out_dev_close;
1924         }
1925
1926         /* Issue device init to initialize the vnic-to-switch link.
1927          * We'll start with carrier off and wait for link UP
1928          * notification later to turn on carrier.  We don't need
1929          * to wait here for the vnic-to-switch link initialization
1930          * to complete; link UP notification is the indication that
1931          * the process is complete.
1932          */
1933
1934         err = vnic_dev_init(enic->vdev, 0);
1935         if (err) {
1936                 dev_err(enic, "vNIC dev init failed, aborting\n");
1937                 goto err_out_dev_close;
1938         }
1939
1940         err = enic_dev_init(enic);
1941         if (err) {
1942                 dev_err(enic, "Device initialization failed, aborting\n");
1943                 goto err_out_dev_close;
1944         }
1945
1946         /* Use a PF spinlock to serialize devcmd from PF and VF representors */
1947         if (enic->switchdev_mode) {
1948                 rte_spinlock_init(&enic->devcmd_lock);
1949                 vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1950         }
1951         return 0;
1952
1953 err_out_dev_close:
1954         vnic_dev_close(enic->vdev);
1955 err_out_unregister:
1956         vnic_dev_unregister(enic->vdev);
1957 err_out:
1958         return err;
1959 }
1960
1961 void enic_remove(struct enic *enic)
1962 {
1963         enic_dev_deinit(enic);
1964         vnic_dev_close(enic->vdev);
1965         vnic_dev_unregister(enic->vdev);
1966 }