755c0bf164ad2c570ea9fd85dad08d280b55b594
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11
12 #include <rte_pci.h>
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <rte_ethdev_driver.h>
19
20 #include "enic_compat.h"
21 #include "enic.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
26 #include "vnic_dev.h"
27 #include "vnic_wq.h"
28 #include "vnic_rq.h"
29 #include "vnic_cq.h"
30 #include "vnic_intr.h"
31 #include "vnic_nic.h"
32
33 static inline int enic_is_sriov_vf(struct enic *enic)
34 {
35         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
36 }
37
38 static int is_zero_addr(uint8_t *addr)
39 {
40         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
41 }
42
43 static int is_mcast_addr(uint8_t *addr)
44 {
45         return addr[0] & 1;
46 }
47
48 static int is_eth_addr_valid(uint8_t *addr)
49 {
50         return !is_mcast_addr(addr) && !is_zero_addr(addr);
51 }
52
53 void
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
55 {
56         uint16_t i;
57
58         if (!rq || !rq->mbuf_ring) {
59                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
60                 return;
61         }
62
63         for (i = 0; i < rq->ring.desc_count; i++) {
64                 if (rq->mbuf_ring[i]) {
65                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66                         rq->mbuf_ring[i] = NULL;
67                 }
68         }
69 }
70
71 void enic_free_wq_buf(struct rte_mbuf **buf)
72 {
73         struct rte_mbuf *mbuf = *buf;
74
75         rte_pktmbuf_free_seg(mbuf);
76         *buf = NULL;
77 }
78
79 static void enic_log_q_error(struct enic *enic)
80 {
81         unsigned int i;
82         uint32_t error_status;
83
84         for (i = 0; i < enic->wq_count; i++) {
85                 error_status = vnic_wq_error_status(&enic->wq[i]);
86                 if (error_status)
87                         dev_err(enic, "WQ[%d] error_status %d\n", i,
88                                 error_status);
89         }
90
91         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92                 if (!enic->rq[i].in_use)
93                         continue;
94                 error_status = vnic_rq_error_status(&enic->rq[i]);
95                 if (error_status)
96                         dev_err(enic, "RQ[%d] error_status %d\n", i,
97                                 error_status);
98         }
99 }
100
101 static void enic_clear_soft_stats(struct enic *enic)
102 {
103         struct enic_soft_stats *soft_stats = &enic->soft_stats;
104         rte_atomic64_clear(&soft_stats->rx_nombuf);
105         rte_atomic64_clear(&soft_stats->rx_packet_errors);
106         rte_atomic64_clear(&soft_stats->tx_oversized);
107 }
108
109 static void enic_init_soft_stats(struct enic *enic)
110 {
111         struct enic_soft_stats *soft_stats = &enic->soft_stats;
112         rte_atomic64_init(&soft_stats->rx_nombuf);
113         rte_atomic64_init(&soft_stats->rx_packet_errors);
114         rte_atomic64_init(&soft_stats->tx_oversized);
115         enic_clear_soft_stats(enic);
116 }
117
118 int enic_dev_stats_clear(struct enic *enic)
119 {
120         int ret;
121
122         ret = vnic_dev_stats_clear(enic->vdev);
123         if (ret != 0) {
124                 dev_err(enic, "Error in clearing stats\n");
125                 return ret;
126         }
127         enic_clear_soft_stats(enic);
128
129         return 0;
130 }
131
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
133 {
134         struct vnic_stats *stats;
135         struct enic_soft_stats *soft_stats = &enic->soft_stats;
136         int64_t rx_truncated;
137         uint64_t rx_packet_errors;
138         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
139
140         if (ret) {
141                 dev_err(enic, "Error in getting stats\n");
142                 return ret;
143         }
144
145         /* The number of truncated packets can only be calculated by
146          * subtracting a hardware counter from error packets received by
147          * the driver. Note: this causes transient inaccuracies in the
148          * ipackets count. Also, the length of truncated packets are
149          * counted in ibytes even though truncated packets are dropped
150          * which can make ibytes be slightly higher than it should be.
151          */
152         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
154
155         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156         r_stats->opackets = stats->tx.tx_frames_ok;
157
158         r_stats->ibytes = stats->rx.rx_bytes_ok;
159         r_stats->obytes = stats->tx.tx_bytes_ok;
160
161         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162         r_stats->oerrors = stats->tx.tx_errors
163                            + rte_atomic64_read(&soft_stats->tx_oversized);
164
165         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
166
167         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
168         return 0;
169 }
170
171 int enic_del_mac_address(struct enic *enic, int mac_index)
172 {
173         struct rte_eth_dev *eth_dev = enic->rte_dev;
174         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
175
176         return vnic_dev_del_addr(enic->vdev, mac_addr);
177 }
178
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
180 {
181         int err;
182
183         if (!is_eth_addr_valid(mac_addr)) {
184                 dev_err(enic, "invalid mac address\n");
185                 return -EINVAL;
186         }
187
188         err = vnic_dev_add_addr(enic->vdev, mac_addr);
189         if (err)
190                 dev_err(enic, "add mac addr failed\n");
191         return err;
192 }
193
194 void enic_free_rq_buf(struct rte_mbuf **mbuf)
195 {
196         if (*mbuf == NULL)
197                 return;
198
199         rte_pktmbuf_free(*mbuf);
200         *mbuf = NULL;
201 }
202
203 void enic_init_vnic_resources(struct enic *enic)
204 {
205         unsigned int error_interrupt_enable = 1;
206         unsigned int error_interrupt_offset = 0;
207         unsigned int rxq_interrupt_enable = 0;
208         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
209         unsigned int index = 0;
210         unsigned int cq_idx;
211         struct vnic_rq *data_rq;
212
213         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
214                 rxq_interrupt_enable = 1;
215
216         for (index = 0; index < enic->rq_count; index++) {
217                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
218
219                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
220                         cq_idx,
221                         error_interrupt_enable,
222                         error_interrupt_offset);
223
224                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
225                 if (data_rq->in_use)
226                         vnic_rq_init(data_rq,
227                                      cq_idx,
228                                      error_interrupt_enable,
229                                      error_interrupt_offset);
230                 vnic_cq_init(&enic->cq[cq_idx],
231                         0 /* flow_control_enable */,
232                         1 /* color_enable */,
233                         0 /* cq_head */,
234                         0 /* cq_tail */,
235                         1 /* cq_tail_color */,
236                         rxq_interrupt_enable,
237                         1 /* cq_entry_enable */,
238                         0 /* cq_message_enable */,
239                         rxq_interrupt_offset,
240                         0 /* cq_message_addr */);
241                 if (rxq_interrupt_enable)
242                         rxq_interrupt_offset++;
243         }
244
245         for (index = 0; index < enic->wq_count; index++) {
246                 vnic_wq_init(&enic->wq[index],
247                         enic_cq_wq(enic, index),
248                         error_interrupt_enable,
249                         error_interrupt_offset);
250                 /* Compute unsupported ol flags for enic_prep_pkts() */
251                 enic->wq[index].tx_offload_notsup_mask =
252                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
253
254                 cq_idx = enic_cq_wq(enic, index);
255                 vnic_cq_init(&enic->cq[cq_idx],
256                         0 /* flow_control_enable */,
257                         1 /* color_enable */,
258                         0 /* cq_head */,
259                         0 /* cq_tail */,
260                         1 /* cq_tail_color */,
261                         0 /* interrupt_enable */,
262                         0 /* cq_entry_enable */,
263                         1 /* cq_message_enable */,
264                         0 /* interrupt offset */,
265                         (uint64_t)enic->wq[index].cqmsg_rz->iova);
266         }
267
268         for (index = 0; index < enic->intr_count; index++) {
269                 vnic_intr_init(&enic->intr[index],
270                                enic->config.intr_timer_usec,
271                                enic->config.intr_timer_type,
272                                /*mask_on_assertion*/1);
273         }
274 }
275
276
277 int
278 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
279 {
280         struct rte_mbuf *mb;
281         struct rq_enet_desc *rqd = rq->ring.descs;
282         unsigned i;
283         dma_addr_t dma_addr;
284         uint32_t max_rx_pkt_len;
285         uint16_t rq_buf_len;
286
287         if (!rq->in_use)
288                 return 0;
289
290         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
291                   rq->ring.desc_count);
292
293         /*
294          * If *not* using scatter and the mbuf size is greater than the
295          * requested max packet size (max_rx_pkt_len), then reduce the
296          * posted buffer size to max_rx_pkt_len. HW still receives packets
297          * larger than max_rx_pkt_len, but they will be truncated, which we
298          * drop in the rx handler. Not ideal, but better than returning
299          * large packets when the user is not expecting them.
300          */
301         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
302         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
303         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
304                 rq_buf_len = max_rx_pkt_len;
305         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
306                 mb = rte_mbuf_raw_alloc(rq->mp);
307                 if (mb == NULL) {
308                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
309                         (unsigned)rq->index);
310                         return -ENOMEM;
311                 }
312
313                 mb->data_off = RTE_PKTMBUF_HEADROOM;
314                 dma_addr = (dma_addr_t)(mb->buf_iova
315                            + RTE_PKTMBUF_HEADROOM);
316                 rq_enet_desc_enc(rqd, dma_addr,
317                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
318                                 : RQ_ENET_TYPE_NOT_SOP),
319                                 rq_buf_len);
320                 rq->mbuf_ring[i] = mb;
321         }
322         /*
323          * Do not post the buffers to the NIC until we enable the RQ via
324          * enic_start_rq().
325          */
326         rq->need_initial_post = true;
327         /* Initialize fetch index while RQ is disabled */
328         iowrite32(0, &rq->ctrl->fetch_index);
329         return 0;
330 }
331
332 /*
333  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
334  * allocated the buffers and filled the RQ descriptor ring. Just need to push
335  * the post index to the NIC.
336  */
337 static void
338 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
339 {
340         if (!rq->in_use || !rq->need_initial_post)
341                 return;
342
343         /* make sure all prior writes are complete before doing the PIO write */
344         rte_rmb();
345
346         /* Post all but the last buffer to VIC. */
347         rq->posted_index = rq->ring.desc_count - 1;
348
349         rq->rx_nb_hold = 0;
350
351         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
352                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
353         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
354         rte_rmb();
355         rq->need_initial_post = false;
356 }
357
358 void *
359 enic_alloc_consistent(void *priv, size_t size,
360         dma_addr_t *dma_handle, uint8_t *name)
361 {
362         void *vaddr;
363         const struct rte_memzone *rz;
364         *dma_handle = 0;
365         struct enic *enic = (struct enic *)priv;
366         struct enic_memzone_entry *mze;
367
368         rz = rte_memzone_reserve_aligned((const char *)name, size,
369                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
370         if (!rz) {
371                 pr_err("%s : Failed to allocate memory requested for %s\n",
372                         __func__, name);
373                 return NULL;
374         }
375
376         vaddr = rz->addr;
377         *dma_handle = (dma_addr_t)rz->iova;
378
379         mze = rte_malloc("enic memzone entry",
380                          sizeof(struct enic_memzone_entry), 0);
381
382         if (!mze) {
383                 pr_err("%s : Failed to allocate memory for memzone list\n",
384                        __func__);
385                 rte_memzone_free(rz);
386                 return NULL;
387         }
388
389         mze->rz = rz;
390
391         rte_spinlock_lock(&enic->memzone_list_lock);
392         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
393         rte_spinlock_unlock(&enic->memzone_list_lock);
394
395         return vaddr;
396 }
397
398 void
399 enic_free_consistent(void *priv,
400                      __rte_unused size_t size,
401                      void *vaddr,
402                      dma_addr_t dma_handle)
403 {
404         struct enic_memzone_entry *mze;
405         struct enic *enic = (struct enic *)priv;
406
407         rte_spinlock_lock(&enic->memzone_list_lock);
408         LIST_FOREACH(mze, &enic->memzone_list, entries) {
409                 if (mze->rz->addr == vaddr &&
410                     mze->rz->iova == dma_handle)
411                         break;
412         }
413         if (mze == NULL) {
414                 rte_spinlock_unlock(&enic->memzone_list_lock);
415                 dev_warning(enic,
416                             "Tried to free memory, but couldn't find it in the memzone list\n");
417                 return;
418         }
419         LIST_REMOVE(mze, entries);
420         rte_spinlock_unlock(&enic->memzone_list_lock);
421         rte_memzone_free(mze->rz);
422         rte_free(mze);
423 }
424
425 int enic_link_update(struct rte_eth_dev *eth_dev)
426 {
427         struct enic *enic = pmd_priv(eth_dev);
428         struct rte_eth_link link;
429
430         memset(&link, 0, sizeof(link));
431         link.link_status = enic_get_link_status(enic);
432         link.link_duplex = ETH_LINK_FULL_DUPLEX;
433         link.link_speed = vnic_dev_port_speed(enic->vdev);
434
435         return rte_eth_linkstatus_set(eth_dev, &link);
436 }
437
438 static void
439 enic_intr_handler(void *arg)
440 {
441         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
442         struct enic *enic = pmd_priv(dev);
443
444         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
445
446         enic_link_update(dev);
447         rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
448         enic_log_q_error(enic);
449         /* Re-enable irq in case of INTx */
450         rte_intr_ack(&enic->pdev->intr_handle);
451 }
452
453 static int enic_rxq_intr_init(struct enic *enic)
454 {
455         struct rte_intr_handle *intr_handle;
456         uint32_t rxq_intr_count, i;
457         int err;
458
459         intr_handle = enic->rte_dev->intr_handle;
460         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
461                 return 0;
462         /*
463          * Rx queue interrupts only work when we have MSI-X interrupts,
464          * one per queue. Sharing one interrupt is technically
465          * possible with VIC, but it is not worth the complications it brings.
466          */
467         if (!rte_intr_cap_multiple(intr_handle)) {
468                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
469                         " (vfio-pci driver)\n");
470                 return -ENOTSUP;
471         }
472         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
473         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
474         if (err) {
475                 dev_err(enic, "Failed to enable event fds for Rx queue"
476                         " interrupts\n");
477                 return err;
478         }
479         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
480                                             rxq_intr_count * sizeof(int), 0);
481         if (intr_handle->intr_vec == NULL) {
482                 dev_err(enic, "Failed to allocate intr_vec\n");
483                 return -ENOMEM;
484         }
485         for (i = 0; i < rxq_intr_count; i++)
486                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
487         return 0;
488 }
489
490 static void enic_rxq_intr_deinit(struct enic *enic)
491 {
492         struct rte_intr_handle *intr_handle;
493
494         intr_handle = enic->rte_dev->intr_handle;
495         rte_intr_efd_disable(intr_handle);
496         if (intr_handle->intr_vec != NULL) {
497                 rte_free(intr_handle->intr_vec);
498                 intr_handle->intr_vec = NULL;
499         }
500 }
501
502 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
503 {
504         struct wq_enet_desc *desc;
505         struct vnic_wq *wq;
506         unsigned int i;
507
508         /*
509          * Fill WQ descriptor fields that never change. Every descriptor is
510          * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
511          * descriptors (i.e. request one completion update every 32 packets).
512          */
513         wq = &enic->wq[queue_idx];
514         desc = (struct wq_enet_desc *)wq->ring.descs;
515         for (i = 0; i < wq->ring.desc_count; i++, desc++) {
516                 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
517                 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
518                         desc->header_length_flags |=
519                                 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
520         }
521 }
522
523 /*
524  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
525  * used when that file is not compiled.
526  */
527 __rte_weak bool
528 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
529 {
530         return false;
531 }
532
533 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
534 {
535         struct enic *enic = pmd_priv(eth_dev);
536
537         /*
538          * Preference order:
539          * 1. The vectorized handler if possible and requested.
540          * 2. The non-scatter, simplified handler if scatter Rx is not used.
541          * 3. The default handler as a fallback.
542          */
543         if (enic_use_vector_rx_handler(eth_dev))
544                 return;
545         if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
546                 ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
547                 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
548         } else {
549                 ENICPMD_LOG(DEBUG, " use the normal Rx handler");
550                 eth_dev->rx_pkt_burst = &enic_recv_pkts;
551         }
552 }
553
554 /* Secondary process uses this to set the Tx handler */
555 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
556 {
557         struct enic *enic = pmd_priv(eth_dev);
558
559         if (enic->use_simple_tx_handler) {
560                 ENICPMD_LOG(DEBUG, " use the simple tx handler");
561                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
562         } else {
563                 ENICPMD_LOG(DEBUG, " use the default tx handler");
564                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
565         }
566 }
567
568 int enic_enable(struct enic *enic)
569 {
570         unsigned int index;
571         int err;
572         struct rte_eth_dev *eth_dev = enic->rte_dev;
573         uint64_t simple_tx_offloads;
574         uintptr_t p;
575
576         if (enic->enable_avx2_rx) {
577                 struct rte_mbuf mb_def = { .buf_addr = 0 };
578
579                 /*
580                  * mbuf_initializer contains const-after-init fields of
581                  * receive mbufs (i.e. 64 bits of fields from rearm_data).
582                  * It is currently used by the vectorized handler.
583                  */
584                 mb_def.nb_segs = 1;
585                 mb_def.data_off = RTE_PKTMBUF_HEADROOM;
586                 mb_def.port = enic->port_id;
587                 rte_mbuf_refcnt_set(&mb_def, 1);
588                 rte_compiler_barrier();
589                 p = (uintptr_t)&mb_def.rearm_data;
590                 enic->mbuf_initializer = *(uint64_t *)p;
591         }
592
593         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
594         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
595
596         /* vnic notification of link status has already been turned on in
597          * enic_dev_init() which is called during probe time.  Here we are
598          * just turning on interrupt vector 0 if needed.
599          */
600         if (eth_dev->data->dev_conf.intr_conf.lsc)
601                 vnic_dev_notify_set(enic->vdev, 0);
602
603         err = enic_rxq_intr_init(enic);
604         if (err)
605                 return err;
606
607         /* Initialize flowman if not already initialized during probe */
608         if (enic->fm == NULL && enic_fm_init(enic))
609                 dev_warning(enic, "Init of flowman failed.\n");
610
611         for (index = 0; index < enic->rq_count; index++) {
612                 err = enic_alloc_rx_queue_mbufs(enic,
613                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
614                 if (err) {
615                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
616                         return err;
617                 }
618                 err = enic_alloc_rx_queue_mbufs(enic,
619                         &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
620                 if (err) {
621                         /* release the allocated mbufs for the sop rq*/
622                         enic_rxmbuf_queue_release(enic,
623                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
624
625                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
626                         return err;
627                 }
628         }
629
630         /*
631          * Use the simple TX handler if possible. Only checksum offloads
632          * and vlan insertion are supported.
633          */
634         simple_tx_offloads = enic->tx_offload_capa &
635                 (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
636                  DEV_TX_OFFLOAD_VLAN_INSERT |
637                  DEV_TX_OFFLOAD_IPV4_CKSUM |
638                  DEV_TX_OFFLOAD_UDP_CKSUM |
639                  DEV_TX_OFFLOAD_TCP_CKSUM);
640         if ((eth_dev->data->dev_conf.txmode.offloads &
641              ~simple_tx_offloads) == 0) {
642                 ENICPMD_LOG(DEBUG, " use the simple tx handler");
643                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
644                 for (index = 0; index < enic->wq_count; index++)
645                         enic_prep_wq_for_simple_tx(enic, index);
646                 enic->use_simple_tx_handler = 1;
647         } else {
648                 ENICPMD_LOG(DEBUG, " use the default tx handler");
649                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
650         }
651
652         enic_pick_rx_handler(eth_dev);
653
654         for (index = 0; index < enic->wq_count; index++)
655                 enic_start_wq(enic, index);
656         for (index = 0; index < enic->rq_count; index++)
657                 enic_start_rq(enic, index);
658
659         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
660
661         vnic_dev_enable_wait(enic->vdev);
662
663         /* Register and enable error interrupt */
664         rte_intr_callback_register(&(enic->pdev->intr_handle),
665                 enic_intr_handler, (void *)enic->rte_dev);
666
667         rte_intr_enable(&(enic->pdev->intr_handle));
668         /* Unmask LSC interrupt */
669         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
670
671         return 0;
672 }
673
674 int enic_alloc_intr_resources(struct enic *enic)
675 {
676         int err;
677         unsigned int i;
678
679         dev_info(enic, "vNIC resources used:  "\
680                 "wq %d rq %d cq %d intr %d\n",
681                 enic->wq_count, enic_vnic_rq_count(enic),
682                 enic->cq_count, enic->intr_count);
683
684         for (i = 0; i < enic->intr_count; i++) {
685                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
686                 if (err) {
687                         enic_free_vnic_resources(enic);
688                         return err;
689                 }
690         }
691         return 0;
692 }
693
694 void enic_free_rq(void *rxq)
695 {
696         struct vnic_rq *rq_sop, *rq_data;
697         struct enic *enic;
698
699         if (rxq == NULL)
700                 return;
701
702         rq_sop = (struct vnic_rq *)rxq;
703         enic = vnic_dev_priv(rq_sop->vdev);
704         rq_data = &enic->rq[rq_sop->data_queue_idx];
705
706         if (rq_sop->free_mbufs) {
707                 struct rte_mbuf **mb;
708                 int i;
709
710                 mb = rq_sop->free_mbufs;
711                 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
712                      i < ENIC_RX_BURST_MAX; i++)
713                         rte_pktmbuf_free(mb[i]);
714                 rte_free(rq_sop->free_mbufs);
715                 rq_sop->free_mbufs = NULL;
716                 rq_sop->num_free_mbufs = 0;
717         }
718
719         enic_rxmbuf_queue_release(enic, rq_sop);
720         if (rq_data->in_use)
721                 enic_rxmbuf_queue_release(enic, rq_data);
722
723         rte_free(rq_sop->mbuf_ring);
724         if (rq_data->in_use)
725                 rte_free(rq_data->mbuf_ring);
726
727         rq_sop->mbuf_ring = NULL;
728         rq_data->mbuf_ring = NULL;
729
730         vnic_rq_free(rq_sop);
731         if (rq_data->in_use)
732                 vnic_rq_free(rq_data);
733
734         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
735
736         rq_sop->in_use = 0;
737         rq_data->in_use = 0;
738 }
739
740 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
741 {
742         struct rte_eth_dev_data *data = enic->dev_data;
743         vnic_wq_enable(&enic->wq[queue_idx]);
744         data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
745 }
746
747 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
748 {
749         struct rte_eth_dev_data *data = enic->dev_data;
750         int ret;
751
752         ret = vnic_wq_disable(&enic->wq[queue_idx]);
753         if (ret)
754                 return ret;
755
756         data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
757         return 0;
758 }
759
760 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
761 {
762         struct rte_eth_dev_data *data = enic->dev_data;
763         struct vnic_rq *rq_sop;
764         struct vnic_rq *rq_data;
765         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
766         rq_data = &enic->rq[rq_sop->data_queue_idx];
767
768         if (rq_data->in_use) {
769                 vnic_rq_enable(rq_data);
770                 enic_initial_post_rx(enic, rq_data);
771         }
772         rte_mb();
773         vnic_rq_enable(rq_sop);
774         enic_initial_post_rx(enic, rq_sop);
775         data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
776 }
777
778 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
779 {
780         struct rte_eth_dev_data *data = enic->dev_data;
781         int ret1 = 0, ret2 = 0;
782         struct vnic_rq *rq_sop;
783         struct vnic_rq *rq_data;
784         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
785         rq_data = &enic->rq[rq_sop->data_queue_idx];
786
787         ret2 = vnic_rq_disable(rq_sop);
788         rte_mb();
789         if (rq_data->in_use)
790                 ret1 = vnic_rq_disable(rq_data);
791
792         if (ret2)
793                 return ret2;
794         else if (ret1)
795                 return ret1;
796
797         data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
798         return 0;
799 }
800
801 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
802         unsigned int socket_id, struct rte_mempool *mp,
803         uint16_t nb_desc, uint16_t free_thresh)
804 {
805         struct enic_vf_representor *vf;
806         int rc;
807         uint16_t sop_queue_idx;
808         uint16_t data_queue_idx;
809         uint16_t cq_idx;
810         struct vnic_rq *rq_sop;
811         struct vnic_rq *rq_data;
812         unsigned int mbuf_size, mbufs_per_pkt;
813         unsigned int nb_sop_desc, nb_data_desc;
814         uint16_t min_sop, max_sop, min_data, max_data;
815         uint32_t max_rx_pkt_len;
816
817         /*
818          * Representor uses a reserved PF queue. Translate representor
819          * queue number to PF queue number.
820          */
821         if (enic_is_vf_rep(enic)) {
822                 RTE_ASSERT(queue_idx == 0);
823                 vf = VF_ENIC_TO_VF_REP(enic);
824                 sop_queue_idx = vf->pf_rq_sop_idx;
825                 data_queue_idx = vf->pf_rq_data_idx;
826                 enic = vf->pf;
827                 queue_idx = sop_queue_idx;
828         } else {
829                 sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
830                 data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
831         }
832         cq_idx = enic_cq_rq(enic, sop_queue_idx);
833         rq_sop = &enic->rq[sop_queue_idx];
834         rq_data = &enic->rq[data_queue_idx];
835         rq_sop->is_sop = 1;
836         rq_sop->data_queue_idx = data_queue_idx;
837         rq_data->is_sop = 0;
838         rq_data->data_queue_idx = 0;
839         rq_sop->socket_id = socket_id;
840         rq_sop->mp = mp;
841         rq_data->socket_id = socket_id;
842         rq_data->mp = mp;
843         rq_sop->in_use = 1;
844         rq_sop->rx_free_thresh = free_thresh;
845         rq_data->rx_free_thresh = free_thresh;
846         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
847                   free_thresh);
848
849         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
850                                RTE_PKTMBUF_HEADROOM);
851         /* max_rx_pkt_len includes the ethernet header and CRC. */
852         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
853
854         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
855             DEV_RX_OFFLOAD_SCATTER) {
856                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
857                 /* ceil((max pkt len)/mbuf_size) */
858                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
859         } else {
860                 dev_info(enic, "Scatter rx mode disabled\n");
861                 mbufs_per_pkt = 1;
862                 if (max_rx_pkt_len > mbuf_size) {
863                         dev_warning(enic, "The maximum Rx packet size (%u) is"
864                                     " larger than the mbuf size (%u), and"
865                                     " scatter is disabled. Larger packets will"
866                                     " be truncated.\n",
867                                     max_rx_pkt_len, mbuf_size);
868                 }
869         }
870
871         if (mbufs_per_pkt > 1) {
872                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
873                 rq_sop->data_queue_enable = 1;
874                 rq_data->in_use = 1;
875                 /*
876                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
877                  * receives packet sizes up to the "max" MTU.
878                  * If not using scatter, we can achieve the effect of dropping
879                  * larger packets by reducing the size of posted buffers.
880                  * See enic_alloc_rx_queue_mbufs().
881                  */
882                 if (max_rx_pkt_len <
883                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
884                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
885                                     " when scatter rx mode is in use.\n");
886                 }
887         } else {
888                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
889                          queue_idx);
890                 rq_sop->data_queue_enable = 0;
891                 rq_data->in_use = 0;
892         }
893
894         /* number of descriptors have to be a multiple of 32 */
895         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
896         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
897
898         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
899         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
900
901         if (mbufs_per_pkt > 1) {
902                 min_sop = ENIC_RX_BURST_MAX;
903                 max_sop = ((enic->config.rq_desc_count /
904                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
905                 min_data = min_sop * (mbufs_per_pkt - 1);
906                 max_data = enic->config.rq_desc_count;
907         } else {
908                 min_sop = ENIC_RX_BURST_MAX;
909                 max_sop = enic->config.rq_desc_count;
910                 min_data = 0;
911                 max_data = 0;
912         }
913
914         if (nb_desc < (min_sop + min_data)) {
915                 dev_warning(enic,
916                             "Number of rx descs too low, adjusting to minimum\n");
917                 nb_sop_desc = min_sop;
918                 nb_data_desc = min_data;
919         } else if (nb_desc > (max_sop + max_data)) {
920                 dev_warning(enic,
921                             "Number of rx_descs too high, adjusting to maximum\n");
922                 nb_sop_desc = max_sop;
923                 nb_data_desc = max_data;
924         }
925         if (mbufs_per_pkt > 1) {
926                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
927                          " rx descriptor range is %u to %u\n",
928                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
929                          max_sop + max_data);
930         }
931         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
932                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
933
934         /* Allocate sop queue resources */
935         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
936                 nb_sop_desc, sizeof(struct rq_enet_desc));
937         if (rc) {
938                 dev_err(enic, "error in allocation of sop rq\n");
939                 goto err_exit;
940         }
941         nb_sop_desc = rq_sop->ring.desc_count;
942
943         if (rq_data->in_use) {
944                 /* Allocate data queue resources */
945                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
946                                    nb_data_desc,
947                                    sizeof(struct rq_enet_desc));
948                 if (rc) {
949                         dev_err(enic, "error in allocation of data rq\n");
950                         goto err_free_rq_sop;
951                 }
952                 nb_data_desc = rq_data->ring.desc_count;
953         }
954         rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
955                            socket_id, nb_sop_desc + nb_data_desc,
956                            sizeof(struct cq_enet_rq_desc));
957         if (rc) {
958                 dev_err(enic, "error in allocation of cq for rq\n");
959                 goto err_free_rq_data;
960         }
961
962         /* Allocate the mbuf rings */
963         rq_sop->mbuf_ring = (struct rte_mbuf **)
964                 rte_zmalloc_socket("rq->mbuf_ring",
965                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
966                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
967         if (rq_sop->mbuf_ring == NULL)
968                 goto err_free_cq;
969
970         if (rq_data->in_use) {
971                 rq_data->mbuf_ring = (struct rte_mbuf **)
972                         rte_zmalloc_socket("rq->mbuf_ring",
973                                 sizeof(struct rte_mbuf *) * nb_data_desc,
974                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
975                 if (rq_data->mbuf_ring == NULL)
976                         goto err_free_sop_mbuf;
977         }
978
979         rq_sop->free_mbufs = (struct rte_mbuf **)
980                 rte_zmalloc_socket("rq->free_mbufs",
981                                    sizeof(struct rte_mbuf *) *
982                                    ENIC_RX_BURST_MAX,
983                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
984         if (rq_sop->free_mbufs == NULL)
985                 goto err_free_data_mbuf;
986         rq_sop->num_free_mbufs = 0;
987
988         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
989
990         return 0;
991
992 err_free_data_mbuf:
993         rte_free(rq_data->mbuf_ring);
994 err_free_sop_mbuf:
995         rte_free(rq_sop->mbuf_ring);
996 err_free_cq:
997         /* cleanup on error */
998         vnic_cq_free(&enic->cq[cq_idx]);
999 err_free_rq_data:
1000         if (rq_data->in_use)
1001                 vnic_rq_free(rq_data);
1002 err_free_rq_sop:
1003         vnic_rq_free(rq_sop);
1004 err_exit:
1005         return -ENOMEM;
1006 }
1007
1008 void enic_free_wq(void *txq)
1009 {
1010         struct vnic_wq *wq;
1011         struct enic *enic;
1012
1013         if (txq == NULL)
1014                 return;
1015
1016         wq = (struct vnic_wq *)txq;
1017         enic = vnic_dev_priv(wq->vdev);
1018         rte_memzone_free(wq->cqmsg_rz);
1019         vnic_wq_free(wq);
1020         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1021 }
1022
1023 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1024         unsigned int socket_id, uint16_t nb_desc)
1025 {
1026         struct enic_vf_representor *vf;
1027         int err;
1028         struct vnic_wq *wq;
1029         unsigned int cq_index;
1030         char name[RTE_MEMZONE_NAMESIZE];
1031         static int instance;
1032
1033         /*
1034          * Representor uses a reserved PF queue. Translate representor
1035          * queue number to PF queue number.
1036          */
1037         if (enic_is_vf_rep(enic)) {
1038                 RTE_ASSERT(queue_idx == 0);
1039                 vf = VF_ENIC_TO_VF_REP(enic);
1040                 queue_idx = vf->pf_wq_idx;
1041                 cq_index = vf->pf_wq_cq_idx;
1042                 enic = vf->pf;
1043         } else {
1044                 cq_index = enic_cq_wq(enic, queue_idx);
1045         }
1046         wq = &enic->wq[queue_idx];
1047         wq->socket_id = socket_id;
1048         /*
1049          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1050          * print an info message for diagnostics.
1051          */
1052         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1053
1054         /* Allocate queue resources */
1055         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1056                 nb_desc,
1057                 sizeof(struct wq_enet_desc));
1058         if (err) {
1059                 dev_err(enic, "error in allocation of wq\n");
1060                 return err;
1061         }
1062
1063         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1064                 socket_id, nb_desc,
1065                 sizeof(struct cq_enet_wq_desc));
1066         if (err) {
1067                 vnic_wq_free(wq);
1068                 dev_err(enic, "error in allocation of cq for wq\n");
1069         }
1070
1071         /* setup up CQ message */
1072         snprintf((char *)name, sizeof(name),
1073                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1074                 instance++);
1075
1076         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1077                         sizeof(uint32_t), SOCKET_ID_ANY,
1078                         RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1079         if (!wq->cqmsg_rz)
1080                 return -ENOMEM;
1081
1082         return err;
1083 }
1084
1085 int enic_disable(struct enic *enic)
1086 {
1087         unsigned int i;
1088         int err;
1089
1090         for (i = 0; i < enic->intr_count; i++) {
1091                 vnic_intr_mask(&enic->intr[i]);
1092                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1093         }
1094         enic_rxq_intr_deinit(enic);
1095         rte_intr_disable(&enic->pdev->intr_handle);
1096         rte_intr_callback_unregister(&enic->pdev->intr_handle,
1097                                      enic_intr_handler,
1098                                      (void *)enic->rte_dev);
1099
1100         vnic_dev_disable(enic->vdev);
1101
1102         enic_fm_destroy(enic);
1103
1104         if (!enic_is_sriov_vf(enic))
1105                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1106
1107         for (i = 0; i < enic->wq_count; i++) {
1108                 err = vnic_wq_disable(&enic->wq[i]);
1109                 if (err)
1110                         return err;
1111         }
1112         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1113                 if (enic->rq[i].in_use) {
1114                         err = vnic_rq_disable(&enic->rq[i]);
1115                         if (err)
1116                                 return err;
1117                 }
1118         }
1119
1120         /* If we were using interrupts, set the interrupt vector to -1
1121          * to disable interrupts.  We are not disabling link notifcations,
1122          * though, as we want the polling of link status to continue working.
1123          */
1124         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1125                 vnic_dev_notify_set(enic->vdev, -1);
1126
1127         vnic_dev_set_reset_flag(enic->vdev, 1);
1128
1129         for (i = 0; i < enic->wq_count; i++)
1130                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1131
1132         for (i = 0; i < enic_vnic_rq_count(enic); i++)
1133                 if (enic->rq[i].in_use)
1134                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1135         for (i = 0; i < enic->cq_count; i++)
1136                 vnic_cq_clean(&enic->cq[i]);
1137         for (i = 0; i < enic->intr_count; i++)
1138                 vnic_intr_clean(&enic->intr[i]);
1139
1140         return 0;
1141 }
1142
1143 static int enic_dev_wait(struct vnic_dev *vdev,
1144         int (*start)(struct vnic_dev *, int),
1145         int (*finished)(struct vnic_dev *, int *),
1146         int arg)
1147 {
1148         int done;
1149         int err;
1150         int i;
1151
1152         err = start(vdev, arg);
1153         if (err)
1154                 return err;
1155
1156         /* Wait for func to complete...2 seconds max */
1157         for (i = 0; i < 2000; i++) {
1158                 err = finished(vdev, &done);
1159                 if (err)
1160                         return err;
1161                 if (done)
1162                         return 0;
1163                 usleep(1000);
1164         }
1165         return -ETIMEDOUT;
1166 }
1167
1168 static int enic_dev_open(struct enic *enic)
1169 {
1170         int err;
1171         int flags = CMD_OPENF_IG_DESCCACHE;
1172
1173         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1174                 vnic_dev_open_done, flags);
1175         if (err)
1176                 dev_err(enic_get_dev(enic),
1177                         "vNIC device open failed, err %d\n", err);
1178
1179         return err;
1180 }
1181
1182 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1183 {
1184         dma_addr_t rss_key_buf_pa;
1185         union vnic_rss_key *rss_key_buf_va = NULL;
1186         int err, i;
1187         uint8_t name[RTE_MEMZONE_NAMESIZE];
1188
1189         RTE_ASSERT(user_key != NULL);
1190         snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1191         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1192                 &rss_key_buf_pa, name);
1193         if (!rss_key_buf_va)
1194                 return -ENOMEM;
1195
1196         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1197                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1198
1199         err = enic_set_rss_key(enic,
1200                 rss_key_buf_pa,
1201                 sizeof(union vnic_rss_key));
1202
1203         /* Save for later queries */
1204         if (!err) {
1205                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1206                            sizeof(union vnic_rss_key));
1207         }
1208         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1209                 rss_key_buf_va, rss_key_buf_pa);
1210
1211         return err;
1212 }
1213
1214 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1215 {
1216         dma_addr_t rss_cpu_buf_pa;
1217         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1218         int err;
1219         uint8_t name[RTE_MEMZONE_NAMESIZE];
1220
1221         snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1222         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1223                 &rss_cpu_buf_pa, name);
1224         if (!rss_cpu_buf_va)
1225                 return -ENOMEM;
1226
1227         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1228
1229         err = enic_set_rss_cpu(enic,
1230                 rss_cpu_buf_pa,
1231                 sizeof(union vnic_rss_cpu));
1232
1233         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1234                 rss_cpu_buf_va, rss_cpu_buf_pa);
1235
1236         /* Save for later queries */
1237         if (!err)
1238                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1239         return err;
1240 }
1241
1242 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1243         uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1244         uint8_t rss_enable)
1245 {
1246         const uint8_t tso_ipid_split_en = 0;
1247         int err;
1248
1249         err = enic_set_nic_cfg(enic,
1250                 rss_default_cpu, rss_hash_type,
1251                 rss_hash_bits, rss_base_cpu,
1252                 rss_enable, tso_ipid_split_en,
1253                 enic->ig_vlan_strip_en);
1254
1255         return err;
1256 }
1257
1258 /* Initialize RSS with defaults, called from dev_configure */
1259 int enic_init_rss_nic_cfg(struct enic *enic)
1260 {
1261         static uint8_t default_rss_key[] = {
1262                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1263                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1264                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1265                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1266         };
1267         struct rte_eth_rss_conf rss_conf;
1268         union vnic_rss_cpu rss_cpu;
1269         int ret, i;
1270
1271         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1272         /*
1273          * If setting key for the first time, and the user gives us none, then
1274          * push the default key to NIC.
1275          */
1276         if (rss_conf.rss_key == NULL) {
1277                 rss_conf.rss_key = default_rss_key;
1278                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1279         }
1280         ret = enic_set_rss_conf(enic, &rss_conf);
1281         if (ret) {
1282                 dev_err(enic, "Failed to configure RSS\n");
1283                 return ret;
1284         }
1285         if (enic->rss_enable) {
1286                 /* If enabling RSS, use the default reta */
1287                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1288                         rss_cpu.cpu[i / 4].b[i % 4] =
1289                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1290                 }
1291                 ret = enic_set_rss_reta(enic, &rss_cpu);
1292                 if (ret)
1293                         dev_err(enic, "Failed to set RSS indirection table\n");
1294         }
1295         return ret;
1296 }
1297
1298 int enic_setup_finish(struct enic *enic)
1299 {
1300         enic_init_soft_stats(enic);
1301
1302         /* switchdev: enable promisc mode on PF */
1303         if (enic->switchdev_mode) {
1304                 vnic_dev_packet_filter(enic->vdev,
1305                                        0 /* directed  */,
1306                                        0 /* multicast */,
1307                                        0 /* broadcast */,
1308                                        1 /* promisc   */,
1309                                        0 /* allmulti  */);
1310                 enic->promisc = 1;
1311                 enic->allmulti = 0;
1312                 return 0;
1313         }
1314         /* Default conf */
1315         vnic_dev_packet_filter(enic->vdev,
1316                 1 /* directed  */,
1317                 1 /* multicast */,
1318                 1 /* broadcast */,
1319                 0 /* promisc   */,
1320                 1 /* allmulti  */);
1321
1322         enic->promisc = 0;
1323         enic->allmulti = 1;
1324
1325         return 0;
1326 }
1327
1328 static int enic_rss_conf_valid(struct enic *enic,
1329                                struct rte_eth_rss_conf *rss_conf)
1330 {
1331         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1332         if (enic->flow_type_rss_offloads == 0)
1333                 return 0;
1334         if (rss_conf->rss_key != NULL &&
1335             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1336                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1337                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1338                 return -EINVAL;
1339         }
1340         if (rss_conf->rss_hf != 0 &&
1341             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1342                 dev_err(enic, "Given rss_hf contains none of the supported"
1343                         " types\n");
1344                 return -EINVAL;
1345         }
1346         return 0;
1347 }
1348
1349 /* Set hash type and key according to rss_conf */
1350 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1351 {
1352         struct rte_eth_dev *eth_dev;
1353         uint64_t rss_hf;
1354         uint8_t rss_hash_type;
1355         uint8_t rss_enable;
1356         int ret;
1357
1358         RTE_ASSERT(rss_conf != NULL);
1359         ret = enic_rss_conf_valid(enic, rss_conf);
1360         if (ret) {
1361                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1362                 return ret;
1363         }
1364
1365         eth_dev = enic->rte_dev;
1366         rss_hash_type = 0;
1367         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1368         if (enic->rq_count > 1 &&
1369             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1370             rss_hf != 0) {
1371                 rss_enable = 1;
1372                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1373                               ETH_RSS_NONFRAG_IPV4_OTHER))
1374                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1375                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1376                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1377                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1378                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1379                         if (enic->udp_rss_weak) {
1380                                 /*
1381                                  * 'TCP' is not a typo. The "weak" version of
1382                                  * UDP RSS requires both the TCP and UDP bits
1383                                  * be set. It does enable TCP RSS as well.
1384                                  */
1385                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1386                         }
1387                 }
1388                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1389                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1390                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1391                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1392                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1393                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1394                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1395                         if (enic->udp_rss_weak)
1396                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1397                 }
1398         } else {
1399                 rss_enable = 0;
1400                 rss_hf = 0;
1401         }
1402
1403         /* Set the hash key if provided */
1404         if (rss_enable && rss_conf->rss_key) {
1405                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1406                 if (ret) {
1407                         dev_err(enic, "Failed to set RSS key\n");
1408                         return ret;
1409                 }
1410         }
1411
1412         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1413                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1414                               rss_enable);
1415         if (!ret) {
1416                 enic->rss_hf = rss_hf;
1417                 enic->rss_hash_type = rss_hash_type;
1418                 enic->rss_enable = rss_enable;
1419         } else {
1420                 dev_err(enic, "Failed to update RSS configurations."
1421                         " hash=0x%x\n", rss_hash_type);
1422         }
1423         return ret;
1424 }
1425
1426 int enic_set_vlan_strip(struct enic *enic)
1427 {
1428         /*
1429          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1430          * together. So, re-do niccfg, preserving the current RSS settings.
1431          */
1432         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1433                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1434                                enic->rss_enable);
1435 }
1436
1437 int enic_add_packet_filter(struct enic *enic)
1438 {
1439         /* switchdev ignores packet filters */
1440         if (enic->switchdev_mode) {
1441                 ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1442                 return 0;
1443         }
1444         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1445         return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1446                 enic->promisc, enic->allmulti);
1447 }
1448
1449 int enic_get_link_status(struct enic *enic)
1450 {
1451         return vnic_dev_link_status(enic->vdev);
1452 }
1453
1454 static void enic_dev_deinit(struct enic *enic)
1455 {
1456         /* stop link status checking */
1457         vnic_dev_notify_unset(enic->vdev);
1458
1459         /* mac_addrs is freed by rte_eth_dev_release_port() */
1460         rte_free(enic->cq);
1461         rte_free(enic->intr);
1462         rte_free(enic->rq);
1463         rte_free(enic->wq);
1464 }
1465
1466
1467 int enic_set_vnic_res(struct enic *enic)
1468 {
1469         struct rte_eth_dev *eth_dev = enic->rte_dev;
1470         int rc = 0;
1471         unsigned int required_rq, required_wq, required_cq, required_intr;
1472
1473         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1474         required_rq = eth_dev->data->nb_rx_queues * 2;
1475         required_wq = eth_dev->data->nb_tx_queues;
1476         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1477         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1478         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1479                 required_intr += eth_dev->data->nb_rx_queues;
1480         }
1481         ENICPMD_LOG(DEBUG, "Required queues for PF: rq %u wq %u cq %u",
1482                     required_rq, required_wq, required_cq);
1483         if (enic->vf_required_rq) {
1484                 /* Queues needed for VF representors */
1485                 required_rq += enic->vf_required_rq;
1486                 required_wq += enic->vf_required_wq;
1487                 required_cq += enic->vf_required_cq;
1488                 ENICPMD_LOG(DEBUG, "Required queues for VF representors: rq %u wq %u cq %u",
1489                             enic->vf_required_rq, enic->vf_required_wq,
1490                             enic->vf_required_cq);
1491         }
1492
1493         if (enic->conf_rq_count < required_rq) {
1494                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1495                         eth_dev->data->nb_rx_queues,
1496                         required_rq, enic->conf_rq_count);
1497                 rc = -EINVAL;
1498         }
1499         if (enic->conf_wq_count < required_wq) {
1500                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1501                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1502                 rc = -EINVAL;
1503         }
1504
1505         if (enic->conf_cq_count < required_cq) {
1506                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1507                         required_cq, enic->conf_cq_count);
1508                 rc = -EINVAL;
1509         }
1510         if (enic->conf_intr_count < required_intr) {
1511                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1512                         " interrupts. Required:%u, Configured:%u\n",
1513                         required_intr, enic->conf_intr_count);
1514                 rc = -EINVAL;
1515         }
1516
1517         if (rc == 0) {
1518                 enic->rq_count = eth_dev->data->nb_rx_queues;
1519                 enic->wq_count = eth_dev->data->nb_tx_queues;
1520                 enic->cq_count = enic->rq_count + enic->wq_count;
1521                 enic->intr_count = required_intr;
1522         }
1523
1524         return rc;
1525 }
1526
1527 /* Initialize the completion queue for an RQ */
1528 static int
1529 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1530 {
1531         struct vnic_rq *sop_rq, *data_rq;
1532         unsigned int cq_idx;
1533         int rc = 0;
1534
1535         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1536         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1537         cq_idx = enic_cq_rq(enic, rq_idx);
1538
1539         vnic_cq_clean(&enic->cq[cq_idx]);
1540         vnic_cq_init(&enic->cq[cq_idx],
1541                      0 /* flow_control_enable */,
1542                      1 /* color_enable */,
1543                      0 /* cq_head */,
1544                      0 /* cq_tail */,
1545                      1 /* cq_tail_color */,
1546                      0 /* interrupt_enable */,
1547                      1 /* cq_entry_enable */,
1548                      0 /* cq_message_enable */,
1549                      0 /* interrupt offset */,
1550                      0 /* cq_message_addr */);
1551
1552
1553         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1554                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1555                            sop_rq->ring.desc_count - 1, 1, 0);
1556         if (data_rq->in_use) {
1557                 vnic_rq_init_start(data_rq,
1558                                    enic_cq_rq(enic,
1559                                    enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1560                                    0, data_rq->ring.desc_count - 1, 1, 0);
1561         }
1562
1563         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1564         if (rc)
1565                 return rc;
1566
1567         if (data_rq->in_use) {
1568                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1569                 if (rc) {
1570                         enic_rxmbuf_queue_release(enic, sop_rq);
1571                         return rc;
1572                 }
1573         }
1574
1575         return 0;
1576 }
1577
1578 /* The Cisco NIC can send and receive packets up to a max packet size
1579  * determined by the NIC type and firmware. There is also an MTU
1580  * configured into the NIC via the CIMC/UCSM management interface
1581  * which can be overridden by this function (up to the max packet size).
1582  * Depending on the network setup, doing so may cause packet drops
1583  * and unexpected behavior.
1584  */
1585 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1586 {
1587         unsigned int rq_idx;
1588         struct vnic_rq *rq;
1589         int rc = 0;
1590         uint16_t old_mtu;       /* previous setting */
1591         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1592         struct rte_eth_dev *eth_dev = enic->rte_dev;
1593
1594         old_mtu = eth_dev->data->mtu;
1595         config_mtu = enic->config.mtu;
1596
1597         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1598                 return -E_RTE_SECONDARY;
1599
1600         if (new_mtu > enic->max_mtu) {
1601                 dev_err(enic,
1602                         "MTU not updated: requested (%u) greater than max (%u)\n",
1603                         new_mtu, enic->max_mtu);
1604                 return -EINVAL;
1605         }
1606         if (new_mtu < ENIC_MIN_MTU) {
1607                 dev_info(enic,
1608                         "MTU not updated: requested (%u) less than min (%u)\n",
1609                         new_mtu, ENIC_MIN_MTU);
1610                 return -EINVAL;
1611         }
1612         if (new_mtu > config_mtu)
1613                 dev_warning(enic,
1614                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1615                         new_mtu, config_mtu);
1616
1617         /* Update the MTU and maximum packet length */
1618         eth_dev->data->mtu = new_mtu;
1619         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1620                 enic_mtu_to_max_rx_pktlen(new_mtu);
1621
1622         /*
1623          * If the device has not started (enic_enable), nothing to do.
1624          * Later, enic_enable() will set up RQs reflecting the new maximum
1625          * packet length.
1626          */
1627         if (!eth_dev->data->dev_started)
1628                 goto set_mtu_done;
1629
1630         /*
1631          * The device has started, re-do RQs on the fly. In the process, we
1632          * pick up the new maximum packet length.
1633          *
1634          * Some applications rely on the ability to change MTU without stopping
1635          * the device. So keep this behavior for now.
1636          */
1637         rte_spinlock_lock(&enic->mtu_lock);
1638
1639         /* Stop traffic on all RQs */
1640         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1641                 rq = &enic->rq[rq_idx];
1642                 if (rq->is_sop && rq->in_use) {
1643                         rc = enic_stop_rq(enic,
1644                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1645                         if (rc) {
1646                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1647                                 goto set_mtu_done;
1648                         }
1649                 }
1650         }
1651
1652         /* replace Rx function with a no-op to avoid getting stale pkts */
1653         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1654         rte_mb();
1655
1656         /* Allow time for threads to exit the real Rx function. */
1657         usleep(100000);
1658
1659         /* now it is safe to reconfigure the RQs */
1660
1661
1662         /* free and reallocate RQs with the new MTU */
1663         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1664                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1665                 if (!rq->in_use)
1666                         continue;
1667
1668                 enic_free_rq(rq);
1669                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1670                                    rq->tot_nb_desc, rq->rx_free_thresh);
1671                 if (rc) {
1672                         dev_err(enic,
1673                                 "Fatal MTU alloc error- No traffic will pass\n");
1674                         goto set_mtu_done;
1675                 }
1676
1677                 rc = enic_reinit_rq(enic, rq_idx);
1678                 if (rc) {
1679                         dev_err(enic,
1680                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1681                         goto set_mtu_done;
1682                 }
1683         }
1684
1685         /* put back the real receive function */
1686         rte_mb();
1687         enic_pick_rx_handler(eth_dev);
1688         rte_mb();
1689
1690         /* restart Rx traffic */
1691         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1692                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1693                 if (rq->is_sop && rq->in_use)
1694                         enic_start_rq(enic, rq_idx);
1695         }
1696
1697 set_mtu_done:
1698         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1699         rte_spinlock_unlock(&enic->mtu_lock);
1700         return rc;
1701 }
1702
1703 static int enic_dev_init(struct enic *enic)
1704 {
1705         int err;
1706         struct rte_eth_dev *eth_dev = enic->rte_dev;
1707
1708         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1709
1710         /* Get vNIC configuration
1711         */
1712         err = enic_get_vnic_config(enic);
1713         if (err) {
1714                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1715                 return err;
1716         }
1717
1718         /* Get available resource counts */
1719         enic_get_res_counts(enic);
1720         if (enic->conf_rq_count == 1) {
1721                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1722                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1723                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1724                 return -EINVAL;
1725         }
1726         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1727         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1728                                enic->conf_cq_count, 8);
1729         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1730                                  enic->conf_intr_count, 8);
1731         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1732                                enic->conf_rq_count, 8);
1733         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1734                                enic->conf_wq_count, 8);
1735         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1736                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1737                 return -1;
1738         }
1739         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1740                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1741                 return -1;
1742         }
1743         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1744                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1745                 return -1;
1746         }
1747         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1748                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1749                 return -1;
1750         }
1751
1752         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1753                                         sizeof(struct rte_ether_addr) *
1754                                         ENIC_UNICAST_PERFECT_FILTERS, 0);
1755         if (!eth_dev->data->mac_addrs) {
1756                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1757                 return -1;
1758         }
1759         rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1760                         eth_dev->data->mac_addrs);
1761
1762         vnic_dev_set_reset_flag(enic->vdev, 0);
1763
1764         LIST_INIT(&enic->flows);
1765
1766         /* set up link status checking */
1767         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1768
1769         /*
1770          * When Geneve with options offload is available, always disable it
1771          * first as it can interfere with user flow rules.
1772          */
1773         if (enic->geneve_opt_avail) {
1774                 /*
1775                  * Disabling fails if the feature is provisioned but
1776                  * not enabled. So ignore result and do not log error.
1777                  */
1778                 vnic_dev_overlay_offload_ctrl(enic->vdev,
1779                         OVERLAY_FEATURE_GENEVE,
1780                         OVERLAY_OFFLOAD_DISABLE);
1781         }
1782         enic->overlay_offload = false;
1783         if (enic->disable_overlay && enic->vxlan) {
1784                 /*
1785                  * Explicitly disable overlay offload as the setting is
1786                  * sticky, and resetting vNIC does not disable it.
1787                  */
1788                 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1789                                                   OVERLAY_FEATURE_VXLAN,
1790                                                   OVERLAY_OFFLOAD_DISABLE)) {
1791                         dev_err(enic, "failed to disable overlay offload\n");
1792                 } else {
1793                         dev_info(enic, "Overlay offload is disabled\n");
1794                 }
1795         }
1796         if (!enic->disable_overlay && enic->vxlan &&
1797             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1798             vnic_dev_overlay_offload_ctrl(enic->vdev,
1799                                           OVERLAY_FEATURE_VXLAN,
1800                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1801                 enic->tx_offload_capa |=
1802                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1803                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1804                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1805                 enic->tx_offload_mask |=
1806                         PKT_TX_OUTER_IPV6 |
1807                         PKT_TX_OUTER_IPV4 |
1808                         PKT_TX_OUTER_IP_CKSUM |
1809                         PKT_TX_TUNNEL_MASK;
1810                 enic->overlay_offload = true;
1811                 dev_info(enic, "Overlay offload is enabled\n");
1812         }
1813         /* Geneve with options offload requires overlay offload */
1814         if (enic->overlay_offload && enic->geneve_opt_avail &&
1815             enic->geneve_opt_request) {
1816                 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1817                                 OVERLAY_FEATURE_GENEVE,
1818                                 OVERLAY_OFFLOAD_ENABLE)) {
1819                         dev_err(enic, "failed to enable geneve+option\n");
1820                 } else {
1821                         enic->geneve_opt_enabled = 1;
1822                         dev_info(enic, "Geneve with options is enabled\n");
1823                 }
1824         }
1825         /*
1826          * Reset the vxlan port if HW vxlan parsing is available. It
1827          * is always enabled regardless of overlay offload
1828          * enable/disable.
1829          */
1830         if (enic->vxlan) {
1831                 enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1832                 /*
1833                  * Reset the vxlan port to the default, as the NIC firmware
1834                  * does not reset it automatically and keeps the old setting.
1835                  */
1836                 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1837                                                  OVERLAY_CFG_VXLAN_PORT_UPDATE,
1838                                                  RTE_VXLAN_DEFAULT_PORT)) {
1839                         dev_err(enic, "failed to update vxlan port\n");
1840                         return -EINVAL;
1841                 }
1842         }
1843
1844         if (enic_fm_init(enic))
1845                 dev_warning(enic, "Init of flowman failed.\n");
1846         return 0;
1847
1848 }
1849
1850 static void lock_devcmd(void *priv)
1851 {
1852         struct enic *enic = priv;
1853
1854         rte_spinlock_lock(&enic->devcmd_lock);
1855 }
1856
1857 static void unlock_devcmd(void *priv)
1858 {
1859         struct enic *enic = priv;
1860
1861         rte_spinlock_unlock(&enic->devcmd_lock);
1862 }
1863
1864 int enic_probe(struct enic *enic)
1865 {
1866         struct rte_pci_device *pdev = enic->pdev;
1867         int err = -1;
1868
1869         dev_debug(enic, "Initializing ENIC PMD\n");
1870
1871         /* if this is a secondary process the hardware is already initialized */
1872         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1873                 return 0;
1874
1875         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1876         enic->bar0.len = pdev->mem_resource[0].len;
1877
1878         /* Register vNIC device */
1879         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1880         if (!enic->vdev) {
1881                 dev_err(enic, "vNIC registration failed, aborting\n");
1882                 goto err_out;
1883         }
1884
1885         LIST_INIT(&enic->memzone_list);
1886         rte_spinlock_init(&enic->memzone_list_lock);
1887
1888         vnic_register_cbacks(enic->vdev,
1889                 enic_alloc_consistent,
1890                 enic_free_consistent);
1891
1892         /*
1893          * Allocate the consistent memory for stats upfront so both primary and
1894          * secondary processes can dump stats.
1895          */
1896         err = vnic_dev_alloc_stats_mem(enic->vdev);
1897         if (err) {
1898                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1899                 goto err_out_unregister;
1900         }
1901         /* Issue device open to get device in known state */
1902         err = enic_dev_open(enic);
1903         if (err) {
1904                 dev_err(enic, "vNIC dev open failed, aborting\n");
1905                 goto err_out_unregister;
1906         }
1907
1908         /* Set ingress vlan rewrite mode before vnic initialization */
1909         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1910                   enic->ig_vlan_rewrite_mode);
1911         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1912                 enic->ig_vlan_rewrite_mode);
1913         if (err) {
1914                 dev_err(enic,
1915                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1916                 goto err_out_dev_close;
1917         }
1918
1919         /* Issue device init to initialize the vnic-to-switch link.
1920          * We'll start with carrier off and wait for link UP
1921          * notification later to turn on carrier.  We don't need
1922          * to wait here for the vnic-to-switch link initialization
1923          * to complete; link UP notification is the indication that
1924          * the process is complete.
1925          */
1926
1927         err = vnic_dev_init(enic->vdev, 0);
1928         if (err) {
1929                 dev_err(enic, "vNIC dev init failed, aborting\n");
1930                 goto err_out_dev_close;
1931         }
1932
1933         err = enic_dev_init(enic);
1934         if (err) {
1935                 dev_err(enic, "Device initialization failed, aborting\n");
1936                 goto err_out_dev_close;
1937         }
1938
1939         /* Use a PF spinlock to serialize devcmd from PF and VF representors */
1940         if (enic->switchdev_mode) {
1941                 rte_spinlock_init(&enic->devcmd_lock);
1942                 vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1943         }
1944         return 0;
1945
1946 err_out_dev_close:
1947         vnic_dev_close(enic->vdev);
1948 err_out_unregister:
1949         vnic_dev_unregister(enic->vdev);
1950 err_out:
1951         return err;
1952 }
1953
1954 void enic_remove(struct enic *enic)
1955 {
1956         enic_dev_deinit(enic);
1957         vnic_dev_close(enic->vdev);
1958         vnic_dev_unregister(enic->vdev);
1959 }