ethdev: make driver-only headers private
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11
12 #include <rte_pci.h>
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_string_fns.h>
18 #include <ethdev_driver.h>
19
20 #include "enic_compat.h"
21 #include "enic.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
26 #include "vnic_dev.h"
27 #include "vnic_wq.h"
28 #include "vnic_rq.h"
29 #include "vnic_cq.h"
30 #include "vnic_intr.h"
31 #include "vnic_nic.h"
32
33 static inline int enic_is_sriov_vf(struct enic *enic)
34 {
35         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
36 }
37
38 static int is_zero_addr(uint8_t *addr)
39 {
40         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
41 }
42
43 static int is_mcast_addr(uint8_t *addr)
44 {
45         return addr[0] & 1;
46 }
47
48 static int is_eth_addr_valid(uint8_t *addr)
49 {
50         return !is_mcast_addr(addr) && !is_zero_addr(addr);
51 }
52
53 void
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
55 {
56         uint16_t i;
57
58         if (!rq || !rq->mbuf_ring) {
59                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
60                 return;
61         }
62
63         for (i = 0; i < rq->ring.desc_count; i++) {
64                 if (rq->mbuf_ring[i]) {
65                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66                         rq->mbuf_ring[i] = NULL;
67                 }
68         }
69 }
70
71 void enic_free_wq_buf(struct rte_mbuf **buf)
72 {
73         struct rte_mbuf *mbuf = *buf;
74
75         rte_pktmbuf_free_seg(mbuf);
76         *buf = NULL;
77 }
78
79 static void enic_log_q_error(struct enic *enic)
80 {
81         unsigned int i;
82         uint32_t error_status;
83
84         for (i = 0; i < enic->wq_count; i++) {
85                 error_status = vnic_wq_error_status(&enic->wq[i]);
86                 if (error_status)
87                         dev_err(enic, "WQ[%d] error_status %d\n", i,
88                                 error_status);
89         }
90
91         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92                 if (!enic->rq[i].in_use)
93                         continue;
94                 error_status = vnic_rq_error_status(&enic->rq[i]);
95                 if (error_status)
96                         dev_err(enic, "RQ[%d] error_status %d\n", i,
97                                 error_status);
98         }
99 }
100
101 static void enic_clear_soft_stats(struct enic *enic)
102 {
103         struct enic_soft_stats *soft_stats = &enic->soft_stats;
104         rte_atomic64_clear(&soft_stats->rx_nombuf);
105         rte_atomic64_clear(&soft_stats->rx_packet_errors);
106         rte_atomic64_clear(&soft_stats->tx_oversized);
107 }
108
109 static void enic_init_soft_stats(struct enic *enic)
110 {
111         struct enic_soft_stats *soft_stats = &enic->soft_stats;
112         rte_atomic64_init(&soft_stats->rx_nombuf);
113         rte_atomic64_init(&soft_stats->rx_packet_errors);
114         rte_atomic64_init(&soft_stats->tx_oversized);
115         enic_clear_soft_stats(enic);
116 }
117
118 int enic_dev_stats_clear(struct enic *enic)
119 {
120         int ret;
121
122         ret = vnic_dev_stats_clear(enic->vdev);
123         if (ret != 0) {
124                 dev_err(enic, "Error in clearing stats\n");
125                 return ret;
126         }
127         enic_clear_soft_stats(enic);
128
129         return 0;
130 }
131
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
133 {
134         struct vnic_stats *stats;
135         struct enic_soft_stats *soft_stats = &enic->soft_stats;
136         int64_t rx_truncated;
137         uint64_t rx_packet_errors;
138         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
139
140         if (ret) {
141                 dev_err(enic, "Error in getting stats\n");
142                 return ret;
143         }
144
145         /* The number of truncated packets can only be calculated by
146          * subtracting a hardware counter from error packets received by
147          * the driver. Note: this causes transient inaccuracies in the
148          * ipackets count. Also, the length of truncated packets are
149          * counted in ibytes even though truncated packets are dropped
150          * which can make ibytes be slightly higher than it should be.
151          */
152         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
154
155         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156         r_stats->opackets = stats->tx.tx_frames_ok;
157
158         r_stats->ibytes = stats->rx.rx_bytes_ok;
159         r_stats->obytes = stats->tx.tx_bytes_ok;
160
161         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162         r_stats->oerrors = stats->tx.tx_errors
163                            + rte_atomic64_read(&soft_stats->tx_oversized);
164
165         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
166
167         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
168         return 0;
169 }
170
171 int enic_del_mac_address(struct enic *enic, int mac_index)
172 {
173         struct rte_eth_dev *eth_dev = enic->rte_dev;
174         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
175
176         return vnic_dev_del_addr(enic->vdev, mac_addr);
177 }
178
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
180 {
181         int err;
182
183         if (!is_eth_addr_valid(mac_addr)) {
184                 dev_err(enic, "invalid mac address\n");
185                 return -EINVAL;
186         }
187
188         err = vnic_dev_add_addr(enic->vdev, mac_addr);
189         if (err)
190                 dev_err(enic, "add mac addr failed\n");
191         return err;
192 }
193
194 void enic_free_rq_buf(struct rte_mbuf **mbuf)
195 {
196         if (*mbuf == NULL)
197                 return;
198
199         rte_pktmbuf_free(*mbuf);
200         *mbuf = NULL;
201 }
202
203 void enic_init_vnic_resources(struct enic *enic)
204 {
205         unsigned int error_interrupt_enable = 1;
206         unsigned int error_interrupt_offset = 0;
207         unsigned int rxq_interrupt_enable = 0;
208         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
209         unsigned int index = 0;
210         unsigned int cq_idx;
211         struct vnic_rq *data_rq;
212
213         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
214                 rxq_interrupt_enable = 1;
215
216         for (index = 0; index < enic->rq_count; index++) {
217                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
218
219                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
220                         cq_idx,
221                         error_interrupt_enable,
222                         error_interrupt_offset);
223
224                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
225                 if (data_rq->in_use)
226                         vnic_rq_init(data_rq,
227                                      cq_idx,
228                                      error_interrupt_enable,
229                                      error_interrupt_offset);
230                 vnic_cq_init(&enic->cq[cq_idx],
231                         0 /* flow_control_enable */,
232                         1 /* color_enable */,
233                         0 /* cq_head */,
234                         0 /* cq_tail */,
235                         1 /* cq_tail_color */,
236                         rxq_interrupt_enable,
237                         1 /* cq_entry_enable */,
238                         0 /* cq_message_enable */,
239                         rxq_interrupt_offset,
240                         0 /* cq_message_addr */);
241                 if (rxq_interrupt_enable)
242                         rxq_interrupt_offset++;
243         }
244
245         for (index = 0; index < enic->wq_count; index++) {
246                 vnic_wq_init(&enic->wq[index],
247                         enic_cq_wq(enic, index),
248                         error_interrupt_enable,
249                         error_interrupt_offset);
250                 /* Compute unsupported ol flags for enic_prep_pkts() */
251                 enic->wq[index].tx_offload_notsup_mask =
252                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
253
254                 cq_idx = enic_cq_wq(enic, index);
255                 vnic_cq_init(&enic->cq[cq_idx],
256                         0 /* flow_control_enable */,
257                         1 /* color_enable */,
258                         0 /* cq_head */,
259                         0 /* cq_tail */,
260                         1 /* cq_tail_color */,
261                         0 /* interrupt_enable */,
262                         0 /* cq_entry_enable */,
263                         1 /* cq_message_enable */,
264                         0 /* interrupt offset */,
265                         (uint64_t)enic->wq[index].cqmsg_rz->iova);
266         }
267
268         for (index = 0; index < enic->intr_count; index++) {
269                 vnic_intr_init(&enic->intr[index],
270                                enic->config.intr_timer_usec,
271                                enic->config.intr_timer_type,
272                                /*mask_on_assertion*/1);
273         }
274 }
275
276
277 int
278 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
279 {
280         struct rte_mbuf *mb;
281         struct rq_enet_desc *rqd = rq->ring.descs;
282         unsigned i;
283         dma_addr_t dma_addr;
284         uint32_t max_rx_pkt_len;
285         uint16_t rq_buf_len;
286
287         if (!rq->in_use)
288                 return 0;
289
290         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
291                   rq->ring.desc_count);
292
293         /*
294          * If *not* using scatter and the mbuf size is greater than the
295          * requested max packet size (max_rx_pkt_len), then reduce the
296          * posted buffer size to max_rx_pkt_len. HW still receives packets
297          * larger than max_rx_pkt_len, but they will be truncated, which we
298          * drop in the rx handler. Not ideal, but better than returning
299          * large packets when the user is not expecting them.
300          */
301         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
302         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
303         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
304                 rq_buf_len = max_rx_pkt_len;
305         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
306                 mb = rte_mbuf_raw_alloc(rq->mp);
307                 if (mb == NULL) {
308                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
309                         (unsigned)rq->index);
310                         return -ENOMEM;
311                 }
312
313                 mb->data_off = RTE_PKTMBUF_HEADROOM;
314                 dma_addr = (dma_addr_t)(mb->buf_iova
315                            + RTE_PKTMBUF_HEADROOM);
316                 rq_enet_desc_enc(rqd, dma_addr,
317                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
318                                 : RQ_ENET_TYPE_NOT_SOP),
319                                 rq_buf_len);
320                 rq->mbuf_ring[i] = mb;
321         }
322         /*
323          * Do not post the buffers to the NIC until we enable the RQ via
324          * enic_start_rq().
325          */
326         rq->need_initial_post = true;
327         /* Initialize fetch index while RQ is disabled */
328         iowrite32(0, &rq->ctrl->fetch_index);
329         return 0;
330 }
331
332 /*
333  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
334  * allocated the buffers and filled the RQ descriptor ring. Just need to push
335  * the post index to the NIC.
336  */
337 static void
338 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
339 {
340         if (!rq->in_use || !rq->need_initial_post)
341                 return;
342
343         /* make sure all prior writes are complete before doing the PIO write */
344         rte_rmb();
345
346         /* Post all but the last buffer to VIC. */
347         rq->posted_index = rq->ring.desc_count - 1;
348
349         rq->rx_nb_hold = 0;
350
351         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
352                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
353         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
354         rte_rmb();
355         rq->need_initial_post = false;
356 }
357
358 void *
359 enic_alloc_consistent(void *priv, size_t size,
360         dma_addr_t *dma_handle, uint8_t *name)
361 {
362         void *vaddr;
363         const struct rte_memzone *rz;
364         *dma_handle = 0;
365         struct enic *enic = (struct enic *)priv;
366         struct enic_memzone_entry *mze;
367
368         rz = rte_memzone_reserve_aligned((const char *)name, size,
369                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
370         if (!rz) {
371                 pr_err("%s : Failed to allocate memory requested for %s\n",
372                         __func__, name);
373                 return NULL;
374         }
375
376         vaddr = rz->addr;
377         *dma_handle = (dma_addr_t)rz->iova;
378
379         mze = rte_malloc("enic memzone entry",
380                          sizeof(struct enic_memzone_entry), 0);
381
382         if (!mze) {
383                 pr_err("%s : Failed to allocate memory for memzone list\n",
384                        __func__);
385                 rte_memzone_free(rz);
386                 return NULL;
387         }
388
389         mze->rz = rz;
390
391         rte_spinlock_lock(&enic->memzone_list_lock);
392         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
393         rte_spinlock_unlock(&enic->memzone_list_lock);
394
395         return vaddr;
396 }
397
398 void
399 enic_free_consistent(void *priv,
400                      __rte_unused size_t size,
401                      void *vaddr,
402                      dma_addr_t dma_handle)
403 {
404         struct enic_memzone_entry *mze;
405         struct enic *enic = (struct enic *)priv;
406
407         rte_spinlock_lock(&enic->memzone_list_lock);
408         LIST_FOREACH(mze, &enic->memzone_list, entries) {
409                 if (mze->rz->addr == vaddr &&
410                     mze->rz->iova == dma_handle)
411                         break;
412         }
413         if (mze == NULL) {
414                 rte_spinlock_unlock(&enic->memzone_list_lock);
415                 dev_warning(enic,
416                             "Tried to free memory, but couldn't find it in the memzone list\n");
417                 return;
418         }
419         LIST_REMOVE(mze, entries);
420         rte_spinlock_unlock(&enic->memzone_list_lock);
421         rte_memzone_free(mze->rz);
422         rte_free(mze);
423 }
424
425 int enic_link_update(struct rte_eth_dev *eth_dev)
426 {
427         struct enic *enic = pmd_priv(eth_dev);
428         struct rte_eth_link link;
429
430         memset(&link, 0, sizeof(link));
431         link.link_status = enic_get_link_status(enic);
432         link.link_duplex = ETH_LINK_FULL_DUPLEX;
433         link.link_speed = vnic_dev_port_speed(enic->vdev);
434
435         return rte_eth_linkstatus_set(eth_dev, &link);
436 }
437
438 static void
439 enic_intr_handler(void *arg)
440 {
441         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
442         struct enic *enic = pmd_priv(dev);
443
444         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
445
446         enic_link_update(dev);
447         rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
448         enic_log_q_error(enic);
449         /* Re-enable irq in case of INTx */
450         rte_intr_ack(&enic->pdev->intr_handle);
451 }
452
453 static int enic_rxq_intr_init(struct enic *enic)
454 {
455         struct rte_intr_handle *intr_handle;
456         uint32_t rxq_intr_count, i;
457         int err;
458
459         intr_handle = enic->rte_dev->intr_handle;
460         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
461                 return 0;
462         /*
463          * Rx queue interrupts only work when we have MSI-X interrupts,
464          * one per queue. Sharing one interrupt is technically
465          * possible with VIC, but it is not worth the complications it brings.
466          */
467         if (!rte_intr_cap_multiple(intr_handle)) {
468                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
469                         " (vfio-pci driver)\n");
470                 return -ENOTSUP;
471         }
472         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
473         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
474         if (err) {
475                 dev_err(enic, "Failed to enable event fds for Rx queue"
476                         " interrupts\n");
477                 return err;
478         }
479         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
480                                             rxq_intr_count * sizeof(int), 0);
481         if (intr_handle->intr_vec == NULL) {
482                 dev_err(enic, "Failed to allocate intr_vec\n");
483                 return -ENOMEM;
484         }
485         for (i = 0; i < rxq_intr_count; i++)
486                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
487         return 0;
488 }
489
490 static void enic_rxq_intr_deinit(struct enic *enic)
491 {
492         struct rte_intr_handle *intr_handle;
493
494         intr_handle = enic->rte_dev->intr_handle;
495         rte_intr_efd_disable(intr_handle);
496         if (intr_handle->intr_vec != NULL) {
497                 rte_free(intr_handle->intr_vec);
498                 intr_handle->intr_vec = NULL;
499         }
500 }
501
502 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
503 {
504         struct wq_enet_desc *desc;
505         struct vnic_wq *wq;
506         unsigned int i;
507
508         /*
509          * Fill WQ descriptor fields that never change. Every descriptor is
510          * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
511          * descriptors (i.e. request one completion update every 32 packets).
512          */
513         wq = &enic->wq[queue_idx];
514         desc = (struct wq_enet_desc *)wq->ring.descs;
515         for (i = 0; i < wq->ring.desc_count; i++, desc++) {
516                 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
517                 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
518                         desc->header_length_flags |=
519                                 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
520         }
521 }
522
523 /*
524  * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
525  * used when that file is not compiled.
526  */
527 __rte_weak bool
528 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
529 {
530         return false;
531 }
532
533 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
534 {
535         struct enic *enic = pmd_priv(eth_dev);
536
537         if (enic->cq64) {
538                 ENICPMD_LOG(DEBUG, " use the normal Rx handler for 64B CQ entry");
539                 eth_dev->rx_pkt_burst = &enic_recv_pkts_64;
540                 return;
541         }
542         /*
543          * Preference order:
544          * 1. The vectorized handler if possible and requested.
545          * 2. The non-scatter, simplified handler if scatter Rx is not used.
546          * 3. The default handler as a fallback.
547          */
548         if (enic_use_vector_rx_handler(eth_dev))
549                 return;
550         if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
551                 ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
552                 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
553         } else {
554                 ENICPMD_LOG(DEBUG, " use the normal Rx handler");
555                 eth_dev->rx_pkt_burst = &enic_recv_pkts;
556         }
557 }
558
559 /* Secondary process uses this to set the Tx handler */
560 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
561 {
562         struct enic *enic = pmd_priv(eth_dev);
563
564         if (enic->use_simple_tx_handler) {
565                 ENICPMD_LOG(DEBUG, " use the simple tx handler");
566                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
567         } else {
568                 ENICPMD_LOG(DEBUG, " use the default tx handler");
569                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
570         }
571 }
572
573 int enic_enable(struct enic *enic)
574 {
575         unsigned int index;
576         int err;
577         struct rte_eth_dev *eth_dev = enic->rte_dev;
578         uint64_t simple_tx_offloads;
579         uintptr_t p;
580
581         if (enic->enable_avx2_rx) {
582                 struct rte_mbuf mb_def = { .buf_addr = 0 };
583
584                 /*
585                  * mbuf_initializer contains const-after-init fields of
586                  * receive mbufs (i.e. 64 bits of fields from rearm_data).
587                  * It is currently used by the vectorized handler.
588                  */
589                 mb_def.nb_segs = 1;
590                 mb_def.data_off = RTE_PKTMBUF_HEADROOM;
591                 mb_def.port = enic->port_id;
592                 rte_mbuf_refcnt_set(&mb_def, 1);
593                 rte_compiler_barrier();
594                 p = (uintptr_t)&mb_def.rearm_data;
595                 enic->mbuf_initializer = *(uint64_t *)p;
596         }
597
598         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
599         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
600
601         /* vnic notification of link status has already been turned on in
602          * enic_dev_init() which is called during probe time.  Here we are
603          * just turning on interrupt vector 0 if needed.
604          */
605         if (eth_dev->data->dev_conf.intr_conf.lsc)
606                 vnic_dev_notify_set(enic->vdev, 0);
607
608         err = enic_rxq_intr_init(enic);
609         if (err)
610                 return err;
611
612         /* Initialize flowman if not already initialized during probe */
613         if (enic->fm == NULL && enic_fm_init(enic))
614                 dev_warning(enic, "Init of flowman failed.\n");
615
616         for (index = 0; index < enic->rq_count; index++) {
617                 err = enic_alloc_rx_queue_mbufs(enic,
618                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
619                 if (err) {
620                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
621                         return err;
622                 }
623                 err = enic_alloc_rx_queue_mbufs(enic,
624                         &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
625                 if (err) {
626                         /* release the allocated mbufs for the sop rq*/
627                         enic_rxmbuf_queue_release(enic,
628                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
629
630                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
631                         return err;
632                 }
633         }
634
635         /*
636          * Use the simple TX handler if possible. Only checksum offloads
637          * and vlan insertion are supported.
638          */
639         simple_tx_offloads = enic->tx_offload_capa &
640                 (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
641                  DEV_TX_OFFLOAD_VLAN_INSERT |
642                  DEV_TX_OFFLOAD_IPV4_CKSUM |
643                  DEV_TX_OFFLOAD_UDP_CKSUM |
644                  DEV_TX_OFFLOAD_TCP_CKSUM);
645         if ((eth_dev->data->dev_conf.txmode.offloads &
646              ~simple_tx_offloads) == 0) {
647                 ENICPMD_LOG(DEBUG, " use the simple tx handler");
648                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
649                 for (index = 0; index < enic->wq_count; index++)
650                         enic_prep_wq_for_simple_tx(enic, index);
651                 enic->use_simple_tx_handler = 1;
652         } else {
653                 ENICPMD_LOG(DEBUG, " use the default tx handler");
654                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
655         }
656
657         enic_pick_rx_handler(eth_dev);
658
659         for (index = 0; index < enic->wq_count; index++)
660                 enic_start_wq(enic, index);
661         for (index = 0; index < enic->rq_count; index++)
662                 enic_start_rq(enic, index);
663
664         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
665
666         vnic_dev_enable_wait(enic->vdev);
667
668         /* Register and enable error interrupt */
669         rte_intr_callback_register(&(enic->pdev->intr_handle),
670                 enic_intr_handler, (void *)enic->rte_dev);
671
672         rte_intr_enable(&(enic->pdev->intr_handle));
673         /* Unmask LSC interrupt */
674         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
675
676         return 0;
677 }
678
679 int enic_alloc_intr_resources(struct enic *enic)
680 {
681         int err;
682         unsigned int i;
683
684         dev_info(enic, "vNIC resources used:  "\
685                 "wq %d rq %d cq %d intr %d\n",
686                 enic->wq_count, enic_vnic_rq_count(enic),
687                 enic->cq_count, enic->intr_count);
688
689         for (i = 0; i < enic->intr_count; i++) {
690                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
691                 if (err) {
692                         enic_free_vnic_resources(enic);
693                         return err;
694                 }
695         }
696         return 0;
697 }
698
699 void enic_free_rq(void *rxq)
700 {
701         struct vnic_rq *rq_sop, *rq_data;
702         struct enic *enic;
703
704         if (rxq == NULL)
705                 return;
706
707         rq_sop = (struct vnic_rq *)rxq;
708         enic = vnic_dev_priv(rq_sop->vdev);
709         rq_data = &enic->rq[rq_sop->data_queue_idx];
710
711         if (rq_sop->free_mbufs) {
712                 struct rte_mbuf **mb;
713                 int i;
714
715                 mb = rq_sop->free_mbufs;
716                 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
717                      i < ENIC_RX_BURST_MAX; i++)
718                         rte_pktmbuf_free(mb[i]);
719                 rte_free(rq_sop->free_mbufs);
720                 rq_sop->free_mbufs = NULL;
721                 rq_sop->num_free_mbufs = 0;
722         }
723
724         enic_rxmbuf_queue_release(enic, rq_sop);
725         if (rq_data->in_use)
726                 enic_rxmbuf_queue_release(enic, rq_data);
727
728         rte_free(rq_sop->mbuf_ring);
729         if (rq_data->in_use)
730                 rte_free(rq_data->mbuf_ring);
731
732         rq_sop->mbuf_ring = NULL;
733         rq_data->mbuf_ring = NULL;
734
735         vnic_rq_free(rq_sop);
736         if (rq_data->in_use)
737                 vnic_rq_free(rq_data);
738
739         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
740
741         rq_sop->in_use = 0;
742         rq_data->in_use = 0;
743 }
744
745 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
746 {
747         struct rte_eth_dev_data *data = enic->dev_data;
748         vnic_wq_enable(&enic->wq[queue_idx]);
749         data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
750 }
751
752 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
753 {
754         struct rte_eth_dev_data *data = enic->dev_data;
755         int ret;
756
757         ret = vnic_wq_disable(&enic->wq[queue_idx]);
758         if (ret)
759                 return ret;
760
761         data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
762         return 0;
763 }
764
765 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
766 {
767         struct rte_eth_dev_data *data = enic->dev_data;
768         struct vnic_rq *rq_sop;
769         struct vnic_rq *rq_data;
770         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
771         rq_data = &enic->rq[rq_sop->data_queue_idx];
772
773         if (rq_data->in_use) {
774                 vnic_rq_enable(rq_data);
775                 enic_initial_post_rx(enic, rq_data);
776         }
777         rte_mb();
778         vnic_rq_enable(rq_sop);
779         enic_initial_post_rx(enic, rq_sop);
780         data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
781 }
782
783 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
784 {
785         struct rte_eth_dev_data *data = enic->dev_data;
786         int ret1 = 0, ret2 = 0;
787         struct vnic_rq *rq_sop;
788         struct vnic_rq *rq_data;
789         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
790         rq_data = &enic->rq[rq_sop->data_queue_idx];
791
792         ret2 = vnic_rq_disable(rq_sop);
793         rte_mb();
794         if (rq_data->in_use)
795                 ret1 = vnic_rq_disable(rq_data);
796
797         if (ret2)
798                 return ret2;
799         else if (ret1)
800                 return ret1;
801
802         data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
803         return 0;
804 }
805
806 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
807         unsigned int socket_id, struct rte_mempool *mp,
808         uint16_t nb_desc, uint16_t free_thresh)
809 {
810         struct enic_vf_representor *vf;
811         int rc;
812         uint16_t sop_queue_idx;
813         uint16_t data_queue_idx;
814         uint16_t cq_idx;
815         struct vnic_rq *rq_sop;
816         struct vnic_rq *rq_data;
817         unsigned int mbuf_size, mbufs_per_pkt;
818         unsigned int nb_sop_desc, nb_data_desc;
819         uint16_t min_sop, max_sop, min_data, max_data;
820         uint32_t max_rx_pkt_len;
821
822         /*
823          * Representor uses a reserved PF queue. Translate representor
824          * queue number to PF queue number.
825          */
826         if (enic_is_vf_rep(enic)) {
827                 RTE_ASSERT(queue_idx == 0);
828                 vf = VF_ENIC_TO_VF_REP(enic);
829                 sop_queue_idx = vf->pf_rq_sop_idx;
830                 data_queue_idx = vf->pf_rq_data_idx;
831                 enic = vf->pf;
832                 queue_idx = sop_queue_idx;
833         } else {
834                 sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
835                 data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
836         }
837         cq_idx = enic_cq_rq(enic, sop_queue_idx);
838         rq_sop = &enic->rq[sop_queue_idx];
839         rq_data = &enic->rq[data_queue_idx];
840         rq_sop->is_sop = 1;
841         rq_sop->data_queue_idx = data_queue_idx;
842         rq_data->is_sop = 0;
843         rq_data->data_queue_idx = 0;
844         rq_sop->socket_id = socket_id;
845         rq_sop->mp = mp;
846         rq_data->socket_id = socket_id;
847         rq_data->mp = mp;
848         rq_sop->in_use = 1;
849         rq_sop->rx_free_thresh = free_thresh;
850         rq_data->rx_free_thresh = free_thresh;
851         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
852                   free_thresh);
853
854         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
855                                RTE_PKTMBUF_HEADROOM);
856         /* max_rx_pkt_len includes the ethernet header and CRC. */
857         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
858
859         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
860             DEV_RX_OFFLOAD_SCATTER) {
861                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
862                 /* ceil((max pkt len)/mbuf_size) */
863                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
864         } else {
865                 dev_info(enic, "Scatter rx mode disabled\n");
866                 mbufs_per_pkt = 1;
867                 if (max_rx_pkt_len > mbuf_size) {
868                         dev_warning(enic, "The maximum Rx packet size (%u) is"
869                                     " larger than the mbuf size (%u), and"
870                                     " scatter is disabled. Larger packets will"
871                                     " be truncated.\n",
872                                     max_rx_pkt_len, mbuf_size);
873                 }
874         }
875
876         if (mbufs_per_pkt > 1) {
877                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
878                 rq_sop->data_queue_enable = 1;
879                 rq_data->in_use = 1;
880                 /*
881                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
882                  * receives packet sizes up to the "max" MTU.
883                  * If not using scatter, we can achieve the effect of dropping
884                  * larger packets by reducing the size of posted buffers.
885                  * See enic_alloc_rx_queue_mbufs().
886                  */
887                 if (max_rx_pkt_len <
888                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
889                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
890                                     " when scatter rx mode is in use.\n");
891                 }
892         } else {
893                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
894                          queue_idx);
895                 rq_sop->data_queue_enable = 0;
896                 rq_data->in_use = 0;
897         }
898
899         /* number of descriptors have to be a multiple of 32 */
900         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
901         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
902
903         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
904         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
905
906         if (mbufs_per_pkt > 1) {
907                 min_sop = ENIC_RX_BURST_MAX;
908                 max_sop = ((enic->config.rq_desc_count /
909                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
910                 min_data = min_sop * (mbufs_per_pkt - 1);
911                 max_data = enic->config.rq_desc_count;
912         } else {
913                 min_sop = ENIC_RX_BURST_MAX;
914                 max_sop = enic->config.rq_desc_count;
915                 min_data = 0;
916                 max_data = 0;
917         }
918
919         if (nb_desc < (min_sop + min_data)) {
920                 dev_warning(enic,
921                             "Number of rx descs too low, adjusting to minimum\n");
922                 nb_sop_desc = min_sop;
923                 nb_data_desc = min_data;
924         } else if (nb_desc > (max_sop + max_data)) {
925                 dev_warning(enic,
926                             "Number of rx_descs too high, adjusting to maximum\n");
927                 nb_sop_desc = max_sop;
928                 nb_data_desc = max_data;
929         }
930         if (mbufs_per_pkt > 1) {
931                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
932                          " rx descriptor range is %u to %u\n",
933                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
934                          max_sop + max_data);
935         }
936         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
937                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
938
939         /* Allocate sop queue resources */
940         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
941                 nb_sop_desc, sizeof(struct rq_enet_desc));
942         if (rc) {
943                 dev_err(enic, "error in allocation of sop rq\n");
944                 goto err_exit;
945         }
946         nb_sop_desc = rq_sop->ring.desc_count;
947
948         if (rq_data->in_use) {
949                 /* Allocate data queue resources */
950                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
951                                    nb_data_desc,
952                                    sizeof(struct rq_enet_desc));
953                 if (rc) {
954                         dev_err(enic, "error in allocation of data rq\n");
955                         goto err_free_rq_sop;
956                 }
957                 nb_data_desc = rq_data->ring.desc_count;
958         }
959         /* Enable 64B CQ entry if requested */
960         if (enic->cq64 && vnic_dev_set_cq_entry_size(enic->vdev,
961                                 sop_queue_idx, VNIC_RQ_CQ_ENTRY_SIZE_64)) {
962                 dev_err(enic, "failed to enable 64B CQ entry on sop rq\n");
963                 goto err_free_rq_data;
964         }
965         if (rq_data->in_use && enic->cq64 &&
966             vnic_dev_set_cq_entry_size(enic->vdev, data_queue_idx,
967                 VNIC_RQ_CQ_ENTRY_SIZE_64)) {
968                 dev_err(enic, "failed to enable 64B CQ entry on data rq\n");
969                 goto err_free_rq_data;
970         }
971
972         rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
973                            socket_id, nb_sop_desc + nb_data_desc,
974                            enic->cq64 ? sizeof(struct cq_enet_rq_desc_64) :
975                            sizeof(struct cq_enet_rq_desc));
976         if (rc) {
977                 dev_err(enic, "error in allocation of cq for rq\n");
978                 goto err_free_rq_data;
979         }
980
981         /* Allocate the mbuf rings */
982         rq_sop->mbuf_ring = (struct rte_mbuf **)
983                 rte_zmalloc_socket("rq->mbuf_ring",
984                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
985                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
986         if (rq_sop->mbuf_ring == NULL)
987                 goto err_free_cq;
988
989         if (rq_data->in_use) {
990                 rq_data->mbuf_ring = (struct rte_mbuf **)
991                         rte_zmalloc_socket("rq->mbuf_ring",
992                                 sizeof(struct rte_mbuf *) * nb_data_desc,
993                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
994                 if (rq_data->mbuf_ring == NULL)
995                         goto err_free_sop_mbuf;
996         }
997
998         rq_sop->free_mbufs = (struct rte_mbuf **)
999                 rte_zmalloc_socket("rq->free_mbufs",
1000                                    sizeof(struct rte_mbuf *) *
1001                                    ENIC_RX_BURST_MAX,
1002                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
1003         if (rq_sop->free_mbufs == NULL)
1004                 goto err_free_data_mbuf;
1005         rq_sop->num_free_mbufs = 0;
1006
1007         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
1008
1009         return 0;
1010
1011 err_free_data_mbuf:
1012         rte_free(rq_data->mbuf_ring);
1013 err_free_sop_mbuf:
1014         rte_free(rq_sop->mbuf_ring);
1015 err_free_cq:
1016         /* cleanup on error */
1017         vnic_cq_free(&enic->cq[cq_idx]);
1018 err_free_rq_data:
1019         if (rq_data->in_use)
1020                 vnic_rq_free(rq_data);
1021 err_free_rq_sop:
1022         vnic_rq_free(rq_sop);
1023 err_exit:
1024         return -ENOMEM;
1025 }
1026
1027 void enic_free_wq(void *txq)
1028 {
1029         struct vnic_wq *wq;
1030         struct enic *enic;
1031
1032         if (txq == NULL)
1033                 return;
1034
1035         wq = (struct vnic_wq *)txq;
1036         enic = vnic_dev_priv(wq->vdev);
1037         rte_memzone_free(wq->cqmsg_rz);
1038         vnic_wq_free(wq);
1039         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1040 }
1041
1042 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1043         unsigned int socket_id, uint16_t nb_desc)
1044 {
1045         struct enic_vf_representor *vf;
1046         int err;
1047         struct vnic_wq *wq;
1048         unsigned int cq_index;
1049         char name[RTE_MEMZONE_NAMESIZE];
1050         static int instance;
1051
1052         /*
1053          * Representor uses a reserved PF queue. Translate representor
1054          * queue number to PF queue number.
1055          */
1056         if (enic_is_vf_rep(enic)) {
1057                 RTE_ASSERT(queue_idx == 0);
1058                 vf = VF_ENIC_TO_VF_REP(enic);
1059                 queue_idx = vf->pf_wq_idx;
1060                 cq_index = vf->pf_wq_cq_idx;
1061                 enic = vf->pf;
1062         } else {
1063                 cq_index = enic_cq_wq(enic, queue_idx);
1064         }
1065         wq = &enic->wq[queue_idx];
1066         wq->socket_id = socket_id;
1067         /*
1068          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1069          * print an info message for diagnostics.
1070          */
1071         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1072
1073         /* Allocate queue resources */
1074         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1075                 nb_desc,
1076                 sizeof(struct wq_enet_desc));
1077         if (err) {
1078                 dev_err(enic, "error in allocation of wq\n");
1079                 return err;
1080         }
1081
1082         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1083                 socket_id, nb_desc,
1084                 sizeof(struct cq_enet_wq_desc));
1085         if (err) {
1086                 vnic_wq_free(wq);
1087                 dev_err(enic, "error in allocation of cq for wq\n");
1088         }
1089
1090         /* setup up CQ message */
1091         snprintf((char *)name, sizeof(name),
1092                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1093                 instance++);
1094
1095         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1096                         sizeof(uint32_t), SOCKET_ID_ANY,
1097                         RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1098         if (!wq->cqmsg_rz)
1099                 return -ENOMEM;
1100
1101         return err;
1102 }
1103
1104 int enic_disable(struct enic *enic)
1105 {
1106         unsigned int i;
1107         int err;
1108
1109         for (i = 0; i < enic->intr_count; i++) {
1110                 vnic_intr_mask(&enic->intr[i]);
1111                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1112         }
1113         enic_rxq_intr_deinit(enic);
1114         rte_intr_disable(&enic->pdev->intr_handle);
1115         rte_intr_callback_unregister(&enic->pdev->intr_handle,
1116                                      enic_intr_handler,
1117                                      (void *)enic->rte_dev);
1118
1119         vnic_dev_disable(enic->vdev);
1120
1121         enic_fm_destroy(enic);
1122
1123         if (!enic_is_sriov_vf(enic))
1124                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1125
1126         for (i = 0; i < enic->wq_count; i++) {
1127                 err = vnic_wq_disable(&enic->wq[i]);
1128                 if (err)
1129                         return err;
1130         }
1131         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1132                 if (enic->rq[i].in_use) {
1133                         err = vnic_rq_disable(&enic->rq[i]);
1134                         if (err)
1135                                 return err;
1136                 }
1137         }
1138
1139         /* If we were using interrupts, set the interrupt vector to -1
1140          * to disable interrupts.  We are not disabling link notifcations,
1141          * though, as we want the polling of link status to continue working.
1142          */
1143         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1144                 vnic_dev_notify_set(enic->vdev, -1);
1145
1146         vnic_dev_set_reset_flag(enic->vdev, 1);
1147
1148         for (i = 0; i < enic->wq_count; i++)
1149                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1150
1151         for (i = 0; i < enic_vnic_rq_count(enic); i++)
1152                 if (enic->rq[i].in_use)
1153                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1154         for (i = 0; i < enic->cq_count; i++)
1155                 vnic_cq_clean(&enic->cq[i]);
1156         for (i = 0; i < enic->intr_count; i++)
1157                 vnic_intr_clean(&enic->intr[i]);
1158
1159         return 0;
1160 }
1161
1162 static int enic_dev_wait(struct vnic_dev *vdev,
1163         int (*start)(struct vnic_dev *, int),
1164         int (*finished)(struct vnic_dev *, int *),
1165         int arg)
1166 {
1167         int done;
1168         int err;
1169         int i;
1170
1171         err = start(vdev, arg);
1172         if (err)
1173                 return err;
1174
1175         /* Wait for func to complete...2 seconds max */
1176         for (i = 0; i < 2000; i++) {
1177                 err = finished(vdev, &done);
1178                 if (err)
1179                         return err;
1180                 if (done)
1181                         return 0;
1182                 usleep(1000);
1183         }
1184         return -ETIMEDOUT;
1185 }
1186
1187 static int enic_dev_open(struct enic *enic)
1188 {
1189         int err;
1190         int flags = CMD_OPENF_IG_DESCCACHE;
1191
1192         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1193                 vnic_dev_open_done, flags);
1194         if (err)
1195                 dev_err(enic_get_dev(enic),
1196                         "vNIC device open failed, err %d\n", err);
1197
1198         return err;
1199 }
1200
1201 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1202 {
1203         dma_addr_t rss_key_buf_pa;
1204         union vnic_rss_key *rss_key_buf_va = NULL;
1205         int err, i;
1206         uint8_t name[RTE_MEMZONE_NAMESIZE];
1207
1208         RTE_ASSERT(user_key != NULL);
1209         snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1210         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1211                 &rss_key_buf_pa, name);
1212         if (!rss_key_buf_va)
1213                 return -ENOMEM;
1214
1215         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1216                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1217
1218         err = enic_set_rss_key(enic,
1219                 rss_key_buf_pa,
1220                 sizeof(union vnic_rss_key));
1221
1222         /* Save for later queries */
1223         if (!err) {
1224                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1225                            sizeof(union vnic_rss_key));
1226         }
1227         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1228                 rss_key_buf_va, rss_key_buf_pa);
1229
1230         return err;
1231 }
1232
1233 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1234 {
1235         dma_addr_t rss_cpu_buf_pa;
1236         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1237         int err;
1238         uint8_t name[RTE_MEMZONE_NAMESIZE];
1239
1240         snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1241         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1242                 &rss_cpu_buf_pa, name);
1243         if (!rss_cpu_buf_va)
1244                 return -ENOMEM;
1245
1246         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1247
1248         err = enic_set_rss_cpu(enic,
1249                 rss_cpu_buf_pa,
1250                 sizeof(union vnic_rss_cpu));
1251
1252         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1253                 rss_cpu_buf_va, rss_cpu_buf_pa);
1254
1255         /* Save for later queries */
1256         if (!err)
1257                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1258         return err;
1259 }
1260
1261 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1262         uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1263         uint8_t rss_enable)
1264 {
1265         const uint8_t tso_ipid_split_en = 0;
1266         int err;
1267
1268         err = enic_set_nic_cfg(enic,
1269                 rss_default_cpu, rss_hash_type,
1270                 rss_hash_bits, rss_base_cpu,
1271                 rss_enable, tso_ipid_split_en,
1272                 enic->ig_vlan_strip_en);
1273
1274         return err;
1275 }
1276
1277 /* Initialize RSS with defaults, called from dev_configure */
1278 int enic_init_rss_nic_cfg(struct enic *enic)
1279 {
1280         static uint8_t default_rss_key[] = {
1281                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1282                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1283                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1284                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1285         };
1286         struct rte_eth_rss_conf rss_conf;
1287         union vnic_rss_cpu rss_cpu;
1288         int ret, i;
1289
1290         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1291         /*
1292          * If setting key for the first time, and the user gives us none, then
1293          * push the default key to NIC.
1294          */
1295         if (rss_conf.rss_key == NULL) {
1296                 rss_conf.rss_key = default_rss_key;
1297                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1298         }
1299         ret = enic_set_rss_conf(enic, &rss_conf);
1300         if (ret) {
1301                 dev_err(enic, "Failed to configure RSS\n");
1302                 return ret;
1303         }
1304         if (enic->rss_enable) {
1305                 /* If enabling RSS, use the default reta */
1306                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1307                         rss_cpu.cpu[i / 4].b[i % 4] =
1308                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1309                 }
1310                 ret = enic_set_rss_reta(enic, &rss_cpu);
1311                 if (ret)
1312                         dev_err(enic, "Failed to set RSS indirection table\n");
1313         }
1314         return ret;
1315 }
1316
1317 int enic_setup_finish(struct enic *enic)
1318 {
1319         enic_init_soft_stats(enic);
1320
1321         /* switchdev: enable promisc mode on PF */
1322         if (enic->switchdev_mode) {
1323                 vnic_dev_packet_filter(enic->vdev,
1324                                        0 /* directed  */,
1325                                        0 /* multicast */,
1326                                        0 /* broadcast */,
1327                                        1 /* promisc   */,
1328                                        0 /* allmulti  */);
1329                 enic->promisc = 1;
1330                 enic->allmulti = 0;
1331                 return 0;
1332         }
1333         /* Default conf */
1334         vnic_dev_packet_filter(enic->vdev,
1335                 1 /* directed  */,
1336                 1 /* multicast */,
1337                 1 /* broadcast */,
1338                 0 /* promisc   */,
1339                 1 /* allmulti  */);
1340
1341         enic->promisc = 0;
1342         enic->allmulti = 1;
1343
1344         return 0;
1345 }
1346
1347 static int enic_rss_conf_valid(struct enic *enic,
1348                                struct rte_eth_rss_conf *rss_conf)
1349 {
1350         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1351         if (enic->flow_type_rss_offloads == 0)
1352                 return 0;
1353         if (rss_conf->rss_key != NULL &&
1354             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1355                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1356                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1357                 return -EINVAL;
1358         }
1359         if (rss_conf->rss_hf != 0 &&
1360             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1361                 dev_err(enic, "Given rss_hf contains none of the supported"
1362                         " types\n");
1363                 return -EINVAL;
1364         }
1365         return 0;
1366 }
1367
1368 /* Set hash type and key according to rss_conf */
1369 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1370 {
1371         struct rte_eth_dev *eth_dev;
1372         uint64_t rss_hf;
1373         uint8_t rss_hash_type;
1374         uint8_t rss_enable;
1375         int ret;
1376
1377         RTE_ASSERT(rss_conf != NULL);
1378         ret = enic_rss_conf_valid(enic, rss_conf);
1379         if (ret) {
1380                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1381                 return ret;
1382         }
1383
1384         eth_dev = enic->rte_dev;
1385         rss_hash_type = 0;
1386         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1387         if (enic->rq_count > 1 &&
1388             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1389             rss_hf != 0) {
1390                 rss_enable = 1;
1391                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1392                               ETH_RSS_NONFRAG_IPV4_OTHER))
1393                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1394                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1395                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1396                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1397                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1398                         if (enic->udp_rss_weak) {
1399                                 /*
1400                                  * 'TCP' is not a typo. The "weak" version of
1401                                  * UDP RSS requires both the TCP and UDP bits
1402                                  * be set. It does enable TCP RSS as well.
1403                                  */
1404                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1405                         }
1406                 }
1407                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1408                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1409                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1410                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1411                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1412                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1413                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1414                         if (enic->udp_rss_weak)
1415                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1416                 }
1417         } else {
1418                 rss_enable = 0;
1419                 rss_hf = 0;
1420         }
1421
1422         /* Set the hash key if provided */
1423         if (rss_enable && rss_conf->rss_key) {
1424                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1425                 if (ret) {
1426                         dev_err(enic, "Failed to set RSS key\n");
1427                         return ret;
1428                 }
1429         }
1430
1431         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1432                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1433                               rss_enable);
1434         if (!ret) {
1435                 enic->rss_hf = rss_hf;
1436                 enic->rss_hash_type = rss_hash_type;
1437                 enic->rss_enable = rss_enable;
1438         } else {
1439                 dev_err(enic, "Failed to update RSS configurations."
1440                         " hash=0x%x\n", rss_hash_type);
1441         }
1442         return ret;
1443 }
1444
1445 int enic_set_vlan_strip(struct enic *enic)
1446 {
1447         /*
1448          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1449          * together. So, re-do niccfg, preserving the current RSS settings.
1450          */
1451         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1452                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1453                                enic->rss_enable);
1454 }
1455
1456 int enic_add_packet_filter(struct enic *enic)
1457 {
1458         /* switchdev ignores packet filters */
1459         if (enic->switchdev_mode) {
1460                 ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1461                 return 0;
1462         }
1463         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1464         return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1465                 enic->promisc, enic->allmulti);
1466 }
1467
1468 int enic_get_link_status(struct enic *enic)
1469 {
1470         return vnic_dev_link_status(enic->vdev);
1471 }
1472
1473 static void enic_dev_deinit(struct enic *enic)
1474 {
1475         /* stop link status checking */
1476         vnic_dev_notify_unset(enic->vdev);
1477
1478         /* mac_addrs is freed by rte_eth_dev_release_port() */
1479         rte_free(enic->cq);
1480         rte_free(enic->intr);
1481         rte_free(enic->rq);
1482         rte_free(enic->wq);
1483 }
1484
1485
1486 int enic_set_vnic_res(struct enic *enic)
1487 {
1488         struct rte_eth_dev *eth_dev = enic->rte_dev;
1489         int rc = 0;
1490         unsigned int required_rq, required_wq, required_cq, required_intr;
1491
1492         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1493         required_rq = eth_dev->data->nb_rx_queues * 2;
1494         required_wq = eth_dev->data->nb_tx_queues;
1495         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1496         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1497         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1498                 required_intr += eth_dev->data->nb_rx_queues;
1499         }
1500         ENICPMD_LOG(DEBUG, "Required queues for PF: rq %u wq %u cq %u",
1501                     required_rq, required_wq, required_cq);
1502         if (enic->vf_required_rq) {
1503                 /* Queues needed for VF representors */
1504                 required_rq += enic->vf_required_rq;
1505                 required_wq += enic->vf_required_wq;
1506                 required_cq += enic->vf_required_cq;
1507                 ENICPMD_LOG(DEBUG, "Required queues for VF representors: rq %u wq %u cq %u",
1508                             enic->vf_required_rq, enic->vf_required_wq,
1509                             enic->vf_required_cq);
1510         }
1511
1512         if (enic->conf_rq_count < required_rq) {
1513                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1514                         eth_dev->data->nb_rx_queues,
1515                         required_rq, enic->conf_rq_count);
1516                 rc = -EINVAL;
1517         }
1518         if (enic->conf_wq_count < required_wq) {
1519                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1520                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1521                 rc = -EINVAL;
1522         }
1523
1524         if (enic->conf_cq_count < required_cq) {
1525                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1526                         required_cq, enic->conf_cq_count);
1527                 rc = -EINVAL;
1528         }
1529         if (enic->conf_intr_count < required_intr) {
1530                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1531                         " interrupts. Required:%u, Configured:%u\n",
1532                         required_intr, enic->conf_intr_count);
1533                 rc = -EINVAL;
1534         }
1535
1536         if (rc == 0) {
1537                 enic->rq_count = eth_dev->data->nb_rx_queues;
1538                 enic->wq_count = eth_dev->data->nb_tx_queues;
1539                 enic->cq_count = enic->rq_count + enic->wq_count;
1540                 enic->intr_count = required_intr;
1541         }
1542
1543         return rc;
1544 }
1545
1546 /* Initialize the completion queue for an RQ */
1547 static int
1548 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1549 {
1550         struct vnic_rq *sop_rq, *data_rq;
1551         unsigned int cq_idx;
1552         int rc = 0;
1553
1554         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1555         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1556         cq_idx = enic_cq_rq(enic, rq_idx);
1557
1558         vnic_cq_clean(&enic->cq[cq_idx]);
1559         vnic_cq_init(&enic->cq[cq_idx],
1560                      0 /* flow_control_enable */,
1561                      1 /* color_enable */,
1562                      0 /* cq_head */,
1563                      0 /* cq_tail */,
1564                      1 /* cq_tail_color */,
1565                      0 /* interrupt_enable */,
1566                      1 /* cq_entry_enable */,
1567                      0 /* cq_message_enable */,
1568                      0 /* interrupt offset */,
1569                      0 /* cq_message_addr */);
1570
1571
1572         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1573                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1574                            sop_rq->ring.desc_count - 1, 1, 0);
1575         if (data_rq->in_use) {
1576                 vnic_rq_init_start(data_rq,
1577                                    enic_cq_rq(enic,
1578                                    enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1579                                    0, data_rq->ring.desc_count - 1, 1, 0);
1580         }
1581
1582         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1583         if (rc)
1584                 return rc;
1585
1586         if (data_rq->in_use) {
1587                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1588                 if (rc) {
1589                         enic_rxmbuf_queue_release(enic, sop_rq);
1590                         return rc;
1591                 }
1592         }
1593
1594         return 0;
1595 }
1596
1597 /* The Cisco NIC can send and receive packets up to a max packet size
1598  * determined by the NIC type and firmware. There is also an MTU
1599  * configured into the NIC via the CIMC/UCSM management interface
1600  * which can be overridden by this function (up to the max packet size).
1601  * Depending on the network setup, doing so may cause packet drops
1602  * and unexpected behavior.
1603  */
1604 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1605 {
1606         unsigned int rq_idx;
1607         struct vnic_rq *rq;
1608         int rc = 0;
1609         uint16_t old_mtu;       /* previous setting */
1610         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1611         struct rte_eth_dev *eth_dev = enic->rte_dev;
1612
1613         old_mtu = eth_dev->data->mtu;
1614         config_mtu = enic->config.mtu;
1615
1616         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1617                 return -E_RTE_SECONDARY;
1618
1619         if (new_mtu > enic->max_mtu) {
1620                 dev_err(enic,
1621                         "MTU not updated: requested (%u) greater than max (%u)\n",
1622                         new_mtu, enic->max_mtu);
1623                 return -EINVAL;
1624         }
1625         if (new_mtu < ENIC_MIN_MTU) {
1626                 dev_info(enic,
1627                         "MTU not updated: requested (%u) less than min (%u)\n",
1628                         new_mtu, ENIC_MIN_MTU);
1629                 return -EINVAL;
1630         }
1631         if (new_mtu > config_mtu)
1632                 dev_warning(enic,
1633                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1634                         new_mtu, config_mtu);
1635
1636         /* Update the MTU and maximum packet length */
1637         eth_dev->data->mtu = new_mtu;
1638         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1639                 enic_mtu_to_max_rx_pktlen(new_mtu);
1640
1641         /*
1642          * If the device has not started (enic_enable), nothing to do.
1643          * Later, enic_enable() will set up RQs reflecting the new maximum
1644          * packet length.
1645          */
1646         if (!eth_dev->data->dev_started)
1647                 goto set_mtu_done;
1648
1649         /*
1650          * The device has started, re-do RQs on the fly. In the process, we
1651          * pick up the new maximum packet length.
1652          *
1653          * Some applications rely on the ability to change MTU without stopping
1654          * the device. So keep this behavior for now.
1655          */
1656         rte_spinlock_lock(&enic->mtu_lock);
1657
1658         /* Stop traffic on all RQs */
1659         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1660                 rq = &enic->rq[rq_idx];
1661                 if (rq->is_sop && rq->in_use) {
1662                         rc = enic_stop_rq(enic,
1663                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1664                         if (rc) {
1665                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1666                                 goto set_mtu_done;
1667                         }
1668                 }
1669         }
1670
1671         /* replace Rx function with a no-op to avoid getting stale pkts */
1672         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1673         rte_mb();
1674
1675         /* Allow time for threads to exit the real Rx function. */
1676         usleep(100000);
1677
1678         /* now it is safe to reconfigure the RQs */
1679
1680
1681         /* free and reallocate RQs with the new MTU */
1682         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1683                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1684                 if (!rq->in_use)
1685                         continue;
1686
1687                 enic_free_rq(rq);
1688                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1689                                    rq->tot_nb_desc, rq->rx_free_thresh);
1690                 if (rc) {
1691                         dev_err(enic,
1692                                 "Fatal MTU alloc error- No traffic will pass\n");
1693                         goto set_mtu_done;
1694                 }
1695
1696                 rc = enic_reinit_rq(enic, rq_idx);
1697                 if (rc) {
1698                         dev_err(enic,
1699                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1700                         goto set_mtu_done;
1701                 }
1702         }
1703
1704         /* put back the real receive function */
1705         rte_mb();
1706         enic_pick_rx_handler(eth_dev);
1707         rte_mb();
1708
1709         /* restart Rx traffic */
1710         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1711                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1712                 if (rq->is_sop && rq->in_use)
1713                         enic_start_rq(enic, rq_idx);
1714         }
1715
1716 set_mtu_done:
1717         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1718         rte_spinlock_unlock(&enic->mtu_lock);
1719         return rc;
1720 }
1721
1722 static int enic_dev_init(struct enic *enic)
1723 {
1724         int err;
1725         struct rte_eth_dev *eth_dev = enic->rte_dev;
1726
1727         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1728
1729         /* Get vNIC configuration
1730         */
1731         err = enic_get_vnic_config(enic);
1732         if (err) {
1733                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1734                 return err;
1735         }
1736
1737         /* Get available resource counts */
1738         enic_get_res_counts(enic);
1739         if (enic->conf_rq_count == 1) {
1740                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1741                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1742                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1743                 return -EINVAL;
1744         }
1745         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1746         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1747                                enic->conf_cq_count, 8);
1748         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1749                                  enic->conf_intr_count, 8);
1750         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1751                                enic->conf_rq_count, 8);
1752         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1753                                enic->conf_wq_count, 8);
1754         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1755                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1756                 return -1;
1757         }
1758         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1759                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1760                 return -1;
1761         }
1762         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1763                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1764                 return -1;
1765         }
1766         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1767                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1768                 return -1;
1769         }
1770
1771         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1772                                         sizeof(struct rte_ether_addr) *
1773                                         ENIC_UNICAST_PERFECT_FILTERS, 0);
1774         if (!eth_dev->data->mac_addrs) {
1775                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1776                 return -1;
1777         }
1778         rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1779                         eth_dev->data->mac_addrs);
1780
1781         vnic_dev_set_reset_flag(enic->vdev, 0);
1782
1783         LIST_INIT(&enic->flows);
1784
1785         /* set up link status checking */
1786         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1787
1788         /*
1789          * When Geneve with options offload is available, always disable it
1790          * first as it can interfere with user flow rules.
1791          */
1792         if (enic->geneve_opt_avail) {
1793                 /*
1794                  * Disabling fails if the feature is provisioned but
1795                  * not enabled. So ignore result and do not log error.
1796                  */
1797                 vnic_dev_overlay_offload_ctrl(enic->vdev,
1798                         OVERLAY_FEATURE_GENEVE,
1799                         OVERLAY_OFFLOAD_DISABLE);
1800         }
1801         enic->overlay_offload = false;
1802         if (enic->disable_overlay && enic->vxlan) {
1803                 /*
1804                  * Explicitly disable overlay offload as the setting is
1805                  * sticky, and resetting vNIC does not disable it.
1806                  */
1807                 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1808                                                   OVERLAY_FEATURE_VXLAN,
1809                                                   OVERLAY_OFFLOAD_DISABLE)) {
1810                         dev_err(enic, "failed to disable overlay offload\n");
1811                 } else {
1812                         dev_info(enic, "Overlay offload is disabled\n");
1813                 }
1814         }
1815         if (!enic->disable_overlay && enic->vxlan &&
1816             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1817             vnic_dev_overlay_offload_ctrl(enic->vdev,
1818                                           OVERLAY_FEATURE_VXLAN,
1819                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1820                 enic->tx_offload_capa |=
1821                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1822                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1823                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1824                 enic->tx_offload_mask |=
1825                         PKT_TX_OUTER_IPV6 |
1826                         PKT_TX_OUTER_IPV4 |
1827                         PKT_TX_OUTER_IP_CKSUM |
1828                         PKT_TX_TUNNEL_MASK;
1829                 enic->overlay_offload = true;
1830                 dev_info(enic, "Overlay offload is enabled\n");
1831         }
1832         /* Geneve with options offload requires overlay offload */
1833         if (enic->overlay_offload && enic->geneve_opt_avail &&
1834             enic->geneve_opt_request) {
1835                 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1836                                 OVERLAY_FEATURE_GENEVE,
1837                                 OVERLAY_OFFLOAD_ENABLE)) {
1838                         dev_err(enic, "failed to enable geneve+option\n");
1839                 } else {
1840                         enic->geneve_opt_enabled = 1;
1841                         dev_info(enic, "Geneve with options is enabled\n");
1842                 }
1843         }
1844         /*
1845          * Reset the vxlan port if HW vxlan parsing is available. It
1846          * is always enabled regardless of overlay offload
1847          * enable/disable.
1848          */
1849         if (enic->vxlan) {
1850                 enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1851                 /*
1852                  * Reset the vxlan port to the default, as the NIC firmware
1853                  * does not reset it automatically and keeps the old setting.
1854                  */
1855                 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1856                                                  OVERLAY_CFG_VXLAN_PORT_UPDATE,
1857                                                  RTE_VXLAN_DEFAULT_PORT)) {
1858                         dev_err(enic, "failed to update vxlan port\n");
1859                         return -EINVAL;
1860                 }
1861         }
1862
1863         if (enic_fm_init(enic))
1864                 dev_warning(enic, "Init of flowman failed.\n");
1865         return 0;
1866
1867 }
1868
1869 static void lock_devcmd(void *priv)
1870 {
1871         struct enic *enic = priv;
1872
1873         rte_spinlock_lock(&enic->devcmd_lock);
1874 }
1875
1876 static void unlock_devcmd(void *priv)
1877 {
1878         struct enic *enic = priv;
1879
1880         rte_spinlock_unlock(&enic->devcmd_lock);
1881 }
1882
1883 int enic_probe(struct enic *enic)
1884 {
1885         struct rte_pci_device *pdev = enic->pdev;
1886         int err = -1;
1887
1888         dev_debug(enic, "Initializing ENIC PMD\n");
1889
1890         /* if this is a secondary process the hardware is already initialized */
1891         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1892                 return 0;
1893
1894         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1895         enic->bar0.len = pdev->mem_resource[0].len;
1896
1897         /* Register vNIC device */
1898         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1899         if (!enic->vdev) {
1900                 dev_err(enic, "vNIC registration failed, aborting\n");
1901                 goto err_out;
1902         }
1903
1904         LIST_INIT(&enic->memzone_list);
1905         rte_spinlock_init(&enic->memzone_list_lock);
1906
1907         vnic_register_cbacks(enic->vdev,
1908                 enic_alloc_consistent,
1909                 enic_free_consistent);
1910
1911         /*
1912          * Allocate the consistent memory for stats upfront so both primary and
1913          * secondary processes can dump stats.
1914          */
1915         err = vnic_dev_alloc_stats_mem(enic->vdev);
1916         if (err) {
1917                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1918                 goto err_out_unregister;
1919         }
1920         /* Issue device open to get device in known state */
1921         err = enic_dev_open(enic);
1922         if (err) {
1923                 dev_err(enic, "vNIC dev open failed, aborting\n");
1924                 goto err_out_unregister;
1925         }
1926
1927         /* Set ingress vlan rewrite mode before vnic initialization */
1928         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1929                   enic->ig_vlan_rewrite_mode);
1930         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1931                 enic->ig_vlan_rewrite_mode);
1932         if (err) {
1933                 dev_err(enic,
1934                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1935                 goto err_out_dev_close;
1936         }
1937
1938         /* Issue device init to initialize the vnic-to-switch link.
1939          * We'll start with carrier off and wait for link UP
1940          * notification later to turn on carrier.  We don't need
1941          * to wait here for the vnic-to-switch link initialization
1942          * to complete; link UP notification is the indication that
1943          * the process is complete.
1944          */
1945
1946         err = vnic_dev_init(enic->vdev, 0);
1947         if (err) {
1948                 dev_err(enic, "vNIC dev init failed, aborting\n");
1949                 goto err_out_dev_close;
1950         }
1951
1952         err = enic_dev_init(enic);
1953         if (err) {
1954                 dev_err(enic, "Device initialization failed, aborting\n");
1955                 goto err_out_dev_close;
1956         }
1957
1958         /* Use a PF spinlock to serialize devcmd from PF and VF representors */
1959         if (enic->switchdev_mode) {
1960                 rte_spinlock_init(&enic->devcmd_lock);
1961                 vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1962         }
1963         return 0;
1964
1965 err_out_dev_close:
1966         vnic_dev_close(enic->vdev);
1967 err_out_unregister:
1968         vnic_dev_unregister(enic->vdev);
1969 err_out:
1970         return err;
1971 }
1972
1973 void enic_remove(struct enic *enic)
1974 {
1975         enic_dev_deinit(enic);
1976         vnic_dev_close(enic->vdev);
1977         vnic_dev_unregister(enic->vdev);
1978 }