net/enic: add simple Rx handler
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 #include <libgen.h>
12
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev_driver.h>
20
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38
39 static int is_zero_addr(uint8_t *addr)
40 {
41         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43
44 static int is_mcast_addr(uint8_t *addr)
45 {
46         return addr[0] & 1;
47 }
48
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51         return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53
54 static void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57         uint16_t i;
58
59         if (!rq || !rq->mbuf_ring) {
60                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61                 return;
62         }
63
64         for (i = 0; i < rq->ring.desc_count; i++) {
65                 if (rq->mbuf_ring[i]) {
66                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67                         rq->mbuf_ring[i] = NULL;
68                 }
69         }
70 }
71
72 static void enic_free_wq_buf(struct rte_mbuf **buf)
73 {
74         struct rte_mbuf *mbuf = *buf;
75
76         rte_pktmbuf_free_seg(mbuf);
77         *buf = NULL;
78 }
79
80 static void enic_log_q_error(struct enic *enic)
81 {
82         unsigned int i;
83         u32 error_status;
84
85         for (i = 0; i < enic->wq_count; i++) {
86                 error_status = vnic_wq_error_status(&enic->wq[i]);
87                 if (error_status)
88                         dev_err(enic, "WQ[%d] error_status %d\n", i,
89                                 error_status);
90         }
91
92         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93                 if (!enic->rq[i].in_use)
94                         continue;
95                 error_status = vnic_rq_error_status(&enic->rq[i]);
96                 if (error_status)
97                         dev_err(enic, "RQ[%d] error_status %d\n", i,
98                                 error_status);
99         }
100 }
101
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104         struct enic_soft_stats *soft_stats = &enic->soft_stats;
105         rte_atomic64_clear(&soft_stats->rx_nombuf);
106         rte_atomic64_clear(&soft_stats->rx_packet_errors);
107         rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112         struct enic_soft_stats *soft_stats = &enic->soft_stats;
113         rte_atomic64_init(&soft_stats->rx_nombuf);
114         rte_atomic64_init(&soft_stats->rx_packet_errors);
115         rte_atomic64_init(&soft_stats->tx_oversized);
116         enic_clear_soft_stats(enic);
117 }
118
119 void enic_dev_stats_clear(struct enic *enic)
120 {
121         if (vnic_dev_stats_clear(enic->vdev))
122                 dev_err(enic, "Error in clearing stats\n");
123         enic_clear_soft_stats(enic);
124 }
125
126 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
127 {
128         struct vnic_stats *stats;
129         struct enic_soft_stats *soft_stats = &enic->soft_stats;
130         int64_t rx_truncated;
131         uint64_t rx_packet_errors;
132         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
133
134         if (ret) {
135                 dev_err(enic, "Error in getting stats\n");
136                 return ret;
137         }
138
139         /* The number of truncated packets can only be calculated by
140          * subtracting a hardware counter from error packets received by
141          * the driver. Note: this causes transient inaccuracies in the
142          * ipackets count. Also, the length of truncated packets are
143          * counted in ibytes even though truncated packets are dropped
144          * which can make ibytes be slightly higher than it should be.
145          */
146         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
147         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
148
149         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
150         r_stats->opackets = stats->tx.tx_frames_ok;
151
152         r_stats->ibytes = stats->rx.rx_bytes_ok;
153         r_stats->obytes = stats->tx.tx_bytes_ok;
154
155         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
156         r_stats->oerrors = stats->tx.tx_errors
157                            + rte_atomic64_read(&soft_stats->tx_oversized);
158
159         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
160
161         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
162         return 0;
163 }
164
165 int enic_del_mac_address(struct enic *enic, int mac_index)
166 {
167         struct rte_eth_dev *eth_dev = enic->rte_dev;
168         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
169
170         return vnic_dev_del_addr(enic->vdev, mac_addr);
171 }
172
173 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
174 {
175         int err;
176
177         if (!is_eth_addr_valid(mac_addr)) {
178                 dev_err(enic, "invalid mac address\n");
179                 return -EINVAL;
180         }
181
182         err = vnic_dev_add_addr(enic->vdev, mac_addr);
183         if (err)
184                 dev_err(enic, "add mac addr failed\n");
185         return err;
186 }
187
188 static void
189 enic_free_rq_buf(struct rte_mbuf **mbuf)
190 {
191         if (*mbuf == NULL)
192                 return;
193
194         rte_pktmbuf_free(*mbuf);
195         *mbuf = NULL;
196 }
197
198 void enic_init_vnic_resources(struct enic *enic)
199 {
200         unsigned int error_interrupt_enable = 1;
201         unsigned int error_interrupt_offset = 0;
202         unsigned int rxq_interrupt_enable = 0;
203         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
204         unsigned int index = 0;
205         unsigned int cq_idx;
206         struct vnic_rq *data_rq;
207
208         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
209                 rxq_interrupt_enable = 1;
210
211         for (index = 0; index < enic->rq_count; index++) {
212                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
213
214                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
215                         cq_idx,
216                         error_interrupt_enable,
217                         error_interrupt_offset);
218
219                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
220                 if (data_rq->in_use)
221                         vnic_rq_init(data_rq,
222                                      cq_idx,
223                                      error_interrupt_enable,
224                                      error_interrupt_offset);
225
226                 vnic_cq_init(&enic->cq[cq_idx],
227                         0 /* flow_control_enable */,
228                         1 /* color_enable */,
229                         0 /* cq_head */,
230                         0 /* cq_tail */,
231                         1 /* cq_tail_color */,
232                         rxq_interrupt_enable,
233                         1 /* cq_entry_enable */,
234                         0 /* cq_message_enable */,
235                         rxq_interrupt_offset,
236                         0 /* cq_message_addr */);
237                 if (rxq_interrupt_enable)
238                         rxq_interrupt_offset++;
239         }
240
241         for (index = 0; index < enic->wq_count; index++) {
242                 vnic_wq_init(&enic->wq[index],
243                         enic_cq_wq(enic, index),
244                         error_interrupt_enable,
245                         error_interrupt_offset);
246                 /* Compute unsupported ol flags for enic_prep_pkts() */
247                 enic->wq[index].tx_offload_notsup_mask =
248                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
249
250                 cq_idx = enic_cq_wq(enic, index);
251                 vnic_cq_init(&enic->cq[cq_idx],
252                         0 /* flow_control_enable */,
253                         1 /* color_enable */,
254                         0 /* cq_head */,
255                         0 /* cq_tail */,
256                         1 /* cq_tail_color */,
257                         0 /* interrupt_enable */,
258                         0 /* cq_entry_enable */,
259                         1 /* cq_message_enable */,
260                         0 /* interrupt offset */,
261                         (u64)enic->wq[index].cqmsg_rz->iova);
262         }
263
264         for (index = 0; index < enic->intr_count; index++) {
265                 vnic_intr_init(&enic->intr[index],
266                                enic->config.intr_timer_usec,
267                                enic->config.intr_timer_type,
268                                /*mask_on_assertion*/1);
269         }
270 }
271
272
273 static int
274 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
275 {
276         struct rte_mbuf *mb;
277         struct rq_enet_desc *rqd = rq->ring.descs;
278         unsigned i;
279         dma_addr_t dma_addr;
280         uint32_t max_rx_pkt_len;
281         uint16_t rq_buf_len;
282
283         if (!rq->in_use)
284                 return 0;
285
286         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
287                   rq->ring.desc_count);
288
289         /*
290          * If *not* using scatter and the mbuf size is greater than the
291          * requested max packet size (max_rx_pkt_len), then reduce the
292          * posted buffer size to max_rx_pkt_len. HW still receives packets
293          * larger than max_rx_pkt_len, but they will be truncated, which we
294          * drop in the rx handler. Not ideal, but better than returning
295          * large packets when the user is not expecting them.
296          */
297         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
298         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
299         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
300                 rq_buf_len = max_rx_pkt_len;
301         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
302                 mb = rte_mbuf_raw_alloc(rq->mp);
303                 if (mb == NULL) {
304                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
305                         (unsigned)rq->index);
306                         return -ENOMEM;
307                 }
308
309                 mb->data_off = RTE_PKTMBUF_HEADROOM;
310                 dma_addr = (dma_addr_t)(mb->buf_iova
311                            + RTE_PKTMBUF_HEADROOM);
312                 rq_enet_desc_enc(rqd, dma_addr,
313                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
314                                 : RQ_ENET_TYPE_NOT_SOP),
315                                 rq_buf_len);
316                 rq->mbuf_ring[i] = mb;
317         }
318         /*
319          * Do not post the buffers to the NIC until we enable the RQ via
320          * enic_start_rq().
321          */
322         rq->need_initial_post = true;
323         /* Initialize fetch index while RQ is disabled */
324         iowrite32(0, &rq->ctrl->fetch_index);
325         return 0;
326 }
327
328 /*
329  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
330  * allocated the buffers and filled the RQ descriptor ring. Just need to push
331  * the post index to the NIC.
332  */
333 static void
334 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
335 {
336         if (!rq->in_use || !rq->need_initial_post)
337                 return;
338
339         /* make sure all prior writes are complete before doing the PIO write */
340         rte_rmb();
341
342         /* Post all but the last buffer to VIC. */
343         rq->posted_index = rq->ring.desc_count - 1;
344
345         rq->rx_nb_hold = 0;
346
347         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
348                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
349         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
350         rte_rmb();
351         rq->need_initial_post = false;
352 }
353
354 static void *
355 enic_alloc_consistent(void *priv, size_t size,
356         dma_addr_t *dma_handle, u8 *name)
357 {
358         void *vaddr;
359         const struct rte_memzone *rz;
360         *dma_handle = 0;
361         struct enic *enic = (struct enic *)priv;
362         struct enic_memzone_entry *mze;
363
364         rz = rte_memzone_reserve_aligned((const char *)name, size,
365                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
366         if (!rz) {
367                 pr_err("%s : Failed to allocate memory requested for %s\n",
368                         __func__, name);
369                 return NULL;
370         }
371
372         vaddr = rz->addr;
373         *dma_handle = (dma_addr_t)rz->iova;
374
375         mze = rte_malloc("enic memzone entry",
376                          sizeof(struct enic_memzone_entry), 0);
377
378         if (!mze) {
379                 pr_err("%s : Failed to allocate memory for memzone list\n",
380                        __func__);
381                 rte_memzone_free(rz);
382                 return NULL;
383         }
384
385         mze->rz = rz;
386
387         rte_spinlock_lock(&enic->memzone_list_lock);
388         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
389         rte_spinlock_unlock(&enic->memzone_list_lock);
390
391         return vaddr;
392 }
393
394 static void
395 enic_free_consistent(void *priv,
396                      __rte_unused size_t size,
397                      void *vaddr,
398                      dma_addr_t dma_handle)
399 {
400         struct enic_memzone_entry *mze;
401         struct enic *enic = (struct enic *)priv;
402
403         rte_spinlock_lock(&enic->memzone_list_lock);
404         LIST_FOREACH(mze, &enic->memzone_list, entries) {
405                 if (mze->rz->addr == vaddr &&
406                     mze->rz->iova == dma_handle)
407                         break;
408         }
409         if (mze == NULL) {
410                 rte_spinlock_unlock(&enic->memzone_list_lock);
411                 dev_warning(enic,
412                             "Tried to free memory, but couldn't find it in the memzone list\n");
413                 return;
414         }
415         LIST_REMOVE(mze, entries);
416         rte_spinlock_unlock(&enic->memzone_list_lock);
417         rte_memzone_free(mze->rz);
418         rte_free(mze);
419 }
420
421 int enic_link_update(struct enic *enic)
422 {
423         struct rte_eth_dev *eth_dev = enic->rte_dev;
424         struct rte_eth_link link;
425
426         memset(&link, 0, sizeof(link));
427         link.link_status = enic_get_link_status(enic);
428         link.link_duplex = ETH_LINK_FULL_DUPLEX;
429         link.link_speed = vnic_dev_port_speed(enic->vdev);
430
431         return rte_eth_linkstatus_set(eth_dev, &link);
432 }
433
434 static void
435 enic_intr_handler(void *arg)
436 {
437         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
438         struct enic *enic = pmd_priv(dev);
439
440         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
441
442         enic_link_update(enic);
443         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
444         enic_log_q_error(enic);
445 }
446
447 static int enic_rxq_intr_init(struct enic *enic)
448 {
449         struct rte_intr_handle *intr_handle;
450         uint32_t rxq_intr_count, i;
451         int err;
452
453         intr_handle = enic->rte_dev->intr_handle;
454         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
455                 return 0;
456         /*
457          * Rx queue interrupts only work when we have MSI-X interrupts,
458          * one per queue. Sharing one interrupt is technically
459          * possible with VIC, but it is not worth the complications it brings.
460          */
461         if (!rte_intr_cap_multiple(intr_handle)) {
462                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
463                         " (vfio-pci driver)\n");
464                 return -ENOTSUP;
465         }
466         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
467         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
468         if (err) {
469                 dev_err(enic, "Failed to enable event fds for Rx queue"
470                         " interrupts\n");
471                 return err;
472         }
473         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
474                                             rxq_intr_count * sizeof(int), 0);
475         if (intr_handle->intr_vec == NULL) {
476                 dev_err(enic, "Failed to allocate intr_vec\n");
477                 return -ENOMEM;
478         }
479         for (i = 0; i < rxq_intr_count; i++)
480                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
481         return 0;
482 }
483
484 static void enic_rxq_intr_deinit(struct enic *enic)
485 {
486         struct rte_intr_handle *intr_handle;
487
488         intr_handle = enic->rte_dev->intr_handle;
489         rte_intr_efd_disable(intr_handle);
490         if (intr_handle->intr_vec != NULL) {
491                 rte_free(intr_handle->intr_vec);
492                 intr_handle->intr_vec = NULL;
493         }
494 }
495
496 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
497 {
498         struct wq_enet_desc *desc;
499         struct vnic_wq *wq;
500         unsigned int i;
501
502         /*
503          * Fill WQ descriptor fields that never change. Every descriptor is
504          * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
505          * descriptors (i.e. request one completion update every 32 packets).
506          */
507         wq = &enic->wq[queue_idx];
508         desc = (struct wq_enet_desc *)wq->ring.descs;
509         for (i = 0; i < wq->ring.desc_count; i++, desc++) {
510                 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
511                 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
512                         desc->header_length_flags |=
513                                 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
514         }
515 }
516
517 int enic_enable(struct enic *enic)
518 {
519         unsigned int index;
520         int err;
521         struct rte_eth_dev *eth_dev = enic->rte_dev;
522
523         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
524         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
525
526         /* vnic notification of link status has already been turned on in
527          * enic_dev_init() which is called during probe time.  Here we are
528          * just turning on interrupt vector 0 if needed.
529          */
530         if (eth_dev->data->dev_conf.intr_conf.lsc)
531                 vnic_dev_notify_set(enic->vdev, 0);
532
533         err = enic_rxq_intr_init(enic);
534         if (err)
535                 return err;
536         if (enic_clsf_init(enic))
537                 dev_warning(enic, "Init of hash table for clsf failed."\
538                         "Flow director feature will not work\n");
539
540         for (index = 0; index < enic->rq_count; index++) {
541                 err = enic_alloc_rx_queue_mbufs(enic,
542                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
543                 if (err) {
544                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
545                         return err;
546                 }
547                 err = enic_alloc_rx_queue_mbufs(enic,
548                         &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
549                 if (err) {
550                         /* release the allocated mbufs for the sop rq*/
551                         enic_rxmbuf_queue_release(enic,
552                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
553
554                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
555                         return err;
556                 }
557         }
558
559         /*
560          * Use the simple TX handler if possible. All offloads must be disabled
561          * except mbuf fast free.
562          */
563         if ((eth_dev->data->dev_conf.txmode.offloads &
564              ~DEV_TX_OFFLOAD_MBUF_FAST_FREE) == 0) {
565                 PMD_INIT_LOG(DEBUG, " use the simple tx handler");
566                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
567                 for (index = 0; index < enic->wq_count; index++)
568                         enic_prep_wq_for_simple_tx(enic, index);
569         } else {
570                 PMD_INIT_LOG(DEBUG, " use the default tx handler");
571                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
572         }
573
574         /* Use the non-scatter, simplified RX handler if possible. */
575         if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
576                 PMD_INIT_LOG(DEBUG, " use the non-scatter Rx handler");
577                 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
578         } else {
579                 PMD_INIT_LOG(DEBUG, " use the normal Rx handler");
580         }
581
582         for (index = 0; index < enic->wq_count; index++)
583                 enic_start_wq(enic, index);
584         for (index = 0; index < enic->rq_count; index++)
585                 enic_start_rq(enic, index);
586
587         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
588
589         vnic_dev_enable_wait(enic->vdev);
590
591         /* Register and enable error interrupt */
592         rte_intr_callback_register(&(enic->pdev->intr_handle),
593                 enic_intr_handler, (void *)enic->rte_dev);
594
595         rte_intr_enable(&(enic->pdev->intr_handle));
596         /* Unmask LSC interrupt */
597         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
598
599         return 0;
600 }
601
602 int enic_alloc_intr_resources(struct enic *enic)
603 {
604         int err;
605         unsigned int i;
606
607         dev_info(enic, "vNIC resources used:  "\
608                 "wq %d rq %d cq %d intr %d\n",
609                 enic->wq_count, enic_vnic_rq_count(enic),
610                 enic->cq_count, enic->intr_count);
611
612         for (i = 0; i < enic->intr_count; i++) {
613                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
614                 if (err) {
615                         enic_free_vnic_resources(enic);
616                         return err;
617                 }
618         }
619         return 0;
620 }
621
622 void enic_free_rq(void *rxq)
623 {
624         struct vnic_rq *rq_sop, *rq_data;
625         struct enic *enic;
626
627         if (rxq == NULL)
628                 return;
629
630         rq_sop = (struct vnic_rq *)rxq;
631         enic = vnic_dev_priv(rq_sop->vdev);
632         rq_data = &enic->rq[rq_sop->data_queue_idx];
633
634         if (rq_sop->free_mbufs) {
635                 struct rte_mbuf **mb;
636                 int i;
637
638                 mb = rq_sop->free_mbufs;
639                 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
640                      i < ENIC_RX_BURST_MAX; i++)
641                         rte_pktmbuf_free(mb[i]);
642                 rte_free(rq_sop->free_mbufs);
643                 rq_sop->free_mbufs = NULL;
644                 rq_sop->num_free_mbufs = 0;
645         }
646
647         enic_rxmbuf_queue_release(enic, rq_sop);
648         if (rq_data->in_use)
649                 enic_rxmbuf_queue_release(enic, rq_data);
650
651         rte_free(rq_sop->mbuf_ring);
652         if (rq_data->in_use)
653                 rte_free(rq_data->mbuf_ring);
654
655         rq_sop->mbuf_ring = NULL;
656         rq_data->mbuf_ring = NULL;
657
658         vnic_rq_free(rq_sop);
659         if (rq_data->in_use)
660                 vnic_rq_free(rq_data);
661
662         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
663
664         rq_sop->in_use = 0;
665         rq_data->in_use = 0;
666 }
667
668 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
669 {
670         struct rte_eth_dev *eth_dev = enic->rte_dev;
671         vnic_wq_enable(&enic->wq[queue_idx]);
672         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
673 }
674
675 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
676 {
677         struct rte_eth_dev *eth_dev = enic->rte_dev;
678         int ret;
679
680         ret = vnic_wq_disable(&enic->wq[queue_idx]);
681         if (ret)
682                 return ret;
683
684         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
685         return 0;
686 }
687
688 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
689 {
690         struct vnic_rq *rq_sop;
691         struct vnic_rq *rq_data;
692         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
693         rq_data = &enic->rq[rq_sop->data_queue_idx];
694         struct rte_eth_dev *eth_dev = enic->rte_dev;
695
696         if (rq_data->in_use) {
697                 vnic_rq_enable(rq_data);
698                 enic_initial_post_rx(enic, rq_data);
699         }
700         rte_mb();
701         vnic_rq_enable(rq_sop);
702         enic_initial_post_rx(enic, rq_sop);
703         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
704 }
705
706 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
707 {
708         int ret1 = 0, ret2 = 0;
709         struct rte_eth_dev *eth_dev = enic->rte_dev;
710         struct vnic_rq *rq_sop;
711         struct vnic_rq *rq_data;
712         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
713         rq_data = &enic->rq[rq_sop->data_queue_idx];
714
715         ret2 = vnic_rq_disable(rq_sop);
716         rte_mb();
717         if (rq_data->in_use)
718                 ret1 = vnic_rq_disable(rq_data);
719
720         if (ret2)
721                 return ret2;
722         else if (ret1)
723                 return ret1;
724
725         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
726         return 0;
727 }
728
729 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
730         unsigned int socket_id, struct rte_mempool *mp,
731         uint16_t nb_desc, uint16_t free_thresh)
732 {
733         int rc;
734         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
735         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
736         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
737         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
738         unsigned int mbuf_size, mbufs_per_pkt;
739         unsigned int nb_sop_desc, nb_data_desc;
740         uint16_t min_sop, max_sop, min_data, max_data;
741         uint32_t max_rx_pkt_len;
742
743         rq_sop->is_sop = 1;
744         rq_sop->data_queue_idx = data_queue_idx;
745         rq_data->is_sop = 0;
746         rq_data->data_queue_idx = 0;
747         rq_sop->socket_id = socket_id;
748         rq_sop->mp = mp;
749         rq_data->socket_id = socket_id;
750         rq_data->mp = mp;
751         rq_sop->in_use = 1;
752         rq_sop->rx_free_thresh = free_thresh;
753         rq_data->rx_free_thresh = free_thresh;
754         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
755                   free_thresh);
756
757         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
758                                RTE_PKTMBUF_HEADROOM);
759         /* max_rx_pkt_len includes the ethernet header and CRC. */
760         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
761
762         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
763             DEV_RX_OFFLOAD_SCATTER) {
764                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
765                 /* ceil((max pkt len)/mbuf_size) */
766                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
767         } else {
768                 dev_info(enic, "Scatter rx mode disabled\n");
769                 mbufs_per_pkt = 1;
770                 if (max_rx_pkt_len > mbuf_size) {
771                         dev_warning(enic, "The maximum Rx packet size (%u) is"
772                                     " larger than the mbuf size (%u), and"
773                                     " scatter is disabled. Larger packets will"
774                                     " be truncated.\n",
775                                     max_rx_pkt_len, mbuf_size);
776                 }
777         }
778
779         if (mbufs_per_pkt > 1) {
780                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
781                 rq_sop->data_queue_enable = 1;
782                 rq_data->in_use = 1;
783                 /*
784                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
785                  * receives packet sizes up to the "max" MTU.
786                  * If not using scatter, we can achieve the effect of dropping
787                  * larger packets by reducing the size of posted buffers.
788                  * See enic_alloc_rx_queue_mbufs().
789                  */
790                 if (max_rx_pkt_len <
791                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
792                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
793                                     " when scatter rx mode is in use.\n");
794                 }
795         } else {
796                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
797                          queue_idx);
798                 rq_sop->data_queue_enable = 0;
799                 rq_data->in_use = 0;
800         }
801
802         /* number of descriptors have to be a multiple of 32 */
803         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
804         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
805
806         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
807         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
808
809         if (mbufs_per_pkt > 1) {
810                 min_sop = ENIC_RX_BURST_MAX;
811                 max_sop = ((enic->config.rq_desc_count /
812                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
813                 min_data = min_sop * (mbufs_per_pkt - 1);
814                 max_data = enic->config.rq_desc_count;
815         } else {
816                 min_sop = ENIC_RX_BURST_MAX;
817                 max_sop = enic->config.rq_desc_count;
818                 min_data = 0;
819                 max_data = 0;
820         }
821
822         if (nb_desc < (min_sop + min_data)) {
823                 dev_warning(enic,
824                             "Number of rx descs too low, adjusting to minimum\n");
825                 nb_sop_desc = min_sop;
826                 nb_data_desc = min_data;
827         } else if (nb_desc > (max_sop + max_data)) {
828                 dev_warning(enic,
829                             "Number of rx_descs too high, adjusting to maximum\n");
830                 nb_sop_desc = max_sop;
831                 nb_data_desc = max_data;
832         }
833         if (mbufs_per_pkt > 1) {
834                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
835                          " rx descriptor range is %u to %u\n",
836                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
837                          max_sop + max_data);
838         }
839         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
840                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
841
842         /* Allocate sop queue resources */
843         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
844                 nb_sop_desc, sizeof(struct rq_enet_desc));
845         if (rc) {
846                 dev_err(enic, "error in allocation of sop rq\n");
847                 goto err_exit;
848         }
849         nb_sop_desc = rq_sop->ring.desc_count;
850
851         if (rq_data->in_use) {
852                 /* Allocate data queue resources */
853                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
854                                    nb_data_desc,
855                                    sizeof(struct rq_enet_desc));
856                 if (rc) {
857                         dev_err(enic, "error in allocation of data rq\n");
858                         goto err_free_rq_sop;
859                 }
860                 nb_data_desc = rq_data->ring.desc_count;
861         }
862         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
863                            socket_id, nb_sop_desc + nb_data_desc,
864                            sizeof(struct cq_enet_rq_desc));
865         if (rc) {
866                 dev_err(enic, "error in allocation of cq for rq\n");
867                 goto err_free_rq_data;
868         }
869
870         /* Allocate the mbuf rings */
871         rq_sop->mbuf_ring = (struct rte_mbuf **)
872                 rte_zmalloc_socket("rq->mbuf_ring",
873                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
874                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
875         if (rq_sop->mbuf_ring == NULL)
876                 goto err_free_cq;
877
878         if (rq_data->in_use) {
879                 rq_data->mbuf_ring = (struct rte_mbuf **)
880                         rte_zmalloc_socket("rq->mbuf_ring",
881                                 sizeof(struct rte_mbuf *) * nb_data_desc,
882                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
883                 if (rq_data->mbuf_ring == NULL)
884                         goto err_free_sop_mbuf;
885         }
886
887         rq_sop->free_mbufs = (struct rte_mbuf **)
888                 rte_zmalloc_socket("rq->free_mbufs",
889                                    sizeof(struct rte_mbuf *) *
890                                    ENIC_RX_BURST_MAX,
891                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
892         if (rq_sop->free_mbufs == NULL)
893                 goto err_free_data_mbuf;
894         rq_sop->num_free_mbufs = 0;
895
896         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
897
898         return 0;
899
900 err_free_data_mbuf:
901         rte_free(rq_data->mbuf_ring);
902 err_free_sop_mbuf:
903         rte_free(rq_sop->mbuf_ring);
904 err_free_cq:
905         /* cleanup on error */
906         vnic_cq_free(&enic->cq[queue_idx]);
907 err_free_rq_data:
908         if (rq_data->in_use)
909                 vnic_rq_free(rq_data);
910 err_free_rq_sop:
911         vnic_rq_free(rq_sop);
912 err_exit:
913         return -ENOMEM;
914 }
915
916 void enic_free_wq(void *txq)
917 {
918         struct vnic_wq *wq;
919         struct enic *enic;
920
921         if (txq == NULL)
922                 return;
923
924         wq = (struct vnic_wq *)txq;
925         enic = vnic_dev_priv(wq->vdev);
926         rte_memzone_free(wq->cqmsg_rz);
927         vnic_wq_free(wq);
928         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
929 }
930
931 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
932         unsigned int socket_id, uint16_t nb_desc)
933 {
934         int err;
935         struct vnic_wq *wq = &enic->wq[queue_idx];
936         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
937         char name[NAME_MAX];
938         static int instance;
939
940         wq->socket_id = socket_id;
941         /*
942          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
943          * print an info message for diagnostics.
944          */
945         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
946
947         /* Allocate queue resources */
948         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
949                 nb_desc,
950                 sizeof(struct wq_enet_desc));
951         if (err) {
952                 dev_err(enic, "error in allocation of wq\n");
953                 return err;
954         }
955
956         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
957                 socket_id, nb_desc,
958                 sizeof(struct cq_enet_wq_desc));
959         if (err) {
960                 vnic_wq_free(wq);
961                 dev_err(enic, "error in allocation of cq for wq\n");
962         }
963
964         /* setup up CQ message */
965         snprintf((char *)name, sizeof(name),
966                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
967                 instance++);
968
969         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
970                         sizeof(uint32_t), SOCKET_ID_ANY,
971                         RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
972         if (!wq->cqmsg_rz)
973                 return -ENOMEM;
974
975         return err;
976 }
977
978 int enic_disable(struct enic *enic)
979 {
980         unsigned int i;
981         int err;
982
983         for (i = 0; i < enic->intr_count; i++) {
984                 vnic_intr_mask(&enic->intr[i]);
985                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
986         }
987         enic_rxq_intr_deinit(enic);
988         rte_intr_disable(&enic->pdev->intr_handle);
989         rte_intr_callback_unregister(&enic->pdev->intr_handle,
990                                      enic_intr_handler,
991                                      (void *)enic->rte_dev);
992
993         vnic_dev_disable(enic->vdev);
994
995         enic_clsf_destroy(enic);
996
997         if (!enic_is_sriov_vf(enic))
998                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
999
1000         for (i = 0; i < enic->wq_count; i++) {
1001                 err = vnic_wq_disable(&enic->wq[i]);
1002                 if (err)
1003                         return err;
1004         }
1005         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1006                 if (enic->rq[i].in_use) {
1007                         err = vnic_rq_disable(&enic->rq[i]);
1008                         if (err)
1009                                 return err;
1010                 }
1011         }
1012
1013         /* If we were using interrupts, set the interrupt vector to -1
1014          * to disable interrupts.  We are not disabling link notifcations,
1015          * though, as we want the polling of link status to continue working.
1016          */
1017         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1018                 vnic_dev_notify_set(enic->vdev, -1);
1019
1020         vnic_dev_set_reset_flag(enic->vdev, 1);
1021
1022         for (i = 0; i < enic->wq_count; i++)
1023                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1024
1025         for (i = 0; i < enic_vnic_rq_count(enic); i++)
1026                 if (enic->rq[i].in_use)
1027                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1028         for (i = 0; i < enic->cq_count; i++)
1029                 vnic_cq_clean(&enic->cq[i]);
1030         for (i = 0; i < enic->intr_count; i++)
1031                 vnic_intr_clean(&enic->intr[i]);
1032
1033         return 0;
1034 }
1035
1036 static int enic_dev_wait(struct vnic_dev *vdev,
1037         int (*start)(struct vnic_dev *, int),
1038         int (*finished)(struct vnic_dev *, int *),
1039         int arg)
1040 {
1041         int done;
1042         int err;
1043         int i;
1044
1045         err = start(vdev, arg);
1046         if (err)
1047                 return err;
1048
1049         /* Wait for func to complete...2 seconds max */
1050         for (i = 0; i < 2000; i++) {
1051                 err = finished(vdev, &done);
1052                 if (err)
1053                         return err;
1054                 if (done)
1055                         return 0;
1056                 usleep(1000);
1057         }
1058         return -ETIMEDOUT;
1059 }
1060
1061 static int enic_dev_open(struct enic *enic)
1062 {
1063         int err;
1064         int flags = CMD_OPENF_IG_DESCCACHE;
1065
1066         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1067                 vnic_dev_open_done, flags);
1068         if (err)
1069                 dev_err(enic_get_dev(enic),
1070                         "vNIC device open failed, err %d\n", err);
1071
1072         return err;
1073 }
1074
1075 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1076 {
1077         dma_addr_t rss_key_buf_pa;
1078         union vnic_rss_key *rss_key_buf_va = NULL;
1079         int err, i;
1080         u8 name[NAME_MAX];
1081
1082         RTE_ASSERT(user_key != NULL);
1083         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1084         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1085                 &rss_key_buf_pa, name);
1086         if (!rss_key_buf_va)
1087                 return -ENOMEM;
1088
1089         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1090                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1091
1092         err = enic_set_rss_key(enic,
1093                 rss_key_buf_pa,
1094                 sizeof(union vnic_rss_key));
1095
1096         /* Save for later queries */
1097         if (!err) {
1098                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1099                            sizeof(union vnic_rss_key));
1100         }
1101         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1102                 rss_key_buf_va, rss_key_buf_pa);
1103
1104         return err;
1105 }
1106
1107 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1108 {
1109         dma_addr_t rss_cpu_buf_pa;
1110         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1111         int err;
1112         u8 name[NAME_MAX];
1113
1114         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1115         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1116                 &rss_cpu_buf_pa, name);
1117         if (!rss_cpu_buf_va)
1118                 return -ENOMEM;
1119
1120         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1121
1122         err = enic_set_rss_cpu(enic,
1123                 rss_cpu_buf_pa,
1124                 sizeof(union vnic_rss_cpu));
1125
1126         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1127                 rss_cpu_buf_va, rss_cpu_buf_pa);
1128
1129         /* Save for later queries */
1130         if (!err)
1131                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1132         return err;
1133 }
1134
1135 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1136         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1137 {
1138         const u8 tso_ipid_split_en = 0;
1139         int err;
1140
1141         err = enic_set_nic_cfg(enic,
1142                 rss_default_cpu, rss_hash_type,
1143                 rss_hash_bits, rss_base_cpu,
1144                 rss_enable, tso_ipid_split_en,
1145                 enic->ig_vlan_strip_en);
1146
1147         return err;
1148 }
1149
1150 /* Initialize RSS with defaults, called from dev_configure */
1151 int enic_init_rss_nic_cfg(struct enic *enic)
1152 {
1153         static uint8_t default_rss_key[] = {
1154                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1155                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1156                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1157                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1158         };
1159         struct rte_eth_rss_conf rss_conf;
1160         union vnic_rss_cpu rss_cpu;
1161         int ret, i;
1162
1163         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1164         /*
1165          * If setting key for the first time, and the user gives us none, then
1166          * push the default key to NIC.
1167          */
1168         if (rss_conf.rss_key == NULL) {
1169                 rss_conf.rss_key = default_rss_key;
1170                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1171         }
1172         ret = enic_set_rss_conf(enic, &rss_conf);
1173         if (ret) {
1174                 dev_err(enic, "Failed to configure RSS\n");
1175                 return ret;
1176         }
1177         if (enic->rss_enable) {
1178                 /* If enabling RSS, use the default reta */
1179                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1180                         rss_cpu.cpu[i / 4].b[i % 4] =
1181                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1182                 }
1183                 ret = enic_set_rss_reta(enic, &rss_cpu);
1184                 if (ret)
1185                         dev_err(enic, "Failed to set RSS indirection table\n");
1186         }
1187         return ret;
1188 }
1189
1190 int enic_setup_finish(struct enic *enic)
1191 {
1192         enic_init_soft_stats(enic);
1193
1194         /* Default conf */
1195         vnic_dev_packet_filter(enic->vdev,
1196                 1 /* directed  */,
1197                 1 /* multicast */,
1198                 1 /* broadcast */,
1199                 0 /* promisc   */,
1200                 1 /* allmulti  */);
1201
1202         enic->promisc = 0;
1203         enic->allmulti = 1;
1204
1205         return 0;
1206 }
1207
1208 static int enic_rss_conf_valid(struct enic *enic,
1209                                struct rte_eth_rss_conf *rss_conf)
1210 {
1211         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1212         if (enic->flow_type_rss_offloads == 0)
1213                 return 0;
1214         if (rss_conf->rss_key != NULL &&
1215             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1216                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1217                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1218                 return -EINVAL;
1219         }
1220         if (rss_conf->rss_hf != 0 &&
1221             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1222                 dev_err(enic, "Given rss_hf contains none of the supported"
1223                         " types\n");
1224                 return -EINVAL;
1225         }
1226         return 0;
1227 }
1228
1229 /* Set hash type and key according to rss_conf */
1230 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1231 {
1232         struct rte_eth_dev *eth_dev;
1233         uint64_t rss_hf;
1234         u8 rss_hash_type;
1235         u8 rss_enable;
1236         int ret;
1237
1238         RTE_ASSERT(rss_conf != NULL);
1239         ret = enic_rss_conf_valid(enic, rss_conf);
1240         if (ret) {
1241                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1242                 return ret;
1243         }
1244
1245         eth_dev = enic->rte_dev;
1246         rss_hash_type = 0;
1247         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1248         if (enic->rq_count > 1 &&
1249             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1250             rss_hf != 0) {
1251                 rss_enable = 1;
1252                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1253                               ETH_RSS_NONFRAG_IPV4_OTHER))
1254                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1255                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1256                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1257                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1258                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1259                         if (enic->udp_rss_weak) {
1260                                 /*
1261                                  * 'TCP' is not a typo. The "weak" version of
1262                                  * UDP RSS requires both the TCP and UDP bits
1263                                  * be set. It does enable TCP RSS as well.
1264                                  */
1265                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1266                         }
1267                 }
1268                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1269                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1270                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1271                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1272                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1273                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1274                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1275                         if (enic->udp_rss_weak)
1276                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1277                 }
1278         } else {
1279                 rss_enable = 0;
1280                 rss_hf = 0;
1281         }
1282
1283         /* Set the hash key if provided */
1284         if (rss_enable && rss_conf->rss_key) {
1285                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1286                 if (ret) {
1287                         dev_err(enic, "Failed to set RSS key\n");
1288                         return ret;
1289                 }
1290         }
1291
1292         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1293                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1294                               rss_enable);
1295         if (!ret) {
1296                 enic->rss_hf = rss_hf;
1297                 enic->rss_hash_type = rss_hash_type;
1298                 enic->rss_enable = rss_enable;
1299         } else {
1300                 dev_err(enic, "Failed to update RSS configurations."
1301                         " hash=0x%x\n", rss_hash_type);
1302         }
1303         return ret;
1304 }
1305
1306 int enic_set_vlan_strip(struct enic *enic)
1307 {
1308         /*
1309          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1310          * together. So, re-do niccfg, preserving the current RSS settings.
1311          */
1312         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1313                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1314                                enic->rss_enable);
1315 }
1316
1317 void enic_add_packet_filter(struct enic *enic)
1318 {
1319         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1320         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1321                 enic->promisc, enic->allmulti);
1322 }
1323
1324 int enic_get_link_status(struct enic *enic)
1325 {
1326         return vnic_dev_link_status(enic->vdev);
1327 }
1328
1329 static void enic_dev_deinit(struct enic *enic)
1330 {
1331         struct rte_eth_dev *eth_dev = enic->rte_dev;
1332
1333         /* stop link status checking */
1334         vnic_dev_notify_unset(enic->vdev);
1335
1336         rte_free(eth_dev->data->mac_addrs);
1337         rte_free(enic->cq);
1338         rte_free(enic->intr);
1339         rte_free(enic->rq);
1340         rte_free(enic->wq);
1341 }
1342
1343
1344 int enic_set_vnic_res(struct enic *enic)
1345 {
1346         struct rte_eth_dev *eth_dev = enic->rte_dev;
1347         int rc = 0;
1348         unsigned int required_rq, required_wq, required_cq, required_intr;
1349
1350         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1351         required_rq = eth_dev->data->nb_rx_queues * 2;
1352         required_wq = eth_dev->data->nb_tx_queues;
1353         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1354         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1355         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1356                 required_intr += eth_dev->data->nb_rx_queues;
1357         }
1358
1359         if (enic->conf_rq_count < required_rq) {
1360                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1361                         eth_dev->data->nb_rx_queues,
1362                         required_rq, enic->conf_rq_count);
1363                 rc = -EINVAL;
1364         }
1365         if (enic->conf_wq_count < required_wq) {
1366                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1367                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1368                 rc = -EINVAL;
1369         }
1370
1371         if (enic->conf_cq_count < required_cq) {
1372                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1373                         required_cq, enic->conf_cq_count);
1374                 rc = -EINVAL;
1375         }
1376         if (enic->conf_intr_count < required_intr) {
1377                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1378                         " interrupts. Required:%u, Configured:%u\n",
1379                         required_intr, enic->conf_intr_count);
1380                 rc = -EINVAL;
1381         }
1382
1383         if (rc == 0) {
1384                 enic->rq_count = eth_dev->data->nb_rx_queues;
1385                 enic->wq_count = eth_dev->data->nb_tx_queues;
1386                 enic->cq_count = enic->rq_count + enic->wq_count;
1387                 enic->intr_count = required_intr;
1388         }
1389
1390         return rc;
1391 }
1392
1393 /* Initialize the completion queue for an RQ */
1394 static int
1395 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1396 {
1397         struct vnic_rq *sop_rq, *data_rq;
1398         unsigned int cq_idx;
1399         int rc = 0;
1400
1401         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1402         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1403         cq_idx = rq_idx;
1404
1405         vnic_cq_clean(&enic->cq[cq_idx]);
1406         vnic_cq_init(&enic->cq[cq_idx],
1407                      0 /* flow_control_enable */,
1408                      1 /* color_enable */,
1409                      0 /* cq_head */,
1410                      0 /* cq_tail */,
1411                      1 /* cq_tail_color */,
1412                      0 /* interrupt_enable */,
1413                      1 /* cq_entry_enable */,
1414                      0 /* cq_message_enable */,
1415                      0 /* interrupt offset */,
1416                      0 /* cq_message_addr */);
1417
1418
1419         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1420                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1421                            sop_rq->ring.desc_count - 1, 1, 0);
1422         if (data_rq->in_use) {
1423                 vnic_rq_init_start(data_rq,
1424                                    enic_cq_rq(enic,
1425                                    enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1426                                    data_rq->ring.desc_count - 1, 1, 0);
1427         }
1428
1429         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1430         if (rc)
1431                 return rc;
1432
1433         if (data_rq->in_use) {
1434                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1435                 if (rc) {
1436                         enic_rxmbuf_queue_release(enic, sop_rq);
1437                         return rc;
1438                 }
1439         }
1440
1441         return 0;
1442 }
1443
1444 /* The Cisco NIC can send and receive packets up to a max packet size
1445  * determined by the NIC type and firmware. There is also an MTU
1446  * configured into the NIC via the CIMC/UCSM management interface
1447  * which can be overridden by this function (up to the max packet size).
1448  * Depending on the network setup, doing so may cause packet drops
1449  * and unexpected behavior.
1450  */
1451 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1452 {
1453         unsigned int rq_idx;
1454         struct vnic_rq *rq;
1455         int rc = 0;
1456         uint16_t old_mtu;       /* previous setting */
1457         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1458         struct rte_eth_dev *eth_dev = enic->rte_dev;
1459
1460         old_mtu = eth_dev->data->mtu;
1461         config_mtu = enic->config.mtu;
1462
1463         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1464                 return -E_RTE_SECONDARY;
1465
1466         if (new_mtu > enic->max_mtu) {
1467                 dev_err(enic,
1468                         "MTU not updated: requested (%u) greater than max (%u)\n",
1469                         new_mtu, enic->max_mtu);
1470                 return -EINVAL;
1471         }
1472         if (new_mtu < ENIC_MIN_MTU) {
1473                 dev_info(enic,
1474                         "MTU not updated: requested (%u) less than min (%u)\n",
1475                         new_mtu, ENIC_MIN_MTU);
1476                 return -EINVAL;
1477         }
1478         if (new_mtu > config_mtu)
1479                 dev_warning(enic,
1480                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1481                         new_mtu, config_mtu);
1482
1483         /* Update the MTU and maximum packet length */
1484         eth_dev->data->mtu = new_mtu;
1485         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1486                 enic_mtu_to_max_rx_pktlen(new_mtu);
1487
1488         /*
1489          * If the device has not started (enic_enable), nothing to do.
1490          * Later, enic_enable() will set up RQs reflecting the new maximum
1491          * packet length.
1492          */
1493         if (!eth_dev->data->dev_started)
1494                 goto set_mtu_done;
1495
1496         /*
1497          * The device has started, re-do RQs on the fly. In the process, we
1498          * pick up the new maximum packet length.
1499          *
1500          * Some applications rely on the ability to change MTU without stopping
1501          * the device. So keep this behavior for now.
1502          */
1503         rte_spinlock_lock(&enic->mtu_lock);
1504
1505         /* Stop traffic on all RQs */
1506         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1507                 rq = &enic->rq[rq_idx];
1508                 if (rq->is_sop && rq->in_use) {
1509                         rc = enic_stop_rq(enic,
1510                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1511                         if (rc) {
1512                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1513                                 goto set_mtu_done;
1514                         }
1515                 }
1516         }
1517
1518         /* replace Rx function with a no-op to avoid getting stale pkts */
1519         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1520         rte_mb();
1521
1522         /* Allow time for threads to exit the real Rx function. */
1523         usleep(100000);
1524
1525         /* now it is safe to reconfigure the RQs */
1526
1527
1528         /* free and reallocate RQs with the new MTU */
1529         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1530                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1531                 if (!rq->in_use)
1532                         continue;
1533
1534                 enic_free_rq(rq);
1535                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1536                                    rq->tot_nb_desc, rq->rx_free_thresh);
1537                 if (rc) {
1538                         dev_err(enic,
1539                                 "Fatal MTU alloc error- No traffic will pass\n");
1540                         goto set_mtu_done;
1541                 }
1542
1543                 rc = enic_reinit_rq(enic, rq_idx);
1544                 if (rc) {
1545                         dev_err(enic,
1546                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1547                         goto set_mtu_done;
1548                 }
1549         }
1550
1551         /* put back the real receive function */
1552         rte_mb();
1553         eth_dev->rx_pkt_burst = enic_recv_pkts;
1554         rte_mb();
1555
1556         /* restart Rx traffic */
1557         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1558                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1559                 if (rq->is_sop && rq->in_use)
1560                         enic_start_rq(enic, rq_idx);
1561         }
1562
1563 set_mtu_done:
1564         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1565         rte_spinlock_unlock(&enic->mtu_lock);
1566         return rc;
1567 }
1568
1569 static int enic_dev_init(struct enic *enic)
1570 {
1571         int err;
1572         struct rte_eth_dev *eth_dev = enic->rte_dev;
1573
1574         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1575
1576         /* Get vNIC configuration
1577         */
1578         err = enic_get_vnic_config(enic);
1579         if (err) {
1580                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1581                 return err;
1582         }
1583
1584         /* Get available resource counts */
1585         enic_get_res_counts(enic);
1586         if (enic->conf_rq_count == 1) {
1587                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1588                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1589                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1590                 return -EINVAL;
1591         }
1592         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1593         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1594                                enic->conf_cq_count, 8);
1595         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1596                                  enic->conf_intr_count, 8);
1597         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1598                                enic->conf_rq_count, 8);
1599         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1600                                enic->conf_wq_count, 8);
1601         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1602                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1603                 return -1;
1604         }
1605         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1606                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1607                 return -1;
1608         }
1609         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1610                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1611                 return -1;
1612         }
1613         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1614                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1615                 return -1;
1616         }
1617
1618         /* Get the supported filters */
1619         enic_fdir_info(enic);
1620
1621         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
1622                                                 * ENIC_MAX_MAC_ADDR, 0);
1623         if (!eth_dev->data->mac_addrs) {
1624                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1625                 return -1;
1626         }
1627         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1628                         eth_dev->data->mac_addrs);
1629
1630         vnic_dev_set_reset_flag(enic->vdev, 0);
1631
1632         LIST_INIT(&enic->flows);
1633         rte_spinlock_init(&enic->flows_lock);
1634
1635         /* set up link status checking */
1636         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1637
1638         enic->overlay_offload = false;
1639         if (!enic->disable_overlay && enic->vxlan &&
1640             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1641             vnic_dev_overlay_offload_ctrl(enic->vdev,
1642                                           OVERLAY_FEATURE_VXLAN,
1643                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1644                 enic->tx_offload_capa |=
1645                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1646                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1647                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1648                 /*
1649                  * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
1650                  * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
1651                  */
1652                 enic->tx_offload_mask |=
1653                         PKT_TX_OUTER_IP_CKSUM |
1654                         PKT_TX_TUNNEL_MASK;
1655                 enic->overlay_offload = true;
1656                 enic->vxlan_port = ENIC_DEFAULT_VXLAN_PORT;
1657                 dev_info(enic, "Overlay offload is enabled\n");
1658         }
1659
1660         return 0;
1661
1662 }
1663
1664 int enic_probe(struct enic *enic)
1665 {
1666         struct rte_pci_device *pdev = enic->pdev;
1667         int err = -1;
1668
1669         dev_debug(enic, " Initializing ENIC PMD\n");
1670
1671         /* if this is a secondary process the hardware is already initialized */
1672         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1673                 return 0;
1674
1675         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1676         enic->bar0.len = pdev->mem_resource[0].len;
1677
1678         /* Register vNIC device */
1679         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1680         if (!enic->vdev) {
1681                 dev_err(enic, "vNIC registration failed, aborting\n");
1682                 goto err_out;
1683         }
1684
1685         LIST_INIT(&enic->memzone_list);
1686         rte_spinlock_init(&enic->memzone_list_lock);
1687
1688         vnic_register_cbacks(enic->vdev,
1689                 enic_alloc_consistent,
1690                 enic_free_consistent);
1691
1692         /*
1693          * Allocate the consistent memory for stats upfront so both primary and
1694          * secondary processes can dump stats.
1695          */
1696         err = vnic_dev_alloc_stats_mem(enic->vdev);
1697         if (err) {
1698                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1699                 goto err_out_unregister;
1700         }
1701         /* Issue device open to get device in known state */
1702         err = enic_dev_open(enic);
1703         if (err) {
1704                 dev_err(enic, "vNIC dev open failed, aborting\n");
1705                 goto err_out_unregister;
1706         }
1707
1708         /* Set ingress vlan rewrite mode before vnic initialization */
1709         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1710                   enic->ig_vlan_rewrite_mode);
1711         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1712                 enic->ig_vlan_rewrite_mode);
1713         if (err) {
1714                 dev_err(enic,
1715                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1716                 goto err_out_dev_close;
1717         }
1718
1719         /* Issue device init to initialize the vnic-to-switch link.
1720          * We'll start with carrier off and wait for link UP
1721          * notification later to turn on carrier.  We don't need
1722          * to wait here for the vnic-to-switch link initialization
1723          * to complete; link UP notification is the indication that
1724          * the process is complete.
1725          */
1726
1727         err = vnic_dev_init(enic->vdev, 0);
1728         if (err) {
1729                 dev_err(enic, "vNIC dev init failed, aborting\n");
1730                 goto err_out_dev_close;
1731         }
1732
1733         err = enic_dev_init(enic);
1734         if (err) {
1735                 dev_err(enic, "Device initialization failed, aborting\n");
1736                 goto err_out_dev_close;
1737         }
1738
1739         return 0;
1740
1741 err_out_dev_close:
1742         vnic_dev_close(enic->vdev);
1743 err_out_unregister:
1744         vnic_dev_unregister(enic->vdev);
1745 err_out:
1746         return err;
1747 }
1748
1749 void enic_remove(struct enic *enic)
1750 {
1751         enic_dev_deinit(enic);
1752         vnic_dev_close(enic->vdev);
1753         vnic_dev_unregister(enic->vdev);
1754 }