net/enic: enable overlay offload for VXLAN and GENEVE
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 #include <libgen.h>
12
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev_driver.h>
20
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38
39 static int is_zero_addr(uint8_t *addr)
40 {
41         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43
44 static int is_mcast_addr(uint8_t *addr)
45 {
46         return addr[0] & 1;
47 }
48
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51         return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53
54 static void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57         uint16_t i;
58
59         if (!rq || !rq->mbuf_ring) {
60                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61                 return;
62         }
63
64         for (i = 0; i < rq->ring.desc_count; i++) {
65                 if (rq->mbuf_ring[i]) {
66                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67                         rq->mbuf_ring[i] = NULL;
68                 }
69         }
70 }
71
72 static void enic_free_wq_buf(struct vnic_wq_buf *buf)
73 {
74         struct rte_mbuf *mbuf = (struct rte_mbuf *)buf->mb;
75
76         rte_pktmbuf_free_seg(mbuf);
77         buf->mb = NULL;
78 }
79
80 static void enic_log_q_error(struct enic *enic)
81 {
82         unsigned int i;
83         u32 error_status;
84
85         for (i = 0; i < enic->wq_count; i++) {
86                 error_status = vnic_wq_error_status(&enic->wq[i]);
87                 if (error_status)
88                         dev_err(enic, "WQ[%d] error_status %d\n", i,
89                                 error_status);
90         }
91
92         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93                 if (!enic->rq[i].in_use)
94                         continue;
95                 error_status = vnic_rq_error_status(&enic->rq[i]);
96                 if (error_status)
97                         dev_err(enic, "RQ[%d] error_status %d\n", i,
98                                 error_status);
99         }
100 }
101
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104         struct enic_soft_stats *soft_stats = &enic->soft_stats;
105         rte_atomic64_clear(&soft_stats->rx_nombuf);
106         rte_atomic64_clear(&soft_stats->rx_packet_errors);
107         rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112         struct enic_soft_stats *soft_stats = &enic->soft_stats;
113         rte_atomic64_init(&soft_stats->rx_nombuf);
114         rte_atomic64_init(&soft_stats->rx_packet_errors);
115         rte_atomic64_init(&soft_stats->tx_oversized);
116         enic_clear_soft_stats(enic);
117 }
118
119 void enic_dev_stats_clear(struct enic *enic)
120 {
121         if (vnic_dev_stats_clear(enic->vdev))
122                 dev_err(enic, "Error in clearing stats\n");
123         enic_clear_soft_stats(enic);
124 }
125
126 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
127 {
128         struct vnic_stats *stats;
129         struct enic_soft_stats *soft_stats = &enic->soft_stats;
130         int64_t rx_truncated;
131         uint64_t rx_packet_errors;
132         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
133
134         if (ret) {
135                 dev_err(enic, "Error in getting stats\n");
136                 return ret;
137         }
138
139         /* The number of truncated packets can only be calculated by
140          * subtracting a hardware counter from error packets received by
141          * the driver. Note: this causes transient inaccuracies in the
142          * ipackets count. Also, the length of truncated packets are
143          * counted in ibytes even though truncated packets are dropped
144          * which can make ibytes be slightly higher than it should be.
145          */
146         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
147         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
148
149         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
150         r_stats->opackets = stats->tx.tx_frames_ok;
151
152         r_stats->ibytes = stats->rx.rx_bytes_ok;
153         r_stats->obytes = stats->tx.tx_bytes_ok;
154
155         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
156         r_stats->oerrors = stats->tx.tx_errors
157                            + rte_atomic64_read(&soft_stats->tx_oversized);
158
159         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
160
161         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
162         return 0;
163 }
164
165 int enic_del_mac_address(struct enic *enic, int mac_index)
166 {
167         struct rte_eth_dev *eth_dev = enic->rte_dev;
168         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
169
170         return vnic_dev_del_addr(enic->vdev, mac_addr);
171 }
172
173 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
174 {
175         int err;
176
177         if (!is_eth_addr_valid(mac_addr)) {
178                 dev_err(enic, "invalid mac address\n");
179                 return -EINVAL;
180         }
181
182         err = vnic_dev_add_addr(enic->vdev, mac_addr);
183         if (err)
184                 dev_err(enic, "add mac addr failed\n");
185         return err;
186 }
187
188 static void
189 enic_free_rq_buf(struct rte_mbuf **mbuf)
190 {
191         if (*mbuf == NULL)
192                 return;
193
194         rte_pktmbuf_free(*mbuf);
195         *mbuf = NULL;
196 }
197
198 void enic_init_vnic_resources(struct enic *enic)
199 {
200         unsigned int error_interrupt_enable = 1;
201         unsigned int error_interrupt_offset = 0;
202         unsigned int rxq_interrupt_enable = 0;
203         unsigned int rxq_interrupt_offset;
204         unsigned int index = 0;
205         unsigned int cq_idx;
206         struct vnic_rq *data_rq;
207
208         if (enic->rte_dev->data->dev_conf.intr_conf.rxq) {
209                 rxq_interrupt_enable = 1;
210                 rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
211         }
212         for (index = 0; index < enic->rq_count; index++) {
213                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
214
215                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
216                         cq_idx,
217                         error_interrupt_enable,
218                         error_interrupt_offset);
219
220                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
221                 if (data_rq->in_use)
222                         vnic_rq_init(data_rq,
223                                      cq_idx,
224                                      error_interrupt_enable,
225                                      error_interrupt_offset);
226
227                 vnic_cq_init(&enic->cq[cq_idx],
228                         0 /* flow_control_enable */,
229                         1 /* color_enable */,
230                         0 /* cq_head */,
231                         0 /* cq_tail */,
232                         1 /* cq_tail_color */,
233                         rxq_interrupt_enable,
234                         1 /* cq_entry_enable */,
235                         0 /* cq_message_enable */,
236                         rxq_interrupt_offset,
237                         0 /* cq_message_addr */);
238                 if (rxq_interrupt_enable)
239                         rxq_interrupt_offset++;
240         }
241
242         for (index = 0; index < enic->wq_count; index++) {
243                 vnic_wq_init(&enic->wq[index],
244                         enic_cq_wq(enic, index),
245                         error_interrupt_enable,
246                         error_interrupt_offset);
247                 /* Compute unsupported ol flags for enic_prep_pkts() */
248                 enic->wq[index].tx_offload_notsup_mask =
249                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
250
251                 cq_idx = enic_cq_wq(enic, index);
252                 vnic_cq_init(&enic->cq[cq_idx],
253                         0 /* flow_control_enable */,
254                         1 /* color_enable */,
255                         0 /* cq_head */,
256                         0 /* cq_tail */,
257                         1 /* cq_tail_color */,
258                         0 /* interrupt_enable */,
259                         0 /* cq_entry_enable */,
260                         1 /* cq_message_enable */,
261                         0 /* interrupt offset */,
262                         (u64)enic->wq[index].cqmsg_rz->iova);
263         }
264
265         for (index = 0; index < enic->intr_count; index++) {
266                 vnic_intr_init(&enic->intr[index],
267                                enic->config.intr_timer_usec,
268                                enic->config.intr_timer_type,
269                                /*mask_on_assertion*/1);
270         }
271 }
272
273
274 static int
275 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
276 {
277         struct rte_mbuf *mb;
278         struct rq_enet_desc *rqd = rq->ring.descs;
279         unsigned i;
280         dma_addr_t dma_addr;
281         uint32_t max_rx_pkt_len;
282         uint16_t rq_buf_len;
283
284         if (!rq->in_use)
285                 return 0;
286
287         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
288                   rq->ring.desc_count);
289
290         /*
291          * If *not* using scatter and the mbuf size is smaller than the
292          * requested max packet size (max_rx_pkt_len), then reduce the
293          * posted buffer size to max_rx_pkt_len. HW still receives packets
294          * larger than max_rx_pkt_len, but they will be truncated, which we
295          * drop in the rx handler. Not ideal, but better than returning
296          * large packets when the user is not expecting them.
297          */
298         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
299         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
300         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
301                 rq_buf_len = max_rx_pkt_len;
302         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
303                 mb = rte_mbuf_raw_alloc(rq->mp);
304                 if (mb == NULL) {
305                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
306                         (unsigned)rq->index);
307                         return -ENOMEM;
308                 }
309
310                 mb->data_off = RTE_PKTMBUF_HEADROOM;
311                 dma_addr = (dma_addr_t)(mb->buf_iova
312                            + RTE_PKTMBUF_HEADROOM);
313                 rq_enet_desc_enc(rqd, dma_addr,
314                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
315                                 : RQ_ENET_TYPE_NOT_SOP),
316                                 rq_buf_len);
317                 rq->mbuf_ring[i] = mb;
318         }
319
320         /* make sure all prior writes are complete before doing the PIO write */
321         rte_rmb();
322
323         /* Post all but the last buffer to VIC. */
324         rq->posted_index = rq->ring.desc_count - 1;
325
326         rq->rx_nb_hold = 0;
327
328         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
329                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
330         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
331         iowrite32(0, &rq->ctrl->fetch_index);
332         rte_rmb();
333
334         return 0;
335
336 }
337
338 static void *
339 enic_alloc_consistent(void *priv, size_t size,
340         dma_addr_t *dma_handle, u8 *name)
341 {
342         void *vaddr;
343         const struct rte_memzone *rz;
344         *dma_handle = 0;
345         struct enic *enic = (struct enic *)priv;
346         struct enic_memzone_entry *mze;
347
348         rz = rte_memzone_reserve_aligned((const char *)name, size,
349                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
350         if (!rz) {
351                 pr_err("%s : Failed to allocate memory requested for %s\n",
352                         __func__, name);
353                 return NULL;
354         }
355
356         vaddr = rz->addr;
357         *dma_handle = (dma_addr_t)rz->iova;
358
359         mze = rte_malloc("enic memzone entry",
360                          sizeof(struct enic_memzone_entry), 0);
361
362         if (!mze) {
363                 pr_err("%s : Failed to allocate memory for memzone list\n",
364                        __func__);
365                 rte_memzone_free(rz);
366                 return NULL;
367         }
368
369         mze->rz = rz;
370
371         rte_spinlock_lock(&enic->memzone_list_lock);
372         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
373         rte_spinlock_unlock(&enic->memzone_list_lock);
374
375         return vaddr;
376 }
377
378 static void
379 enic_free_consistent(void *priv,
380                      __rte_unused size_t size,
381                      void *vaddr,
382                      dma_addr_t dma_handle)
383 {
384         struct enic_memzone_entry *mze;
385         struct enic *enic = (struct enic *)priv;
386
387         rte_spinlock_lock(&enic->memzone_list_lock);
388         LIST_FOREACH(mze, &enic->memzone_list, entries) {
389                 if (mze->rz->addr == vaddr &&
390                     mze->rz->iova == dma_handle)
391                         break;
392         }
393         if (mze == NULL) {
394                 rte_spinlock_unlock(&enic->memzone_list_lock);
395                 dev_warning(enic,
396                             "Tried to free memory, but couldn't find it in the memzone list\n");
397                 return;
398         }
399         LIST_REMOVE(mze, entries);
400         rte_spinlock_unlock(&enic->memzone_list_lock);
401         rte_memzone_free(mze->rz);
402         rte_free(mze);
403 }
404
405 int enic_link_update(struct enic *enic)
406 {
407         struct rte_eth_dev *eth_dev = enic->rte_dev;
408         struct rte_eth_link link;
409
410         memset(&link, 0, sizeof(link));
411         link.link_status = enic_get_link_status(enic);
412         link.link_duplex = ETH_LINK_FULL_DUPLEX;
413         link.link_speed = vnic_dev_port_speed(enic->vdev);
414
415         return rte_eth_linkstatus_set(eth_dev, &link);
416 }
417
418 static void
419 enic_intr_handler(void *arg)
420 {
421         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
422         struct enic *enic = pmd_priv(dev);
423
424         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
425
426         enic_link_update(enic);
427         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
428         enic_log_q_error(enic);
429 }
430
431 static int enic_rxq_intr_init(struct enic *enic)
432 {
433         struct rte_intr_handle *intr_handle;
434         uint32_t rxq_intr_count, i;
435         int err;
436
437         intr_handle = enic->rte_dev->intr_handle;
438         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
439                 return 0;
440         /*
441          * Rx queue interrupts only work when we have MSI-X interrupts,
442          * one per queue. Sharing one interrupt is technically
443          * possible with VIC, but it is not worth the complications it brings.
444          */
445         if (!rte_intr_cap_multiple(intr_handle)) {
446                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
447                         " (vfio-pci driver)\n");
448                 return -ENOTSUP;
449         }
450         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
451         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
452         if (err) {
453                 dev_err(enic, "Failed to enable event fds for Rx queue"
454                         " interrupts\n");
455                 return err;
456         }
457         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
458                                             rxq_intr_count * sizeof(int), 0);
459         if (intr_handle->intr_vec == NULL) {
460                 dev_err(enic, "Failed to allocate intr_vec\n");
461                 return -ENOMEM;
462         }
463         for (i = 0; i < rxq_intr_count; i++)
464                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
465         return 0;
466 }
467
468 static void enic_rxq_intr_deinit(struct enic *enic)
469 {
470         struct rte_intr_handle *intr_handle;
471
472         intr_handle = enic->rte_dev->intr_handle;
473         rte_intr_efd_disable(intr_handle);
474         if (intr_handle->intr_vec != NULL) {
475                 rte_free(intr_handle->intr_vec);
476                 intr_handle->intr_vec = NULL;
477         }
478 }
479
480 int enic_enable(struct enic *enic)
481 {
482         unsigned int index;
483         int err;
484         struct rte_eth_dev *eth_dev = enic->rte_dev;
485
486         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
487         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
488
489         /* vnic notification of link status has already been turned on in
490          * enic_dev_init() which is called during probe time.  Here we are
491          * just turning on interrupt vector 0 if needed.
492          */
493         if (eth_dev->data->dev_conf.intr_conf.lsc)
494                 vnic_dev_notify_set(enic->vdev, 0);
495
496         err = enic_rxq_intr_init(enic);
497         if (err)
498                 return err;
499         if (enic_clsf_init(enic))
500                 dev_warning(enic, "Init of hash table for clsf failed."\
501                         "Flow director feature will not work\n");
502
503         for (index = 0; index < enic->rq_count; index++) {
504                 err = enic_alloc_rx_queue_mbufs(enic,
505                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
506                 if (err) {
507                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
508                         return err;
509                 }
510                 err = enic_alloc_rx_queue_mbufs(enic,
511                         &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
512                 if (err) {
513                         /* release the allocated mbufs for the sop rq*/
514                         enic_rxmbuf_queue_release(enic,
515                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
516
517                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
518                         return err;
519                 }
520         }
521
522         for (index = 0; index < enic->wq_count; index++)
523                 enic_start_wq(enic, index);
524         for (index = 0; index < enic->rq_count; index++)
525                 enic_start_rq(enic, index);
526
527         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
528
529         vnic_dev_enable_wait(enic->vdev);
530
531         /* Register and enable error interrupt */
532         rte_intr_callback_register(&(enic->pdev->intr_handle),
533                 enic_intr_handler, (void *)enic->rte_dev);
534
535         rte_intr_enable(&(enic->pdev->intr_handle));
536         /* Unmask LSC interrupt */
537         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
538
539         return 0;
540 }
541
542 int enic_alloc_intr_resources(struct enic *enic)
543 {
544         int err;
545         unsigned int i;
546
547         dev_info(enic, "vNIC resources used:  "\
548                 "wq %d rq %d cq %d intr %d\n",
549                 enic->wq_count, enic_vnic_rq_count(enic),
550                 enic->cq_count, enic->intr_count);
551
552         for (i = 0; i < enic->intr_count; i++) {
553                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
554                 if (err) {
555                         enic_free_vnic_resources(enic);
556                         return err;
557                 }
558         }
559         return 0;
560 }
561
562 void enic_free_rq(void *rxq)
563 {
564         struct vnic_rq *rq_sop, *rq_data;
565         struct enic *enic;
566
567         if (rxq == NULL)
568                 return;
569
570         rq_sop = (struct vnic_rq *)rxq;
571         enic = vnic_dev_priv(rq_sop->vdev);
572         rq_data = &enic->rq[rq_sop->data_queue_idx];
573
574         enic_rxmbuf_queue_release(enic, rq_sop);
575         if (rq_data->in_use)
576                 enic_rxmbuf_queue_release(enic, rq_data);
577
578         rte_free(rq_sop->mbuf_ring);
579         if (rq_data->in_use)
580                 rte_free(rq_data->mbuf_ring);
581
582         rq_sop->mbuf_ring = NULL;
583         rq_data->mbuf_ring = NULL;
584
585         vnic_rq_free(rq_sop);
586         if (rq_data->in_use)
587                 vnic_rq_free(rq_data);
588
589         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
590
591         rq_sop->in_use = 0;
592         rq_data->in_use = 0;
593 }
594
595 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
596 {
597         struct rte_eth_dev *eth_dev = enic->rte_dev;
598         vnic_wq_enable(&enic->wq[queue_idx]);
599         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
600 }
601
602 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
603 {
604         struct rte_eth_dev *eth_dev = enic->rte_dev;
605         int ret;
606
607         ret = vnic_wq_disable(&enic->wq[queue_idx]);
608         if (ret)
609                 return ret;
610
611         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
612         return 0;
613 }
614
615 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
616 {
617         struct vnic_rq *rq_sop;
618         struct vnic_rq *rq_data;
619         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
620         rq_data = &enic->rq[rq_sop->data_queue_idx];
621         struct rte_eth_dev *eth_dev = enic->rte_dev;
622
623         if (rq_data->in_use)
624                 vnic_rq_enable(rq_data);
625         rte_mb();
626         vnic_rq_enable(rq_sop);
627         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
628 }
629
630 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
631 {
632         int ret1 = 0, ret2 = 0;
633         struct rte_eth_dev *eth_dev = enic->rte_dev;
634         struct vnic_rq *rq_sop;
635         struct vnic_rq *rq_data;
636         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
637         rq_data = &enic->rq[rq_sop->data_queue_idx];
638
639         ret2 = vnic_rq_disable(rq_sop);
640         rte_mb();
641         if (rq_data->in_use)
642                 ret1 = vnic_rq_disable(rq_data);
643
644         if (ret2)
645                 return ret2;
646         else if (ret1)
647                 return ret1;
648
649         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
650         return 0;
651 }
652
653 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
654         unsigned int socket_id, struct rte_mempool *mp,
655         uint16_t nb_desc, uint16_t free_thresh)
656 {
657         int rc;
658         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
659         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
660         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
661         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
662         unsigned int mbuf_size, mbufs_per_pkt;
663         unsigned int nb_sop_desc, nb_data_desc;
664         uint16_t min_sop, max_sop, min_data, max_data;
665         uint32_t max_rx_pkt_len;
666
667         rq_sop->is_sop = 1;
668         rq_sop->data_queue_idx = data_queue_idx;
669         rq_data->is_sop = 0;
670         rq_data->data_queue_idx = 0;
671         rq_sop->socket_id = socket_id;
672         rq_sop->mp = mp;
673         rq_data->socket_id = socket_id;
674         rq_data->mp = mp;
675         rq_sop->in_use = 1;
676         rq_sop->rx_free_thresh = free_thresh;
677         rq_data->rx_free_thresh = free_thresh;
678         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
679                   free_thresh);
680
681         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
682                                RTE_PKTMBUF_HEADROOM);
683         /* max_rx_pkt_len includes the ethernet header and CRC. */
684         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
685
686         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
687             DEV_RX_OFFLOAD_SCATTER) {
688                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
689                 /* ceil((max pkt len)/mbuf_size) */
690                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
691         } else {
692                 dev_info(enic, "Scatter rx mode disabled\n");
693                 mbufs_per_pkt = 1;
694                 if (max_rx_pkt_len > mbuf_size) {
695                         dev_warning(enic, "The maximum Rx packet size (%u) is"
696                                     " larger than the mbuf size (%u), and"
697                                     " scatter is disabled. Larger packets will"
698                                     " be truncated.\n",
699                                     max_rx_pkt_len, mbuf_size);
700                 }
701         }
702
703         if (mbufs_per_pkt > 1) {
704                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
705                 rq_sop->data_queue_enable = 1;
706                 rq_data->in_use = 1;
707                 /*
708                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
709                  * receives packet sizes up to the "max" MTU.
710                  * If not using scatter, we can achieve the effect of dropping
711                  * larger packets by reducing the size of posted buffers.
712                  * See enic_alloc_rx_queue_mbufs().
713                  */
714                 if (max_rx_pkt_len <
715                     enic_mtu_to_max_rx_pktlen(enic->rte_dev->data->mtu)) {
716                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
717                                     " when scatter rx mode is in use.\n");
718                 }
719         } else {
720                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
721                          queue_idx);
722                 rq_sop->data_queue_enable = 0;
723                 rq_data->in_use = 0;
724         }
725
726         /* number of descriptors have to be a multiple of 32 */
727         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ~0x1F;
728         nb_data_desc = (nb_desc - nb_sop_desc) & ~0x1F;
729
730         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
731         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
732
733         if (mbufs_per_pkt > 1) {
734                 min_sop = 64;
735                 max_sop = ((enic->config.rq_desc_count /
736                             (mbufs_per_pkt - 1)) & ~0x1F);
737                 min_data = min_sop * (mbufs_per_pkt - 1);
738                 max_data = enic->config.rq_desc_count;
739         } else {
740                 min_sop = 64;
741                 max_sop = enic->config.rq_desc_count;
742                 min_data = 0;
743                 max_data = 0;
744         }
745
746         if (nb_desc < (min_sop + min_data)) {
747                 dev_warning(enic,
748                             "Number of rx descs too low, adjusting to minimum\n");
749                 nb_sop_desc = min_sop;
750                 nb_data_desc = min_data;
751         } else if (nb_desc > (max_sop + max_data)) {
752                 dev_warning(enic,
753                             "Number of rx_descs too high, adjusting to maximum\n");
754                 nb_sop_desc = max_sop;
755                 nb_data_desc = max_data;
756         }
757         if (mbufs_per_pkt > 1) {
758                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
759                          " rx descriptor range is %u to %u\n",
760                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
761                          max_sop + max_data);
762         }
763         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
764                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
765
766         /* Allocate sop queue resources */
767         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
768                 nb_sop_desc, sizeof(struct rq_enet_desc));
769         if (rc) {
770                 dev_err(enic, "error in allocation of sop rq\n");
771                 goto err_exit;
772         }
773         nb_sop_desc = rq_sop->ring.desc_count;
774
775         if (rq_data->in_use) {
776                 /* Allocate data queue resources */
777                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
778                                    nb_data_desc,
779                                    sizeof(struct rq_enet_desc));
780                 if (rc) {
781                         dev_err(enic, "error in allocation of data rq\n");
782                         goto err_free_rq_sop;
783                 }
784                 nb_data_desc = rq_data->ring.desc_count;
785         }
786         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
787                            socket_id, nb_sop_desc + nb_data_desc,
788                            sizeof(struct cq_enet_rq_desc));
789         if (rc) {
790                 dev_err(enic, "error in allocation of cq for rq\n");
791                 goto err_free_rq_data;
792         }
793
794         /* Allocate the mbuf rings */
795         rq_sop->mbuf_ring = (struct rte_mbuf **)
796                 rte_zmalloc_socket("rq->mbuf_ring",
797                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
798                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
799         if (rq_sop->mbuf_ring == NULL)
800                 goto err_free_cq;
801
802         if (rq_data->in_use) {
803                 rq_data->mbuf_ring = (struct rte_mbuf **)
804                         rte_zmalloc_socket("rq->mbuf_ring",
805                                 sizeof(struct rte_mbuf *) * nb_data_desc,
806                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
807                 if (rq_data->mbuf_ring == NULL)
808                         goto err_free_sop_mbuf;
809         }
810
811         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
812
813         return 0;
814
815 err_free_sop_mbuf:
816         rte_free(rq_sop->mbuf_ring);
817 err_free_cq:
818         /* cleanup on error */
819         vnic_cq_free(&enic->cq[queue_idx]);
820 err_free_rq_data:
821         if (rq_data->in_use)
822                 vnic_rq_free(rq_data);
823 err_free_rq_sop:
824         vnic_rq_free(rq_sop);
825 err_exit:
826         return -ENOMEM;
827 }
828
829 void enic_free_wq(void *txq)
830 {
831         struct vnic_wq *wq;
832         struct enic *enic;
833
834         if (txq == NULL)
835                 return;
836
837         wq = (struct vnic_wq *)txq;
838         enic = vnic_dev_priv(wq->vdev);
839         rte_memzone_free(wq->cqmsg_rz);
840         vnic_wq_free(wq);
841         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
842 }
843
844 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
845         unsigned int socket_id, uint16_t nb_desc)
846 {
847         int err;
848         struct vnic_wq *wq = &enic->wq[queue_idx];
849         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
850         char name[NAME_MAX];
851         static int instance;
852
853         wq->socket_id = socket_id;
854         if (nb_desc) {
855                 if (nb_desc > enic->config.wq_desc_count) {
856                         dev_warning(enic,
857                                 "WQ %d - number of tx desc in cmd line (%d)"\
858                                 "is greater than that in the UCSM/CIMC adapter"\
859                                 "policy.  Applying the value in the adapter "\
860                                 "policy (%d)\n",
861                                 queue_idx, nb_desc, enic->config.wq_desc_count);
862                 } else if (nb_desc != enic->config.wq_desc_count) {
863                         enic->config.wq_desc_count = nb_desc;
864                         dev_info(enic,
865                                 "TX Queues - effective number of descs:%d\n",
866                                 nb_desc);
867                 }
868         }
869
870         /* Allocate queue resources */
871         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
872                 enic->config.wq_desc_count,
873                 sizeof(struct wq_enet_desc));
874         if (err) {
875                 dev_err(enic, "error in allocation of wq\n");
876                 return err;
877         }
878
879         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
880                 socket_id, enic->config.wq_desc_count,
881                 sizeof(struct cq_enet_wq_desc));
882         if (err) {
883                 vnic_wq_free(wq);
884                 dev_err(enic, "error in allocation of cq for wq\n");
885         }
886
887         /* setup up CQ message */
888         snprintf((char *)name, sizeof(name),
889                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
890                 instance++);
891
892         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
893                         sizeof(uint32_t), SOCKET_ID_ANY,
894                         RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
895         if (!wq->cqmsg_rz)
896                 return -ENOMEM;
897
898         return err;
899 }
900
901 int enic_disable(struct enic *enic)
902 {
903         unsigned int i;
904         int err;
905
906         for (i = 0; i < enic->intr_count; i++) {
907                 vnic_intr_mask(&enic->intr[i]);
908                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
909         }
910         enic_rxq_intr_deinit(enic);
911         rte_intr_disable(&enic->pdev->intr_handle);
912         rte_intr_callback_unregister(&enic->pdev->intr_handle,
913                                      enic_intr_handler,
914                                      (void *)enic->rte_dev);
915
916         vnic_dev_disable(enic->vdev);
917
918         enic_clsf_destroy(enic);
919
920         if (!enic_is_sriov_vf(enic))
921                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
922
923         for (i = 0; i < enic->wq_count; i++) {
924                 err = vnic_wq_disable(&enic->wq[i]);
925                 if (err)
926                         return err;
927         }
928         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
929                 if (enic->rq[i].in_use) {
930                         err = vnic_rq_disable(&enic->rq[i]);
931                         if (err)
932                                 return err;
933                 }
934         }
935
936         /* If we were using interrupts, set the interrupt vector to -1
937          * to disable interrupts.  We are not disabling link notifcations,
938          * though, as we want the polling of link status to continue working.
939          */
940         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
941                 vnic_dev_notify_set(enic->vdev, -1);
942
943         vnic_dev_set_reset_flag(enic->vdev, 1);
944
945         for (i = 0; i < enic->wq_count; i++)
946                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
947
948         for (i = 0; i < enic_vnic_rq_count(enic); i++)
949                 if (enic->rq[i].in_use)
950                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
951         for (i = 0; i < enic->cq_count; i++)
952                 vnic_cq_clean(&enic->cq[i]);
953         for (i = 0; i < enic->intr_count; i++)
954                 vnic_intr_clean(&enic->intr[i]);
955
956         return 0;
957 }
958
959 static int enic_dev_wait(struct vnic_dev *vdev,
960         int (*start)(struct vnic_dev *, int),
961         int (*finished)(struct vnic_dev *, int *),
962         int arg)
963 {
964         int done;
965         int err;
966         int i;
967
968         err = start(vdev, arg);
969         if (err)
970                 return err;
971
972         /* Wait for func to complete...2 seconds max */
973         for (i = 0; i < 2000; i++) {
974                 err = finished(vdev, &done);
975                 if (err)
976                         return err;
977                 if (done)
978                         return 0;
979                 usleep(1000);
980         }
981         return -ETIMEDOUT;
982 }
983
984 static int enic_dev_open(struct enic *enic)
985 {
986         int err;
987         int flags = CMD_OPENF_IG_DESCCACHE;
988
989         err = enic_dev_wait(enic->vdev, vnic_dev_open,
990                 vnic_dev_open_done, flags);
991         if (err)
992                 dev_err(enic_get_dev(enic),
993                         "vNIC device open failed, err %d\n", err);
994
995         return err;
996 }
997
998 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
999 {
1000         dma_addr_t rss_key_buf_pa;
1001         union vnic_rss_key *rss_key_buf_va = NULL;
1002         int err, i;
1003         u8 name[NAME_MAX];
1004
1005         RTE_ASSERT(user_key != NULL);
1006         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1007         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1008                 &rss_key_buf_pa, name);
1009         if (!rss_key_buf_va)
1010                 return -ENOMEM;
1011
1012         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1013                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1014
1015         err = enic_set_rss_key(enic,
1016                 rss_key_buf_pa,
1017                 sizeof(union vnic_rss_key));
1018
1019         /* Save for later queries */
1020         if (!err) {
1021                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1022                            sizeof(union vnic_rss_key));
1023         }
1024         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1025                 rss_key_buf_va, rss_key_buf_pa);
1026
1027         return err;
1028 }
1029
1030 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1031 {
1032         dma_addr_t rss_cpu_buf_pa;
1033         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1034         int err;
1035         u8 name[NAME_MAX];
1036
1037         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1038         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1039                 &rss_cpu_buf_pa, name);
1040         if (!rss_cpu_buf_va)
1041                 return -ENOMEM;
1042
1043         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1044
1045         err = enic_set_rss_cpu(enic,
1046                 rss_cpu_buf_pa,
1047                 sizeof(union vnic_rss_cpu));
1048
1049         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1050                 rss_cpu_buf_va, rss_cpu_buf_pa);
1051
1052         /* Save for later queries */
1053         if (!err)
1054                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1055         return err;
1056 }
1057
1058 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1059         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1060 {
1061         const u8 tso_ipid_split_en = 0;
1062         int err;
1063
1064         err = enic_set_nic_cfg(enic,
1065                 rss_default_cpu, rss_hash_type,
1066                 rss_hash_bits, rss_base_cpu,
1067                 rss_enable, tso_ipid_split_en,
1068                 enic->ig_vlan_strip_en);
1069
1070         return err;
1071 }
1072
1073 /* Initialize RSS with defaults, called from dev_configure */
1074 int enic_init_rss_nic_cfg(struct enic *enic)
1075 {
1076         static uint8_t default_rss_key[] = {
1077                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1078                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1079                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1080                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1081         };
1082         struct rte_eth_rss_conf rss_conf;
1083         union vnic_rss_cpu rss_cpu;
1084         int ret, i;
1085
1086         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1087         /*
1088          * If setting key for the first time, and the user gives us none, then
1089          * push the default key to NIC.
1090          */
1091         if (rss_conf.rss_key == NULL) {
1092                 rss_conf.rss_key = default_rss_key;
1093                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1094         }
1095         ret = enic_set_rss_conf(enic, &rss_conf);
1096         if (ret) {
1097                 dev_err(enic, "Failed to configure RSS\n");
1098                 return ret;
1099         }
1100         if (enic->rss_enable) {
1101                 /* If enabling RSS, use the default reta */
1102                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1103                         rss_cpu.cpu[i / 4].b[i % 4] =
1104                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1105                 }
1106                 ret = enic_set_rss_reta(enic, &rss_cpu);
1107                 if (ret)
1108                         dev_err(enic, "Failed to set RSS indirection table\n");
1109         }
1110         return ret;
1111 }
1112
1113 int enic_setup_finish(struct enic *enic)
1114 {
1115         enic_init_soft_stats(enic);
1116
1117         /* Default conf */
1118         vnic_dev_packet_filter(enic->vdev,
1119                 1 /* directed  */,
1120                 1 /* multicast */,
1121                 1 /* broadcast */,
1122                 0 /* promisc   */,
1123                 1 /* allmulti  */);
1124
1125         enic->promisc = 0;
1126         enic->allmulti = 1;
1127
1128         return 0;
1129 }
1130
1131 static int enic_rss_conf_valid(struct enic *enic,
1132                                struct rte_eth_rss_conf *rss_conf)
1133 {
1134         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1135         if (enic->flow_type_rss_offloads == 0)
1136                 return 0;
1137         if (rss_conf->rss_key != NULL &&
1138             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1139                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1140                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1141                 return -EINVAL;
1142         }
1143         if (rss_conf->rss_hf != 0 &&
1144             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1145                 dev_err(enic, "Given rss_hf contains none of the supported"
1146                         " types\n");
1147                 return -EINVAL;
1148         }
1149         return 0;
1150 }
1151
1152 /* Set hash type and key according to rss_conf */
1153 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1154 {
1155         struct rte_eth_dev *eth_dev;
1156         uint64_t rss_hf;
1157         u8 rss_hash_type;
1158         u8 rss_enable;
1159         int ret;
1160
1161         RTE_ASSERT(rss_conf != NULL);
1162         ret = enic_rss_conf_valid(enic, rss_conf);
1163         if (ret) {
1164                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1165                 return ret;
1166         }
1167
1168         eth_dev = enic->rte_dev;
1169         rss_hash_type = 0;
1170         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1171         if (enic->rq_count > 1 &&
1172             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1173             rss_hf != 0) {
1174                 rss_enable = 1;
1175                 if (rss_hf & ETH_RSS_IPV4)
1176                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1177                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1178                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1179                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1180                         /*
1181                          * 'TCP' is not a typo. HW does not have a separate
1182                          * enable bit for UDP RSS. The TCP bit enables both TCP
1183                          * and UDP RSS..
1184                          */
1185                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1186                 }
1187                 if (rss_hf & ETH_RSS_IPV6)
1188                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1189                 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1190                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1191                 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP) {
1192                         /* Again, 'TCP' is not a typo. */
1193                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1194                 }
1195                 if (rss_hf & ETH_RSS_IPV6_EX)
1196                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6_EX;
1197                 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1198                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX;
1199         } else {
1200                 rss_enable = 0;
1201                 rss_hf = 0;
1202         }
1203
1204         /* Set the hash key if provided */
1205         if (rss_enable && rss_conf->rss_key) {
1206                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1207                 if (ret) {
1208                         dev_err(enic, "Failed to set RSS key\n");
1209                         return ret;
1210                 }
1211         }
1212
1213         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1214                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1215                               rss_enable);
1216         if (!ret) {
1217                 enic->rss_hf = rss_hf;
1218                 enic->rss_hash_type = rss_hash_type;
1219                 enic->rss_enable = rss_enable;
1220         }
1221         return 0;
1222 }
1223
1224 int enic_set_vlan_strip(struct enic *enic)
1225 {
1226         /*
1227          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1228          * together. So, re-do niccfg, preserving the current RSS settings.
1229          */
1230         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1231                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1232                                enic->rss_enable);
1233 }
1234
1235 void enic_add_packet_filter(struct enic *enic)
1236 {
1237         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1238         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1239                 enic->promisc, enic->allmulti);
1240 }
1241
1242 int enic_get_link_status(struct enic *enic)
1243 {
1244         return vnic_dev_link_status(enic->vdev);
1245 }
1246
1247 static void enic_dev_deinit(struct enic *enic)
1248 {
1249         struct rte_eth_dev *eth_dev = enic->rte_dev;
1250
1251         /* stop link status checking */
1252         vnic_dev_notify_unset(enic->vdev);
1253
1254         rte_free(eth_dev->data->mac_addrs);
1255         rte_free(enic->cq);
1256         rte_free(enic->intr);
1257         rte_free(enic->rq);
1258         rte_free(enic->wq);
1259 }
1260
1261
1262 int enic_set_vnic_res(struct enic *enic)
1263 {
1264         struct rte_eth_dev *eth_dev = enic->rte_dev;
1265         int rc = 0;
1266         unsigned int required_rq, required_wq, required_cq, required_intr;
1267
1268         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1269         required_rq = eth_dev->data->nb_rx_queues * 2;
1270         required_wq = eth_dev->data->nb_tx_queues;
1271         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1272         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1273         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1274                 required_intr += eth_dev->data->nb_rx_queues;
1275         }
1276
1277         if (enic->conf_rq_count < required_rq) {
1278                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1279                         eth_dev->data->nb_rx_queues,
1280                         required_rq, enic->conf_rq_count);
1281                 rc = -EINVAL;
1282         }
1283         if (enic->conf_wq_count < required_wq) {
1284                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1285                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1286                 rc = -EINVAL;
1287         }
1288
1289         if (enic->conf_cq_count < required_cq) {
1290                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1291                         required_cq, enic->conf_cq_count);
1292                 rc = -EINVAL;
1293         }
1294         if (enic->conf_intr_count < required_intr) {
1295                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1296                         " interrupts. Required:%u, Configured:%u\n",
1297                         required_intr, enic->conf_intr_count);
1298                 rc = -EINVAL;
1299         }
1300
1301         if (rc == 0) {
1302                 enic->rq_count = eth_dev->data->nb_rx_queues;
1303                 enic->wq_count = eth_dev->data->nb_tx_queues;
1304                 enic->cq_count = enic->rq_count + enic->wq_count;
1305                 enic->intr_count = required_intr;
1306         }
1307
1308         return rc;
1309 }
1310
1311 /* Initialize the completion queue for an RQ */
1312 static int
1313 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1314 {
1315         struct vnic_rq *sop_rq, *data_rq;
1316         unsigned int cq_idx;
1317         int rc = 0;
1318
1319         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1320         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1321         cq_idx = rq_idx;
1322
1323         vnic_cq_clean(&enic->cq[cq_idx]);
1324         vnic_cq_init(&enic->cq[cq_idx],
1325                      0 /* flow_control_enable */,
1326                      1 /* color_enable */,
1327                      0 /* cq_head */,
1328                      0 /* cq_tail */,
1329                      1 /* cq_tail_color */,
1330                      0 /* interrupt_enable */,
1331                      1 /* cq_entry_enable */,
1332                      0 /* cq_message_enable */,
1333                      0 /* interrupt offset */,
1334                      0 /* cq_message_addr */);
1335
1336
1337         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1338                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1339                            sop_rq->ring.desc_count - 1, 1, 0);
1340         if (data_rq->in_use) {
1341                 vnic_rq_init_start(data_rq,
1342                                    enic_cq_rq(enic,
1343                                    enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1344                                    data_rq->ring.desc_count - 1, 1, 0);
1345         }
1346
1347         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1348         if (rc)
1349                 return rc;
1350
1351         if (data_rq->in_use) {
1352                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1353                 if (rc) {
1354                         enic_rxmbuf_queue_release(enic, sop_rq);
1355                         return rc;
1356                 }
1357         }
1358
1359         return 0;
1360 }
1361
1362 /* The Cisco NIC can send and receive packets up to a max packet size
1363  * determined by the NIC type and firmware. There is also an MTU
1364  * configured into the NIC via the CIMC/UCSM management interface
1365  * which can be overridden by this function (up to the max packet size).
1366  * Depending on the network setup, doing so may cause packet drops
1367  * and unexpected behavior.
1368  */
1369 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1370 {
1371         unsigned int rq_idx;
1372         struct vnic_rq *rq;
1373         int rc = 0;
1374         uint16_t old_mtu;       /* previous setting */
1375         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1376         struct rte_eth_dev *eth_dev = enic->rte_dev;
1377
1378         old_mtu = eth_dev->data->mtu;
1379         config_mtu = enic->config.mtu;
1380
1381         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1382                 return -E_RTE_SECONDARY;
1383
1384         if (new_mtu > enic->max_mtu) {
1385                 dev_err(enic,
1386                         "MTU not updated: requested (%u) greater than max (%u)\n",
1387                         new_mtu, enic->max_mtu);
1388                 return -EINVAL;
1389         }
1390         if (new_mtu < ENIC_MIN_MTU) {
1391                 dev_info(enic,
1392                         "MTU not updated: requested (%u) less than min (%u)\n",
1393                         new_mtu, ENIC_MIN_MTU);
1394                 return -EINVAL;
1395         }
1396         if (new_mtu > config_mtu)
1397                 dev_warning(enic,
1398                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1399                         new_mtu, config_mtu);
1400
1401         /* The easy case is when scatter is disabled. However if the MTU
1402          * becomes greater than the mbuf data size, packet drops will ensue.
1403          */
1404         if (!(enic->rte_dev->data->dev_conf.rxmode.offloads &
1405               DEV_RX_OFFLOAD_SCATTER)) {
1406                 eth_dev->data->mtu = new_mtu;
1407                 goto set_mtu_done;
1408         }
1409
1410         /* Rx scatter is enabled so reconfigure RQ's on the fly. The point is to
1411          * change Rx scatter mode if necessary for better performance. I.e. if
1412          * MTU was greater than the mbuf size and now it's less, scatter Rx
1413          * doesn't have to be used and vice versa.
1414           */
1415         rte_spinlock_lock(&enic->mtu_lock);
1416
1417         /* Stop traffic on all RQs */
1418         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1419                 rq = &enic->rq[rq_idx];
1420                 if (rq->is_sop && rq->in_use) {
1421                         rc = enic_stop_rq(enic,
1422                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1423                         if (rc) {
1424                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1425                                 goto set_mtu_done;
1426                         }
1427                 }
1428         }
1429
1430         /* replace Rx function with a no-op to avoid getting stale pkts */
1431         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1432         rte_mb();
1433
1434         /* Allow time for threads to exit the real Rx function. */
1435         usleep(100000);
1436
1437         /* now it is safe to reconfigure the RQs */
1438
1439         /* update the mtu */
1440         eth_dev->data->mtu = new_mtu;
1441
1442         /* free and reallocate RQs with the new MTU */
1443         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1444                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1445                 if (!rq->in_use)
1446                         continue;
1447
1448                 enic_free_rq(rq);
1449                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1450                                    rq->tot_nb_desc, rq->rx_free_thresh);
1451                 if (rc) {
1452                         dev_err(enic,
1453                                 "Fatal MTU alloc error- No traffic will pass\n");
1454                         goto set_mtu_done;
1455                 }
1456
1457                 rc = enic_reinit_rq(enic, rq_idx);
1458                 if (rc) {
1459                         dev_err(enic,
1460                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1461                         goto set_mtu_done;
1462                 }
1463         }
1464
1465         /* put back the real receive function */
1466         rte_mb();
1467         eth_dev->rx_pkt_burst = enic_recv_pkts;
1468         rte_mb();
1469
1470         /* restart Rx traffic */
1471         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1472                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1473                 if (rq->is_sop && rq->in_use)
1474                         enic_start_rq(enic, rq_idx);
1475         }
1476
1477 set_mtu_done:
1478         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1479         rte_spinlock_unlock(&enic->mtu_lock);
1480         return rc;
1481 }
1482
1483 static int enic_dev_init(struct enic *enic)
1484 {
1485         int err;
1486         struct rte_eth_dev *eth_dev = enic->rte_dev;
1487
1488         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1489
1490         /* Get vNIC configuration
1491         */
1492         err = enic_get_vnic_config(enic);
1493         if (err) {
1494                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1495                 return err;
1496         }
1497
1498         /* Get available resource counts */
1499         enic_get_res_counts(enic);
1500         if (enic->conf_rq_count == 1) {
1501                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1502                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1503                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1504                 return -EINVAL;
1505         }
1506         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1507         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1508                                enic->conf_cq_count, 8);
1509         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1510                                  enic->conf_intr_count, 8);
1511         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1512                                enic->conf_rq_count, 8);
1513         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1514                                enic->conf_wq_count, 8);
1515         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1516                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1517                 return -1;
1518         }
1519         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1520                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1521                 return -1;
1522         }
1523         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1524                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1525                 return -1;
1526         }
1527         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1528                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1529                 return -1;
1530         }
1531
1532         /* Get the supported filters */
1533         enic_fdir_info(enic);
1534
1535         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
1536                                                 * ENIC_MAX_MAC_ADDR, 0);
1537         if (!eth_dev->data->mac_addrs) {
1538                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1539                 return -1;
1540         }
1541         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1542                         eth_dev->data->mac_addrs);
1543
1544         vnic_dev_set_reset_flag(enic->vdev, 0);
1545
1546         LIST_INIT(&enic->flows);
1547         rte_spinlock_init(&enic->flows_lock);
1548
1549         /* set up link status checking */
1550         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1551
1552         enic->overlay_offload = false;
1553         if (!enic->disable_overlay && enic->vxlan &&
1554             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1555             vnic_dev_overlay_offload_ctrl(enic->vdev,
1556                                           OVERLAY_FEATURE_VXLAN,
1557                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1558                 enic->tx_offload_capa |=
1559                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1560                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1561                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1562                 /*
1563                  * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
1564                  * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
1565                  */
1566                 enic->tx_offload_mask |=
1567                         PKT_TX_OUTER_IP_CKSUM |
1568                         PKT_TX_TUNNEL_MASK;
1569                 enic->overlay_offload = true;
1570                 dev_info(enic, "Overlay offload is enabled\n");
1571         }
1572
1573         return 0;
1574
1575 }
1576
1577 int enic_probe(struct enic *enic)
1578 {
1579         struct rte_pci_device *pdev = enic->pdev;
1580         int err = -1;
1581
1582         dev_debug(enic, " Initializing ENIC PMD\n");
1583
1584         /* if this is a secondary process the hardware is already initialized */
1585         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1586                 return 0;
1587
1588         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1589         enic->bar0.len = pdev->mem_resource[0].len;
1590
1591         /* Register vNIC device */
1592         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1593         if (!enic->vdev) {
1594                 dev_err(enic, "vNIC registration failed, aborting\n");
1595                 goto err_out;
1596         }
1597
1598         LIST_INIT(&enic->memzone_list);
1599         rte_spinlock_init(&enic->memzone_list_lock);
1600
1601         vnic_register_cbacks(enic->vdev,
1602                 enic_alloc_consistent,
1603                 enic_free_consistent);
1604
1605         /*
1606          * Allocate the consistent memory for stats upfront so both primary and
1607          * secondary processes can dump stats.
1608          */
1609         err = vnic_dev_alloc_stats_mem(enic->vdev);
1610         if (err) {
1611                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1612                 goto err_out_unregister;
1613         }
1614         /* Issue device open to get device in known state */
1615         err = enic_dev_open(enic);
1616         if (err) {
1617                 dev_err(enic, "vNIC dev open failed, aborting\n");
1618                 goto err_out_unregister;
1619         }
1620
1621         /* Set ingress vlan rewrite mode before vnic initialization */
1622         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1623                 IG_VLAN_REWRITE_MODE_PASS_THRU);
1624         if (err) {
1625                 dev_err(enic,
1626                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1627                 goto err_out_dev_close;
1628         }
1629
1630         /* Issue device init to initialize the vnic-to-switch link.
1631          * We'll start with carrier off and wait for link UP
1632          * notification later to turn on carrier.  We don't need
1633          * to wait here for the vnic-to-switch link initialization
1634          * to complete; link UP notification is the indication that
1635          * the process is complete.
1636          */
1637
1638         err = vnic_dev_init(enic->vdev, 0);
1639         if (err) {
1640                 dev_err(enic, "vNIC dev init failed, aborting\n");
1641                 goto err_out_dev_close;
1642         }
1643
1644         err = enic_dev_init(enic);
1645         if (err) {
1646                 dev_err(enic, "Device initialization failed, aborting\n");
1647                 goto err_out_dev_close;
1648         }
1649
1650         return 0;
1651
1652 err_out_dev_close:
1653         vnic_dev_close(enic->vdev);
1654 err_out_unregister:
1655         vnic_dev_unregister(enic->vdev);
1656 err_out:
1657         return err;
1658 }
1659
1660 void enic_remove(struct enic *enic)
1661 {
1662         enic_dev_deinit(enic);
1663         vnic_dev_close(enic->vdev);
1664         vnic_dev_unregister(enic->vdev);
1665 }