net/enic: enable RQ first and then post Rx buffers
[dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 #include <libgen.h>
12
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev_driver.h>
20
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38
39 static int is_zero_addr(uint8_t *addr)
40 {
41         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43
44 static int is_mcast_addr(uint8_t *addr)
45 {
46         return addr[0] & 1;
47 }
48
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51         return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53
54 static void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57         uint16_t i;
58
59         if (!rq || !rq->mbuf_ring) {
60                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61                 return;
62         }
63
64         for (i = 0; i < rq->ring.desc_count; i++) {
65                 if (rq->mbuf_ring[i]) {
66                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67                         rq->mbuf_ring[i] = NULL;
68                 }
69         }
70 }
71
72 static void enic_free_wq_buf(struct vnic_wq_buf *buf)
73 {
74         struct rte_mbuf *mbuf = (struct rte_mbuf *)buf->mb;
75
76         rte_pktmbuf_free_seg(mbuf);
77         buf->mb = NULL;
78 }
79
80 static void enic_log_q_error(struct enic *enic)
81 {
82         unsigned int i;
83         u32 error_status;
84
85         for (i = 0; i < enic->wq_count; i++) {
86                 error_status = vnic_wq_error_status(&enic->wq[i]);
87                 if (error_status)
88                         dev_err(enic, "WQ[%d] error_status %d\n", i,
89                                 error_status);
90         }
91
92         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93                 if (!enic->rq[i].in_use)
94                         continue;
95                 error_status = vnic_rq_error_status(&enic->rq[i]);
96                 if (error_status)
97                         dev_err(enic, "RQ[%d] error_status %d\n", i,
98                                 error_status);
99         }
100 }
101
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104         struct enic_soft_stats *soft_stats = &enic->soft_stats;
105         rte_atomic64_clear(&soft_stats->rx_nombuf);
106         rte_atomic64_clear(&soft_stats->rx_packet_errors);
107         rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112         struct enic_soft_stats *soft_stats = &enic->soft_stats;
113         rte_atomic64_init(&soft_stats->rx_nombuf);
114         rte_atomic64_init(&soft_stats->rx_packet_errors);
115         rte_atomic64_init(&soft_stats->tx_oversized);
116         enic_clear_soft_stats(enic);
117 }
118
119 void enic_dev_stats_clear(struct enic *enic)
120 {
121         if (vnic_dev_stats_clear(enic->vdev))
122                 dev_err(enic, "Error in clearing stats\n");
123         enic_clear_soft_stats(enic);
124 }
125
126 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
127 {
128         struct vnic_stats *stats;
129         struct enic_soft_stats *soft_stats = &enic->soft_stats;
130         int64_t rx_truncated;
131         uint64_t rx_packet_errors;
132         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
133
134         if (ret) {
135                 dev_err(enic, "Error in getting stats\n");
136                 return ret;
137         }
138
139         /* The number of truncated packets can only be calculated by
140          * subtracting a hardware counter from error packets received by
141          * the driver. Note: this causes transient inaccuracies in the
142          * ipackets count. Also, the length of truncated packets are
143          * counted in ibytes even though truncated packets are dropped
144          * which can make ibytes be slightly higher than it should be.
145          */
146         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
147         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
148
149         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
150         r_stats->opackets = stats->tx.tx_frames_ok;
151
152         r_stats->ibytes = stats->rx.rx_bytes_ok;
153         r_stats->obytes = stats->tx.tx_bytes_ok;
154
155         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
156         r_stats->oerrors = stats->tx.tx_errors
157                            + rte_atomic64_read(&soft_stats->tx_oversized);
158
159         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
160
161         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
162         return 0;
163 }
164
165 int enic_del_mac_address(struct enic *enic, int mac_index)
166 {
167         struct rte_eth_dev *eth_dev = enic->rte_dev;
168         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
169
170         return vnic_dev_del_addr(enic->vdev, mac_addr);
171 }
172
173 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
174 {
175         int err;
176
177         if (!is_eth_addr_valid(mac_addr)) {
178                 dev_err(enic, "invalid mac address\n");
179                 return -EINVAL;
180         }
181
182         err = vnic_dev_add_addr(enic->vdev, mac_addr);
183         if (err)
184                 dev_err(enic, "add mac addr failed\n");
185         return err;
186 }
187
188 static void
189 enic_free_rq_buf(struct rte_mbuf **mbuf)
190 {
191         if (*mbuf == NULL)
192                 return;
193
194         rte_pktmbuf_free(*mbuf);
195         *mbuf = NULL;
196 }
197
198 void enic_init_vnic_resources(struct enic *enic)
199 {
200         unsigned int error_interrupt_enable = 1;
201         unsigned int error_interrupt_offset = 0;
202         unsigned int rxq_interrupt_enable = 0;
203         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
204         unsigned int index = 0;
205         unsigned int cq_idx;
206         struct vnic_rq *data_rq;
207
208         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
209                 rxq_interrupt_enable = 1;
210
211         for (index = 0; index < enic->rq_count; index++) {
212                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
213
214                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
215                         cq_idx,
216                         error_interrupt_enable,
217                         error_interrupt_offset);
218
219                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
220                 if (data_rq->in_use)
221                         vnic_rq_init(data_rq,
222                                      cq_idx,
223                                      error_interrupt_enable,
224                                      error_interrupt_offset);
225
226                 vnic_cq_init(&enic->cq[cq_idx],
227                         0 /* flow_control_enable */,
228                         1 /* color_enable */,
229                         0 /* cq_head */,
230                         0 /* cq_tail */,
231                         1 /* cq_tail_color */,
232                         rxq_interrupt_enable,
233                         1 /* cq_entry_enable */,
234                         0 /* cq_message_enable */,
235                         rxq_interrupt_offset,
236                         0 /* cq_message_addr */);
237                 if (rxq_interrupt_enable)
238                         rxq_interrupt_offset++;
239         }
240
241         for (index = 0; index < enic->wq_count; index++) {
242                 vnic_wq_init(&enic->wq[index],
243                         enic_cq_wq(enic, index),
244                         error_interrupt_enable,
245                         error_interrupt_offset);
246                 /* Compute unsupported ol flags for enic_prep_pkts() */
247                 enic->wq[index].tx_offload_notsup_mask =
248                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
249
250                 cq_idx = enic_cq_wq(enic, index);
251                 vnic_cq_init(&enic->cq[cq_idx],
252                         0 /* flow_control_enable */,
253                         1 /* color_enable */,
254                         0 /* cq_head */,
255                         0 /* cq_tail */,
256                         1 /* cq_tail_color */,
257                         0 /* interrupt_enable */,
258                         0 /* cq_entry_enable */,
259                         1 /* cq_message_enable */,
260                         0 /* interrupt offset */,
261                         (u64)enic->wq[index].cqmsg_rz->iova);
262         }
263
264         for (index = 0; index < enic->intr_count; index++) {
265                 vnic_intr_init(&enic->intr[index],
266                                enic->config.intr_timer_usec,
267                                enic->config.intr_timer_type,
268                                /*mask_on_assertion*/1);
269         }
270 }
271
272
273 static int
274 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
275 {
276         struct rte_mbuf *mb;
277         struct rq_enet_desc *rqd = rq->ring.descs;
278         unsigned i;
279         dma_addr_t dma_addr;
280         uint32_t max_rx_pkt_len;
281         uint16_t rq_buf_len;
282
283         if (!rq->in_use)
284                 return 0;
285
286         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
287                   rq->ring.desc_count);
288
289         /*
290          * If *not* using scatter and the mbuf size is smaller than the
291          * requested max packet size (max_rx_pkt_len), then reduce the
292          * posted buffer size to max_rx_pkt_len. HW still receives packets
293          * larger than max_rx_pkt_len, but they will be truncated, which we
294          * drop in the rx handler. Not ideal, but better than returning
295          * large packets when the user is not expecting them.
296          */
297         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
298         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
299         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
300                 rq_buf_len = max_rx_pkt_len;
301         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
302                 mb = rte_mbuf_raw_alloc(rq->mp);
303                 if (mb == NULL) {
304                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
305                         (unsigned)rq->index);
306                         return -ENOMEM;
307                 }
308
309                 mb->data_off = RTE_PKTMBUF_HEADROOM;
310                 dma_addr = (dma_addr_t)(mb->buf_iova
311                            + RTE_PKTMBUF_HEADROOM);
312                 rq_enet_desc_enc(rqd, dma_addr,
313                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
314                                 : RQ_ENET_TYPE_NOT_SOP),
315                                 rq_buf_len);
316                 rq->mbuf_ring[i] = mb;
317         }
318         /*
319          * Do not post the buffers to the NIC until we enable the RQ via
320          * enic_start_rq().
321          */
322         rq->need_initial_post = true;
323         return 0;
324 }
325
326 /*
327  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
328  * allocated the buffers and filled the RQ descriptor ring. Just need to push
329  * the post index to the NIC.
330  */
331 static void
332 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
333 {
334         if (!rq->in_use || !rq->need_initial_post)
335                 return;
336
337         /* make sure all prior writes are complete before doing the PIO write */
338         rte_rmb();
339
340         /* Post all but the last buffer to VIC. */
341         rq->posted_index = rq->ring.desc_count - 1;
342
343         rq->rx_nb_hold = 0;
344
345         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
346                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
347         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
348         iowrite32(0, &rq->ctrl->fetch_index);
349         rte_rmb();
350         rq->need_initial_post = false;
351 }
352
353 static void *
354 enic_alloc_consistent(void *priv, size_t size,
355         dma_addr_t *dma_handle, u8 *name)
356 {
357         void *vaddr;
358         const struct rte_memzone *rz;
359         *dma_handle = 0;
360         struct enic *enic = (struct enic *)priv;
361         struct enic_memzone_entry *mze;
362
363         rz = rte_memzone_reserve_aligned((const char *)name, size,
364                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
365         if (!rz) {
366                 pr_err("%s : Failed to allocate memory requested for %s\n",
367                         __func__, name);
368                 return NULL;
369         }
370
371         vaddr = rz->addr;
372         *dma_handle = (dma_addr_t)rz->iova;
373
374         mze = rte_malloc("enic memzone entry",
375                          sizeof(struct enic_memzone_entry), 0);
376
377         if (!mze) {
378                 pr_err("%s : Failed to allocate memory for memzone list\n",
379                        __func__);
380                 rte_memzone_free(rz);
381                 return NULL;
382         }
383
384         mze->rz = rz;
385
386         rte_spinlock_lock(&enic->memzone_list_lock);
387         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
388         rte_spinlock_unlock(&enic->memzone_list_lock);
389
390         return vaddr;
391 }
392
393 static void
394 enic_free_consistent(void *priv,
395                      __rte_unused size_t size,
396                      void *vaddr,
397                      dma_addr_t dma_handle)
398 {
399         struct enic_memzone_entry *mze;
400         struct enic *enic = (struct enic *)priv;
401
402         rte_spinlock_lock(&enic->memzone_list_lock);
403         LIST_FOREACH(mze, &enic->memzone_list, entries) {
404                 if (mze->rz->addr == vaddr &&
405                     mze->rz->iova == dma_handle)
406                         break;
407         }
408         if (mze == NULL) {
409                 rte_spinlock_unlock(&enic->memzone_list_lock);
410                 dev_warning(enic,
411                             "Tried to free memory, but couldn't find it in the memzone list\n");
412                 return;
413         }
414         LIST_REMOVE(mze, entries);
415         rte_spinlock_unlock(&enic->memzone_list_lock);
416         rte_memzone_free(mze->rz);
417         rte_free(mze);
418 }
419
420 int enic_link_update(struct enic *enic)
421 {
422         struct rte_eth_dev *eth_dev = enic->rte_dev;
423         struct rte_eth_link link;
424
425         memset(&link, 0, sizeof(link));
426         link.link_status = enic_get_link_status(enic);
427         link.link_duplex = ETH_LINK_FULL_DUPLEX;
428         link.link_speed = vnic_dev_port_speed(enic->vdev);
429
430         return rte_eth_linkstatus_set(eth_dev, &link);
431 }
432
433 static void
434 enic_intr_handler(void *arg)
435 {
436         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
437         struct enic *enic = pmd_priv(dev);
438
439         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
440
441         enic_link_update(enic);
442         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
443         enic_log_q_error(enic);
444 }
445
446 static int enic_rxq_intr_init(struct enic *enic)
447 {
448         struct rte_intr_handle *intr_handle;
449         uint32_t rxq_intr_count, i;
450         int err;
451
452         intr_handle = enic->rte_dev->intr_handle;
453         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
454                 return 0;
455         /*
456          * Rx queue interrupts only work when we have MSI-X interrupts,
457          * one per queue. Sharing one interrupt is technically
458          * possible with VIC, but it is not worth the complications it brings.
459          */
460         if (!rte_intr_cap_multiple(intr_handle)) {
461                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
462                         " (vfio-pci driver)\n");
463                 return -ENOTSUP;
464         }
465         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
466         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
467         if (err) {
468                 dev_err(enic, "Failed to enable event fds for Rx queue"
469                         " interrupts\n");
470                 return err;
471         }
472         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
473                                             rxq_intr_count * sizeof(int), 0);
474         if (intr_handle->intr_vec == NULL) {
475                 dev_err(enic, "Failed to allocate intr_vec\n");
476                 return -ENOMEM;
477         }
478         for (i = 0; i < rxq_intr_count; i++)
479                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
480         return 0;
481 }
482
483 static void enic_rxq_intr_deinit(struct enic *enic)
484 {
485         struct rte_intr_handle *intr_handle;
486
487         intr_handle = enic->rte_dev->intr_handle;
488         rte_intr_efd_disable(intr_handle);
489         if (intr_handle->intr_vec != NULL) {
490                 rte_free(intr_handle->intr_vec);
491                 intr_handle->intr_vec = NULL;
492         }
493 }
494
495 int enic_enable(struct enic *enic)
496 {
497         unsigned int index;
498         int err;
499         struct rte_eth_dev *eth_dev = enic->rte_dev;
500
501         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
502         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
503
504         /* vnic notification of link status has already been turned on in
505          * enic_dev_init() which is called during probe time.  Here we are
506          * just turning on interrupt vector 0 if needed.
507          */
508         if (eth_dev->data->dev_conf.intr_conf.lsc)
509                 vnic_dev_notify_set(enic->vdev, 0);
510
511         err = enic_rxq_intr_init(enic);
512         if (err)
513                 return err;
514         if (enic_clsf_init(enic))
515                 dev_warning(enic, "Init of hash table for clsf failed."\
516                         "Flow director feature will not work\n");
517
518         for (index = 0; index < enic->rq_count; index++) {
519                 err = enic_alloc_rx_queue_mbufs(enic,
520                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
521                 if (err) {
522                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
523                         return err;
524                 }
525                 err = enic_alloc_rx_queue_mbufs(enic,
526                         &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
527                 if (err) {
528                         /* release the allocated mbufs for the sop rq*/
529                         enic_rxmbuf_queue_release(enic,
530                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
531
532                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
533                         return err;
534                 }
535         }
536
537         for (index = 0; index < enic->wq_count; index++)
538                 enic_start_wq(enic, index);
539         for (index = 0; index < enic->rq_count; index++)
540                 enic_start_rq(enic, index);
541
542         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
543
544         vnic_dev_enable_wait(enic->vdev);
545
546         /* Register and enable error interrupt */
547         rte_intr_callback_register(&(enic->pdev->intr_handle),
548                 enic_intr_handler, (void *)enic->rte_dev);
549
550         rte_intr_enable(&(enic->pdev->intr_handle));
551         /* Unmask LSC interrupt */
552         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
553
554         return 0;
555 }
556
557 int enic_alloc_intr_resources(struct enic *enic)
558 {
559         int err;
560         unsigned int i;
561
562         dev_info(enic, "vNIC resources used:  "\
563                 "wq %d rq %d cq %d intr %d\n",
564                 enic->wq_count, enic_vnic_rq_count(enic),
565                 enic->cq_count, enic->intr_count);
566
567         for (i = 0; i < enic->intr_count; i++) {
568                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
569                 if (err) {
570                         enic_free_vnic_resources(enic);
571                         return err;
572                 }
573         }
574         return 0;
575 }
576
577 void enic_free_rq(void *rxq)
578 {
579         struct vnic_rq *rq_sop, *rq_data;
580         struct enic *enic;
581
582         if (rxq == NULL)
583                 return;
584
585         rq_sop = (struct vnic_rq *)rxq;
586         enic = vnic_dev_priv(rq_sop->vdev);
587         rq_data = &enic->rq[rq_sop->data_queue_idx];
588
589         enic_rxmbuf_queue_release(enic, rq_sop);
590         if (rq_data->in_use)
591                 enic_rxmbuf_queue_release(enic, rq_data);
592
593         rte_free(rq_sop->mbuf_ring);
594         if (rq_data->in_use)
595                 rte_free(rq_data->mbuf_ring);
596
597         rq_sop->mbuf_ring = NULL;
598         rq_data->mbuf_ring = NULL;
599
600         vnic_rq_free(rq_sop);
601         if (rq_data->in_use)
602                 vnic_rq_free(rq_data);
603
604         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
605
606         rq_sop->in_use = 0;
607         rq_data->in_use = 0;
608 }
609
610 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
611 {
612         struct rte_eth_dev *eth_dev = enic->rte_dev;
613         vnic_wq_enable(&enic->wq[queue_idx]);
614         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
615 }
616
617 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
618 {
619         struct rte_eth_dev *eth_dev = enic->rte_dev;
620         int ret;
621
622         ret = vnic_wq_disable(&enic->wq[queue_idx]);
623         if (ret)
624                 return ret;
625
626         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
627         return 0;
628 }
629
630 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
631 {
632         struct vnic_rq *rq_sop;
633         struct vnic_rq *rq_data;
634         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
635         rq_data = &enic->rq[rq_sop->data_queue_idx];
636         struct rte_eth_dev *eth_dev = enic->rte_dev;
637
638         if (rq_data->in_use) {
639                 vnic_rq_enable(rq_data);
640                 enic_initial_post_rx(enic, rq_data);
641         }
642         rte_mb();
643         vnic_rq_enable(rq_sop);
644         enic_initial_post_rx(enic, rq_sop);
645         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
646 }
647
648 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
649 {
650         int ret1 = 0, ret2 = 0;
651         struct rte_eth_dev *eth_dev = enic->rte_dev;
652         struct vnic_rq *rq_sop;
653         struct vnic_rq *rq_data;
654         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
655         rq_data = &enic->rq[rq_sop->data_queue_idx];
656
657         ret2 = vnic_rq_disable(rq_sop);
658         rte_mb();
659         if (rq_data->in_use)
660                 ret1 = vnic_rq_disable(rq_data);
661
662         if (ret2)
663                 return ret2;
664         else if (ret1)
665                 return ret1;
666
667         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
668         return 0;
669 }
670
671 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
672         unsigned int socket_id, struct rte_mempool *mp,
673         uint16_t nb_desc, uint16_t free_thresh)
674 {
675         int rc;
676         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
677         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
678         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
679         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
680         unsigned int mbuf_size, mbufs_per_pkt;
681         unsigned int nb_sop_desc, nb_data_desc;
682         uint16_t min_sop, max_sop, min_data, max_data;
683         uint32_t max_rx_pkt_len;
684
685         rq_sop->is_sop = 1;
686         rq_sop->data_queue_idx = data_queue_idx;
687         rq_data->is_sop = 0;
688         rq_data->data_queue_idx = 0;
689         rq_sop->socket_id = socket_id;
690         rq_sop->mp = mp;
691         rq_data->socket_id = socket_id;
692         rq_data->mp = mp;
693         rq_sop->in_use = 1;
694         rq_sop->rx_free_thresh = free_thresh;
695         rq_data->rx_free_thresh = free_thresh;
696         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
697                   free_thresh);
698
699         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
700                                RTE_PKTMBUF_HEADROOM);
701         /* max_rx_pkt_len includes the ethernet header and CRC. */
702         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
703
704         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
705             DEV_RX_OFFLOAD_SCATTER) {
706                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
707                 /* ceil((max pkt len)/mbuf_size) */
708                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
709         } else {
710                 dev_info(enic, "Scatter rx mode disabled\n");
711                 mbufs_per_pkt = 1;
712                 if (max_rx_pkt_len > mbuf_size) {
713                         dev_warning(enic, "The maximum Rx packet size (%u) is"
714                                     " larger than the mbuf size (%u), and"
715                                     " scatter is disabled. Larger packets will"
716                                     " be truncated.\n",
717                                     max_rx_pkt_len, mbuf_size);
718                 }
719         }
720
721         if (mbufs_per_pkt > 1) {
722                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
723                 rq_sop->data_queue_enable = 1;
724                 rq_data->in_use = 1;
725                 /*
726                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
727                  * receives packet sizes up to the "max" MTU.
728                  * If not using scatter, we can achieve the effect of dropping
729                  * larger packets by reducing the size of posted buffers.
730                  * See enic_alloc_rx_queue_mbufs().
731                  */
732                 if (max_rx_pkt_len <
733                     enic_mtu_to_max_rx_pktlen(enic->rte_dev->data->mtu)) {
734                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
735                                     " when scatter rx mode is in use.\n");
736                 }
737         } else {
738                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
739                          queue_idx);
740                 rq_sop->data_queue_enable = 0;
741                 rq_data->in_use = 0;
742         }
743
744         /* number of descriptors have to be a multiple of 32 */
745         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ~0x1F;
746         nb_data_desc = (nb_desc - nb_sop_desc) & ~0x1F;
747
748         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
749         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
750
751         if (mbufs_per_pkt > 1) {
752                 min_sop = 64;
753                 max_sop = ((enic->config.rq_desc_count /
754                             (mbufs_per_pkt - 1)) & ~0x1F);
755                 min_data = min_sop * (mbufs_per_pkt - 1);
756                 max_data = enic->config.rq_desc_count;
757         } else {
758                 min_sop = 64;
759                 max_sop = enic->config.rq_desc_count;
760                 min_data = 0;
761                 max_data = 0;
762         }
763
764         if (nb_desc < (min_sop + min_data)) {
765                 dev_warning(enic,
766                             "Number of rx descs too low, adjusting to minimum\n");
767                 nb_sop_desc = min_sop;
768                 nb_data_desc = min_data;
769         } else if (nb_desc > (max_sop + max_data)) {
770                 dev_warning(enic,
771                             "Number of rx_descs too high, adjusting to maximum\n");
772                 nb_sop_desc = max_sop;
773                 nb_data_desc = max_data;
774         }
775         if (mbufs_per_pkt > 1) {
776                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
777                          " rx descriptor range is %u to %u\n",
778                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
779                          max_sop + max_data);
780         }
781         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
782                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
783
784         /* Allocate sop queue resources */
785         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
786                 nb_sop_desc, sizeof(struct rq_enet_desc));
787         if (rc) {
788                 dev_err(enic, "error in allocation of sop rq\n");
789                 goto err_exit;
790         }
791         nb_sop_desc = rq_sop->ring.desc_count;
792
793         if (rq_data->in_use) {
794                 /* Allocate data queue resources */
795                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
796                                    nb_data_desc,
797                                    sizeof(struct rq_enet_desc));
798                 if (rc) {
799                         dev_err(enic, "error in allocation of data rq\n");
800                         goto err_free_rq_sop;
801                 }
802                 nb_data_desc = rq_data->ring.desc_count;
803         }
804         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
805                            socket_id, nb_sop_desc + nb_data_desc,
806                            sizeof(struct cq_enet_rq_desc));
807         if (rc) {
808                 dev_err(enic, "error in allocation of cq for rq\n");
809                 goto err_free_rq_data;
810         }
811
812         /* Allocate the mbuf rings */
813         rq_sop->mbuf_ring = (struct rte_mbuf **)
814                 rte_zmalloc_socket("rq->mbuf_ring",
815                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
816                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
817         if (rq_sop->mbuf_ring == NULL)
818                 goto err_free_cq;
819
820         if (rq_data->in_use) {
821                 rq_data->mbuf_ring = (struct rte_mbuf **)
822                         rte_zmalloc_socket("rq->mbuf_ring",
823                                 sizeof(struct rte_mbuf *) * nb_data_desc,
824                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
825                 if (rq_data->mbuf_ring == NULL)
826                         goto err_free_sop_mbuf;
827         }
828
829         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
830
831         return 0;
832
833 err_free_sop_mbuf:
834         rte_free(rq_sop->mbuf_ring);
835 err_free_cq:
836         /* cleanup on error */
837         vnic_cq_free(&enic->cq[queue_idx]);
838 err_free_rq_data:
839         if (rq_data->in_use)
840                 vnic_rq_free(rq_data);
841 err_free_rq_sop:
842         vnic_rq_free(rq_sop);
843 err_exit:
844         return -ENOMEM;
845 }
846
847 void enic_free_wq(void *txq)
848 {
849         struct vnic_wq *wq;
850         struct enic *enic;
851
852         if (txq == NULL)
853                 return;
854
855         wq = (struct vnic_wq *)txq;
856         enic = vnic_dev_priv(wq->vdev);
857         rte_memzone_free(wq->cqmsg_rz);
858         vnic_wq_free(wq);
859         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
860 }
861
862 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
863         unsigned int socket_id, uint16_t nb_desc)
864 {
865         int err;
866         struct vnic_wq *wq = &enic->wq[queue_idx];
867         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
868         char name[NAME_MAX];
869         static int instance;
870
871         wq->socket_id = socket_id;
872         if (nb_desc) {
873                 if (nb_desc > enic->config.wq_desc_count) {
874                         dev_warning(enic,
875                                 "WQ %d - number of tx desc in cmd line (%d)"\
876                                 "is greater than that in the UCSM/CIMC adapter"\
877                                 "policy.  Applying the value in the adapter "\
878                                 "policy (%d)\n",
879                                 queue_idx, nb_desc, enic->config.wq_desc_count);
880                 } else if (nb_desc != enic->config.wq_desc_count) {
881                         enic->config.wq_desc_count = nb_desc;
882                         dev_info(enic,
883                                 "TX Queues - effective number of descs:%d\n",
884                                 nb_desc);
885                 }
886         }
887
888         /* Allocate queue resources */
889         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
890                 enic->config.wq_desc_count,
891                 sizeof(struct wq_enet_desc));
892         if (err) {
893                 dev_err(enic, "error in allocation of wq\n");
894                 return err;
895         }
896
897         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
898                 socket_id, enic->config.wq_desc_count,
899                 sizeof(struct cq_enet_wq_desc));
900         if (err) {
901                 vnic_wq_free(wq);
902                 dev_err(enic, "error in allocation of cq for wq\n");
903         }
904
905         /* setup up CQ message */
906         snprintf((char *)name, sizeof(name),
907                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
908                 instance++);
909
910         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
911                         sizeof(uint32_t), SOCKET_ID_ANY,
912                         RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
913         if (!wq->cqmsg_rz)
914                 return -ENOMEM;
915
916         return err;
917 }
918
919 int enic_disable(struct enic *enic)
920 {
921         unsigned int i;
922         int err;
923
924         for (i = 0; i < enic->intr_count; i++) {
925                 vnic_intr_mask(&enic->intr[i]);
926                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
927         }
928         enic_rxq_intr_deinit(enic);
929         rte_intr_disable(&enic->pdev->intr_handle);
930         rte_intr_callback_unregister(&enic->pdev->intr_handle,
931                                      enic_intr_handler,
932                                      (void *)enic->rte_dev);
933
934         vnic_dev_disable(enic->vdev);
935
936         enic_clsf_destroy(enic);
937
938         if (!enic_is_sriov_vf(enic))
939                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
940
941         for (i = 0; i < enic->wq_count; i++) {
942                 err = vnic_wq_disable(&enic->wq[i]);
943                 if (err)
944                         return err;
945         }
946         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
947                 if (enic->rq[i].in_use) {
948                         err = vnic_rq_disable(&enic->rq[i]);
949                         if (err)
950                                 return err;
951                 }
952         }
953
954         /* If we were using interrupts, set the interrupt vector to -1
955          * to disable interrupts.  We are not disabling link notifcations,
956          * though, as we want the polling of link status to continue working.
957          */
958         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
959                 vnic_dev_notify_set(enic->vdev, -1);
960
961         vnic_dev_set_reset_flag(enic->vdev, 1);
962
963         for (i = 0; i < enic->wq_count; i++)
964                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
965
966         for (i = 0; i < enic_vnic_rq_count(enic); i++)
967                 if (enic->rq[i].in_use)
968                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
969         for (i = 0; i < enic->cq_count; i++)
970                 vnic_cq_clean(&enic->cq[i]);
971         for (i = 0; i < enic->intr_count; i++)
972                 vnic_intr_clean(&enic->intr[i]);
973
974         return 0;
975 }
976
977 static int enic_dev_wait(struct vnic_dev *vdev,
978         int (*start)(struct vnic_dev *, int),
979         int (*finished)(struct vnic_dev *, int *),
980         int arg)
981 {
982         int done;
983         int err;
984         int i;
985
986         err = start(vdev, arg);
987         if (err)
988                 return err;
989
990         /* Wait for func to complete...2 seconds max */
991         for (i = 0; i < 2000; i++) {
992                 err = finished(vdev, &done);
993                 if (err)
994                         return err;
995                 if (done)
996                         return 0;
997                 usleep(1000);
998         }
999         return -ETIMEDOUT;
1000 }
1001
1002 static int enic_dev_open(struct enic *enic)
1003 {
1004         int err;
1005         int flags = CMD_OPENF_IG_DESCCACHE;
1006
1007         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1008                 vnic_dev_open_done, flags);
1009         if (err)
1010                 dev_err(enic_get_dev(enic),
1011                         "vNIC device open failed, err %d\n", err);
1012
1013         return err;
1014 }
1015
1016 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1017 {
1018         dma_addr_t rss_key_buf_pa;
1019         union vnic_rss_key *rss_key_buf_va = NULL;
1020         int err, i;
1021         u8 name[NAME_MAX];
1022
1023         RTE_ASSERT(user_key != NULL);
1024         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1025         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1026                 &rss_key_buf_pa, name);
1027         if (!rss_key_buf_va)
1028                 return -ENOMEM;
1029
1030         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1031                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1032
1033         err = enic_set_rss_key(enic,
1034                 rss_key_buf_pa,
1035                 sizeof(union vnic_rss_key));
1036
1037         /* Save for later queries */
1038         if (!err) {
1039                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1040                            sizeof(union vnic_rss_key));
1041         }
1042         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1043                 rss_key_buf_va, rss_key_buf_pa);
1044
1045         return err;
1046 }
1047
1048 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1049 {
1050         dma_addr_t rss_cpu_buf_pa;
1051         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1052         int err;
1053         u8 name[NAME_MAX];
1054
1055         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1056         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1057                 &rss_cpu_buf_pa, name);
1058         if (!rss_cpu_buf_va)
1059                 return -ENOMEM;
1060
1061         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1062
1063         err = enic_set_rss_cpu(enic,
1064                 rss_cpu_buf_pa,
1065                 sizeof(union vnic_rss_cpu));
1066
1067         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1068                 rss_cpu_buf_va, rss_cpu_buf_pa);
1069
1070         /* Save for later queries */
1071         if (!err)
1072                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1073         return err;
1074 }
1075
1076 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1077         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1078 {
1079         const u8 tso_ipid_split_en = 0;
1080         int err;
1081
1082         err = enic_set_nic_cfg(enic,
1083                 rss_default_cpu, rss_hash_type,
1084                 rss_hash_bits, rss_base_cpu,
1085                 rss_enable, tso_ipid_split_en,
1086                 enic->ig_vlan_strip_en);
1087
1088         return err;
1089 }
1090
1091 /* Initialize RSS with defaults, called from dev_configure */
1092 int enic_init_rss_nic_cfg(struct enic *enic)
1093 {
1094         static uint8_t default_rss_key[] = {
1095                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1096                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1097                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1098                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1099         };
1100         struct rte_eth_rss_conf rss_conf;
1101         union vnic_rss_cpu rss_cpu;
1102         int ret, i;
1103
1104         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1105         /*
1106          * If setting key for the first time, and the user gives us none, then
1107          * push the default key to NIC.
1108          */
1109         if (rss_conf.rss_key == NULL) {
1110                 rss_conf.rss_key = default_rss_key;
1111                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1112         }
1113         ret = enic_set_rss_conf(enic, &rss_conf);
1114         if (ret) {
1115                 dev_err(enic, "Failed to configure RSS\n");
1116                 return ret;
1117         }
1118         if (enic->rss_enable) {
1119                 /* If enabling RSS, use the default reta */
1120                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1121                         rss_cpu.cpu[i / 4].b[i % 4] =
1122                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1123                 }
1124                 ret = enic_set_rss_reta(enic, &rss_cpu);
1125                 if (ret)
1126                         dev_err(enic, "Failed to set RSS indirection table\n");
1127         }
1128         return ret;
1129 }
1130
1131 int enic_setup_finish(struct enic *enic)
1132 {
1133         enic_init_soft_stats(enic);
1134
1135         /* Default conf */
1136         vnic_dev_packet_filter(enic->vdev,
1137                 1 /* directed  */,
1138                 1 /* multicast */,
1139                 1 /* broadcast */,
1140                 0 /* promisc   */,
1141                 1 /* allmulti  */);
1142
1143         enic->promisc = 0;
1144         enic->allmulti = 1;
1145
1146         return 0;
1147 }
1148
1149 static int enic_rss_conf_valid(struct enic *enic,
1150                                struct rte_eth_rss_conf *rss_conf)
1151 {
1152         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1153         if (enic->flow_type_rss_offloads == 0)
1154                 return 0;
1155         if (rss_conf->rss_key != NULL &&
1156             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1157                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1158                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1159                 return -EINVAL;
1160         }
1161         if (rss_conf->rss_hf != 0 &&
1162             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1163                 dev_err(enic, "Given rss_hf contains none of the supported"
1164                         " types\n");
1165                 return -EINVAL;
1166         }
1167         return 0;
1168 }
1169
1170 /* Set hash type and key according to rss_conf */
1171 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1172 {
1173         struct rte_eth_dev *eth_dev;
1174         uint64_t rss_hf;
1175         u8 rss_hash_type;
1176         u8 rss_enable;
1177         int ret;
1178
1179         RTE_ASSERT(rss_conf != NULL);
1180         ret = enic_rss_conf_valid(enic, rss_conf);
1181         if (ret) {
1182                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1183                 return ret;
1184         }
1185
1186         eth_dev = enic->rte_dev;
1187         rss_hash_type = 0;
1188         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1189         if (enic->rq_count > 1 &&
1190             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1191             rss_hf != 0) {
1192                 rss_enable = 1;
1193                 if (rss_hf & ETH_RSS_IPV4)
1194                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1195                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1196                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1197                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1198                         /*
1199                          * 'TCP' is not a typo. HW does not have a separate
1200                          * enable bit for UDP RSS. The TCP bit enables both TCP
1201                          * and UDP RSS..
1202                          */
1203                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1204                 }
1205                 if (rss_hf & ETH_RSS_IPV6)
1206                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1207                 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1208                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1209                 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP) {
1210                         /* Again, 'TCP' is not a typo. */
1211                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1212                 }
1213                 if (rss_hf & ETH_RSS_IPV6_EX)
1214                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6_EX;
1215                 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1216                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX;
1217         } else {
1218                 rss_enable = 0;
1219                 rss_hf = 0;
1220         }
1221
1222         /* Set the hash key if provided */
1223         if (rss_enable && rss_conf->rss_key) {
1224                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1225                 if (ret) {
1226                         dev_err(enic, "Failed to set RSS key\n");
1227                         return ret;
1228                 }
1229         }
1230
1231         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1232                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1233                               rss_enable);
1234         if (!ret) {
1235                 enic->rss_hf = rss_hf;
1236                 enic->rss_hash_type = rss_hash_type;
1237                 enic->rss_enable = rss_enable;
1238         }
1239         return 0;
1240 }
1241
1242 int enic_set_vlan_strip(struct enic *enic)
1243 {
1244         /*
1245          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1246          * together. So, re-do niccfg, preserving the current RSS settings.
1247          */
1248         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1249                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1250                                enic->rss_enable);
1251 }
1252
1253 void enic_add_packet_filter(struct enic *enic)
1254 {
1255         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1256         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1257                 enic->promisc, enic->allmulti);
1258 }
1259
1260 int enic_get_link_status(struct enic *enic)
1261 {
1262         return vnic_dev_link_status(enic->vdev);
1263 }
1264
1265 static void enic_dev_deinit(struct enic *enic)
1266 {
1267         struct rte_eth_dev *eth_dev = enic->rte_dev;
1268
1269         /* stop link status checking */
1270         vnic_dev_notify_unset(enic->vdev);
1271
1272         rte_free(eth_dev->data->mac_addrs);
1273         rte_free(enic->cq);
1274         rte_free(enic->intr);
1275         rte_free(enic->rq);
1276         rte_free(enic->wq);
1277 }
1278
1279
1280 int enic_set_vnic_res(struct enic *enic)
1281 {
1282         struct rte_eth_dev *eth_dev = enic->rte_dev;
1283         int rc = 0;
1284         unsigned int required_rq, required_wq, required_cq, required_intr;
1285
1286         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1287         required_rq = eth_dev->data->nb_rx_queues * 2;
1288         required_wq = eth_dev->data->nb_tx_queues;
1289         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1290         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1291         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1292                 required_intr += eth_dev->data->nb_rx_queues;
1293         }
1294
1295         if (enic->conf_rq_count < required_rq) {
1296                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1297                         eth_dev->data->nb_rx_queues,
1298                         required_rq, enic->conf_rq_count);
1299                 rc = -EINVAL;
1300         }
1301         if (enic->conf_wq_count < required_wq) {
1302                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1303                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1304                 rc = -EINVAL;
1305         }
1306
1307         if (enic->conf_cq_count < required_cq) {
1308                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1309                         required_cq, enic->conf_cq_count);
1310                 rc = -EINVAL;
1311         }
1312         if (enic->conf_intr_count < required_intr) {
1313                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1314                         " interrupts. Required:%u, Configured:%u\n",
1315                         required_intr, enic->conf_intr_count);
1316                 rc = -EINVAL;
1317         }
1318
1319         if (rc == 0) {
1320                 enic->rq_count = eth_dev->data->nb_rx_queues;
1321                 enic->wq_count = eth_dev->data->nb_tx_queues;
1322                 enic->cq_count = enic->rq_count + enic->wq_count;
1323                 enic->intr_count = required_intr;
1324         }
1325
1326         return rc;
1327 }
1328
1329 /* Initialize the completion queue for an RQ */
1330 static int
1331 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1332 {
1333         struct vnic_rq *sop_rq, *data_rq;
1334         unsigned int cq_idx;
1335         int rc = 0;
1336
1337         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1338         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1339         cq_idx = rq_idx;
1340
1341         vnic_cq_clean(&enic->cq[cq_idx]);
1342         vnic_cq_init(&enic->cq[cq_idx],
1343                      0 /* flow_control_enable */,
1344                      1 /* color_enable */,
1345                      0 /* cq_head */,
1346                      0 /* cq_tail */,
1347                      1 /* cq_tail_color */,
1348                      0 /* interrupt_enable */,
1349                      1 /* cq_entry_enable */,
1350                      0 /* cq_message_enable */,
1351                      0 /* interrupt offset */,
1352                      0 /* cq_message_addr */);
1353
1354
1355         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1356                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1357                            sop_rq->ring.desc_count - 1, 1, 0);
1358         if (data_rq->in_use) {
1359                 vnic_rq_init_start(data_rq,
1360                                    enic_cq_rq(enic,
1361                                    enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1362                                    data_rq->ring.desc_count - 1, 1, 0);
1363         }
1364
1365         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1366         if (rc)
1367                 return rc;
1368
1369         if (data_rq->in_use) {
1370                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1371                 if (rc) {
1372                         enic_rxmbuf_queue_release(enic, sop_rq);
1373                         return rc;
1374                 }
1375         }
1376
1377         return 0;
1378 }
1379
1380 /* The Cisco NIC can send and receive packets up to a max packet size
1381  * determined by the NIC type and firmware. There is also an MTU
1382  * configured into the NIC via the CIMC/UCSM management interface
1383  * which can be overridden by this function (up to the max packet size).
1384  * Depending on the network setup, doing so may cause packet drops
1385  * and unexpected behavior.
1386  */
1387 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1388 {
1389         unsigned int rq_idx;
1390         struct vnic_rq *rq;
1391         int rc = 0;
1392         uint16_t old_mtu;       /* previous setting */
1393         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1394         struct rte_eth_dev *eth_dev = enic->rte_dev;
1395
1396         old_mtu = eth_dev->data->mtu;
1397         config_mtu = enic->config.mtu;
1398
1399         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1400                 return -E_RTE_SECONDARY;
1401
1402         if (new_mtu > enic->max_mtu) {
1403                 dev_err(enic,
1404                         "MTU not updated: requested (%u) greater than max (%u)\n",
1405                         new_mtu, enic->max_mtu);
1406                 return -EINVAL;
1407         }
1408         if (new_mtu < ENIC_MIN_MTU) {
1409                 dev_info(enic,
1410                         "MTU not updated: requested (%u) less than min (%u)\n",
1411                         new_mtu, ENIC_MIN_MTU);
1412                 return -EINVAL;
1413         }
1414         if (new_mtu > config_mtu)
1415                 dev_warning(enic,
1416                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1417                         new_mtu, config_mtu);
1418
1419         /* The easy case is when scatter is disabled. However if the MTU
1420          * becomes greater than the mbuf data size, packet drops will ensue.
1421          */
1422         if (!(enic->rte_dev->data->dev_conf.rxmode.offloads &
1423               DEV_RX_OFFLOAD_SCATTER)) {
1424                 eth_dev->data->mtu = new_mtu;
1425                 goto set_mtu_done;
1426         }
1427
1428         /* Rx scatter is enabled so reconfigure RQ's on the fly. The point is to
1429          * change Rx scatter mode if necessary for better performance. I.e. if
1430          * MTU was greater than the mbuf size and now it's less, scatter Rx
1431          * doesn't have to be used and vice versa.
1432           */
1433         rte_spinlock_lock(&enic->mtu_lock);
1434
1435         /* Stop traffic on all RQs */
1436         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1437                 rq = &enic->rq[rq_idx];
1438                 if (rq->is_sop && rq->in_use) {
1439                         rc = enic_stop_rq(enic,
1440                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1441                         if (rc) {
1442                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1443                                 goto set_mtu_done;
1444                         }
1445                 }
1446         }
1447
1448         /* replace Rx function with a no-op to avoid getting stale pkts */
1449         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1450         rte_mb();
1451
1452         /* Allow time for threads to exit the real Rx function. */
1453         usleep(100000);
1454
1455         /* now it is safe to reconfigure the RQs */
1456
1457         /* update the mtu */
1458         eth_dev->data->mtu = new_mtu;
1459
1460         /* free and reallocate RQs with the new MTU */
1461         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1462                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1463                 if (!rq->in_use)
1464                         continue;
1465
1466                 enic_free_rq(rq);
1467                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1468                                    rq->tot_nb_desc, rq->rx_free_thresh);
1469                 if (rc) {
1470                         dev_err(enic,
1471                                 "Fatal MTU alloc error- No traffic will pass\n");
1472                         goto set_mtu_done;
1473                 }
1474
1475                 rc = enic_reinit_rq(enic, rq_idx);
1476                 if (rc) {
1477                         dev_err(enic,
1478                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1479                         goto set_mtu_done;
1480                 }
1481         }
1482
1483         /* put back the real receive function */
1484         rte_mb();
1485         eth_dev->rx_pkt_burst = enic_recv_pkts;
1486         rte_mb();
1487
1488         /* restart Rx traffic */
1489         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1490                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1491                 if (rq->is_sop && rq->in_use)
1492                         enic_start_rq(enic, rq_idx);
1493         }
1494
1495 set_mtu_done:
1496         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1497         rte_spinlock_unlock(&enic->mtu_lock);
1498         return rc;
1499 }
1500
1501 static int enic_dev_init(struct enic *enic)
1502 {
1503         int err;
1504         struct rte_eth_dev *eth_dev = enic->rte_dev;
1505
1506         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1507
1508         /* Get vNIC configuration
1509         */
1510         err = enic_get_vnic_config(enic);
1511         if (err) {
1512                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1513                 return err;
1514         }
1515
1516         /* Get available resource counts */
1517         enic_get_res_counts(enic);
1518         if (enic->conf_rq_count == 1) {
1519                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1520                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1521                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1522                 return -EINVAL;
1523         }
1524         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1525         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1526                                enic->conf_cq_count, 8);
1527         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1528                                  enic->conf_intr_count, 8);
1529         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1530                                enic->conf_rq_count, 8);
1531         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1532                                enic->conf_wq_count, 8);
1533         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1534                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1535                 return -1;
1536         }
1537         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1538                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1539                 return -1;
1540         }
1541         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1542                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1543                 return -1;
1544         }
1545         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1546                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1547                 return -1;
1548         }
1549
1550         /* Get the supported filters */
1551         enic_fdir_info(enic);
1552
1553         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
1554                                                 * ENIC_MAX_MAC_ADDR, 0);
1555         if (!eth_dev->data->mac_addrs) {
1556                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1557                 return -1;
1558         }
1559         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1560                         eth_dev->data->mac_addrs);
1561
1562         vnic_dev_set_reset_flag(enic->vdev, 0);
1563
1564         LIST_INIT(&enic->flows);
1565         rte_spinlock_init(&enic->flows_lock);
1566
1567         /* set up link status checking */
1568         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1569
1570         enic->overlay_offload = false;
1571         if (!enic->disable_overlay && enic->vxlan &&
1572             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1573             vnic_dev_overlay_offload_ctrl(enic->vdev,
1574                                           OVERLAY_FEATURE_VXLAN,
1575                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1576                 enic->tx_offload_capa |=
1577                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1578                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1579                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1580                 /*
1581                  * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
1582                  * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
1583                  */
1584                 enic->tx_offload_mask |=
1585                         PKT_TX_OUTER_IP_CKSUM |
1586                         PKT_TX_TUNNEL_MASK;
1587                 enic->overlay_offload = true;
1588                 dev_info(enic, "Overlay offload is enabled\n");
1589         }
1590
1591         return 0;
1592
1593 }
1594
1595 int enic_probe(struct enic *enic)
1596 {
1597         struct rte_pci_device *pdev = enic->pdev;
1598         int err = -1;
1599
1600         dev_debug(enic, " Initializing ENIC PMD\n");
1601
1602         /* if this is a secondary process the hardware is already initialized */
1603         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1604                 return 0;
1605
1606         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1607         enic->bar0.len = pdev->mem_resource[0].len;
1608
1609         /* Register vNIC device */
1610         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1611         if (!enic->vdev) {
1612                 dev_err(enic, "vNIC registration failed, aborting\n");
1613                 goto err_out;
1614         }
1615
1616         LIST_INIT(&enic->memzone_list);
1617         rte_spinlock_init(&enic->memzone_list_lock);
1618
1619         vnic_register_cbacks(enic->vdev,
1620                 enic_alloc_consistent,
1621                 enic_free_consistent);
1622
1623         /*
1624          * Allocate the consistent memory for stats upfront so both primary and
1625          * secondary processes can dump stats.
1626          */
1627         err = vnic_dev_alloc_stats_mem(enic->vdev);
1628         if (err) {
1629                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1630                 goto err_out_unregister;
1631         }
1632         /* Issue device open to get device in known state */
1633         err = enic_dev_open(enic);
1634         if (err) {
1635                 dev_err(enic, "vNIC dev open failed, aborting\n");
1636                 goto err_out_unregister;
1637         }
1638
1639         /* Set ingress vlan rewrite mode before vnic initialization */
1640         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1641                 IG_VLAN_REWRITE_MODE_PASS_THRU);
1642         if (err) {
1643                 dev_err(enic,
1644                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1645                 goto err_out_dev_close;
1646         }
1647
1648         /* Issue device init to initialize the vnic-to-switch link.
1649          * We'll start with carrier off and wait for link UP
1650          * notification later to turn on carrier.  We don't need
1651          * to wait here for the vnic-to-switch link initialization
1652          * to complete; link UP notification is the indication that
1653          * the process is complete.
1654          */
1655
1656         err = vnic_dev_init(enic->vdev, 0);
1657         if (err) {
1658                 dev_err(enic, "vNIC dev init failed, aborting\n");
1659                 goto err_out_dev_close;
1660         }
1661
1662         err = enic_dev_init(enic);
1663         if (err) {
1664                 dev_err(enic, "Device initialization failed, aborting\n");
1665                 goto err_out_dev_close;
1666         }
1667
1668         return 0;
1669
1670 err_out_dev_close:
1671         vnic_dev_close(enic->vdev);
1672 err_out_unregister:
1673         vnic_dev_unregister(enic->vdev);
1674 err_out:
1675         return err;
1676 }
1677
1678 void enic_remove(struct enic *enic)
1679 {
1680         enic_dev_deinit(enic);
1681         vnic_dev_close(enic->vdev);
1682         vnic_dev_unregister(enic->vdev);
1683 }