1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
17 #include <rte_string_fns.h>
18 #include <ethdev_driver.h>
19 #include <rte_geneve.h>
21 #include "enic_compat.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
31 #include "vnic_intr.h"
34 static inline int enic_is_sriov_vf(struct enic *enic)
36 return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
39 static int is_zero_addr(uint8_t *addr)
41 return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
44 static int is_mcast_addr(uint8_t *addr)
49 static int is_eth_addr_valid(uint8_t *addr)
51 return !is_mcast_addr(addr) && !is_zero_addr(addr);
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
59 if (!rq || !rq->mbuf_ring) {
60 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
64 for (i = 0; i < rq->ring.desc_count; i++) {
65 if (rq->mbuf_ring[i]) {
66 rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67 rq->mbuf_ring[i] = NULL;
72 void enic_free_wq_buf(struct rte_mbuf **buf)
74 struct rte_mbuf *mbuf = *buf;
76 rte_pktmbuf_free_seg(mbuf);
80 static void enic_log_q_error(struct enic *enic)
83 uint32_t error_status;
85 for (i = 0; i < enic->wq_count; i++) {
86 error_status = vnic_wq_error_status(&enic->wq[i]);
88 dev_err(enic, "WQ[%d] error_status %d\n", i,
92 for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93 if (!enic->rq[i].in_use)
95 error_status = vnic_rq_error_status(&enic->rq[i]);
97 dev_err(enic, "RQ[%d] error_status %d\n", i,
102 static void enic_clear_soft_stats(struct enic *enic)
104 struct enic_soft_stats *soft_stats = &enic->soft_stats;
105 rte_atomic64_clear(&soft_stats->rx_nombuf);
106 rte_atomic64_clear(&soft_stats->rx_packet_errors);
107 rte_atomic64_clear(&soft_stats->tx_oversized);
110 static void enic_init_soft_stats(struct enic *enic)
112 struct enic_soft_stats *soft_stats = &enic->soft_stats;
113 rte_atomic64_init(&soft_stats->rx_nombuf);
114 rte_atomic64_init(&soft_stats->rx_packet_errors);
115 rte_atomic64_init(&soft_stats->tx_oversized);
116 enic_clear_soft_stats(enic);
119 int enic_dev_stats_clear(struct enic *enic)
123 ret = vnic_dev_stats_clear(enic->vdev);
125 dev_err(enic, "Error in clearing stats\n");
128 enic_clear_soft_stats(enic);
133 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
135 struct vnic_stats *stats;
136 struct enic_soft_stats *soft_stats = &enic->soft_stats;
137 int64_t rx_truncated;
138 uint64_t rx_packet_errors;
139 int ret = vnic_dev_stats_dump(enic->vdev, &stats);
142 dev_err(enic, "Error in getting stats\n");
146 /* The number of truncated packets can only be calculated by
147 * subtracting a hardware counter from error packets received by
148 * the driver. Note: this causes transient inaccuracies in the
149 * ipackets count. Also, the length of truncated packets are
150 * counted in ibytes even though truncated packets are dropped
151 * which can make ibytes be slightly higher than it should be.
153 rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
154 rx_truncated = rx_packet_errors - stats->rx.rx_errors;
156 r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
157 r_stats->opackets = stats->tx.tx_frames_ok;
159 r_stats->ibytes = stats->rx.rx_bytes_ok;
160 r_stats->obytes = stats->tx.tx_bytes_ok;
162 r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
163 r_stats->oerrors = stats->tx.tx_errors
164 + rte_atomic64_read(&soft_stats->tx_oversized);
166 r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
168 r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
172 int enic_del_mac_address(struct enic *enic, int mac_index)
174 struct rte_eth_dev *eth_dev = enic->rte_dev;
175 uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
177 return vnic_dev_del_addr(enic->vdev, mac_addr);
180 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
184 if (!is_eth_addr_valid(mac_addr)) {
185 dev_err(enic, "invalid mac address\n");
189 err = vnic_dev_add_addr(enic->vdev, mac_addr);
191 dev_err(enic, "add mac addr failed\n");
195 void enic_free_rq_buf(struct rte_mbuf **mbuf)
200 rte_pktmbuf_free(*mbuf);
204 void enic_init_vnic_resources(struct enic *enic)
206 unsigned int error_interrupt_enable = 1;
207 unsigned int error_interrupt_offset = 0;
208 unsigned int rxq_interrupt_enable = 0;
209 unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
210 unsigned int index = 0;
212 struct vnic_rq *data_rq;
214 if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
215 rxq_interrupt_enable = 1;
217 for (index = 0; index < enic->rq_count; index++) {
218 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
220 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
222 error_interrupt_enable,
223 error_interrupt_offset);
225 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
227 vnic_rq_init(data_rq,
229 error_interrupt_enable,
230 error_interrupt_offset);
231 vnic_cq_init(&enic->cq[cq_idx],
232 0 /* flow_control_enable */,
233 1 /* color_enable */,
236 1 /* cq_tail_color */,
237 rxq_interrupt_enable,
238 1 /* cq_entry_enable */,
239 0 /* cq_message_enable */,
240 rxq_interrupt_offset,
241 0 /* cq_message_addr */);
242 if (rxq_interrupt_enable)
243 rxq_interrupt_offset++;
246 for (index = 0; index < enic->wq_count; index++) {
247 vnic_wq_init(&enic->wq[index],
248 enic_cq_wq(enic, index),
249 error_interrupt_enable,
250 error_interrupt_offset);
251 /* Compute unsupported ol flags for enic_prep_pkts() */
252 enic->wq[index].tx_offload_notsup_mask =
253 PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
255 cq_idx = enic_cq_wq(enic, index);
256 vnic_cq_init(&enic->cq[cq_idx],
257 0 /* flow_control_enable */,
258 1 /* color_enable */,
261 1 /* cq_tail_color */,
262 0 /* interrupt_enable */,
263 0 /* cq_entry_enable */,
264 1 /* cq_message_enable */,
265 0 /* interrupt offset */,
266 (uint64_t)enic->wq[index].cqmsg_rz->iova);
269 for (index = 0; index < enic->intr_count; index++) {
270 vnic_intr_init(&enic->intr[index],
271 enic->config.intr_timer_usec,
272 enic->config.intr_timer_type,
273 /*mask_on_assertion*/1);
279 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
282 struct rq_enet_desc *rqd = rq->ring.descs;
285 uint32_t max_rx_pkt_len;
291 dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
292 rq->ring.desc_count);
295 * If *not* using scatter and the mbuf size is greater than the
296 * requested max packet size (max_rx_pkt_len), then reduce the
297 * posted buffer size to max_rx_pkt_len. HW still receives packets
298 * larger than max_rx_pkt_len, but they will be truncated, which we
299 * drop in the rx handler. Not ideal, but better than returning
300 * large packets when the user is not expecting them.
302 max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
303 rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
304 if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
305 rq_buf_len = max_rx_pkt_len;
306 for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
307 mb = rte_mbuf_raw_alloc(rq->mp);
309 dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
310 (unsigned)rq->index);
314 mb->data_off = RTE_PKTMBUF_HEADROOM;
315 dma_addr = (dma_addr_t)(mb->buf_iova
316 + RTE_PKTMBUF_HEADROOM);
317 rq_enet_desc_enc(rqd, dma_addr,
318 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
319 : RQ_ENET_TYPE_NOT_SOP),
321 rq->mbuf_ring[i] = mb;
324 * Do not post the buffers to the NIC until we enable the RQ via
327 rq->need_initial_post = true;
328 /* Initialize fetch index while RQ is disabled */
329 iowrite32(0, &rq->ctrl->fetch_index);
334 * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
335 * allocated the buffers and filled the RQ descriptor ring. Just need to push
336 * the post index to the NIC.
339 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
341 if (!rq->in_use || !rq->need_initial_post)
344 /* make sure all prior writes are complete before doing the PIO write */
347 /* Post all but the last buffer to VIC. */
348 rq->posted_index = rq->ring.desc_count - 1;
352 dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
353 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
354 iowrite32(rq->posted_index, &rq->ctrl->posted_index);
356 rq->need_initial_post = false;
360 enic_alloc_consistent(void *priv, size_t size,
361 dma_addr_t *dma_handle, uint8_t *name)
364 const struct rte_memzone *rz;
366 struct enic *enic = (struct enic *)priv;
367 struct enic_memzone_entry *mze;
369 rz = rte_memzone_reserve_aligned((const char *)name, size,
370 SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
372 pr_err("%s : Failed to allocate memory requested for %s\n",
378 *dma_handle = (dma_addr_t)rz->iova;
380 mze = rte_malloc("enic memzone entry",
381 sizeof(struct enic_memzone_entry), 0);
384 pr_err("%s : Failed to allocate memory for memzone list\n",
386 rte_memzone_free(rz);
392 rte_spinlock_lock(&enic->memzone_list_lock);
393 LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
394 rte_spinlock_unlock(&enic->memzone_list_lock);
400 enic_free_consistent(void *priv,
401 __rte_unused size_t size,
403 dma_addr_t dma_handle)
405 struct enic_memzone_entry *mze;
406 struct enic *enic = (struct enic *)priv;
408 rte_spinlock_lock(&enic->memzone_list_lock);
409 LIST_FOREACH(mze, &enic->memzone_list, entries) {
410 if (mze->rz->addr == vaddr &&
411 mze->rz->iova == dma_handle)
415 rte_spinlock_unlock(&enic->memzone_list_lock);
417 "Tried to free memory, but couldn't find it in the memzone list\n");
420 LIST_REMOVE(mze, entries);
421 rte_spinlock_unlock(&enic->memzone_list_lock);
422 rte_memzone_free(mze->rz);
426 int enic_link_update(struct rte_eth_dev *eth_dev)
428 struct enic *enic = pmd_priv(eth_dev);
429 struct rte_eth_link link;
431 memset(&link, 0, sizeof(link));
432 link.link_status = enic_get_link_status(enic);
433 link.link_duplex = ETH_LINK_FULL_DUPLEX;
434 link.link_speed = vnic_dev_port_speed(enic->vdev);
436 return rte_eth_linkstatus_set(eth_dev, &link);
440 enic_intr_handler(void *arg)
442 struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
443 struct enic *enic = pmd_priv(dev);
445 vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
447 enic_link_update(dev);
448 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
449 enic_log_q_error(enic);
450 /* Re-enable irq in case of INTx */
451 rte_intr_ack(&enic->pdev->intr_handle);
454 static int enic_rxq_intr_init(struct enic *enic)
456 struct rte_intr_handle *intr_handle;
457 uint32_t rxq_intr_count, i;
460 intr_handle = enic->rte_dev->intr_handle;
461 if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
464 * Rx queue interrupts only work when we have MSI-X interrupts,
465 * one per queue. Sharing one interrupt is technically
466 * possible with VIC, but it is not worth the complications it brings.
468 if (!rte_intr_cap_multiple(intr_handle)) {
469 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
470 " (vfio-pci driver)\n");
473 rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
474 err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
476 dev_err(enic, "Failed to enable event fds for Rx queue"
480 intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
481 rxq_intr_count * sizeof(int), 0);
482 if (intr_handle->intr_vec == NULL) {
483 dev_err(enic, "Failed to allocate intr_vec\n");
486 for (i = 0; i < rxq_intr_count; i++)
487 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
491 static void enic_rxq_intr_deinit(struct enic *enic)
493 struct rte_intr_handle *intr_handle;
495 intr_handle = enic->rte_dev->intr_handle;
496 rte_intr_efd_disable(intr_handle);
497 if (intr_handle->intr_vec != NULL) {
498 rte_free(intr_handle->intr_vec);
499 intr_handle->intr_vec = NULL;
503 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
505 struct wq_enet_desc *desc;
510 * Fill WQ descriptor fields that never change. Every descriptor is
511 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
512 * descriptors (i.e. request one completion update every 32 packets).
514 wq = &enic->wq[queue_idx];
515 desc = (struct wq_enet_desc *)wq->ring.descs;
516 for (i = 0; i < wq->ring.desc_count; i++, desc++) {
517 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
518 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
519 desc->header_length_flags |=
520 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
525 * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
526 * used when that file is not compiled.
529 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
534 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
536 struct enic *enic = pmd_priv(eth_dev);
539 ENICPMD_LOG(DEBUG, " use the normal Rx handler for 64B CQ entry");
540 eth_dev->rx_pkt_burst = &enic_recv_pkts_64;
545 * 1. The vectorized handler if possible and requested.
546 * 2. The non-scatter, simplified handler if scatter Rx is not used.
547 * 3. The default handler as a fallback.
549 if (enic_use_vector_rx_handler(eth_dev))
551 if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
552 ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
553 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
555 ENICPMD_LOG(DEBUG, " use the normal Rx handler");
556 eth_dev->rx_pkt_burst = &enic_recv_pkts;
560 /* Secondary process uses this to set the Tx handler */
561 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
563 struct enic *enic = pmd_priv(eth_dev);
565 if (enic->use_simple_tx_handler) {
566 ENICPMD_LOG(DEBUG, " use the simple tx handler");
567 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
569 ENICPMD_LOG(DEBUG, " use the default tx handler");
570 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
574 int enic_enable(struct enic *enic)
578 struct rte_eth_dev *eth_dev = enic->rte_dev;
579 uint64_t simple_tx_offloads;
582 if (enic->enable_avx2_rx) {
583 struct rte_mbuf mb_def = { .buf_addr = 0 };
586 * mbuf_initializer contains const-after-init fields of
587 * receive mbufs (i.e. 64 bits of fields from rearm_data).
588 * It is currently used by the vectorized handler.
591 mb_def.data_off = RTE_PKTMBUF_HEADROOM;
592 mb_def.port = enic->port_id;
593 rte_mbuf_refcnt_set(&mb_def, 1);
594 rte_compiler_barrier();
595 p = (uintptr_t)&mb_def.rearm_data;
596 enic->mbuf_initializer = *(uint64_t *)p;
599 eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
600 eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
602 /* vnic notification of link status has already been turned on in
603 * enic_dev_init() which is called during probe time. Here we are
604 * just turning on interrupt vector 0 if needed.
606 if (eth_dev->data->dev_conf.intr_conf.lsc)
607 vnic_dev_notify_set(enic->vdev, 0);
609 err = enic_rxq_intr_init(enic);
613 /* Initialize flowman if not already initialized during probe */
614 if (enic->fm == NULL && enic_fm_init(enic))
615 dev_warning(enic, "Init of flowman failed.\n");
617 for (index = 0; index < enic->rq_count; index++) {
618 err = enic_alloc_rx_queue_mbufs(enic,
619 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
621 dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
624 err = enic_alloc_rx_queue_mbufs(enic,
625 &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
627 /* release the allocated mbufs for the sop rq*/
628 enic_rxmbuf_queue_release(enic,
629 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
631 dev_err(enic, "Failed to alloc data RX queue mbufs\n");
637 * Use the simple TX handler if possible. Only checksum offloads
638 * and vlan insertion are supported.
640 simple_tx_offloads = enic->tx_offload_capa &
641 (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
642 DEV_TX_OFFLOAD_VLAN_INSERT |
643 DEV_TX_OFFLOAD_IPV4_CKSUM |
644 DEV_TX_OFFLOAD_UDP_CKSUM |
645 DEV_TX_OFFLOAD_TCP_CKSUM);
646 if ((eth_dev->data->dev_conf.txmode.offloads &
647 ~simple_tx_offloads) == 0) {
648 ENICPMD_LOG(DEBUG, " use the simple tx handler");
649 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
650 for (index = 0; index < enic->wq_count; index++)
651 enic_prep_wq_for_simple_tx(enic, index);
652 enic->use_simple_tx_handler = 1;
654 ENICPMD_LOG(DEBUG, " use the default tx handler");
655 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
658 enic_pick_rx_handler(eth_dev);
660 for (index = 0; index < enic->wq_count; index++)
661 enic_start_wq(enic, index);
662 for (index = 0; index < enic->rq_count; index++)
663 enic_start_rq(enic, index);
665 vnic_dev_add_addr(enic->vdev, enic->mac_addr);
667 vnic_dev_enable_wait(enic->vdev);
669 /* Register and enable error interrupt */
670 rte_intr_callback_register(&(enic->pdev->intr_handle),
671 enic_intr_handler, (void *)enic->rte_dev);
673 rte_intr_enable(&(enic->pdev->intr_handle));
674 /* Unmask LSC interrupt */
675 vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
680 int enic_alloc_intr_resources(struct enic *enic)
685 dev_info(enic, "vNIC resources used: "\
686 "wq %d rq %d cq %d intr %d\n",
687 enic->wq_count, enic_vnic_rq_count(enic),
688 enic->cq_count, enic->intr_count);
690 for (i = 0; i < enic->intr_count; i++) {
691 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
693 enic_free_vnic_resources(enic);
700 void enic_free_rq(void *rxq)
702 struct vnic_rq *rq_sop, *rq_data;
708 rq_sop = (struct vnic_rq *)rxq;
709 enic = vnic_dev_priv(rq_sop->vdev);
710 rq_data = &enic->rq[rq_sop->data_queue_idx];
712 if (rq_sop->free_mbufs) {
713 struct rte_mbuf **mb;
716 mb = rq_sop->free_mbufs;
717 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
718 i < ENIC_RX_BURST_MAX; i++)
719 rte_pktmbuf_free(mb[i]);
720 rte_free(rq_sop->free_mbufs);
721 rq_sop->free_mbufs = NULL;
722 rq_sop->num_free_mbufs = 0;
725 enic_rxmbuf_queue_release(enic, rq_sop);
727 enic_rxmbuf_queue_release(enic, rq_data);
729 rte_free(rq_sop->mbuf_ring);
731 rte_free(rq_data->mbuf_ring);
733 rq_sop->mbuf_ring = NULL;
734 rq_data->mbuf_ring = NULL;
736 vnic_rq_free(rq_sop);
738 vnic_rq_free(rq_data);
740 vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
746 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
748 struct rte_eth_dev_data *data = enic->dev_data;
749 vnic_wq_enable(&enic->wq[queue_idx]);
750 data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
753 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
755 struct rte_eth_dev_data *data = enic->dev_data;
758 ret = vnic_wq_disable(&enic->wq[queue_idx]);
762 data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
766 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
768 struct rte_eth_dev_data *data = enic->dev_data;
769 struct vnic_rq *rq_sop;
770 struct vnic_rq *rq_data;
771 rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
772 rq_data = &enic->rq[rq_sop->data_queue_idx];
774 if (rq_data->in_use) {
775 vnic_rq_enable(rq_data);
776 enic_initial_post_rx(enic, rq_data);
779 vnic_rq_enable(rq_sop);
780 enic_initial_post_rx(enic, rq_sop);
781 data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
784 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
786 struct rte_eth_dev_data *data = enic->dev_data;
787 int ret1 = 0, ret2 = 0;
788 struct vnic_rq *rq_sop;
789 struct vnic_rq *rq_data;
790 rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
791 rq_data = &enic->rq[rq_sop->data_queue_idx];
793 ret2 = vnic_rq_disable(rq_sop);
796 ret1 = vnic_rq_disable(rq_data);
803 data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
807 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
808 unsigned int socket_id, struct rte_mempool *mp,
809 uint16_t nb_desc, uint16_t free_thresh)
811 struct enic_vf_representor *vf;
813 uint16_t sop_queue_idx;
814 uint16_t data_queue_idx;
816 struct vnic_rq *rq_sop;
817 struct vnic_rq *rq_data;
818 unsigned int mbuf_size, mbufs_per_pkt;
819 unsigned int nb_sop_desc, nb_data_desc;
820 uint16_t min_sop, max_sop, min_data, max_data;
821 uint32_t max_rx_pkt_len;
824 * Representor uses a reserved PF queue. Translate representor
825 * queue number to PF queue number.
827 if (enic_is_vf_rep(enic)) {
828 RTE_ASSERT(queue_idx == 0);
829 vf = VF_ENIC_TO_VF_REP(enic);
830 sop_queue_idx = vf->pf_rq_sop_idx;
831 data_queue_idx = vf->pf_rq_data_idx;
833 queue_idx = sop_queue_idx;
835 sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
836 data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
838 cq_idx = enic_cq_rq(enic, sop_queue_idx);
839 rq_sop = &enic->rq[sop_queue_idx];
840 rq_data = &enic->rq[data_queue_idx];
842 rq_sop->data_queue_idx = data_queue_idx;
844 rq_data->data_queue_idx = 0;
845 rq_sop->socket_id = socket_id;
847 rq_data->socket_id = socket_id;
850 rq_sop->rx_free_thresh = free_thresh;
851 rq_data->rx_free_thresh = free_thresh;
852 dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
855 mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
856 RTE_PKTMBUF_HEADROOM);
857 /* max_rx_pkt_len includes the ethernet header and CRC. */
858 max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
860 if (enic->rte_dev->data->dev_conf.rxmode.offloads &
861 DEV_RX_OFFLOAD_SCATTER) {
862 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
863 /* ceil((max pkt len)/mbuf_size) */
864 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
866 dev_info(enic, "Scatter rx mode disabled\n");
868 if (max_rx_pkt_len > mbuf_size) {
869 dev_warning(enic, "The maximum Rx packet size (%u) is"
870 " larger than the mbuf size (%u), and"
871 " scatter is disabled. Larger packets will"
873 max_rx_pkt_len, mbuf_size);
877 if (mbufs_per_pkt > 1) {
878 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
879 rq_sop->data_queue_enable = 1;
882 * HW does not directly support rxmode.max_rx_pkt_len. HW always
883 * receives packet sizes up to the "max" MTU.
884 * If not using scatter, we can achieve the effect of dropping
885 * larger packets by reducing the size of posted buffers.
886 * See enic_alloc_rx_queue_mbufs().
889 enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
890 dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
891 " when scatter rx mode is in use.\n");
894 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
896 rq_sop->data_queue_enable = 0;
900 /* number of descriptors have to be a multiple of 32 */
901 nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
902 nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
904 rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
905 rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
907 if (mbufs_per_pkt > 1) {
908 min_sop = ENIC_RX_BURST_MAX;
909 max_sop = ((enic->config.rq_desc_count /
910 (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
911 min_data = min_sop * (mbufs_per_pkt - 1);
912 max_data = enic->config.rq_desc_count;
914 min_sop = ENIC_RX_BURST_MAX;
915 max_sop = enic->config.rq_desc_count;
920 if (nb_desc < (min_sop + min_data)) {
922 "Number of rx descs too low, adjusting to minimum\n");
923 nb_sop_desc = min_sop;
924 nb_data_desc = min_data;
925 } else if (nb_desc > (max_sop + max_data)) {
927 "Number of rx_descs too high, adjusting to maximum\n");
928 nb_sop_desc = max_sop;
929 nb_data_desc = max_data;
931 if (mbufs_per_pkt > 1) {
932 dev_info(enic, "For max packet size %u and mbuf size %u valid"
933 " rx descriptor range is %u to %u\n",
934 max_rx_pkt_len, mbuf_size, min_sop + min_data,
937 dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
938 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
940 /* Allocate sop queue resources */
941 rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
942 nb_sop_desc, sizeof(struct rq_enet_desc));
944 dev_err(enic, "error in allocation of sop rq\n");
947 nb_sop_desc = rq_sop->ring.desc_count;
949 if (rq_data->in_use) {
950 /* Allocate data queue resources */
951 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
953 sizeof(struct rq_enet_desc));
955 dev_err(enic, "error in allocation of data rq\n");
956 goto err_free_rq_sop;
958 nb_data_desc = rq_data->ring.desc_count;
960 /* Enable 64B CQ entry if requested */
961 if (enic->cq64 && vnic_dev_set_cq_entry_size(enic->vdev,
962 sop_queue_idx, VNIC_RQ_CQ_ENTRY_SIZE_64)) {
963 dev_err(enic, "failed to enable 64B CQ entry on sop rq\n");
964 goto err_free_rq_data;
966 if (rq_data->in_use && enic->cq64 &&
967 vnic_dev_set_cq_entry_size(enic->vdev, data_queue_idx,
968 VNIC_RQ_CQ_ENTRY_SIZE_64)) {
969 dev_err(enic, "failed to enable 64B CQ entry on data rq\n");
970 goto err_free_rq_data;
973 rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
974 socket_id, nb_sop_desc + nb_data_desc,
975 enic->cq64 ? sizeof(struct cq_enet_rq_desc_64) :
976 sizeof(struct cq_enet_rq_desc));
978 dev_err(enic, "error in allocation of cq for rq\n");
979 goto err_free_rq_data;
982 /* Allocate the mbuf rings */
983 rq_sop->mbuf_ring = (struct rte_mbuf **)
984 rte_zmalloc_socket("rq->mbuf_ring",
985 sizeof(struct rte_mbuf *) * nb_sop_desc,
986 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
987 if (rq_sop->mbuf_ring == NULL)
990 if (rq_data->in_use) {
991 rq_data->mbuf_ring = (struct rte_mbuf **)
992 rte_zmalloc_socket("rq->mbuf_ring",
993 sizeof(struct rte_mbuf *) * nb_data_desc,
994 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
995 if (rq_data->mbuf_ring == NULL)
996 goto err_free_sop_mbuf;
999 rq_sop->free_mbufs = (struct rte_mbuf **)
1000 rte_zmalloc_socket("rq->free_mbufs",
1001 sizeof(struct rte_mbuf *) *
1003 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
1004 if (rq_sop->free_mbufs == NULL)
1005 goto err_free_data_mbuf;
1006 rq_sop->num_free_mbufs = 0;
1008 rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
1013 rte_free(rq_data->mbuf_ring);
1015 rte_free(rq_sop->mbuf_ring);
1017 /* cleanup on error */
1018 vnic_cq_free(&enic->cq[cq_idx]);
1020 if (rq_data->in_use)
1021 vnic_rq_free(rq_data);
1023 vnic_rq_free(rq_sop);
1028 void enic_free_wq(void *txq)
1036 wq = (struct vnic_wq *)txq;
1037 enic = vnic_dev_priv(wq->vdev);
1038 rte_memzone_free(wq->cqmsg_rz);
1040 vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1043 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1044 unsigned int socket_id, uint16_t nb_desc)
1046 struct enic_vf_representor *vf;
1049 unsigned int cq_index;
1050 char name[RTE_MEMZONE_NAMESIZE];
1051 static int instance;
1054 * Representor uses a reserved PF queue. Translate representor
1055 * queue number to PF queue number.
1057 if (enic_is_vf_rep(enic)) {
1058 RTE_ASSERT(queue_idx == 0);
1059 vf = VF_ENIC_TO_VF_REP(enic);
1060 queue_idx = vf->pf_wq_idx;
1061 cq_index = vf->pf_wq_cq_idx;
1064 cq_index = enic_cq_wq(enic, queue_idx);
1066 wq = &enic->wq[queue_idx];
1067 wq->socket_id = socket_id;
1069 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1070 * print an info message for diagnostics.
1072 dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1074 /* Allocate queue resources */
1075 err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1077 sizeof(struct wq_enet_desc));
1079 dev_err(enic, "error in allocation of wq\n");
1083 err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1085 sizeof(struct cq_enet_wq_desc));
1088 dev_err(enic, "error in allocation of cq for wq\n");
1091 /* setup up CQ message */
1092 snprintf((char *)name, sizeof(name),
1093 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1096 wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1097 sizeof(uint32_t), SOCKET_ID_ANY,
1098 RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1105 int enic_disable(struct enic *enic)
1110 for (i = 0; i < enic->intr_count; i++) {
1111 vnic_intr_mask(&enic->intr[i]);
1112 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1114 enic_rxq_intr_deinit(enic);
1115 rte_intr_disable(&enic->pdev->intr_handle);
1116 rte_intr_callback_unregister(&enic->pdev->intr_handle,
1118 (void *)enic->rte_dev);
1120 vnic_dev_disable(enic->vdev);
1122 enic_fm_destroy(enic);
1124 if (!enic_is_sriov_vf(enic))
1125 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1127 for (i = 0; i < enic->wq_count; i++) {
1128 err = vnic_wq_disable(&enic->wq[i]);
1132 for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1133 if (enic->rq[i].in_use) {
1134 err = vnic_rq_disable(&enic->rq[i]);
1140 /* If we were using interrupts, set the interrupt vector to -1
1141 * to disable interrupts. We are not disabling link notifcations,
1142 * though, as we want the polling of link status to continue working.
1144 if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1145 vnic_dev_notify_set(enic->vdev, -1);
1147 vnic_dev_set_reset_flag(enic->vdev, 1);
1149 for (i = 0; i < enic->wq_count; i++)
1150 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1152 for (i = 0; i < enic_vnic_rq_count(enic); i++)
1153 if (enic->rq[i].in_use)
1154 vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1155 for (i = 0; i < enic->cq_count; i++)
1156 vnic_cq_clean(&enic->cq[i]);
1157 for (i = 0; i < enic->intr_count; i++)
1158 vnic_intr_clean(&enic->intr[i]);
1163 static int enic_dev_wait(struct vnic_dev *vdev,
1164 int (*start)(struct vnic_dev *, int),
1165 int (*finished)(struct vnic_dev *, int *),
1172 err = start(vdev, arg);
1176 /* Wait for func to complete...2 seconds max */
1177 for (i = 0; i < 2000; i++) {
1178 err = finished(vdev, &done);
1188 static int enic_dev_open(struct enic *enic)
1191 int flags = CMD_OPENF_IG_DESCCACHE;
1193 err = enic_dev_wait(enic->vdev, vnic_dev_open,
1194 vnic_dev_open_done, flags);
1196 dev_err(enic_get_dev(enic),
1197 "vNIC device open failed, err %d\n", err);
1202 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1204 dma_addr_t rss_key_buf_pa;
1205 union vnic_rss_key *rss_key_buf_va = NULL;
1207 uint8_t name[RTE_MEMZONE_NAMESIZE];
1209 RTE_ASSERT(user_key != NULL);
1210 snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1211 rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1212 &rss_key_buf_pa, name);
1213 if (!rss_key_buf_va)
1216 for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1217 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1219 err = enic_set_rss_key(enic,
1221 sizeof(union vnic_rss_key));
1223 /* Save for later queries */
1225 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1226 sizeof(union vnic_rss_key));
1228 enic_free_consistent(enic, sizeof(union vnic_rss_key),
1229 rss_key_buf_va, rss_key_buf_pa);
1234 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1236 dma_addr_t rss_cpu_buf_pa;
1237 union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1239 uint8_t name[RTE_MEMZONE_NAMESIZE];
1241 snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1242 rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1243 &rss_cpu_buf_pa, name);
1244 if (!rss_cpu_buf_va)
1247 rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1249 err = enic_set_rss_cpu(enic,
1251 sizeof(union vnic_rss_cpu));
1253 enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1254 rss_cpu_buf_va, rss_cpu_buf_pa);
1256 /* Save for later queries */
1258 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1262 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1263 uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1266 const uint8_t tso_ipid_split_en = 0;
1269 err = enic_set_nic_cfg(enic,
1270 rss_default_cpu, rss_hash_type,
1271 rss_hash_bits, rss_base_cpu,
1272 rss_enable, tso_ipid_split_en,
1273 enic->ig_vlan_strip_en);
1278 /* Initialize RSS with defaults, called from dev_configure */
1279 int enic_init_rss_nic_cfg(struct enic *enic)
1281 static uint8_t default_rss_key[] = {
1282 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1283 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1284 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1285 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1287 struct rte_eth_rss_conf rss_conf;
1288 union vnic_rss_cpu rss_cpu;
1291 rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1293 * If setting key for the first time, and the user gives us none, then
1294 * push the default key to NIC.
1296 if (rss_conf.rss_key == NULL) {
1297 rss_conf.rss_key = default_rss_key;
1298 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1300 ret = enic_set_rss_conf(enic, &rss_conf);
1302 dev_err(enic, "Failed to configure RSS\n");
1305 if (enic->rss_enable) {
1306 /* If enabling RSS, use the default reta */
1307 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1308 rss_cpu.cpu[i / 4].b[i % 4] =
1309 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1311 ret = enic_set_rss_reta(enic, &rss_cpu);
1313 dev_err(enic, "Failed to set RSS indirection table\n");
1318 int enic_setup_finish(struct enic *enic)
1320 enic_init_soft_stats(enic);
1322 /* switchdev: enable promisc mode on PF */
1323 if (enic->switchdev_mode) {
1324 vnic_dev_packet_filter(enic->vdev,
1335 vnic_dev_packet_filter(enic->vdev,
1348 static int enic_rss_conf_valid(struct enic *enic,
1349 struct rte_eth_rss_conf *rss_conf)
1351 /* RSS is disabled per VIC settings. Ignore rss_conf. */
1352 if (enic->flow_type_rss_offloads == 0)
1354 if (rss_conf->rss_key != NULL &&
1355 rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1356 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1357 rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1360 if (rss_conf->rss_hf != 0 &&
1361 (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1362 dev_err(enic, "Given rss_hf contains none of the supported"
1369 /* Set hash type and key according to rss_conf */
1370 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1372 struct rte_eth_dev *eth_dev;
1374 uint8_t rss_hash_type;
1378 RTE_ASSERT(rss_conf != NULL);
1379 ret = enic_rss_conf_valid(enic, rss_conf);
1381 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1385 eth_dev = enic->rte_dev;
1387 rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1388 if (enic->rq_count > 1 &&
1389 (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1392 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1393 ETH_RSS_NONFRAG_IPV4_OTHER))
1394 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1395 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1396 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1397 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1398 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1399 if (enic->udp_rss_weak) {
1401 * 'TCP' is not a typo. The "weak" version of
1402 * UDP RSS requires both the TCP and UDP bits
1403 * be set. It does enable TCP RSS as well.
1405 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1408 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1409 ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1410 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1411 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1412 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1413 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1414 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1415 if (enic->udp_rss_weak)
1416 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1423 /* Set the hash key if provided */
1424 if (rss_enable && rss_conf->rss_key) {
1425 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1427 dev_err(enic, "Failed to set RSS key\n");
1432 ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1433 ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1436 enic->rss_hf = rss_hf;
1437 enic->rss_hash_type = rss_hash_type;
1438 enic->rss_enable = rss_enable;
1440 dev_err(enic, "Failed to update RSS configurations."
1441 " hash=0x%x\n", rss_hash_type);
1446 int enic_set_vlan_strip(struct enic *enic)
1449 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1450 * together. So, re-do niccfg, preserving the current RSS settings.
1452 return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1453 ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1457 int enic_add_packet_filter(struct enic *enic)
1459 /* switchdev ignores packet filters */
1460 if (enic->switchdev_mode) {
1461 ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1464 /* Args -> directed, multicast, broadcast, promisc, allmulti */
1465 return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1466 enic->promisc, enic->allmulti);
1469 int enic_get_link_status(struct enic *enic)
1471 return vnic_dev_link_status(enic->vdev);
1474 static void enic_dev_deinit(struct enic *enic)
1476 /* stop link status checking */
1477 vnic_dev_notify_unset(enic->vdev);
1479 /* mac_addrs is freed by rte_eth_dev_release_port() */
1481 rte_free(enic->intr);
1487 int enic_set_vnic_res(struct enic *enic)
1489 struct rte_eth_dev *eth_dev = enic->rte_dev;
1491 unsigned int required_rq, required_wq, required_cq, required_intr;
1493 /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1494 required_rq = eth_dev->data->nb_rx_queues * 2;
1495 required_wq = eth_dev->data->nb_tx_queues;
1496 required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1497 required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1498 if (eth_dev->data->dev_conf.intr_conf.rxq) {
1499 required_intr += eth_dev->data->nb_rx_queues;
1501 ENICPMD_LOG(DEBUG, "Required queues for PF: rq %u wq %u cq %u",
1502 required_rq, required_wq, required_cq);
1503 if (enic->vf_required_rq) {
1504 /* Queues needed for VF representors */
1505 required_rq += enic->vf_required_rq;
1506 required_wq += enic->vf_required_wq;
1507 required_cq += enic->vf_required_cq;
1508 ENICPMD_LOG(DEBUG, "Required queues for VF representors: rq %u wq %u cq %u",
1509 enic->vf_required_rq, enic->vf_required_wq,
1510 enic->vf_required_cq);
1513 if (enic->conf_rq_count < required_rq) {
1514 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1515 eth_dev->data->nb_rx_queues,
1516 required_rq, enic->conf_rq_count);
1519 if (enic->conf_wq_count < required_wq) {
1520 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1521 eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1525 if (enic->conf_cq_count < required_cq) {
1526 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1527 required_cq, enic->conf_cq_count);
1530 if (enic->conf_intr_count < required_intr) {
1531 dev_err(dev, "Not enough Interrupts to support Rx queue"
1532 " interrupts. Required:%u, Configured:%u\n",
1533 required_intr, enic->conf_intr_count);
1538 enic->rq_count = eth_dev->data->nb_rx_queues;
1539 enic->wq_count = eth_dev->data->nb_tx_queues;
1540 enic->cq_count = enic->rq_count + enic->wq_count;
1541 enic->intr_count = required_intr;
1547 /* Initialize the completion queue for an RQ */
1549 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1551 struct vnic_rq *sop_rq, *data_rq;
1552 unsigned int cq_idx;
1555 sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1556 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1557 cq_idx = enic_cq_rq(enic, rq_idx);
1559 vnic_cq_clean(&enic->cq[cq_idx]);
1560 vnic_cq_init(&enic->cq[cq_idx],
1561 0 /* flow_control_enable */,
1562 1 /* color_enable */,
1565 1 /* cq_tail_color */,
1566 0 /* interrupt_enable */,
1567 1 /* cq_entry_enable */,
1568 0 /* cq_message_enable */,
1569 0 /* interrupt offset */,
1570 0 /* cq_message_addr */);
1573 vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1574 enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1575 sop_rq->ring.desc_count - 1, 1, 0);
1576 if (data_rq->in_use) {
1577 vnic_rq_init_start(data_rq,
1579 enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1580 0, data_rq->ring.desc_count - 1, 1, 0);
1583 rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1587 if (data_rq->in_use) {
1588 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1590 enic_rxmbuf_queue_release(enic, sop_rq);
1598 /* The Cisco NIC can send and receive packets up to a max packet size
1599 * determined by the NIC type and firmware. There is also an MTU
1600 * configured into the NIC via the CIMC/UCSM management interface
1601 * which can be overridden by this function (up to the max packet size).
1602 * Depending on the network setup, doing so may cause packet drops
1603 * and unexpected behavior.
1605 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1607 unsigned int rq_idx;
1610 uint16_t old_mtu; /* previous setting */
1611 uint16_t config_mtu; /* Value configured into NIC via CIMC/UCSM */
1612 struct rte_eth_dev *eth_dev = enic->rte_dev;
1614 old_mtu = eth_dev->data->mtu;
1615 config_mtu = enic->config.mtu;
1617 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1618 return -E_RTE_SECONDARY;
1620 if (new_mtu > enic->max_mtu) {
1622 "MTU not updated: requested (%u) greater than max (%u)\n",
1623 new_mtu, enic->max_mtu);
1626 if (new_mtu < ENIC_MIN_MTU) {
1628 "MTU not updated: requested (%u) less than min (%u)\n",
1629 new_mtu, ENIC_MIN_MTU);
1632 if (new_mtu > config_mtu)
1634 "MTU (%u) is greater than value configured in NIC (%u)\n",
1635 new_mtu, config_mtu);
1637 /* Update the MTU and maximum packet length */
1638 eth_dev->data->mtu = new_mtu;
1639 eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1640 enic_mtu_to_max_rx_pktlen(new_mtu);
1643 * If the device has not started (enic_enable), nothing to do.
1644 * Later, enic_enable() will set up RQs reflecting the new maximum
1647 if (!eth_dev->data->dev_started)
1651 * The device has started, re-do RQs on the fly. In the process, we
1652 * pick up the new maximum packet length.
1654 * Some applications rely on the ability to change MTU without stopping
1655 * the device. So keep this behavior for now.
1657 rte_spinlock_lock(&enic->mtu_lock);
1659 /* Stop traffic on all RQs */
1660 for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1661 rq = &enic->rq[rq_idx];
1662 if (rq->is_sop && rq->in_use) {
1663 rc = enic_stop_rq(enic,
1664 enic_sop_rq_idx_to_rte_idx(rq_idx));
1666 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1672 /* replace Rx function with a no-op to avoid getting stale pkts */
1673 eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1676 /* Allow time for threads to exit the real Rx function. */
1679 /* now it is safe to reconfigure the RQs */
1682 /* free and reallocate RQs with the new MTU */
1683 for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1684 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1689 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1690 rq->tot_nb_desc, rq->rx_free_thresh);
1693 "Fatal MTU alloc error- No traffic will pass\n");
1697 rc = enic_reinit_rq(enic, rq_idx);
1700 "Fatal MTU RQ reinit- No traffic will pass\n");
1705 /* put back the real receive function */
1707 enic_pick_rx_handler(eth_dev);
1710 /* restart Rx traffic */
1711 for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1712 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1713 if (rq->is_sop && rq->in_use)
1714 enic_start_rq(enic, rq_idx);
1718 dev_info(enic, "MTU changed from %u to %u\n", old_mtu, new_mtu);
1719 rte_spinlock_unlock(&enic->mtu_lock);
1724 enic_disable_overlay_offload(struct enic *enic)
1727 * Disabling fails if the feature is provisioned but
1728 * not enabled. So ignore result and do not log error.
1731 vnic_dev_overlay_offload_ctrl(enic->vdev,
1732 OVERLAY_FEATURE_VXLAN, OVERLAY_OFFLOAD_DISABLE);
1735 vnic_dev_overlay_offload_ctrl(enic->vdev,
1736 OVERLAY_FEATURE_GENEVE, OVERLAY_OFFLOAD_DISABLE);
1741 enic_enable_overlay_offload(struct enic *enic)
1743 if (enic->vxlan && vnic_dev_overlay_offload_ctrl(enic->vdev,
1744 OVERLAY_FEATURE_VXLAN, OVERLAY_OFFLOAD_ENABLE) != 0) {
1745 dev_err(NULL, "failed to enable VXLAN offload\n");
1748 if (enic->geneve && vnic_dev_overlay_offload_ctrl(enic->vdev,
1749 OVERLAY_FEATURE_GENEVE, OVERLAY_OFFLOAD_ENABLE) != 0) {
1750 dev_err(NULL, "failed to enable Geneve offload\n");
1753 enic->tx_offload_capa |=
1754 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1755 (enic->geneve ? DEV_TX_OFFLOAD_GENEVE_TNL_TSO : 0) |
1756 (enic->vxlan ? DEV_TX_OFFLOAD_VXLAN_TNL_TSO : 0);
1757 enic->tx_offload_mask |=
1760 PKT_TX_OUTER_IP_CKSUM |
1762 enic->overlay_offload = true;
1764 if (enic->vxlan && enic->geneve)
1765 dev_info(NULL, "Overlay offload is enabled (VxLAN, Geneve)\n");
1766 else if (enic->vxlan)
1767 dev_info(NULL, "Overlay offload is enabled (VxLAN)\n");
1769 dev_info(NULL, "Overlay offload is enabled (Geneve)\n");
1775 enic_reset_overlay_port(struct enic *enic)
1778 enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1780 * Reset the vxlan port to the default, as the NIC firmware
1781 * does not reset it automatically and keeps the old setting.
1783 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1784 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1785 RTE_VXLAN_DEFAULT_PORT)) {
1786 dev_err(enic, "failed to update vxlan port\n");
1791 enic->geneve_port = RTE_GENEVE_DEFAULT_PORT;
1792 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1793 OVERLAY_CFG_GENEVE_PORT_UPDATE,
1794 RTE_GENEVE_DEFAULT_PORT)) {
1795 dev_err(enic, "failed to update vxlan port\n");
1802 static int enic_dev_init(struct enic *enic)
1805 struct rte_eth_dev *eth_dev = enic->rte_dev;
1807 vnic_dev_intr_coal_timer_info_default(enic->vdev);
1809 /* Get vNIC configuration
1811 err = enic_get_vnic_config(enic);
1813 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1817 /* Get available resource counts */
1818 enic_get_res_counts(enic);
1819 if (enic->conf_rq_count == 1) {
1820 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1821 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1822 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1825 /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1826 enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1827 enic->conf_cq_count, 8);
1828 enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1829 enic->conf_intr_count, 8);
1830 enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1831 enic->conf_rq_count, 8);
1832 enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1833 enic->conf_wq_count, 8);
1834 if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1835 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1838 if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1839 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1842 if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1843 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1846 if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1847 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1851 eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1852 sizeof(struct rte_ether_addr) *
1853 ENIC_UNICAST_PERFECT_FILTERS, 0);
1854 if (!eth_dev->data->mac_addrs) {
1855 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1858 rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1859 eth_dev->data->mac_addrs);
1861 vnic_dev_set_reset_flag(enic->vdev, 0);
1863 LIST_INIT(&enic->flows);
1865 /* set up link status checking */
1866 vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1868 enic->overlay_offload = false;
1870 * First, explicitly disable overlay offload as the setting is
1871 * sticky, and resetting vNIC may not disable it.
1873 enic_disable_overlay_offload(enic);
1874 /* Then, enable overlay offload according to vNIC flags */
1875 if (!enic->disable_overlay && (enic->vxlan || enic->geneve)) {
1876 err = enic_enable_overlay_offload(enic);
1878 dev_info(NULL, "failed to enable overlay offload\n");
1883 * Reset the vxlan/geneve port if HW parsing is available. It
1884 * is always enabled regardless of overlay offload
1887 err = enic_reset_overlay_port(enic);
1891 if (enic_fm_init(enic))
1892 dev_warning(enic, "Init of flowman failed.\n");
1896 static void lock_devcmd(void *priv)
1898 struct enic *enic = priv;
1900 rte_spinlock_lock(&enic->devcmd_lock);
1903 static void unlock_devcmd(void *priv)
1905 struct enic *enic = priv;
1907 rte_spinlock_unlock(&enic->devcmd_lock);
1910 int enic_probe(struct enic *enic)
1912 struct rte_pci_device *pdev = enic->pdev;
1915 dev_debug(enic, "Initializing ENIC PMD\n");
1917 /* if this is a secondary process the hardware is already initialized */
1918 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1921 enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1922 enic->bar0.len = pdev->mem_resource[0].len;
1924 /* Register vNIC device */
1925 enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1927 dev_err(enic, "vNIC registration failed, aborting\n");
1931 LIST_INIT(&enic->memzone_list);
1932 rte_spinlock_init(&enic->memzone_list_lock);
1934 vnic_register_cbacks(enic->vdev,
1935 enic_alloc_consistent,
1936 enic_free_consistent);
1939 * Allocate the consistent memory for stats upfront so both primary and
1940 * secondary processes can dump stats.
1942 err = vnic_dev_alloc_stats_mem(enic->vdev);
1944 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1945 goto err_out_unregister;
1947 /* Issue device open to get device in known state */
1948 err = enic_dev_open(enic);
1950 dev_err(enic, "vNIC dev open failed, aborting\n");
1951 goto err_out_unregister;
1954 /* Set ingress vlan rewrite mode before vnic initialization */
1955 dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1956 enic->ig_vlan_rewrite_mode);
1957 err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1958 enic->ig_vlan_rewrite_mode);
1961 "Failed to set ingress vlan rewrite mode, aborting.\n");
1962 goto err_out_dev_close;
1965 /* Issue device init to initialize the vnic-to-switch link.
1966 * We'll start with carrier off and wait for link UP
1967 * notification later to turn on carrier. We don't need
1968 * to wait here for the vnic-to-switch link initialization
1969 * to complete; link UP notification is the indication that
1970 * the process is complete.
1973 err = vnic_dev_init(enic->vdev, 0);
1975 dev_err(enic, "vNIC dev init failed, aborting\n");
1976 goto err_out_dev_close;
1979 err = enic_dev_init(enic);
1981 dev_err(enic, "Device initialization failed, aborting\n");
1982 goto err_out_dev_close;
1985 /* Use a PF spinlock to serialize devcmd from PF and VF representors */
1986 if (enic->switchdev_mode) {
1987 rte_spinlock_init(&enic->devcmd_lock);
1988 vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1993 vnic_dev_close(enic->vdev);
1995 vnic_dev_unregister(enic->vdev);
2000 void enic_remove(struct enic *enic)
2002 enic_dev_deinit(enic);
2003 vnic_dev_close(enic->vdev);
2004 vnic_dev_unregister(enic->vdev);