1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2008-2017 Cisco Systems, Inc. All rights reserved.
3 * Copyright 2007 Nuova Systems, Inc. All rights reserved.
13 #include <rte_bus_pci.h>
14 #include <rte_memzone.h>
15 #include <rte_malloc.h>
17 #include <rte_string_fns.h>
18 #include <rte_ethdev_driver.h>
20 #include "enic_compat.h"
22 #include "wq_enet_desc.h"
23 #include "rq_enet_desc.h"
24 #include "cq_enet_desc.h"
25 #include "vnic_enet.h"
30 #include "vnic_intr.h"
33 static inline int enic_is_sriov_vf(struct enic *enic)
35 return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
38 static int is_zero_addr(uint8_t *addr)
40 return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
43 static int is_mcast_addr(uint8_t *addr)
48 static int is_eth_addr_valid(uint8_t *addr)
50 return !is_mcast_addr(addr) && !is_zero_addr(addr);
54 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
58 if (!rq || !rq->mbuf_ring) {
59 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
63 for (i = 0; i < rq->ring.desc_count; i++) {
64 if (rq->mbuf_ring[i]) {
65 rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
66 rq->mbuf_ring[i] = NULL;
71 void enic_free_wq_buf(struct rte_mbuf **buf)
73 struct rte_mbuf *mbuf = *buf;
75 rte_pktmbuf_free_seg(mbuf);
79 static void enic_log_q_error(struct enic *enic)
82 uint32_t error_status;
84 for (i = 0; i < enic->wq_count; i++) {
85 error_status = vnic_wq_error_status(&enic->wq[i]);
87 dev_err(enic, "WQ[%d] error_status %d\n", i,
91 for (i = 0; i < enic_vnic_rq_count(enic); i++) {
92 if (!enic->rq[i].in_use)
94 error_status = vnic_rq_error_status(&enic->rq[i]);
96 dev_err(enic, "RQ[%d] error_status %d\n", i,
101 static void enic_clear_soft_stats(struct enic *enic)
103 struct enic_soft_stats *soft_stats = &enic->soft_stats;
104 rte_atomic64_clear(&soft_stats->rx_nombuf);
105 rte_atomic64_clear(&soft_stats->rx_packet_errors);
106 rte_atomic64_clear(&soft_stats->tx_oversized);
109 static void enic_init_soft_stats(struct enic *enic)
111 struct enic_soft_stats *soft_stats = &enic->soft_stats;
112 rte_atomic64_init(&soft_stats->rx_nombuf);
113 rte_atomic64_init(&soft_stats->rx_packet_errors);
114 rte_atomic64_init(&soft_stats->tx_oversized);
115 enic_clear_soft_stats(enic);
118 int enic_dev_stats_clear(struct enic *enic)
122 ret = vnic_dev_stats_clear(enic->vdev);
124 dev_err(enic, "Error in clearing stats\n");
127 enic_clear_soft_stats(enic);
132 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
134 struct vnic_stats *stats;
135 struct enic_soft_stats *soft_stats = &enic->soft_stats;
136 int64_t rx_truncated;
137 uint64_t rx_packet_errors;
138 int ret = vnic_dev_stats_dump(enic->vdev, &stats);
141 dev_err(enic, "Error in getting stats\n");
145 /* The number of truncated packets can only be calculated by
146 * subtracting a hardware counter from error packets received by
147 * the driver. Note: this causes transient inaccuracies in the
148 * ipackets count. Also, the length of truncated packets are
149 * counted in ibytes even though truncated packets are dropped
150 * which can make ibytes be slightly higher than it should be.
152 rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
153 rx_truncated = rx_packet_errors - stats->rx.rx_errors;
155 r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
156 r_stats->opackets = stats->tx.tx_frames_ok;
158 r_stats->ibytes = stats->rx.rx_bytes_ok;
159 r_stats->obytes = stats->tx.tx_bytes_ok;
161 r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
162 r_stats->oerrors = stats->tx.tx_errors
163 + rte_atomic64_read(&soft_stats->tx_oversized);
165 r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
167 r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
171 int enic_del_mac_address(struct enic *enic, int mac_index)
173 struct rte_eth_dev *eth_dev = enic->rte_dev;
174 uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
176 return vnic_dev_del_addr(enic->vdev, mac_addr);
179 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
183 if (!is_eth_addr_valid(mac_addr)) {
184 dev_err(enic, "invalid mac address\n");
188 err = vnic_dev_add_addr(enic->vdev, mac_addr);
190 dev_err(enic, "add mac addr failed\n");
194 void enic_free_rq_buf(struct rte_mbuf **mbuf)
199 rte_pktmbuf_free(*mbuf);
203 void enic_init_vnic_resources(struct enic *enic)
205 unsigned int error_interrupt_enable = 1;
206 unsigned int error_interrupt_offset = 0;
207 unsigned int rxq_interrupt_enable = 0;
208 unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
209 unsigned int index = 0;
211 struct vnic_rq *data_rq;
213 if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
214 rxq_interrupt_enable = 1;
216 for (index = 0; index < enic->rq_count; index++) {
217 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
219 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
221 error_interrupt_enable,
222 error_interrupt_offset);
224 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)];
226 vnic_rq_init(data_rq,
228 error_interrupt_enable,
229 error_interrupt_offset);
230 vnic_cq_init(&enic->cq[cq_idx],
231 0 /* flow_control_enable */,
232 1 /* color_enable */,
235 1 /* cq_tail_color */,
236 rxq_interrupt_enable,
237 1 /* cq_entry_enable */,
238 0 /* cq_message_enable */,
239 rxq_interrupt_offset,
240 0 /* cq_message_addr */);
241 if (rxq_interrupt_enable)
242 rxq_interrupt_offset++;
245 for (index = 0; index < enic->wq_count; index++) {
246 vnic_wq_init(&enic->wq[index],
247 enic_cq_wq(enic, index),
248 error_interrupt_enable,
249 error_interrupt_offset);
250 /* Compute unsupported ol flags for enic_prep_pkts() */
251 enic->wq[index].tx_offload_notsup_mask =
252 PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
254 cq_idx = enic_cq_wq(enic, index);
255 vnic_cq_init(&enic->cq[cq_idx],
256 0 /* flow_control_enable */,
257 1 /* color_enable */,
260 1 /* cq_tail_color */,
261 0 /* interrupt_enable */,
262 0 /* cq_entry_enable */,
263 1 /* cq_message_enable */,
264 0 /* interrupt offset */,
265 (uint64_t)enic->wq[index].cqmsg_rz->iova);
268 for (index = 0; index < enic->intr_count; index++) {
269 vnic_intr_init(&enic->intr[index],
270 enic->config.intr_timer_usec,
271 enic->config.intr_timer_type,
272 /*mask_on_assertion*/1);
278 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
281 struct rq_enet_desc *rqd = rq->ring.descs;
284 uint32_t max_rx_pkt_len;
290 dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
291 rq->ring.desc_count);
294 * If *not* using scatter and the mbuf size is greater than the
295 * requested max packet size (max_rx_pkt_len), then reduce the
296 * posted buffer size to max_rx_pkt_len. HW still receives packets
297 * larger than max_rx_pkt_len, but they will be truncated, which we
298 * drop in the rx handler. Not ideal, but better than returning
299 * large packets when the user is not expecting them.
301 max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
302 rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
303 if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
304 rq_buf_len = max_rx_pkt_len;
305 for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
306 mb = rte_mbuf_raw_alloc(rq->mp);
308 dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
309 (unsigned)rq->index);
313 mb->data_off = RTE_PKTMBUF_HEADROOM;
314 dma_addr = (dma_addr_t)(mb->buf_iova
315 + RTE_PKTMBUF_HEADROOM);
316 rq_enet_desc_enc(rqd, dma_addr,
317 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
318 : RQ_ENET_TYPE_NOT_SOP),
320 rq->mbuf_ring[i] = mb;
323 * Do not post the buffers to the NIC until we enable the RQ via
326 rq->need_initial_post = true;
327 /* Initialize fetch index while RQ is disabled */
328 iowrite32(0, &rq->ctrl->fetch_index);
333 * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
334 * allocated the buffers and filled the RQ descriptor ring. Just need to push
335 * the post index to the NIC.
338 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
340 if (!rq->in_use || !rq->need_initial_post)
343 /* make sure all prior writes are complete before doing the PIO write */
346 /* Post all but the last buffer to VIC. */
347 rq->posted_index = rq->ring.desc_count - 1;
351 dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
352 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
353 iowrite32(rq->posted_index, &rq->ctrl->posted_index);
355 rq->need_initial_post = false;
359 enic_alloc_consistent(void *priv, size_t size,
360 dma_addr_t *dma_handle, uint8_t *name)
363 const struct rte_memzone *rz;
365 struct enic *enic = (struct enic *)priv;
366 struct enic_memzone_entry *mze;
368 rz = rte_memzone_reserve_aligned((const char *)name, size,
369 SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
371 pr_err("%s : Failed to allocate memory requested for %s\n",
377 *dma_handle = (dma_addr_t)rz->iova;
379 mze = rte_malloc("enic memzone entry",
380 sizeof(struct enic_memzone_entry), 0);
383 pr_err("%s : Failed to allocate memory for memzone list\n",
385 rte_memzone_free(rz);
391 rte_spinlock_lock(&enic->memzone_list_lock);
392 LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
393 rte_spinlock_unlock(&enic->memzone_list_lock);
399 enic_free_consistent(void *priv,
400 __rte_unused size_t size,
402 dma_addr_t dma_handle)
404 struct enic_memzone_entry *mze;
405 struct enic *enic = (struct enic *)priv;
407 rte_spinlock_lock(&enic->memzone_list_lock);
408 LIST_FOREACH(mze, &enic->memzone_list, entries) {
409 if (mze->rz->addr == vaddr &&
410 mze->rz->iova == dma_handle)
414 rte_spinlock_unlock(&enic->memzone_list_lock);
416 "Tried to free memory, but couldn't find it in the memzone list\n");
419 LIST_REMOVE(mze, entries);
420 rte_spinlock_unlock(&enic->memzone_list_lock);
421 rte_memzone_free(mze->rz);
425 int enic_link_update(struct rte_eth_dev *eth_dev)
427 struct enic *enic = pmd_priv(eth_dev);
428 struct rte_eth_link link;
430 memset(&link, 0, sizeof(link));
431 link.link_status = enic_get_link_status(enic);
432 link.link_duplex = ETH_LINK_FULL_DUPLEX;
433 link.link_speed = vnic_dev_port_speed(enic->vdev);
435 return rte_eth_linkstatus_set(eth_dev, &link);
439 enic_intr_handler(void *arg)
441 struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
442 struct enic *enic = pmd_priv(dev);
444 vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
446 enic_link_update(dev);
447 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
448 enic_log_q_error(enic);
449 /* Re-enable irq in case of INTx */
450 rte_intr_ack(&enic->pdev->intr_handle);
453 static int enic_rxq_intr_init(struct enic *enic)
455 struct rte_intr_handle *intr_handle;
456 uint32_t rxq_intr_count, i;
459 intr_handle = enic->rte_dev->intr_handle;
460 if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
463 * Rx queue interrupts only work when we have MSI-X interrupts,
464 * one per queue. Sharing one interrupt is technically
465 * possible with VIC, but it is not worth the complications it brings.
467 if (!rte_intr_cap_multiple(intr_handle)) {
468 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
469 " (vfio-pci driver)\n");
472 rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
473 err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
475 dev_err(enic, "Failed to enable event fds for Rx queue"
479 intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
480 rxq_intr_count * sizeof(int), 0);
481 if (intr_handle->intr_vec == NULL) {
482 dev_err(enic, "Failed to allocate intr_vec\n");
485 for (i = 0; i < rxq_intr_count; i++)
486 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
490 static void enic_rxq_intr_deinit(struct enic *enic)
492 struct rte_intr_handle *intr_handle;
494 intr_handle = enic->rte_dev->intr_handle;
495 rte_intr_efd_disable(intr_handle);
496 if (intr_handle->intr_vec != NULL) {
497 rte_free(intr_handle->intr_vec);
498 intr_handle->intr_vec = NULL;
502 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
504 struct wq_enet_desc *desc;
509 * Fill WQ descriptor fields that never change. Every descriptor is
510 * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
511 * descriptors (i.e. request one completion update every 32 packets).
513 wq = &enic->wq[queue_idx];
514 desc = (struct wq_enet_desc *)wq->ring.descs;
515 for (i = 0; i < wq->ring.desc_count; i++, desc++) {
516 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
517 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
518 desc->header_length_flags |=
519 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
524 * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
525 * used when that file is not compiled.
528 enic_use_vector_rx_handler(__rte_unused struct rte_eth_dev *eth_dev)
533 void enic_pick_rx_handler(struct rte_eth_dev *eth_dev)
535 struct enic *enic = pmd_priv(eth_dev);
539 * 1. The vectorized handler if possible and requested.
540 * 2. The non-scatter, simplified handler if scatter Rx is not used.
541 * 3. The default handler as a fallback.
543 if (enic_use_vector_rx_handler(eth_dev))
545 if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
546 ENICPMD_LOG(DEBUG, " use the non-scatter Rx handler");
547 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
549 ENICPMD_LOG(DEBUG, " use the normal Rx handler");
550 eth_dev->rx_pkt_burst = &enic_recv_pkts;
554 /* Secondary process uses this to set the Tx handler */
555 void enic_pick_tx_handler(struct rte_eth_dev *eth_dev)
557 struct enic *enic = pmd_priv(eth_dev);
559 if (enic->use_simple_tx_handler) {
560 ENICPMD_LOG(DEBUG, " use the simple tx handler");
561 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
563 ENICPMD_LOG(DEBUG, " use the default tx handler");
564 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
568 int enic_enable(struct enic *enic)
572 struct rte_eth_dev *eth_dev = enic->rte_dev;
573 uint64_t simple_tx_offloads;
576 if (enic->enable_avx2_rx) {
577 struct rte_mbuf mb_def = { .buf_addr = 0 };
580 * mbuf_initializer contains const-after-init fields of
581 * receive mbufs (i.e. 64 bits of fields from rearm_data).
582 * It is currently used by the vectorized handler.
585 mb_def.data_off = RTE_PKTMBUF_HEADROOM;
586 mb_def.port = enic->port_id;
587 rte_mbuf_refcnt_set(&mb_def, 1);
588 rte_compiler_barrier();
589 p = (uintptr_t)&mb_def.rearm_data;
590 enic->mbuf_initializer = *(uint64_t *)p;
593 eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
594 eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
596 /* vnic notification of link status has already been turned on in
597 * enic_dev_init() which is called during probe time. Here we are
598 * just turning on interrupt vector 0 if needed.
600 if (eth_dev->data->dev_conf.intr_conf.lsc)
601 vnic_dev_notify_set(enic->vdev, 0);
603 err = enic_rxq_intr_init(enic);
607 /* Initialize flowman if not already initialized during probe */
608 if (enic->fm == NULL && enic_fm_init(enic))
609 dev_warning(enic, "Init of flowman failed.\n");
611 for (index = 0; index < enic->rq_count; index++) {
612 err = enic_alloc_rx_queue_mbufs(enic,
613 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
615 dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
618 err = enic_alloc_rx_queue_mbufs(enic,
619 &enic->rq[enic_rte_rq_idx_to_data_idx(index, enic)]);
621 /* release the allocated mbufs for the sop rq*/
622 enic_rxmbuf_queue_release(enic,
623 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
625 dev_err(enic, "Failed to alloc data RX queue mbufs\n");
631 * Use the simple TX handler if possible. Only checksum offloads
632 * and vlan insertion are supported.
634 simple_tx_offloads = enic->tx_offload_capa &
635 (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
636 DEV_TX_OFFLOAD_VLAN_INSERT |
637 DEV_TX_OFFLOAD_IPV4_CKSUM |
638 DEV_TX_OFFLOAD_UDP_CKSUM |
639 DEV_TX_OFFLOAD_TCP_CKSUM);
640 if ((eth_dev->data->dev_conf.txmode.offloads &
641 ~simple_tx_offloads) == 0) {
642 ENICPMD_LOG(DEBUG, " use the simple tx handler");
643 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
644 for (index = 0; index < enic->wq_count; index++)
645 enic_prep_wq_for_simple_tx(enic, index);
646 enic->use_simple_tx_handler = 1;
648 ENICPMD_LOG(DEBUG, " use the default tx handler");
649 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
652 enic_pick_rx_handler(eth_dev);
654 for (index = 0; index < enic->wq_count; index++)
655 enic_start_wq(enic, index);
656 for (index = 0; index < enic->rq_count; index++)
657 enic_start_rq(enic, index);
659 vnic_dev_add_addr(enic->vdev, enic->mac_addr);
661 vnic_dev_enable_wait(enic->vdev);
663 /* Register and enable error interrupt */
664 rte_intr_callback_register(&(enic->pdev->intr_handle),
665 enic_intr_handler, (void *)enic->rte_dev);
667 rte_intr_enable(&(enic->pdev->intr_handle));
668 /* Unmask LSC interrupt */
669 vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
674 int enic_alloc_intr_resources(struct enic *enic)
679 dev_info(enic, "vNIC resources used: "\
680 "wq %d rq %d cq %d intr %d\n",
681 enic->wq_count, enic_vnic_rq_count(enic),
682 enic->cq_count, enic->intr_count);
684 for (i = 0; i < enic->intr_count; i++) {
685 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
687 enic_free_vnic_resources(enic);
694 void enic_free_rq(void *rxq)
696 struct vnic_rq *rq_sop, *rq_data;
702 rq_sop = (struct vnic_rq *)rxq;
703 enic = vnic_dev_priv(rq_sop->vdev);
704 rq_data = &enic->rq[rq_sop->data_queue_idx];
706 if (rq_sop->free_mbufs) {
707 struct rte_mbuf **mb;
710 mb = rq_sop->free_mbufs;
711 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
712 i < ENIC_RX_BURST_MAX; i++)
713 rte_pktmbuf_free(mb[i]);
714 rte_free(rq_sop->free_mbufs);
715 rq_sop->free_mbufs = NULL;
716 rq_sop->num_free_mbufs = 0;
719 enic_rxmbuf_queue_release(enic, rq_sop);
721 enic_rxmbuf_queue_release(enic, rq_data);
723 rte_free(rq_sop->mbuf_ring);
725 rte_free(rq_data->mbuf_ring);
727 rq_sop->mbuf_ring = NULL;
728 rq_data->mbuf_ring = NULL;
730 vnic_rq_free(rq_sop);
732 vnic_rq_free(rq_data);
734 vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
740 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
742 struct rte_eth_dev_data *data = enic->dev_data;
743 vnic_wq_enable(&enic->wq[queue_idx]);
744 data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
747 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
749 struct rte_eth_dev_data *data = enic->dev_data;
752 ret = vnic_wq_disable(&enic->wq[queue_idx]);
756 data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
760 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
762 struct rte_eth_dev_data *data = enic->dev_data;
763 struct vnic_rq *rq_sop;
764 struct vnic_rq *rq_data;
765 rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
766 rq_data = &enic->rq[rq_sop->data_queue_idx];
768 if (rq_data->in_use) {
769 vnic_rq_enable(rq_data);
770 enic_initial_post_rx(enic, rq_data);
773 vnic_rq_enable(rq_sop);
774 enic_initial_post_rx(enic, rq_sop);
775 data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
778 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
780 struct rte_eth_dev_data *data = enic->dev_data;
781 int ret1 = 0, ret2 = 0;
782 struct vnic_rq *rq_sop;
783 struct vnic_rq *rq_data;
784 rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
785 rq_data = &enic->rq[rq_sop->data_queue_idx];
787 ret2 = vnic_rq_disable(rq_sop);
790 ret1 = vnic_rq_disable(rq_data);
797 data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
801 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
802 unsigned int socket_id, struct rte_mempool *mp,
803 uint16_t nb_desc, uint16_t free_thresh)
805 struct enic_vf_representor *vf;
807 uint16_t sop_queue_idx;
808 uint16_t data_queue_idx;
810 struct vnic_rq *rq_sop;
811 struct vnic_rq *rq_data;
812 unsigned int mbuf_size, mbufs_per_pkt;
813 unsigned int nb_sop_desc, nb_data_desc;
814 uint16_t min_sop, max_sop, min_data, max_data;
815 uint32_t max_rx_pkt_len;
818 * Representor uses a reserved PF queue. Translate representor
819 * queue number to PF queue number.
821 if (enic_is_vf_rep(enic)) {
822 RTE_ASSERT(queue_idx == 0);
823 vf = VF_ENIC_TO_VF_REP(enic);
824 sop_queue_idx = vf->pf_rq_sop_idx;
825 data_queue_idx = vf->pf_rq_data_idx;
827 queue_idx = sop_queue_idx;
829 sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
830 data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx, enic);
832 cq_idx = enic_cq_rq(enic, sop_queue_idx);
833 rq_sop = &enic->rq[sop_queue_idx];
834 rq_data = &enic->rq[data_queue_idx];
836 rq_sop->data_queue_idx = data_queue_idx;
838 rq_data->data_queue_idx = 0;
839 rq_sop->socket_id = socket_id;
841 rq_data->socket_id = socket_id;
844 rq_sop->rx_free_thresh = free_thresh;
845 rq_data->rx_free_thresh = free_thresh;
846 dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
849 mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
850 RTE_PKTMBUF_HEADROOM);
851 /* max_rx_pkt_len includes the ethernet header and CRC. */
852 max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
854 if (enic->rte_dev->data->dev_conf.rxmode.offloads &
855 DEV_RX_OFFLOAD_SCATTER) {
856 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
857 /* ceil((max pkt len)/mbuf_size) */
858 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
860 dev_info(enic, "Scatter rx mode disabled\n");
862 if (max_rx_pkt_len > mbuf_size) {
863 dev_warning(enic, "The maximum Rx packet size (%u) is"
864 " larger than the mbuf size (%u), and"
865 " scatter is disabled. Larger packets will"
867 max_rx_pkt_len, mbuf_size);
871 if (mbufs_per_pkt > 1) {
872 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
873 rq_sop->data_queue_enable = 1;
876 * HW does not directly support rxmode.max_rx_pkt_len. HW always
877 * receives packet sizes up to the "max" MTU.
878 * If not using scatter, we can achieve the effect of dropping
879 * larger packets by reducing the size of posted buffers.
880 * See enic_alloc_rx_queue_mbufs().
883 enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
884 dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
885 " when scatter rx mode is in use.\n");
888 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
890 rq_sop->data_queue_enable = 0;
894 /* number of descriptors have to be a multiple of 32 */
895 nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
896 nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
898 rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
899 rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
901 if (mbufs_per_pkt > 1) {
902 min_sop = ENIC_RX_BURST_MAX;
903 max_sop = ((enic->config.rq_desc_count /
904 (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
905 min_data = min_sop * (mbufs_per_pkt - 1);
906 max_data = enic->config.rq_desc_count;
908 min_sop = ENIC_RX_BURST_MAX;
909 max_sop = enic->config.rq_desc_count;
914 if (nb_desc < (min_sop + min_data)) {
916 "Number of rx descs too low, adjusting to minimum\n");
917 nb_sop_desc = min_sop;
918 nb_data_desc = min_data;
919 } else if (nb_desc > (max_sop + max_data)) {
921 "Number of rx_descs too high, adjusting to maximum\n");
922 nb_sop_desc = max_sop;
923 nb_data_desc = max_data;
925 if (mbufs_per_pkt > 1) {
926 dev_info(enic, "For max packet size %u and mbuf size %u valid"
927 " rx descriptor range is %u to %u\n",
928 max_rx_pkt_len, mbuf_size, min_sop + min_data,
931 dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
932 nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
934 /* Allocate sop queue resources */
935 rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
936 nb_sop_desc, sizeof(struct rq_enet_desc));
938 dev_err(enic, "error in allocation of sop rq\n");
941 nb_sop_desc = rq_sop->ring.desc_count;
943 if (rq_data->in_use) {
944 /* Allocate data queue resources */
945 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
947 sizeof(struct rq_enet_desc));
949 dev_err(enic, "error in allocation of data rq\n");
950 goto err_free_rq_sop;
952 nb_data_desc = rq_data->ring.desc_count;
954 rc = vnic_cq_alloc(enic->vdev, &enic->cq[cq_idx], cq_idx,
955 socket_id, nb_sop_desc + nb_data_desc,
956 sizeof(struct cq_enet_rq_desc));
958 dev_err(enic, "error in allocation of cq for rq\n");
959 goto err_free_rq_data;
962 /* Allocate the mbuf rings */
963 rq_sop->mbuf_ring = (struct rte_mbuf **)
964 rte_zmalloc_socket("rq->mbuf_ring",
965 sizeof(struct rte_mbuf *) * nb_sop_desc,
966 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
967 if (rq_sop->mbuf_ring == NULL)
970 if (rq_data->in_use) {
971 rq_data->mbuf_ring = (struct rte_mbuf **)
972 rte_zmalloc_socket("rq->mbuf_ring",
973 sizeof(struct rte_mbuf *) * nb_data_desc,
974 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
975 if (rq_data->mbuf_ring == NULL)
976 goto err_free_sop_mbuf;
979 rq_sop->free_mbufs = (struct rte_mbuf **)
980 rte_zmalloc_socket("rq->free_mbufs",
981 sizeof(struct rte_mbuf *) *
983 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
984 if (rq_sop->free_mbufs == NULL)
985 goto err_free_data_mbuf;
986 rq_sop->num_free_mbufs = 0;
988 rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
993 rte_free(rq_data->mbuf_ring);
995 rte_free(rq_sop->mbuf_ring);
997 /* cleanup on error */
998 vnic_cq_free(&enic->cq[cq_idx]);
1000 if (rq_data->in_use)
1001 vnic_rq_free(rq_data);
1003 vnic_rq_free(rq_sop);
1008 void enic_free_wq(void *txq)
1016 wq = (struct vnic_wq *)txq;
1017 enic = vnic_dev_priv(wq->vdev);
1018 rte_memzone_free(wq->cqmsg_rz);
1020 vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
1023 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
1024 unsigned int socket_id, uint16_t nb_desc)
1026 struct enic_vf_representor *vf;
1029 unsigned int cq_index;
1030 char name[RTE_MEMZONE_NAMESIZE];
1031 static int instance;
1034 * Representor uses a reserved PF queue. Translate representor
1035 * queue number to PF queue number.
1037 if (enic_is_vf_rep(enic)) {
1038 RTE_ASSERT(queue_idx == 0);
1039 vf = VF_ENIC_TO_VF_REP(enic);
1040 queue_idx = vf->pf_wq_idx;
1041 cq_index = vf->pf_wq_cq_idx;
1044 cq_index = enic_cq_wq(enic, queue_idx);
1046 wq = &enic->wq[queue_idx];
1047 wq->socket_id = socket_id;
1049 * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
1050 * print an info message for diagnostics.
1052 dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
1054 /* Allocate queue resources */
1055 err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
1057 sizeof(struct wq_enet_desc));
1059 dev_err(enic, "error in allocation of wq\n");
1063 err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
1065 sizeof(struct cq_enet_wq_desc));
1068 dev_err(enic, "error in allocation of cq for wq\n");
1071 /* setup up CQ message */
1072 snprintf((char *)name, sizeof(name),
1073 "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
1076 wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
1077 sizeof(uint32_t), SOCKET_ID_ANY,
1078 RTE_MEMZONE_IOVA_CONTIG, ENIC_PAGE_SIZE);
1085 int enic_disable(struct enic *enic)
1090 for (i = 0; i < enic->intr_count; i++) {
1091 vnic_intr_mask(&enic->intr[i]);
1092 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
1094 enic_rxq_intr_deinit(enic);
1095 rte_intr_disable(&enic->pdev->intr_handle);
1096 rte_intr_callback_unregister(&enic->pdev->intr_handle,
1098 (void *)enic->rte_dev);
1100 vnic_dev_disable(enic->vdev);
1102 enic_fm_destroy(enic);
1104 if (!enic_is_sriov_vf(enic))
1105 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1107 for (i = 0; i < enic->wq_count; i++) {
1108 err = vnic_wq_disable(&enic->wq[i]);
1112 for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1113 if (enic->rq[i].in_use) {
1114 err = vnic_rq_disable(&enic->rq[i]);
1120 /* If we were using interrupts, set the interrupt vector to -1
1121 * to disable interrupts. We are not disabling link notifcations,
1122 * though, as we want the polling of link status to continue working.
1124 if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1125 vnic_dev_notify_set(enic->vdev, -1);
1127 vnic_dev_set_reset_flag(enic->vdev, 1);
1129 for (i = 0; i < enic->wq_count; i++)
1130 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1132 for (i = 0; i < enic_vnic_rq_count(enic); i++)
1133 if (enic->rq[i].in_use)
1134 vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1135 for (i = 0; i < enic->cq_count; i++)
1136 vnic_cq_clean(&enic->cq[i]);
1137 for (i = 0; i < enic->intr_count; i++)
1138 vnic_intr_clean(&enic->intr[i]);
1143 static int enic_dev_wait(struct vnic_dev *vdev,
1144 int (*start)(struct vnic_dev *, int),
1145 int (*finished)(struct vnic_dev *, int *),
1152 err = start(vdev, arg);
1156 /* Wait for func to complete...2 seconds max */
1157 for (i = 0; i < 2000; i++) {
1158 err = finished(vdev, &done);
1168 static int enic_dev_open(struct enic *enic)
1171 int flags = CMD_OPENF_IG_DESCCACHE;
1173 err = enic_dev_wait(enic->vdev, vnic_dev_open,
1174 vnic_dev_open_done, flags);
1176 dev_err(enic_get_dev(enic),
1177 "vNIC device open failed, err %d\n", err);
1182 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1184 dma_addr_t rss_key_buf_pa;
1185 union vnic_rss_key *rss_key_buf_va = NULL;
1187 uint8_t name[RTE_MEMZONE_NAMESIZE];
1189 RTE_ASSERT(user_key != NULL);
1190 snprintf((char *)name, sizeof(name), "rss_key-%s", enic->bdf_name);
1191 rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1192 &rss_key_buf_pa, name);
1193 if (!rss_key_buf_va)
1196 for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1197 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1199 err = enic_set_rss_key(enic,
1201 sizeof(union vnic_rss_key));
1203 /* Save for later queries */
1205 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1206 sizeof(union vnic_rss_key));
1208 enic_free_consistent(enic, sizeof(union vnic_rss_key),
1209 rss_key_buf_va, rss_key_buf_pa);
1214 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1216 dma_addr_t rss_cpu_buf_pa;
1217 union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1219 uint8_t name[RTE_MEMZONE_NAMESIZE];
1221 snprintf((char *)name, sizeof(name), "rss_cpu-%s", enic->bdf_name);
1222 rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1223 &rss_cpu_buf_pa, name);
1224 if (!rss_cpu_buf_va)
1227 rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1229 err = enic_set_rss_cpu(enic,
1231 sizeof(union vnic_rss_cpu));
1233 enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1234 rss_cpu_buf_va, rss_cpu_buf_pa);
1236 /* Save for later queries */
1238 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1242 static int enic_set_niccfg(struct enic *enic, uint8_t rss_default_cpu,
1243 uint8_t rss_hash_type, uint8_t rss_hash_bits, uint8_t rss_base_cpu,
1246 const uint8_t tso_ipid_split_en = 0;
1249 err = enic_set_nic_cfg(enic,
1250 rss_default_cpu, rss_hash_type,
1251 rss_hash_bits, rss_base_cpu,
1252 rss_enable, tso_ipid_split_en,
1253 enic->ig_vlan_strip_en);
1258 /* Initialize RSS with defaults, called from dev_configure */
1259 int enic_init_rss_nic_cfg(struct enic *enic)
1261 static uint8_t default_rss_key[] = {
1262 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1263 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1264 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1265 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1267 struct rte_eth_rss_conf rss_conf;
1268 union vnic_rss_cpu rss_cpu;
1271 rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1273 * If setting key for the first time, and the user gives us none, then
1274 * push the default key to NIC.
1276 if (rss_conf.rss_key == NULL) {
1277 rss_conf.rss_key = default_rss_key;
1278 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1280 ret = enic_set_rss_conf(enic, &rss_conf);
1282 dev_err(enic, "Failed to configure RSS\n");
1285 if (enic->rss_enable) {
1286 /* If enabling RSS, use the default reta */
1287 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1288 rss_cpu.cpu[i / 4].b[i % 4] =
1289 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1291 ret = enic_set_rss_reta(enic, &rss_cpu);
1293 dev_err(enic, "Failed to set RSS indirection table\n");
1298 int enic_setup_finish(struct enic *enic)
1300 enic_init_soft_stats(enic);
1302 /* switchdev: enable promisc mode on PF */
1303 if (enic->switchdev_mode) {
1304 vnic_dev_packet_filter(enic->vdev,
1315 vnic_dev_packet_filter(enic->vdev,
1328 static int enic_rss_conf_valid(struct enic *enic,
1329 struct rte_eth_rss_conf *rss_conf)
1331 /* RSS is disabled per VIC settings. Ignore rss_conf. */
1332 if (enic->flow_type_rss_offloads == 0)
1334 if (rss_conf->rss_key != NULL &&
1335 rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1336 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1337 rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1340 if (rss_conf->rss_hf != 0 &&
1341 (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1342 dev_err(enic, "Given rss_hf contains none of the supported"
1349 /* Set hash type and key according to rss_conf */
1350 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1352 struct rte_eth_dev *eth_dev;
1354 uint8_t rss_hash_type;
1358 RTE_ASSERT(rss_conf != NULL);
1359 ret = enic_rss_conf_valid(enic, rss_conf);
1361 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1365 eth_dev = enic->rte_dev;
1367 rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1368 if (enic->rq_count > 1 &&
1369 (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1372 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1373 ETH_RSS_NONFRAG_IPV4_OTHER))
1374 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1375 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1376 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1377 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1378 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1379 if (enic->udp_rss_weak) {
1381 * 'TCP' is not a typo. The "weak" version of
1382 * UDP RSS requires both the TCP and UDP bits
1383 * be set. It does enable TCP RSS as well.
1385 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1388 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1389 ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1390 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1391 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1392 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1393 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1394 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1395 if (enic->udp_rss_weak)
1396 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1403 /* Set the hash key if provided */
1404 if (rss_enable && rss_conf->rss_key) {
1405 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1407 dev_err(enic, "Failed to set RSS key\n");
1412 ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1413 ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1416 enic->rss_hf = rss_hf;
1417 enic->rss_hash_type = rss_hash_type;
1418 enic->rss_enable = rss_enable;
1420 dev_err(enic, "Failed to update RSS configurations."
1421 " hash=0x%x\n", rss_hash_type);
1426 int enic_set_vlan_strip(struct enic *enic)
1429 * Unfortunately, VLAN strip on/off and RSS on/off are configured
1430 * together. So, re-do niccfg, preserving the current RSS settings.
1432 return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1433 ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1437 int enic_add_packet_filter(struct enic *enic)
1439 /* switchdev ignores packet filters */
1440 if (enic->switchdev_mode) {
1441 ENICPMD_LOG(DEBUG, " switchdev: ignore packet filter");
1444 /* Args -> directed, multicast, broadcast, promisc, allmulti */
1445 return vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1446 enic->promisc, enic->allmulti);
1449 int enic_get_link_status(struct enic *enic)
1451 return vnic_dev_link_status(enic->vdev);
1454 static void enic_dev_deinit(struct enic *enic)
1456 /* stop link status checking */
1457 vnic_dev_notify_unset(enic->vdev);
1459 /* mac_addrs is freed by rte_eth_dev_release_port() */
1461 rte_free(enic->intr);
1467 int enic_set_vnic_res(struct enic *enic)
1469 struct rte_eth_dev *eth_dev = enic->rte_dev;
1471 unsigned int required_rq, required_wq, required_cq, required_intr;
1473 /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1474 required_rq = eth_dev->data->nb_rx_queues * 2;
1475 required_wq = eth_dev->data->nb_tx_queues;
1476 required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1477 required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1478 if (eth_dev->data->dev_conf.intr_conf.rxq) {
1479 required_intr += eth_dev->data->nb_rx_queues;
1481 ENICPMD_LOG(DEBUG, "Required queues for PF: rq %u wq %u cq %u",
1482 required_rq, required_wq, required_cq);
1483 if (enic->vf_required_rq) {
1484 /* Queues needed for VF representors */
1485 required_rq += enic->vf_required_rq;
1486 required_wq += enic->vf_required_wq;
1487 required_cq += enic->vf_required_cq;
1488 ENICPMD_LOG(DEBUG, "Required queues for VF representors: rq %u wq %u cq %u",
1489 enic->vf_required_rq, enic->vf_required_wq,
1490 enic->vf_required_cq);
1493 if (enic->conf_rq_count < required_rq) {
1494 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1495 eth_dev->data->nb_rx_queues,
1496 required_rq, enic->conf_rq_count);
1499 if (enic->conf_wq_count < required_wq) {
1500 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1501 eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1505 if (enic->conf_cq_count < required_cq) {
1506 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1507 required_cq, enic->conf_cq_count);
1510 if (enic->conf_intr_count < required_intr) {
1511 dev_err(dev, "Not enough Interrupts to support Rx queue"
1512 " interrupts. Required:%u, Configured:%u\n",
1513 required_intr, enic->conf_intr_count);
1518 enic->rq_count = eth_dev->data->nb_rx_queues;
1519 enic->wq_count = eth_dev->data->nb_tx_queues;
1520 enic->cq_count = enic->rq_count + enic->wq_count;
1521 enic->intr_count = required_intr;
1527 /* Initialize the completion queue for an RQ */
1529 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1531 struct vnic_rq *sop_rq, *data_rq;
1532 unsigned int cq_idx;
1535 sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1536 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx, enic)];
1537 cq_idx = enic_cq_rq(enic, rq_idx);
1539 vnic_cq_clean(&enic->cq[cq_idx]);
1540 vnic_cq_init(&enic->cq[cq_idx],
1541 0 /* flow_control_enable */,
1542 1 /* color_enable */,
1545 1 /* cq_tail_color */,
1546 0 /* interrupt_enable */,
1547 1 /* cq_entry_enable */,
1548 0 /* cq_message_enable */,
1549 0 /* interrupt offset */,
1550 0 /* cq_message_addr */);
1553 vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1554 enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1555 sop_rq->ring.desc_count - 1, 1, 0);
1556 if (data_rq->in_use) {
1557 vnic_rq_init_start(data_rq,
1559 enic_rte_rq_idx_to_data_idx(rq_idx, enic)),
1560 0, data_rq->ring.desc_count - 1, 1, 0);
1563 rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1567 if (data_rq->in_use) {
1568 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1570 enic_rxmbuf_queue_release(enic, sop_rq);
1578 /* The Cisco NIC can send and receive packets up to a max packet size
1579 * determined by the NIC type and firmware. There is also an MTU
1580 * configured into the NIC via the CIMC/UCSM management interface
1581 * which can be overridden by this function (up to the max packet size).
1582 * Depending on the network setup, doing so may cause packet drops
1583 * and unexpected behavior.
1585 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1587 unsigned int rq_idx;
1590 uint16_t old_mtu; /* previous setting */
1591 uint16_t config_mtu; /* Value configured into NIC via CIMC/UCSM */
1592 struct rte_eth_dev *eth_dev = enic->rte_dev;
1594 old_mtu = eth_dev->data->mtu;
1595 config_mtu = enic->config.mtu;
1597 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1598 return -E_RTE_SECONDARY;
1600 if (new_mtu > enic->max_mtu) {
1602 "MTU not updated: requested (%u) greater than max (%u)\n",
1603 new_mtu, enic->max_mtu);
1606 if (new_mtu < ENIC_MIN_MTU) {
1608 "MTU not updated: requested (%u) less than min (%u)\n",
1609 new_mtu, ENIC_MIN_MTU);
1612 if (new_mtu > config_mtu)
1614 "MTU (%u) is greater than value configured in NIC (%u)\n",
1615 new_mtu, config_mtu);
1617 /* Update the MTU and maximum packet length */
1618 eth_dev->data->mtu = new_mtu;
1619 eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1620 enic_mtu_to_max_rx_pktlen(new_mtu);
1623 * If the device has not started (enic_enable), nothing to do.
1624 * Later, enic_enable() will set up RQs reflecting the new maximum
1627 if (!eth_dev->data->dev_started)
1631 * The device has started, re-do RQs on the fly. In the process, we
1632 * pick up the new maximum packet length.
1634 * Some applications rely on the ability to change MTU without stopping
1635 * the device. So keep this behavior for now.
1637 rte_spinlock_lock(&enic->mtu_lock);
1639 /* Stop traffic on all RQs */
1640 for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1641 rq = &enic->rq[rq_idx];
1642 if (rq->is_sop && rq->in_use) {
1643 rc = enic_stop_rq(enic,
1644 enic_sop_rq_idx_to_rte_idx(rq_idx));
1646 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1652 /* replace Rx function with a no-op to avoid getting stale pkts */
1653 eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1656 /* Allow time for threads to exit the real Rx function. */
1659 /* now it is safe to reconfigure the RQs */
1662 /* free and reallocate RQs with the new MTU */
1663 for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1664 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1669 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1670 rq->tot_nb_desc, rq->rx_free_thresh);
1673 "Fatal MTU alloc error- No traffic will pass\n");
1677 rc = enic_reinit_rq(enic, rq_idx);
1680 "Fatal MTU RQ reinit- No traffic will pass\n");
1685 /* put back the real receive function */
1687 enic_pick_rx_handler(eth_dev);
1690 /* restart Rx traffic */
1691 for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1692 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1693 if (rq->is_sop && rq->in_use)
1694 enic_start_rq(enic, rq_idx);
1698 dev_info(enic, "MTU changed from %u to %u\n", old_mtu, new_mtu);
1699 rte_spinlock_unlock(&enic->mtu_lock);
1703 static int enic_dev_init(struct enic *enic)
1706 struct rte_eth_dev *eth_dev = enic->rte_dev;
1708 vnic_dev_intr_coal_timer_info_default(enic->vdev);
1710 /* Get vNIC configuration
1712 err = enic_get_vnic_config(enic);
1714 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1718 /* Get available resource counts */
1719 enic_get_res_counts(enic);
1720 if (enic->conf_rq_count == 1) {
1721 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1722 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1723 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1726 /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1727 enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1728 enic->conf_cq_count, 8);
1729 enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1730 enic->conf_intr_count, 8);
1731 enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1732 enic->conf_rq_count, 8);
1733 enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1734 enic->conf_wq_count, 8);
1735 if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1736 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1739 if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1740 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1743 if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1744 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1747 if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1748 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1752 eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr",
1753 sizeof(struct rte_ether_addr) *
1754 ENIC_UNICAST_PERFECT_FILTERS, 0);
1755 if (!eth_dev->data->mac_addrs) {
1756 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1759 rte_ether_addr_copy((struct rte_ether_addr *)enic->mac_addr,
1760 eth_dev->data->mac_addrs);
1762 vnic_dev_set_reset_flag(enic->vdev, 0);
1764 LIST_INIT(&enic->flows);
1766 /* set up link status checking */
1767 vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1770 * When Geneve with options offload is available, always disable it
1771 * first as it can interfere with user flow rules.
1773 if (enic->geneve_opt_avail) {
1775 * Disabling fails if the feature is provisioned but
1776 * not enabled. So ignore result and do not log error.
1778 vnic_dev_overlay_offload_ctrl(enic->vdev,
1779 OVERLAY_FEATURE_GENEVE,
1780 OVERLAY_OFFLOAD_DISABLE);
1782 enic->overlay_offload = false;
1783 if (enic->disable_overlay && enic->vxlan) {
1785 * Explicitly disable overlay offload as the setting is
1786 * sticky, and resetting vNIC does not disable it.
1788 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1789 OVERLAY_FEATURE_VXLAN,
1790 OVERLAY_OFFLOAD_DISABLE)) {
1791 dev_err(enic, "failed to disable overlay offload\n");
1793 dev_info(enic, "Overlay offload is disabled\n");
1796 if (!enic->disable_overlay && enic->vxlan &&
1797 /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1798 vnic_dev_overlay_offload_ctrl(enic->vdev,
1799 OVERLAY_FEATURE_VXLAN,
1800 OVERLAY_OFFLOAD_ENABLE) == 0) {
1801 enic->tx_offload_capa |=
1802 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1803 DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1804 DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1805 enic->tx_offload_mask |=
1808 PKT_TX_OUTER_IP_CKSUM |
1810 enic->overlay_offload = true;
1811 dev_info(enic, "Overlay offload is enabled\n");
1813 /* Geneve with options offload requires overlay offload */
1814 if (enic->overlay_offload && enic->geneve_opt_avail &&
1815 enic->geneve_opt_request) {
1816 if (vnic_dev_overlay_offload_ctrl(enic->vdev,
1817 OVERLAY_FEATURE_GENEVE,
1818 OVERLAY_OFFLOAD_ENABLE)) {
1819 dev_err(enic, "failed to enable geneve+option\n");
1821 enic->geneve_opt_enabled = 1;
1822 dev_info(enic, "Geneve with options is enabled\n");
1826 * Reset the vxlan port if HW vxlan parsing is available. It
1827 * is always enabled regardless of overlay offload
1831 enic->vxlan_port = RTE_VXLAN_DEFAULT_PORT;
1833 * Reset the vxlan port to the default, as the NIC firmware
1834 * does not reset it automatically and keeps the old setting.
1836 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1837 OVERLAY_CFG_VXLAN_PORT_UPDATE,
1838 RTE_VXLAN_DEFAULT_PORT)) {
1839 dev_err(enic, "failed to update vxlan port\n");
1844 if (enic_fm_init(enic))
1845 dev_warning(enic, "Init of flowman failed.\n");
1850 static void lock_devcmd(void *priv)
1852 struct enic *enic = priv;
1854 rte_spinlock_lock(&enic->devcmd_lock);
1857 static void unlock_devcmd(void *priv)
1859 struct enic *enic = priv;
1861 rte_spinlock_unlock(&enic->devcmd_lock);
1864 int enic_probe(struct enic *enic)
1866 struct rte_pci_device *pdev = enic->pdev;
1869 dev_debug(enic, "Initializing ENIC PMD\n");
1871 /* if this is a secondary process the hardware is already initialized */
1872 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1875 enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1876 enic->bar0.len = pdev->mem_resource[0].len;
1878 /* Register vNIC device */
1879 enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1881 dev_err(enic, "vNIC registration failed, aborting\n");
1885 LIST_INIT(&enic->memzone_list);
1886 rte_spinlock_init(&enic->memzone_list_lock);
1888 vnic_register_cbacks(enic->vdev,
1889 enic_alloc_consistent,
1890 enic_free_consistent);
1893 * Allocate the consistent memory for stats upfront so both primary and
1894 * secondary processes can dump stats.
1896 err = vnic_dev_alloc_stats_mem(enic->vdev);
1898 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1899 goto err_out_unregister;
1901 /* Issue device open to get device in known state */
1902 err = enic_dev_open(enic);
1904 dev_err(enic, "vNIC dev open failed, aborting\n");
1905 goto err_out_unregister;
1908 /* Set ingress vlan rewrite mode before vnic initialization */
1909 dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1910 enic->ig_vlan_rewrite_mode);
1911 err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1912 enic->ig_vlan_rewrite_mode);
1915 "Failed to set ingress vlan rewrite mode, aborting.\n");
1916 goto err_out_dev_close;
1919 /* Issue device init to initialize the vnic-to-switch link.
1920 * We'll start with carrier off and wait for link UP
1921 * notification later to turn on carrier. We don't need
1922 * to wait here for the vnic-to-switch link initialization
1923 * to complete; link UP notification is the indication that
1924 * the process is complete.
1927 err = vnic_dev_init(enic->vdev, 0);
1929 dev_err(enic, "vNIC dev init failed, aborting\n");
1930 goto err_out_dev_close;
1933 err = enic_dev_init(enic);
1935 dev_err(enic, "Device initialization failed, aborting\n");
1936 goto err_out_dev_close;
1939 /* Use a PF spinlock to serialize devcmd from PF and VF representors */
1940 if (enic->switchdev_mode) {
1941 rte_spinlock_init(&enic->devcmd_lock);
1942 vnic_register_lock(enic->vdev, lock_devcmd, unlock_devcmd);
1947 vnic_dev_close(enic->vdev);
1949 vnic_dev_unregister(enic->vdev);
1954 void enic_remove(struct enic *enic)
1956 enic_dev_deinit(enic);
1957 vnic_dev_close(enic->vdev);
1958 vnic_dev_unregister(enic->vdev);