1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2020-2021 Xilinx, Inc.
7 #include <rte_common.h>
10 #include <rte_vhost.h>
14 #include "sfc_vdpa_ops.h"
16 extern uint32_t sfc_logtype_driver;
19 #define PAGE_SIZE (sysconf(_SC_PAGESIZE))
23 sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
24 size_t len, efsys_mem_t *esmp)
27 size_t mcdi_buff_size;
28 const struct rte_memzone *mz = NULL;
29 int numa_node = sva->pdev->device.numa_node;
32 mcdi_buff_size = RTE_ALIGN_CEIL(len, PAGE_SIZE);
34 sfc_vdpa_log_init(sva, "name=%s, len=%zu", name, len);
36 mz = rte_memzone_reserve_aligned(name, mcdi_buff_size,
38 RTE_MEMZONE_IOVA_CONTIG,
41 sfc_vdpa_err(sva, "cannot reserve memory for %s: len=%#x: %s",
42 name, (unsigned int)len, rte_strerror(rte_errno));
46 /* IOVA address for MCDI would be re-calculated if mapping
47 * using default IOVA would fail.
48 * TODO: Earlier there was no way to get valid IOVA range.
49 * Recently a patch has been submitted to get the IOVA range
50 * using ioctl. VFIO_IOMMU_GET_INFO. This patch is available
51 * in the kernel version >= 5.4. Support to get the default
52 * IOVA address for MCDI buffer using available IOVA range
53 * would be added later. Meanwhile default IOVA for MCDI buffer
54 * is kept at high mem at 2TB. In case of overlap new available
55 * addresses would be searched and same would be used.
57 mcdi_iova = SFC_VDPA_DEFAULT_MCDI_IOVA;
60 ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
61 (uint64_t)mz->addr, mcdi_iova,
66 mcdi_iova = mcdi_iova >> 1;
67 if (mcdi_iova < mcdi_buff_size) {
69 "DMA mapping failed for MCDI : %s",
70 rte_strerror(rte_errno));
76 esmp->esm_addr = mcdi_iova;
77 esmp->esm_base = mz->addr;
78 sva->mcdi_buff_size = mcdi_buff_size;
81 "DMA name=%s len=%zu => virt=%p iova=0x%" PRIx64,
82 name, len, esmp->esm_base, esmp->esm_addr);
88 sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp)
92 sfc_vdpa_log_init(sva, "name=%s", esmp->esm_mz->name);
94 ret = rte_vfio_container_dma_unmap(sva->vfio_container_fd,
95 (uint64_t)esmp->esm_base,
96 esmp->esm_addr, sva->mcdi_buff_size);
98 sfc_vdpa_err(sva, "DMA unmap failed for MCDI : %s",
99 rte_strerror(rte_errno));
102 "DMA free name=%s => virt=%p iova=0x%" PRIx64,
103 esmp->esm_mz->name, esmp->esm_base, esmp->esm_addr);
105 rte_free((void *)(esmp->esm_base));
107 sva->mcdi_buff_size = 0;
108 memset(esmp, 0, sizeof(*esmp));
112 sfc_vdpa_dma_map(struct sfc_vdpa_ops_data *ops_data, bool do_map)
116 struct rte_vhost_memory *vhost_mem = NULL;
117 struct rte_vhost_mem_region *mem_reg = NULL;
118 int vfio_container_fd;
121 dev = ops_data->dev_handle;
123 sfc_vdpa_adapter_by_dev_handle(dev)->vfio_container_fd;
125 rc = rte_vhost_get_mem_table(ops_data->vid, &vhost_mem);
128 "failed to get VM memory layout");
132 for (i = 0; i < vhost_mem->nregions; i++) {
133 mem_reg = &vhost_mem->regions[i];
136 rc = rte_vfio_container_dma_map(vfio_container_fd,
137 mem_reg->host_user_addr,
138 mem_reg->guest_phys_addr,
142 "DMA map failed : %s",
143 rte_strerror(rte_errno));
144 goto failed_vfio_dma_map;
147 rc = rte_vfio_container_dma_unmap(vfio_container_fd,
148 mem_reg->host_user_addr,
149 mem_reg->guest_phys_addr,
153 "DMA unmap failed : %s",
154 rte_strerror(rte_errno));
165 for (j = 0; j < i; j++) {
166 mem_reg = &vhost_mem->regions[j];
167 rte_vfio_container_dma_unmap(vfio_container_fd,
168 mem_reg->host_user_addr,
169 mem_reg->guest_phys_addr,
180 sfc_vdpa_mem_bar_init(struct sfc_vdpa_adapter *sva,
181 const efx_bar_region_t *mem_ebrp)
183 struct rte_pci_device *pci_dev = sva->pdev;
184 efsys_bar_t *ebp = &sva->mem_bar;
185 struct rte_mem_resource *res =
186 &pci_dev->mem_resource[mem_ebrp->ebr_index];
188 SFC_BAR_LOCK_INIT(ebp, pci_dev->name);
189 ebp->esb_rid = mem_ebrp->ebr_index;
190 ebp->esb_dev = pci_dev;
191 ebp->esb_base = res->addr;
197 sfc_vdpa_mem_bar_fini(struct sfc_vdpa_adapter *sva)
199 efsys_bar_t *ebp = &sva->mem_bar;
201 SFC_BAR_LOCK_DESTROY(ebp);
202 memset(ebp, 0, sizeof(*ebp));
206 sfc_vdpa_nic_probe(struct sfc_vdpa_adapter *sva)
208 efx_nic_t *enp = sva->nic;
211 rc = efx_nic_probe(enp, EFX_FW_VARIANT_DONT_CARE);
213 sfc_vdpa_err(sva, "nic probe failed: %s", rte_strerror(rc));
219 sfc_vdpa_estimate_resource_limits(struct sfc_vdpa_adapter *sva)
221 efx_drv_limits_t limits;
223 uint32_t evq_allocated;
224 uint32_t rxq_allocated;
225 uint32_t txq_allocated;
226 uint32_t max_queue_cnt;
228 memset(&limits, 0, sizeof(limits));
230 /* Request at least one Rx and Tx queue */
231 limits.edl_min_rxq_count = 1;
232 limits.edl_min_txq_count = 1;
233 /* Management event queue plus event queue for Tx/Rx queue */
234 limits.edl_min_evq_count =
235 1 + RTE_MAX(limits.edl_min_rxq_count, limits.edl_min_txq_count);
237 limits.edl_max_rxq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
238 limits.edl_max_txq_count = SFC_VDPA_MAX_QUEUE_PAIRS;
239 limits.edl_max_evq_count = 1 + SFC_VDPA_MAX_QUEUE_PAIRS;
241 SFC_VDPA_ASSERT(limits.edl_max_evq_count >= limits.edl_min_rxq_count);
242 SFC_VDPA_ASSERT(limits.edl_max_rxq_count >= limits.edl_min_rxq_count);
243 SFC_VDPA_ASSERT(limits.edl_max_txq_count >= limits.edl_min_rxq_count);
245 /* Configure the minimum required resources needed for the
246 * driver to operate, and the maximum desired resources that the
247 * driver is capable of using.
249 sfc_vdpa_log_init(sva, "set drv limit");
250 efx_nic_set_drv_limits(sva->nic, &limits);
252 sfc_vdpa_log_init(sva, "init nic");
253 rc = efx_nic_init(sva->nic);
255 sfc_vdpa_err(sva, "nic init failed: %s", rte_strerror(rc));
259 /* Find resource dimensions assigned by firmware to this function */
260 rc = efx_nic_get_vi_pool(sva->nic, &evq_allocated, &rxq_allocated,
263 sfc_vdpa_err(sva, "vi pool get failed: %s", rte_strerror(rc));
264 goto fail_get_vi_pool;
267 /* It still may allocate more than maximum, ensure limit */
268 evq_allocated = RTE_MIN(evq_allocated, limits.edl_max_evq_count);
269 rxq_allocated = RTE_MIN(rxq_allocated, limits.edl_max_rxq_count);
270 txq_allocated = RTE_MIN(txq_allocated, limits.edl_max_txq_count);
273 max_queue_cnt = RTE_MIN(rxq_allocated, txq_allocated);
274 /* Subtract management EVQ not used for traffic */
275 max_queue_cnt = RTE_MIN(evq_allocated - 1, max_queue_cnt);
277 SFC_VDPA_ASSERT(max_queue_cnt > 0);
279 sva->max_queue_count = max_queue_cnt;
284 efx_nic_fini(sva->nic);
286 sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
291 sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva)
293 efx_bar_region_t mem_ebr;
297 sfc_vdpa_log_init(sva, "entry");
299 sfc_vdpa_log_init(sva, "get family");
300 rc = sfc_efx_family(sva->pdev, &mem_ebr, &sva->family);
303 sfc_vdpa_log_init(sva,
304 "family is %u, membar is %d,"
305 "function control window offset is %#" PRIx64,
306 sva->family, mem_ebr.ebr_index, mem_ebr.ebr_offset);
308 sfc_vdpa_log_init(sva, "init mem bar");
309 rc = sfc_vdpa_mem_bar_init(sva, &mem_ebr);
311 goto fail_mem_bar_init;
313 sfc_vdpa_log_init(sva, "create nic");
314 rte_spinlock_init(&sva->nic_lock);
315 rc = efx_nic_create(sva->family, (efsys_identifier_t *)sva,
316 &sva->mem_bar, mem_ebr.ebr_offset,
317 &sva->nic_lock, &enp);
319 sfc_vdpa_err(sva, "nic create failed: %s", rte_strerror(rc));
320 goto fail_nic_create;
324 sfc_vdpa_log_init(sva, "init mcdi");
325 rc = sfc_vdpa_mcdi_init(sva);
327 sfc_vdpa_err(sva, "mcdi init failed: %s", rte_strerror(rc));
331 sfc_vdpa_log_init(sva, "probe nic");
332 rc = sfc_vdpa_nic_probe(sva);
336 sfc_vdpa_log_init(sva, "reset nic");
337 rc = efx_nic_reset(enp);
339 sfc_vdpa_err(sva, "nic reset failed: %s", rte_strerror(rc));
343 sfc_vdpa_log_init(sva, "estimate resource limits");
344 rc = sfc_vdpa_estimate_resource_limits(sva);
346 goto fail_estimate_rsrc_limits;
348 sfc_vdpa_log_init(sva, "init virtio");
349 rc = efx_virtio_init(enp);
351 sfc_vdpa_err(sva, "virtio init failed: %s", rte_strerror(rc));
352 goto fail_virtio_init;
355 sfc_vdpa_log_init(sva, "init filter");
356 rc = efx_filter_init(enp);
358 sfc_vdpa_err(sva, "filter init failed: %s", rte_strerror(rc));
359 goto fail_filter_init;
362 sfc_vdpa_log_init(sva, "done");
367 efx_virtio_fini(enp);
372 fail_estimate_rsrc_limits:
374 efx_nic_unprobe(enp);
377 sfc_vdpa_mcdi_fini(sva);
380 sfc_vdpa_log_init(sva, "destroy nic");
382 efx_nic_destroy(enp);
385 sfc_vdpa_mem_bar_fini(sva);
389 sfc_vdpa_log_init(sva, "failed: %s", rte_strerror(rc));
394 sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sva)
396 efx_nic_t *enp = sva->nic;
398 sfc_vdpa_log_init(sva, "entry");
400 sfc_vdpa_log_init(sva, "virtio fini");
401 efx_virtio_fini(enp);
403 sfc_vdpa_log_init(sva, "unprobe nic");
404 efx_nic_unprobe(enp);
406 sfc_vdpa_log_init(sva, "mcdi fini");
407 sfc_vdpa_mcdi_fini(sva);
409 sfc_vdpa_log_init(sva, "nic fini");
412 sfc_vdpa_log_init(sva, "destroy nic");
414 efx_nic_destroy(enp);
416 sfc_vdpa_mem_bar_fini(sva);