1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
11 #include <ethdev_driver.h>
12 #include <ethdev_pci.h>
13 #include <rte_memcpy.h>
14 #include <rte_string_fns.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_branch_prediction.h>
19 #include <rte_bus_pci.h>
20 #include <rte_ether.h>
23 #include <rte_common.h>
24 #include <rte_errno.h>
25 #include <rte_cpuflags.h>
28 #include <rte_memory.h>
31 #include <rte_cycles.h>
32 #include <rte_kvargs.h>
34 #include "virtio_ethdev.h"
35 #include "virtio_pci.h"
36 #include "virtio_logs.h"
37 #include "virtqueue.h"
38 #include "virtio_rxtx.h"
39 #include "virtio_user/virtio_user_dev.h"
41 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
42 static int virtio_dev_configure(struct rte_eth_dev *dev);
43 static int virtio_dev_start(struct rte_eth_dev *dev);
44 static int virtio_dev_stop(struct rte_eth_dev *dev);
45 static int virtio_dev_promiscuous_enable(struct rte_eth_dev *dev);
46 static int virtio_dev_promiscuous_disable(struct rte_eth_dev *dev);
47 static int virtio_dev_allmulticast_enable(struct rte_eth_dev *dev);
48 static int virtio_dev_allmulticast_disable(struct rte_eth_dev *dev);
49 static uint32_t virtio_dev_speed_capa_get(uint32_t speed);
50 static int virtio_dev_devargs_parse(struct rte_devargs *devargs,
54 static int virtio_dev_info_get(struct rte_eth_dev *dev,
55 struct rte_eth_dev_info *dev_info);
56 static int virtio_dev_link_update(struct rte_eth_dev *dev,
57 int wait_to_complete);
58 static int virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask);
60 static void virtio_set_hwaddr(struct virtio_hw *hw);
61 static void virtio_get_hwaddr(struct virtio_hw *hw);
63 static int virtio_dev_stats_get(struct rte_eth_dev *dev,
64 struct rte_eth_stats *stats);
65 static int virtio_dev_xstats_get(struct rte_eth_dev *dev,
66 struct rte_eth_xstat *xstats, unsigned n);
67 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
68 struct rte_eth_xstat_name *xstats_names,
70 static int virtio_dev_stats_reset(struct rte_eth_dev *dev);
71 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);
72 static int virtio_vlan_filter_set(struct rte_eth_dev *dev,
73 uint16_t vlan_id, int on);
74 static int virtio_mac_addr_add(struct rte_eth_dev *dev,
75 struct rte_ether_addr *mac_addr,
76 uint32_t index, uint32_t vmdq);
77 static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index);
78 static int virtio_mac_addr_set(struct rte_eth_dev *dev,
79 struct rte_ether_addr *mac_addr);
81 static int virtio_intr_disable(struct rte_eth_dev *dev);
83 static int virtio_dev_queue_stats_mapping_set(
84 struct rte_eth_dev *eth_dev,
89 static void virtio_notify_peers(struct rte_eth_dev *dev);
90 static void virtio_ack_link_announce(struct rte_eth_dev *dev);
93 * The set of PCI devices this driver supports
95 static const struct rte_pci_id pci_id_virtio_map[] = {
96 { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) },
97 { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) },
98 { .vendor_id = 0, /* sentinel */ },
101 struct rte_virtio_xstats_name_off {
102 char name[RTE_ETH_XSTATS_NAME_SIZE];
106 /* [rt]x_qX_ is prepended to the name string here */
107 static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = {
108 {"good_packets", offsetof(struct virtnet_rx, stats.packets)},
109 {"good_bytes", offsetof(struct virtnet_rx, stats.bytes)},
110 {"errors", offsetof(struct virtnet_rx, stats.errors)},
111 {"multicast_packets", offsetof(struct virtnet_rx, stats.multicast)},
112 {"broadcast_packets", offsetof(struct virtnet_rx, stats.broadcast)},
113 {"undersize_packets", offsetof(struct virtnet_rx, stats.size_bins[0])},
114 {"size_64_packets", offsetof(struct virtnet_rx, stats.size_bins[1])},
115 {"size_65_127_packets", offsetof(struct virtnet_rx, stats.size_bins[2])},
116 {"size_128_255_packets", offsetof(struct virtnet_rx, stats.size_bins[3])},
117 {"size_256_511_packets", offsetof(struct virtnet_rx, stats.size_bins[4])},
118 {"size_512_1023_packets", offsetof(struct virtnet_rx, stats.size_bins[5])},
119 {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])},
120 {"size_1519_max_packets", offsetof(struct virtnet_rx, stats.size_bins[7])},
123 /* [rt]x_qX_ is prepended to the name string here */
124 static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
125 {"good_packets", offsetof(struct virtnet_tx, stats.packets)},
126 {"good_bytes", offsetof(struct virtnet_tx, stats.bytes)},
127 {"multicast_packets", offsetof(struct virtnet_tx, stats.multicast)},
128 {"broadcast_packets", offsetof(struct virtnet_tx, stats.broadcast)},
129 {"undersize_packets", offsetof(struct virtnet_tx, stats.size_bins[0])},
130 {"size_64_packets", offsetof(struct virtnet_tx, stats.size_bins[1])},
131 {"size_65_127_packets", offsetof(struct virtnet_tx, stats.size_bins[2])},
132 {"size_128_255_packets", offsetof(struct virtnet_tx, stats.size_bins[3])},
133 {"size_256_511_packets", offsetof(struct virtnet_tx, stats.size_bins[4])},
134 {"size_512_1023_packets", offsetof(struct virtnet_tx, stats.size_bins[5])},
135 {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])},
136 {"size_1519_max_packets", offsetof(struct virtnet_tx, stats.size_bins[7])},
139 #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \
140 sizeof(rte_virtio_rxq_stat_strings[0]))
141 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
142 sizeof(rte_virtio_txq_stat_strings[0]))
144 struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
146 static struct virtio_pmd_ctrl *
147 virtio_send_command_packed(struct virtnet_ctl *cvq,
148 struct virtio_pmd_ctrl *ctrl,
149 int *dlen, int pkt_num)
151 struct virtqueue *vq = cvq->vq;
153 struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
154 struct virtio_pmd_ctrl *result;
161 * Format is enforced in qemu code:
162 * One TX packet for header;
163 * At least one TX packet per argument;
164 * One RX packet for ACK.
166 head = vq->vq_avail_idx;
167 flags = vq->vq_packed.cached_flags;
168 desc[head].addr = cvq->virtio_net_hdr_mem;
169 desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
172 if (++vq->vq_avail_idx >= vq->vq_nentries) {
173 vq->vq_avail_idx -= vq->vq_nentries;
174 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
177 for (k = 0; k < pkt_num; k++) {
178 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
179 + sizeof(struct virtio_net_ctrl_hdr)
180 + sizeof(ctrl->status) + sizeof(uint8_t) * sum;
181 desc[vq->vq_avail_idx].len = dlen[k];
182 desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT |
183 vq->vq_packed.cached_flags;
187 if (++vq->vq_avail_idx >= vq->vq_nentries) {
188 vq->vq_avail_idx -= vq->vq_nentries;
189 vq->vq_packed.cached_flags ^=
190 VRING_PACKED_DESC_F_AVAIL_USED;
194 desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem
195 + sizeof(struct virtio_net_ctrl_hdr);
196 desc[vq->vq_avail_idx].len = sizeof(ctrl->status);
197 desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE |
198 vq->vq_packed.cached_flags;
201 if (++vq->vq_avail_idx >= vq->vq_nentries) {
202 vq->vq_avail_idx -= vq->vq_nentries;
203 vq->vq_packed.cached_flags ^= VRING_PACKED_DESC_F_AVAIL_USED;
206 virtqueue_store_flags_packed(&desc[head], VRING_DESC_F_NEXT | flags,
207 vq->hw->weak_barriers);
209 virtio_wmb(vq->hw->weak_barriers);
210 virtqueue_notify(vq);
212 /* wait for used desc in virtqueue
213 * desc_is_used has a load-acquire or rte_io_rmb inside
215 while (!desc_is_used(&desc[head], vq))
218 /* now get used descriptors */
219 vq->vq_free_cnt += nb_descs;
220 vq->vq_used_cons_idx += nb_descs;
221 if (vq->vq_used_cons_idx >= vq->vq_nentries) {
222 vq->vq_used_cons_idx -= vq->vq_nentries;
223 vq->vq_packed.used_wrap_counter ^= 1;
226 PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\n"
227 "vq->vq_avail_idx=%d\n"
228 "vq->vq_used_cons_idx=%d\n"
229 "vq->vq_packed.cached_flags=0x%x\n"
230 "vq->vq_packed.used_wrap_counter=%d\n",
233 vq->vq_used_cons_idx,
234 vq->vq_packed.cached_flags,
235 vq->vq_packed.used_wrap_counter);
237 result = cvq->virtio_net_hdr_mz->addr;
241 static struct virtio_pmd_ctrl *
242 virtio_send_command_split(struct virtnet_ctl *cvq,
243 struct virtio_pmd_ctrl *ctrl,
244 int *dlen, int pkt_num)
246 struct virtio_pmd_ctrl *result;
247 struct virtqueue *vq = cvq->vq;
251 head = vq->vq_desc_head_idx;
254 * Format is enforced in qemu code:
255 * One TX packet for header;
256 * At least one TX packet per argument;
257 * One RX packet for ACK.
259 vq->vq_split.ring.desc[head].flags = VRING_DESC_F_NEXT;
260 vq->vq_split.ring.desc[head].addr = cvq->virtio_net_hdr_mem;
261 vq->vq_split.ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
263 i = vq->vq_split.ring.desc[head].next;
265 for (k = 0; k < pkt_num; k++) {
266 vq->vq_split.ring.desc[i].flags = VRING_DESC_F_NEXT;
267 vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
268 + sizeof(struct virtio_net_ctrl_hdr)
269 + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
270 vq->vq_split.ring.desc[i].len = dlen[k];
273 i = vq->vq_split.ring.desc[i].next;
276 vq->vq_split.ring.desc[i].flags = VRING_DESC_F_WRITE;
277 vq->vq_split.ring.desc[i].addr = cvq->virtio_net_hdr_mem
278 + sizeof(struct virtio_net_ctrl_hdr);
279 vq->vq_split.ring.desc[i].len = sizeof(ctrl->status);
282 vq->vq_desc_head_idx = vq->vq_split.ring.desc[i].next;
284 vq_update_avail_ring(vq, head);
285 vq_update_avail_idx(vq);
287 PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index);
289 virtqueue_notify(vq);
291 while (virtqueue_nused(vq) == 0)
294 while (virtqueue_nused(vq)) {
295 uint32_t idx, desc_idx, used_idx;
296 struct vring_used_elem *uep;
298 used_idx = (uint32_t)(vq->vq_used_cons_idx
299 & (vq->vq_nentries - 1));
300 uep = &vq->vq_split.ring.used->ring[used_idx];
301 idx = (uint32_t) uep->id;
304 while (vq->vq_split.ring.desc[desc_idx].flags &
306 desc_idx = vq->vq_split.ring.desc[desc_idx].next;
310 vq->vq_split.ring.desc[desc_idx].next = vq->vq_desc_head_idx;
311 vq->vq_desc_head_idx = idx;
313 vq->vq_used_cons_idx++;
317 PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d",
318 vq->vq_free_cnt, vq->vq_desc_head_idx);
320 result = cvq->virtio_net_hdr_mz->addr;
325 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
326 int *dlen, int pkt_num)
328 virtio_net_ctrl_ack status = ~0;
329 struct virtio_pmd_ctrl *result;
330 struct virtqueue *vq;
332 ctrl->status = status;
334 if (!cvq || !cvq->vq) {
335 PMD_INIT_LOG(ERR, "Control queue is not supported.");
339 rte_spinlock_lock(&cvq->lock);
342 PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
343 "vq->hw->cvq = %p vq = %p",
344 vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
346 if (vq->vq_free_cnt < pkt_num + 2 || pkt_num < 1) {
347 rte_spinlock_unlock(&cvq->lock);
351 memcpy(cvq->virtio_net_hdr_mz->addr, ctrl,
352 sizeof(struct virtio_pmd_ctrl));
354 if (vtpci_packed_queue(vq->hw))
355 result = virtio_send_command_packed(cvq, ctrl, dlen, pkt_num);
357 result = virtio_send_command_split(cvq, ctrl, dlen, pkt_num);
359 rte_spinlock_unlock(&cvq->lock);
360 return result->status;
364 virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues)
366 struct virtio_hw *hw = dev->data->dev_private;
367 struct virtio_pmd_ctrl ctrl;
371 ctrl.hdr.class = VIRTIO_NET_CTRL_MQ;
372 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
373 memcpy(ctrl.data, &nb_queues, sizeof(uint16_t));
375 dlen[0] = sizeof(uint16_t);
377 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
379 PMD_INIT_LOG(ERR, "Multiqueue configured but send command "
380 "failed, this is too late now...");
388 virtio_dev_queue_release(void *queue __rte_unused)
394 virtio_get_nr_vq(struct virtio_hw *hw)
396 uint16_t nr_vq = hw->max_queue_pairs * 2;
398 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
405 virtio_init_vring(struct virtqueue *vq)
407 int size = vq->vq_nentries;
408 uint8_t *ring_mem = vq->vq_ring_virt_mem;
410 PMD_INIT_FUNC_TRACE();
412 memset(ring_mem, 0, vq->vq_ring_size);
414 vq->vq_used_cons_idx = 0;
415 vq->vq_desc_head_idx = 0;
416 vq->vq_avail_idx = 0;
417 vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1);
418 vq->vq_free_cnt = vq->vq_nentries;
419 memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
420 if (vtpci_packed_queue(vq->hw)) {
421 vring_init_packed(&vq->vq_packed.ring, ring_mem,
422 VIRTIO_PCI_VRING_ALIGN, size);
423 vring_desc_init_packed(vq, size);
425 struct vring *vr = &vq->vq_split.ring;
427 vring_init_split(vr, ring_mem, VIRTIO_PCI_VRING_ALIGN, size);
428 vring_desc_init_split(vr->desc, size);
431 * Disable device(host) interrupting guest
433 virtqueue_disable_intr(vq);
437 virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
439 char vq_name[VIRTQUEUE_MAX_NAME_SZ];
440 char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ];
441 const struct rte_memzone *mz = NULL, *hdr_mz = NULL;
442 unsigned int vq_size, size;
443 struct virtio_hw *hw = dev->data->dev_private;
444 struct virtnet_rx *rxvq = NULL;
445 struct virtnet_tx *txvq = NULL;
446 struct virtnet_ctl *cvq = NULL;
447 struct virtqueue *vq;
448 size_t sz_hdr_mz = 0;
449 void *sw_ring = NULL;
450 int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx);
452 int numa_node = dev->device->numa_node;
454 PMD_INIT_LOG(INFO, "setting up queue: %u on NUMA node %d",
455 vtpci_queue_idx, numa_node);
458 * Read the virtqueue size from the Queue Size field
459 * Always power of 2 and if 0 virtqueue does not exist
461 vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
462 PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
464 PMD_INIT_LOG(ERR, "virtqueue does not exist");
468 if (!vtpci_packed_queue(hw) && !rte_is_power_of_2(vq_size)) {
469 PMD_INIT_LOG(ERR, "split virtqueue size is not power of 2");
473 snprintf(vq_name, sizeof(vq_name), "port%d_vq%d",
474 dev->data->port_id, vtpci_queue_idx);
476 size = RTE_ALIGN_CEIL(sizeof(*vq) +
477 vq_size * sizeof(struct vq_desc_extra),
478 RTE_CACHE_LINE_SIZE);
479 if (queue_type == VTNET_TQ) {
481 * For each xmit packet, allocate a virtio_net_hdr
482 * and indirect ring elements
484 sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region);
485 } else if (queue_type == VTNET_CQ) {
486 /* Allocate a page for control vq command, data and status */
487 sz_hdr_mz = PAGE_SIZE;
490 vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE,
493 PMD_INIT_LOG(ERR, "can not allocate vq");
496 hw->vqs[vtpci_queue_idx] = vq;
499 vq->vq_queue_index = vtpci_queue_idx;
500 vq->vq_nentries = vq_size;
501 if (vtpci_packed_queue(hw)) {
502 vq->vq_packed.used_wrap_counter = 1;
503 vq->vq_packed.cached_flags = VRING_PACKED_DESC_F_AVAIL;
504 vq->vq_packed.event_flags_shadow = 0;
505 if (queue_type == VTNET_RQ)
506 vq->vq_packed.cached_flags |= VRING_DESC_F_WRITE;
510 * Reserve a memzone for vring elements
512 size = vring_size(hw, vq_size, VIRTIO_PCI_VRING_ALIGN);
513 vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
514 PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d",
515 size, vq->vq_ring_size);
517 mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size,
518 numa_node, RTE_MEMZONE_IOVA_CONTIG,
519 VIRTIO_PCI_VRING_ALIGN);
521 if (rte_errno == EEXIST)
522 mz = rte_memzone_lookup(vq_name);
529 memset(mz->addr, 0, mz->len);
531 vq->vq_ring_mem = mz->iova;
532 vq->vq_ring_virt_mem = mz->addr;
533 PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64,
535 PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64,
536 (uint64_t)(uintptr_t)mz->addr);
538 virtio_init_vring(vq);
541 snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr",
542 dev->data->port_id, vtpci_queue_idx);
543 hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz,
544 numa_node, RTE_MEMZONE_IOVA_CONTIG,
545 RTE_CACHE_LINE_SIZE);
546 if (hdr_mz == NULL) {
547 if (rte_errno == EEXIST)
548 hdr_mz = rte_memzone_lookup(vq_hdr_name);
549 if (hdr_mz == NULL) {
556 if (queue_type == VTNET_RQ) {
557 size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) *
558 sizeof(vq->sw_ring[0]);
560 sw_ring = rte_zmalloc_socket("sw_ring", sz_sw,
561 RTE_CACHE_LINE_SIZE, numa_node);
563 PMD_INIT_LOG(ERR, "can not allocate RX soft ring");
568 vq->sw_ring = sw_ring;
571 rxvq->port_id = dev->data->port_id;
573 } else if (queue_type == VTNET_TQ) {
576 txvq->port_id = dev->data->port_id;
578 txvq->virtio_net_hdr_mz = hdr_mz;
579 txvq->virtio_net_hdr_mem = hdr_mz->iova;
580 } else if (queue_type == VTNET_CQ) {
584 cvq->virtio_net_hdr_mz = hdr_mz;
585 cvq->virtio_net_hdr_mem = hdr_mz->iova;
586 memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE);
591 /* For virtio_user case (that is when hw->virtio_user_dev is not NULL),
592 * we use virtual address. And we need properly set _offset_, please see
593 * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information.
595 if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY || hw->bus_type == VIRTIO_BUS_PCI_MODERN) {
596 vq->offset = offsetof(struct rte_mbuf, buf_iova);
597 } else if (hw->bus_type == VIRTIO_BUS_USER) {
598 vq->vq_ring_mem = (uintptr_t)mz->addr;
599 vq->offset = offsetof(struct rte_mbuf, buf_addr);
600 if (queue_type == VTNET_TQ)
601 txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
602 else if (queue_type == VTNET_CQ)
603 cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr;
606 if (queue_type == VTNET_TQ) {
607 struct virtio_tx_region *txr;
611 memset(txr, 0, vq_size * sizeof(*txr));
612 for (i = 0; i < vq_size; i++) {
613 /* first indirect descriptor is always the tx header */
614 if (!vtpci_packed_queue(hw)) {
615 struct vring_desc *start_dp = txr[i].tx_indir;
616 vring_desc_init_split(start_dp,
617 RTE_DIM(txr[i].tx_indir));
618 start_dp->addr = txvq->virtio_net_hdr_mem
620 + offsetof(struct virtio_tx_region,
622 start_dp->len = hw->vtnet_hdr_size;
623 start_dp->flags = VRING_DESC_F_NEXT;
625 struct vring_packed_desc *start_dp =
626 txr[i].tx_packed_indir;
627 vring_desc_init_indirect_packed(start_dp,
628 RTE_DIM(txr[i].tx_packed_indir));
629 start_dp->addr = txvq->virtio_net_hdr_mem
631 + offsetof(struct virtio_tx_region,
633 start_dp->len = hw->vtnet_hdr_size;
638 if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
639 PMD_INIT_LOG(ERR, "setup_queue failed");
647 rte_memzone_free(hdr_mz);
648 rte_memzone_free(mz);
655 virtio_free_queues(struct virtio_hw *hw)
657 uint16_t nr_vq = virtio_get_nr_vq(hw);
658 struct virtqueue *vq;
665 for (i = 0; i < nr_vq; i++) {
670 queue_type = virtio_get_queue_type(hw, i);
671 if (queue_type == VTNET_RQ) {
672 rte_free(vq->sw_ring);
673 rte_memzone_free(vq->rxq.mz);
674 } else if (queue_type == VTNET_TQ) {
675 rte_memzone_free(vq->txq.mz);
676 rte_memzone_free(vq->txq.virtio_net_hdr_mz);
678 rte_memzone_free(vq->cq.mz);
679 rte_memzone_free(vq->cq.virtio_net_hdr_mz);
691 virtio_alloc_queues(struct rte_eth_dev *dev)
693 struct virtio_hw *hw = dev->data->dev_private;
694 uint16_t nr_vq = virtio_get_nr_vq(hw);
698 hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0);
700 PMD_INIT_LOG(ERR, "failed to allocate vqs");
704 for (i = 0; i < nr_vq; i++) {
705 ret = virtio_init_queue(dev, i);
707 virtio_free_queues(hw);
715 static void virtio_queues_unbind_intr(struct rte_eth_dev *dev);
718 virtio_dev_close(struct rte_eth_dev *dev)
720 struct virtio_hw *hw = dev->data->dev_private;
721 struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
723 PMD_INIT_LOG(DEBUG, "virtio_dev_close");
724 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
732 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
733 VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR);
735 virtio_queues_unbind_intr(dev);
737 if (intr_conf->lsc || intr_conf->rxq) {
738 virtio_intr_disable(dev);
739 rte_intr_efd_disable(dev->intr_handle);
740 rte_free(dev->intr_handle->intr_vec);
741 dev->intr_handle->intr_vec = NULL;
745 virtio_dev_free_mbufs(dev);
746 virtio_free_queues(hw);
748 #ifdef RTE_VIRTIO_USER
749 if (hw->bus_type == VIRTIO_BUS_USER)
750 virtio_user_dev_uninit(dev->data->dev_private);
754 rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(dev));
755 if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY)
756 rte_pci_ioport_unmap(VTPCI_IO(hw));
763 virtio_dev_promiscuous_enable(struct rte_eth_dev *dev)
765 struct virtio_hw *hw = dev->data->dev_private;
766 struct virtio_pmd_ctrl ctrl;
770 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
771 PMD_INIT_LOG(INFO, "host does not support rx control");
775 ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
776 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
780 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
782 PMD_INIT_LOG(ERR, "Failed to enable promisc");
790 virtio_dev_promiscuous_disable(struct rte_eth_dev *dev)
792 struct virtio_hw *hw = dev->data->dev_private;
793 struct virtio_pmd_ctrl ctrl;
797 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
798 PMD_INIT_LOG(INFO, "host does not support rx control");
802 ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
803 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC;
807 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
809 PMD_INIT_LOG(ERR, "Failed to disable promisc");
817 virtio_dev_allmulticast_enable(struct rte_eth_dev *dev)
819 struct virtio_hw *hw = dev->data->dev_private;
820 struct virtio_pmd_ctrl ctrl;
824 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
825 PMD_INIT_LOG(INFO, "host does not support rx control");
829 ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
830 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
834 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
836 PMD_INIT_LOG(ERR, "Failed to enable allmulticast");
844 virtio_dev_allmulticast_disable(struct rte_eth_dev *dev)
846 struct virtio_hw *hw = dev->data->dev_private;
847 struct virtio_pmd_ctrl ctrl;
851 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) {
852 PMD_INIT_LOG(INFO, "host does not support rx control");
856 ctrl.hdr.class = VIRTIO_NET_CTRL_RX;
857 ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI;
861 ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1);
863 PMD_INIT_LOG(ERR, "Failed to disable allmulticast");
870 #define VLAN_TAG_LEN 4 /* 802.3ac tag (not DMA'd) */
872 virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
874 struct virtio_hw *hw = dev->data->dev_private;
875 uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
877 uint32_t frame_size = mtu + ether_hdr_len;
878 uint32_t max_frame_size = hw->max_mtu + ether_hdr_len;
880 max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN);
882 if (mtu < RTE_ETHER_MIN_MTU || frame_size > max_frame_size) {
883 PMD_INIT_LOG(ERR, "MTU should be between %d and %d",
884 RTE_ETHER_MIN_MTU, max_frame_size - ether_hdr_len);
891 virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
893 struct virtio_hw *hw = dev->data->dev_private;
894 struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
895 struct virtqueue *vq = rxvq->vq;
897 virtqueue_enable_intr(vq);
898 virtio_mb(hw->weak_barriers);
903 virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
905 struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id];
906 struct virtqueue *vq = rxvq->vq;
908 virtqueue_disable_intr(vq);
913 * dev_ops for virtio, bare necessities for basic operation
915 static const struct eth_dev_ops virtio_eth_dev_ops = {
916 .dev_configure = virtio_dev_configure,
917 .dev_start = virtio_dev_start,
918 .dev_stop = virtio_dev_stop,
919 .dev_close = virtio_dev_close,
920 .promiscuous_enable = virtio_dev_promiscuous_enable,
921 .promiscuous_disable = virtio_dev_promiscuous_disable,
922 .allmulticast_enable = virtio_dev_allmulticast_enable,
923 .allmulticast_disable = virtio_dev_allmulticast_disable,
924 .mtu_set = virtio_mtu_set,
925 .dev_infos_get = virtio_dev_info_get,
926 .stats_get = virtio_dev_stats_get,
927 .xstats_get = virtio_dev_xstats_get,
928 .xstats_get_names = virtio_dev_xstats_get_names,
929 .stats_reset = virtio_dev_stats_reset,
930 .xstats_reset = virtio_dev_stats_reset,
931 .link_update = virtio_dev_link_update,
932 .vlan_offload_set = virtio_dev_vlan_offload_set,
933 .rx_queue_setup = virtio_dev_rx_queue_setup,
934 .rx_queue_intr_enable = virtio_dev_rx_queue_intr_enable,
935 .rx_queue_intr_disable = virtio_dev_rx_queue_intr_disable,
936 .rx_queue_release = virtio_dev_queue_release,
937 .tx_queue_setup = virtio_dev_tx_queue_setup,
938 .tx_queue_release = virtio_dev_queue_release,
939 /* collect stats per queue */
940 .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
941 .vlan_filter_set = virtio_vlan_filter_set,
942 .mac_addr_add = virtio_mac_addr_add,
943 .mac_addr_remove = virtio_mac_addr_remove,
944 .mac_addr_set = virtio_mac_addr_set,
948 * dev_ops for virtio-user in secondary processes, as we just have
949 * some limited supports currently.
951 const struct eth_dev_ops virtio_user_secondary_eth_dev_ops = {
952 .dev_infos_get = virtio_dev_info_get,
953 .stats_get = virtio_dev_stats_get,
954 .xstats_get = virtio_dev_xstats_get,
955 .xstats_get_names = virtio_dev_xstats_get_names,
956 .stats_reset = virtio_dev_stats_reset,
957 .xstats_reset = virtio_dev_stats_reset,
958 /* collect stats per queue */
959 .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set,
963 virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
967 for (i = 0; i < dev->data->nb_tx_queues; i++) {
968 const struct virtnet_tx *txvq = dev->data->tx_queues[i];
972 stats->opackets += txvq->stats.packets;
973 stats->obytes += txvq->stats.bytes;
975 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
976 stats->q_opackets[i] = txvq->stats.packets;
977 stats->q_obytes[i] = txvq->stats.bytes;
981 for (i = 0; i < dev->data->nb_rx_queues; i++) {
982 const struct virtnet_rx *rxvq = dev->data->rx_queues[i];
986 stats->ipackets += rxvq->stats.packets;
987 stats->ibytes += rxvq->stats.bytes;
988 stats->ierrors += rxvq->stats.errors;
990 if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
991 stats->q_ipackets[i] = rxvq->stats.packets;
992 stats->q_ibytes[i] = rxvq->stats.bytes;
996 stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
999 static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev,
1000 struct rte_eth_xstat_name *xstats_names,
1001 __rte_unused unsigned limit)
1007 unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1008 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1010 if (xstats_names != NULL) {
1011 /* Note: limit checked in rte_eth_xstats_names() */
1013 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1014 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1017 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1018 snprintf(xstats_names[count].name,
1019 sizeof(xstats_names[count].name),
1021 rte_virtio_rxq_stat_strings[t].name);
1026 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1027 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1030 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1031 snprintf(xstats_names[count].name,
1032 sizeof(xstats_names[count].name),
1034 rte_virtio_txq_stat_strings[t].name);
1044 virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
1050 unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS +
1051 dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS;
1056 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1057 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1064 for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) {
1065 xstats[count].value = *(uint64_t *)(((char *)rxvq) +
1066 rte_virtio_rxq_stat_strings[t].offset);
1067 xstats[count].id = count;
1072 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1073 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1080 for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) {
1081 xstats[count].value = *(uint64_t *)(((char *)txvq) +
1082 rte_virtio_txq_stat_strings[t].offset);
1083 xstats[count].id = count;
1092 virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1094 virtio_update_stats(dev, stats);
1100 virtio_dev_stats_reset(struct rte_eth_dev *dev)
1104 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1105 struct virtnet_tx *txvq = dev->data->tx_queues[i];
1109 txvq->stats.packets = 0;
1110 txvq->stats.bytes = 0;
1111 txvq->stats.multicast = 0;
1112 txvq->stats.broadcast = 0;
1113 memset(txvq->stats.size_bins, 0,
1114 sizeof(txvq->stats.size_bins[0]) * 8);
1117 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1118 struct virtnet_rx *rxvq = dev->data->rx_queues[i];
1122 rxvq->stats.packets = 0;
1123 rxvq->stats.bytes = 0;
1124 rxvq->stats.errors = 0;
1125 rxvq->stats.multicast = 0;
1126 rxvq->stats.broadcast = 0;
1127 memset(rxvq->stats.size_bins, 0,
1128 sizeof(rxvq->stats.size_bins[0]) * 8);
1135 virtio_set_hwaddr(struct virtio_hw *hw)
1137 vtpci_write_dev_config(hw,
1138 offsetof(struct virtio_net_config, mac),
1139 &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1143 virtio_get_hwaddr(struct virtio_hw *hw)
1145 if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
1146 vtpci_read_dev_config(hw,
1147 offsetof(struct virtio_net_config, mac),
1148 &hw->mac_addr, RTE_ETHER_ADDR_LEN);
1150 rte_eth_random_addr(&hw->mac_addr[0]);
1151 virtio_set_hwaddr(hw);
1156 virtio_mac_table_set(struct virtio_hw *hw,
1157 const struct virtio_net_ctrl_mac *uc,
1158 const struct virtio_net_ctrl_mac *mc)
1160 struct virtio_pmd_ctrl ctrl;
1163 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1164 PMD_DRV_LOG(INFO, "host does not support mac table");
1168 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1169 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
1171 len[0] = uc->entries * RTE_ETHER_ADDR_LEN + sizeof(uc->entries);
1172 memcpy(ctrl.data, uc, len[0]);
1174 len[1] = mc->entries * RTE_ETHER_ADDR_LEN + sizeof(mc->entries);
1175 memcpy(ctrl.data + len[0], mc, len[1]);
1177 err = virtio_send_command(hw->cvq, &ctrl, len, 2);
1179 PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err);
1184 virtio_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr,
1185 uint32_t index, uint32_t vmdq __rte_unused)
1187 struct virtio_hw *hw = dev->data->dev_private;
1188 const struct rte_ether_addr *addrs = dev->data->mac_addrs;
1190 struct virtio_net_ctrl_mac *uc, *mc;
1192 if (index >= VIRTIO_MAX_MAC_ADDRS) {
1193 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1197 uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1198 sizeof(uc->entries));
1200 mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1201 sizeof(mc->entries));
1204 for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1205 const struct rte_ether_addr *addr
1206 = (i == index) ? mac_addr : addrs + i;
1207 struct virtio_net_ctrl_mac *tbl
1208 = rte_is_multicast_ether_addr(addr) ? mc : uc;
1210 memcpy(&tbl->macs[tbl->entries++], addr, RTE_ETHER_ADDR_LEN);
1213 return virtio_mac_table_set(hw, uc, mc);
1217 virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
1219 struct virtio_hw *hw = dev->data->dev_private;
1220 struct rte_ether_addr *addrs = dev->data->mac_addrs;
1221 struct virtio_net_ctrl_mac *uc, *mc;
1224 if (index >= VIRTIO_MAX_MAC_ADDRS) {
1225 PMD_DRV_LOG(ERR, "mac address index %u out of range", index);
1229 uc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1230 sizeof(uc->entries));
1232 mc = alloca(VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN +
1233 sizeof(mc->entries));
1236 for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) {
1237 struct virtio_net_ctrl_mac *tbl;
1239 if (i == index || rte_is_zero_ether_addr(addrs + i))
1242 tbl = rte_is_multicast_ether_addr(addrs + i) ? mc : uc;
1243 memcpy(&tbl->macs[tbl->entries++], addrs + i,
1244 RTE_ETHER_ADDR_LEN);
1247 virtio_mac_table_set(hw, uc, mc);
1251 virtio_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
1253 struct virtio_hw *hw = dev->data->dev_private;
1255 memcpy(hw->mac_addr, mac_addr, RTE_ETHER_ADDR_LEN);
1257 /* Use atomic update if available */
1258 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1259 struct virtio_pmd_ctrl ctrl;
1260 int len = RTE_ETHER_ADDR_LEN;
1262 ctrl.hdr.class = VIRTIO_NET_CTRL_MAC;
1263 ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
1265 memcpy(ctrl.data, mac_addr, RTE_ETHER_ADDR_LEN);
1266 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1269 if (!vtpci_with_feature(hw, VIRTIO_NET_F_MAC))
1272 virtio_set_hwaddr(hw);
1277 virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1279 struct virtio_hw *hw = dev->data->dev_private;
1280 struct virtio_pmd_ctrl ctrl;
1283 if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN))
1286 ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN;
1287 ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
1288 memcpy(ctrl.data, &vlan_id, sizeof(vlan_id));
1289 len = sizeof(vlan_id);
1291 return virtio_send_command(hw->cvq, &ctrl, &len, 1);
1295 virtio_intr_unmask(struct rte_eth_dev *dev)
1297 struct virtio_hw *hw = dev->data->dev_private;
1299 if (rte_intr_ack(dev->intr_handle) < 0)
1302 if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY || hw->bus_type == VIRTIO_BUS_PCI_MODERN)
1303 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1309 virtio_intr_enable(struct rte_eth_dev *dev)
1311 struct virtio_hw *hw = dev->data->dev_private;
1313 if (rte_intr_enable(dev->intr_handle) < 0)
1316 if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY || hw->bus_type == VIRTIO_BUS_PCI_MODERN)
1317 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1323 virtio_intr_disable(struct rte_eth_dev *dev)
1325 struct virtio_hw *hw = dev->data->dev_private;
1327 if (rte_intr_disable(dev->intr_handle) < 0)
1330 if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY || hw->bus_type == VIRTIO_BUS_PCI_MODERN)
1331 hw->use_msix = vtpci_msix_detect(RTE_ETH_DEV_TO_PCI(dev));
1337 virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
1339 uint64_t host_features;
1341 /* Prepare guest_features: feature that driver wants to support */
1342 PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64,
1345 /* Read device(host) feature bits */
1346 host_features = VTPCI_OPS(hw)->get_features(hw);
1347 PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
1350 /* If supported, ensure MTU value is valid before acknowledging it. */
1351 if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) {
1352 struct virtio_net_config config;
1354 vtpci_read_dev_config(hw,
1355 offsetof(struct virtio_net_config, mtu),
1356 &config.mtu, sizeof(config.mtu));
1358 if (config.mtu < RTE_ETHER_MIN_MTU)
1359 req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
1363 * Negotiate features: Subset of device feature bits are written back
1364 * guest feature bits.
1366 hw->guest_features = req_features;
1367 hw->guest_features = vtpci_negotiate_features(hw, host_features);
1368 PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64,
1369 hw->guest_features);
1371 if (hw->bus_type == VIRTIO_BUS_PCI_MODERN && !vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
1373 "VIRTIO_F_VERSION_1 features is not enabled.");
1377 if (hw->bus_type == VIRTIO_BUS_PCI_MODERN || hw->bus_type == VIRTIO_BUS_USER) {
1378 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
1379 if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
1381 "failed to set FEATURES_OK status!");
1386 hw->req_guest_features = req_features;
1392 virtio_dev_pause(struct rte_eth_dev *dev)
1394 struct virtio_hw *hw = dev->data->dev_private;
1396 rte_spinlock_lock(&hw->state_lock);
1398 if (hw->started == 0) {
1399 /* Device is just stopped. */
1400 rte_spinlock_unlock(&hw->state_lock);
1405 * Prevent the worker threads from touching queues to avoid contention,
1406 * 1 ms should be enough for the ongoing Tx function to finish.
1413 * Recover hw state to let the worker threads continue.
1416 virtio_dev_resume(struct rte_eth_dev *dev)
1418 struct virtio_hw *hw = dev->data->dev_private;
1421 rte_spinlock_unlock(&hw->state_lock);
1425 * Should be called only after device is paused.
1428 virtio_inject_pkts(struct rte_eth_dev *dev, struct rte_mbuf **tx_pkts,
1431 struct virtio_hw *hw = dev->data->dev_private;
1432 struct virtnet_tx *txvq = dev->data->tx_queues[0];
1435 hw->inject_pkts = tx_pkts;
1436 ret = dev->tx_pkt_burst(txvq, tx_pkts, nb_pkts);
1437 hw->inject_pkts = NULL;
1443 virtio_notify_peers(struct rte_eth_dev *dev)
1445 struct virtio_hw *hw = dev->data->dev_private;
1446 struct virtnet_rx *rxvq;
1447 struct rte_mbuf *rarp_mbuf;
1449 if (!dev->data->rx_queues)
1452 rxvq = dev->data->rx_queues[0];
1456 rarp_mbuf = rte_net_make_rarp_packet(rxvq->mpool,
1457 (struct rte_ether_addr *)hw->mac_addr);
1458 if (rarp_mbuf == NULL) {
1459 PMD_DRV_LOG(ERR, "failed to make RARP packet.");
1463 /* If virtio port just stopped, no need to send RARP */
1464 if (virtio_dev_pause(dev) < 0) {
1465 rte_pktmbuf_free(rarp_mbuf);
1469 virtio_inject_pkts(dev, &rarp_mbuf, 1);
1470 virtio_dev_resume(dev);
1474 virtio_ack_link_announce(struct rte_eth_dev *dev)
1476 struct virtio_hw *hw = dev->data->dev_private;
1477 struct virtio_pmd_ctrl ctrl;
1479 ctrl.hdr.class = VIRTIO_NET_CTRL_ANNOUNCE;
1480 ctrl.hdr.cmd = VIRTIO_NET_CTRL_ANNOUNCE_ACK;
1482 virtio_send_command(hw->cvq, &ctrl, NULL, 0);
1486 * Process virtio config changed interrupt. Call the callback
1487 * if link state changed, generate gratuitous RARP packet if
1488 * the status indicates an ANNOUNCE.
1491 virtio_interrupt_handler(void *param)
1493 struct rte_eth_dev *dev = param;
1494 struct virtio_hw *hw = dev->data->dev_private;
1498 /* Read interrupt status which clears interrupt */
1499 isr = vtpci_isr(hw);
1500 PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
1502 if (virtio_intr_unmask(dev) < 0)
1503 PMD_DRV_LOG(ERR, "interrupt enable failed");
1505 if (isr & VIRTIO_PCI_ISR_CONFIG) {
1506 if (virtio_dev_link_update(dev, 0) == 0)
1507 rte_eth_dev_callback_process(dev,
1508 RTE_ETH_EVENT_INTR_LSC,
1511 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1512 vtpci_read_dev_config(hw,
1513 offsetof(struct virtio_net_config, status),
1514 &status, sizeof(status));
1515 if (status & VIRTIO_NET_S_ANNOUNCE) {
1516 virtio_notify_peers(dev);
1518 virtio_ack_link_announce(dev);
1524 /* set rx and tx handlers according to what is supported */
1526 set_rxtx_funcs(struct rte_eth_dev *eth_dev)
1528 struct virtio_hw *hw = eth_dev->data->dev_private;
1530 eth_dev->tx_pkt_prepare = virtio_xmit_pkts_prepare;
1531 if (vtpci_packed_queue(hw)) {
1533 "virtio: using packed ring %s Tx path on port %u",
1534 hw->use_vec_tx ? "vectorized" : "standard",
1535 eth_dev->data->port_id);
1537 eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed_vec;
1539 eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
1541 if (hw->use_inorder_tx) {
1542 PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
1543 eth_dev->data->port_id);
1544 eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
1546 PMD_INIT_LOG(INFO, "virtio: using standard Tx path on port %u",
1547 eth_dev->data->port_id);
1548 eth_dev->tx_pkt_burst = virtio_xmit_pkts;
1552 if (vtpci_packed_queue(hw)) {
1553 if (hw->use_vec_rx) {
1555 "virtio: using packed ring vectorized Rx path on port %u",
1556 eth_dev->data->port_id);
1557 eth_dev->rx_pkt_burst =
1558 &virtio_recv_pkts_packed_vec;
1559 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1561 "virtio: using packed ring mergeable buffer Rx path on port %u",
1562 eth_dev->data->port_id);
1563 eth_dev->rx_pkt_burst =
1564 &virtio_recv_mergeable_pkts_packed;
1567 "virtio: using packed ring standard Rx path on port %u",
1568 eth_dev->data->port_id);
1569 eth_dev->rx_pkt_burst = &virtio_recv_pkts_packed;
1572 if (hw->use_vec_rx) {
1573 PMD_INIT_LOG(INFO, "virtio: using vectorized Rx path on port %u",
1574 eth_dev->data->port_id);
1575 eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
1576 } else if (hw->use_inorder_rx) {
1578 "virtio: using inorder Rx path on port %u",
1579 eth_dev->data->port_id);
1580 eth_dev->rx_pkt_burst = &virtio_recv_pkts_inorder;
1581 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
1583 "virtio: using mergeable buffer Rx path on port %u",
1584 eth_dev->data->port_id);
1585 eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
1587 PMD_INIT_LOG(INFO, "virtio: using standard Rx path on port %u",
1588 eth_dev->data->port_id);
1589 eth_dev->rx_pkt_burst = &virtio_recv_pkts;
1595 /* Only support 1:1 queue/interrupt mapping so far.
1596 * TODO: support n:1 queue/interrupt mapping when there are limited number of
1597 * interrupt vectors (<N+1).
1600 virtio_queues_bind_intr(struct rte_eth_dev *dev)
1603 struct virtio_hw *hw = dev->data->dev_private;
1605 PMD_INIT_LOG(INFO, "queue/interrupt binding");
1606 for (i = 0; i < dev->data->nb_rx_queues; ++i) {
1607 dev->intr_handle->intr_vec[i] = i + 1;
1608 if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) ==
1609 VIRTIO_MSI_NO_VECTOR) {
1610 PMD_DRV_LOG(ERR, "failed to set queue vector");
1619 virtio_queues_unbind_intr(struct rte_eth_dev *dev)
1622 struct virtio_hw *hw = dev->data->dev_private;
1624 PMD_INIT_LOG(INFO, "queue/interrupt unbinding");
1625 for (i = 0; i < dev->data->nb_rx_queues; ++i)
1626 VTPCI_OPS(hw)->set_queue_irq(hw,
1627 hw->vqs[i * VTNET_CQ],
1628 VIRTIO_MSI_NO_VECTOR);
1632 virtio_configure_intr(struct rte_eth_dev *dev)
1634 struct virtio_hw *hw = dev->data->dev_private;
1636 if (!rte_intr_cap_multiple(dev->intr_handle)) {
1637 PMD_INIT_LOG(ERR, "Multiple intr vector not supported");
1641 if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) {
1642 PMD_INIT_LOG(ERR, "Fail to create eventfd");
1646 if (!dev->intr_handle->intr_vec) {
1647 dev->intr_handle->intr_vec =
1648 rte_zmalloc("intr_vec",
1649 hw->max_queue_pairs * sizeof(int), 0);
1650 if (!dev->intr_handle->intr_vec) {
1651 PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors",
1652 hw->max_queue_pairs);
1657 /* Re-register callback to update max_intr */
1658 rte_intr_callback_unregister(dev->intr_handle,
1659 virtio_interrupt_handler,
1661 rte_intr_callback_register(dev->intr_handle,
1662 virtio_interrupt_handler,
1665 /* DO NOT try to remove this! This function will enable msix, or QEMU
1666 * will encounter SIGSEGV when DRIVER_OK is sent.
1667 * And for legacy devices, this should be done before queue/vec binding
1668 * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR
1669 * (22) will be ignored.
1671 if (virtio_intr_enable(dev) < 0) {
1672 PMD_DRV_LOG(ERR, "interrupt enable failed");
1676 if (virtio_queues_bind_intr(dev) < 0) {
1677 PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt");
1683 #define DUPLEX_UNKNOWN 0xff
1684 /* reset device and renegotiate features if needed */
1686 virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
1688 struct virtio_hw *hw = eth_dev->data->dev_private;
1689 struct virtio_net_config *config;
1690 struct virtio_net_config local_config;
1691 struct rte_pci_device *pci_dev = NULL;
1694 /* Reset the device although not necessary at startup */
1698 virtio_dev_free_mbufs(eth_dev);
1699 virtio_free_queues(hw);
1702 /* Tell the host we've noticed this device. */
1703 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
1705 /* Tell the host we've known how to drive the device. */
1706 vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
1707 if (virtio_negotiate_features(hw, req_features) < 0)
1710 hw->weak_barriers = !vtpci_with_feature(hw, VIRTIO_F_ORDER_PLATFORM);
1712 if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY || hw->bus_type == VIRTIO_BUS_PCI_MODERN)
1713 pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1715 /* If host does not support both status and MSI-X then disable LSC */
1716 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) &&
1717 hw->use_msix != VIRTIO_MSIX_NONE)
1718 eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
1720 eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
1722 eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1724 /* Setting up rx_header size for the device */
1725 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
1726 vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
1727 vtpci_with_feature(hw, VIRTIO_F_RING_PACKED))
1728 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1730 hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
1732 /* Copy the permanent MAC address to: virtio_hw */
1733 virtio_get_hwaddr(hw);
1734 rte_ether_addr_copy((struct rte_ether_addr *)hw->mac_addr,
1735 ð_dev->data->mac_addrs[0]);
1737 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1738 hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
1739 hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
1741 if (hw->speed == ETH_SPEED_NUM_UNKNOWN) {
1742 if (vtpci_with_feature(hw, VIRTIO_NET_F_SPEED_DUPLEX)) {
1743 config = &local_config;
1744 vtpci_read_dev_config(hw,
1745 offsetof(struct virtio_net_config, speed),
1746 &config->speed, sizeof(config->speed));
1747 vtpci_read_dev_config(hw,
1748 offsetof(struct virtio_net_config, duplex),
1749 &config->duplex, sizeof(config->duplex));
1750 hw->speed = config->speed;
1751 hw->duplex = config->duplex;
1754 if (hw->duplex == DUPLEX_UNKNOWN)
1755 hw->duplex = ETH_LINK_FULL_DUPLEX;
1756 PMD_INIT_LOG(DEBUG, "link speed = %d, duplex = %d",
1757 hw->speed, hw->duplex);
1758 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) {
1759 config = &local_config;
1761 vtpci_read_dev_config(hw,
1762 offsetof(struct virtio_net_config, mac),
1763 &config->mac, sizeof(config->mac));
1765 if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
1766 vtpci_read_dev_config(hw,
1767 offsetof(struct virtio_net_config, status),
1768 &config->status, sizeof(config->status));
1771 "VIRTIO_NET_F_STATUS is not supported");
1775 if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) {
1776 vtpci_read_dev_config(hw,
1777 offsetof(struct virtio_net_config, max_virtqueue_pairs),
1778 &config->max_virtqueue_pairs,
1779 sizeof(config->max_virtqueue_pairs));
1782 "VIRTIO_NET_F_MQ is not supported");
1783 config->max_virtqueue_pairs = 1;
1786 hw->max_queue_pairs = config->max_virtqueue_pairs;
1788 if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) {
1789 vtpci_read_dev_config(hw,
1790 offsetof(struct virtio_net_config, mtu),
1792 sizeof(config->mtu));
1795 * MTU value has already been checked at negotiation
1796 * time, but check again in case it has changed since
1797 * then, which should not happen.
1799 if (config->mtu < RTE_ETHER_MIN_MTU) {
1800 PMD_INIT_LOG(ERR, "invalid max MTU value (%u)",
1805 hw->max_mtu = config->mtu;
1806 /* Set initial MTU to maximum one supported by vhost */
1807 eth_dev->data->mtu = config->mtu;
1810 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1811 VLAN_TAG_LEN - hw->vtnet_hdr_size;
1814 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d",
1815 config->max_virtqueue_pairs);
1816 PMD_INIT_LOG(DEBUG, "config->status=%d", config->status);
1818 "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
1819 config->mac[0], config->mac[1],
1820 config->mac[2], config->mac[3],
1821 config->mac[4], config->mac[5]);
1823 PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1");
1824 hw->max_queue_pairs = 1;
1825 hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - RTE_ETHER_HDR_LEN -
1826 VLAN_TAG_LEN - hw->vtnet_hdr_size;
1829 ret = virtio_alloc_queues(eth_dev);
1833 if (eth_dev->data->dev_conf.intr_conf.rxq) {
1834 if (virtio_configure_intr(eth_dev) < 0) {
1835 PMD_INIT_LOG(ERR, "failed to configure interrupt");
1836 virtio_free_queues(hw);
1841 vtpci_reinit_complete(hw);
1844 PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
1845 eth_dev->data->port_id, pci_dev->id.vendor_id,
1846 pci_dev->id.device_id);
1852 * Remap the PCI device again (IO port map for legacy device and
1853 * memory map for modern device), so that the secondary process
1854 * could have the PCI initiated correctly.
1857 virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
1859 if (hw->bus_type == VIRTIO_BUS_PCI_MODERN) {
1861 * We don't have to re-parse the PCI config space, since
1862 * rte_pci_map_device() makes sure the mapped address
1863 * in secondary process would equal to the one mapped in
1864 * the primary process: error will be returned if that
1865 * requirement is not met.
1867 * That said, we could simply reuse all cap pointers
1868 * (such as dev_cfg, common_cfg, etc.) parsed from the
1869 * primary process, which is stored in shared memory.
1871 if (rte_pci_map_device(pci_dev)) {
1872 PMD_INIT_LOG(DEBUG, "failed to map pci device!");
1875 } else if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY) {
1876 if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
1884 virtio_set_vtpci_ops(struct virtio_hw *hw)
1886 #ifdef RTE_VIRTIO_USER
1887 if (hw->bus_type == VIRTIO_BUS_USER)
1888 VTPCI_OPS(hw) = &virtio_user_ops;
1891 if (hw->bus_type == VIRTIO_BUS_PCI_MODERN)
1892 VTPCI_OPS(hw) = &modern_ops;
1893 else if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY)
1894 VTPCI_OPS(hw) = &legacy_ops;
1900 * This function is based on probe() function in virtio_pci.c
1901 * It returns 0 on success.
1904 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
1906 struct virtio_hw *hw = eth_dev->data->dev_private;
1907 uint32_t speed = ETH_SPEED_NUM_UNKNOWN;
1911 if (sizeof(struct virtio_net_hdr_mrg_rxbuf) > RTE_PKTMBUF_HEADROOM) {
1913 "Not sufficient headroom required = %d, avail = %d",
1914 (int)sizeof(struct virtio_net_hdr_mrg_rxbuf),
1915 RTE_PKTMBUF_HEADROOM);
1920 eth_dev->dev_ops = &virtio_eth_dev_ops;
1921 eth_dev->rx_descriptor_done = virtio_dev_rx_queue_done;
1923 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1924 if (hw->bus_type != VIRTIO_BUS_USER) {
1925 ret = virtio_remap_pci(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1930 virtio_set_vtpci_ops(hw);
1931 set_rxtx_funcs(eth_dev);
1935 ret = virtio_dev_devargs_parse(eth_dev->device->devargs,
1936 NULL, &speed, &vectorized);
1941 /* Allocate memory for storing MAC addresses */
1942 eth_dev->data->mac_addrs = rte_zmalloc("virtio",
1943 VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN, 0);
1944 if (eth_dev->data->mac_addrs == NULL) {
1946 "Failed to allocate %d bytes needed to store MAC addresses",
1947 VIRTIO_MAX_MAC_ADDRS * RTE_ETHER_ADDR_LEN);
1951 hw->port_id = eth_dev->data->port_id;
1952 /* For virtio_user case the hw->virtio_user_dev is populated by
1953 * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called.
1955 if (hw->bus_type != VIRTIO_BUS_USER) {
1956 ret = vtpci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
1958 goto err_vtpci_init;
1961 rte_spinlock_init(&hw->state_lock);
1963 /* reset device and negotiate default features */
1964 ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES);
1966 goto err_virtio_init;
1969 if (!vtpci_packed_queue(hw)) {
1972 #if defined(CC_AVX512_SUPPORT) || defined(RTE_ARCH_ARM)
1977 "building environment do not support packed ring vectorized");
1987 if (hw->bus_type == VIRTIO_BUS_PCI_MODERN || hw->bus_type == VIRTIO_BUS_PCI_LEGACY) {
1988 rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev));
1989 if (hw->bus_type == VIRTIO_BUS_PCI_LEGACY)
1990 rte_pci_ioport_unmap(VTPCI_IO(hw));
1993 rte_free(eth_dev->data->mac_addrs);
1994 eth_dev->data->mac_addrs = NULL;
1999 eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
2002 PMD_INIT_FUNC_TRACE();
2004 if (rte_eal_process_type() == RTE_PROC_SECONDARY)
2007 ret = virtio_dev_stop(eth_dev);
2008 virtio_dev_close(eth_dev);
2010 PMD_INIT_LOG(DEBUG, "dev_uninit completed");
2016 static int vdpa_check_handler(__rte_unused const char *key,
2017 const char *value, void *ret_val)
2019 if (strcmp(value, "1") == 0)
2020 *(int *)ret_val = 1;
2022 *(int *)ret_val = 0;
2029 virtio_dev_speed_capa_get(uint32_t speed)
2032 case ETH_SPEED_NUM_10G:
2033 return ETH_LINK_SPEED_10G;
2034 case ETH_SPEED_NUM_20G:
2035 return ETH_LINK_SPEED_20G;
2036 case ETH_SPEED_NUM_25G:
2037 return ETH_LINK_SPEED_25G;
2038 case ETH_SPEED_NUM_40G:
2039 return ETH_LINK_SPEED_40G;
2040 case ETH_SPEED_NUM_50G:
2041 return ETH_LINK_SPEED_50G;
2042 case ETH_SPEED_NUM_56G:
2043 return ETH_LINK_SPEED_56G;
2044 case ETH_SPEED_NUM_100G:
2045 return ETH_LINK_SPEED_100G;
2046 case ETH_SPEED_NUM_200G:
2047 return ETH_LINK_SPEED_200G;
2053 static int vectorized_check_handler(__rte_unused const char *key,
2054 const char *value, void *ret_val)
2056 if (strcmp(value, "1") == 0)
2057 *(int *)ret_val = 1;
2059 *(int *)ret_val = 0;
2064 #define VIRTIO_ARG_SPEED "speed"
2065 #define VIRTIO_ARG_VDPA "vdpa"
2066 #define VIRTIO_ARG_VECTORIZED "vectorized"
2070 link_speed_handler(const char *key __rte_unused,
2071 const char *value, void *ret_val)
2074 if (!value || !ret_val)
2076 val = strtoul(value, NULL, 0);
2077 /* validate input */
2078 if (virtio_dev_speed_capa_get(val) == 0)
2080 *(uint32_t *)ret_val = val;
2087 virtio_dev_devargs_parse(struct rte_devargs *devargs, int *vdpa,
2088 uint32_t *speed, int *vectorized)
2090 struct rte_kvargs *kvlist;
2093 if (devargs == NULL)
2096 kvlist = rte_kvargs_parse(devargs->args, NULL);
2097 if (kvlist == NULL) {
2098 PMD_INIT_LOG(ERR, "error when parsing param");
2101 if (vdpa && rte_kvargs_count(kvlist, VIRTIO_ARG_VDPA) == 1) {
2102 /* vdpa mode selected when there's a key-value pair:
2105 ret = rte_kvargs_process(kvlist, VIRTIO_ARG_VDPA,
2106 vdpa_check_handler, vdpa);
2108 PMD_INIT_LOG(ERR, "Failed to parse %s",
2113 if (speed && rte_kvargs_count(kvlist, VIRTIO_ARG_SPEED) == 1) {
2114 ret = rte_kvargs_process(kvlist,
2116 link_speed_handler, speed);
2118 PMD_INIT_LOG(ERR, "Failed to parse %s",
2125 rte_kvargs_count(kvlist, VIRTIO_ARG_VECTORIZED) == 1) {
2126 ret = rte_kvargs_process(kvlist,
2127 VIRTIO_ARG_VECTORIZED,
2128 vectorized_check_handler, vectorized);
2130 PMD_INIT_LOG(ERR, "Failed to parse %s",
2131 VIRTIO_ARG_VECTORIZED);
2137 rte_kvargs_free(kvlist);
2141 static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
2142 struct rte_pci_device *pci_dev)
2147 ret = virtio_dev_devargs_parse(pci_dev->device.devargs, &vdpa, NULL,
2150 PMD_INIT_LOG(ERR, "devargs parsing is failed");
2153 /* virtio pmd skips probe if device needs to work in vdpa mode */
2157 return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_pci_dev),
2158 eth_virtio_dev_init);
2161 static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev)
2165 ret = rte_eth_dev_pci_generic_remove(pci_dev, eth_virtio_dev_uninit);
2166 /* Port has already been released by close. */
2172 static struct rte_pci_driver rte_virtio_pmd = {
2174 .name = "net_virtio",
2176 .id_table = pci_id_virtio_map,
2178 .probe = eth_virtio_pci_probe,
2179 .remove = eth_virtio_pci_remove,
2182 RTE_INIT(rte_virtio_pmd_init)
2184 rte_eal_iopl_init();
2185 rte_pci_register(&rte_virtio_pmd);
2189 rx_offload_enabled(struct virtio_hw *hw)
2191 return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
2192 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2193 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
2197 tx_offload_enabled(struct virtio_hw *hw)
2199 return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
2200 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
2201 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
2205 * Configure virtio device
2206 * It returns 0 on success.
2209 virtio_dev_configure(struct rte_eth_dev *dev)
2211 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2212 const struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
2213 struct virtio_hw *hw = dev->data->dev_private;
2214 uint32_t ether_hdr_len = RTE_ETHER_HDR_LEN + VLAN_TAG_LEN +
2216 uint64_t rx_offloads = rxmode->offloads;
2217 uint64_t tx_offloads = txmode->offloads;
2218 uint64_t req_features;
2221 PMD_INIT_LOG(DEBUG, "configure");
2222 req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES;
2224 if (rxmode->mq_mode != ETH_MQ_RX_NONE) {
2226 "Unsupported Rx multi queue mode %d",
2231 if (txmode->mq_mode != ETH_MQ_TX_NONE) {
2233 "Unsupported Tx multi queue mode %d",
2238 if (dev->data->dev_conf.intr_conf.rxq) {
2239 ret = virtio_init_device(dev, hw->req_guest_features);
2244 if (rxmode->max_rx_pkt_len > hw->max_mtu + ether_hdr_len)
2245 req_features &= ~(1ULL << VIRTIO_NET_F_MTU);
2247 if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2248 DEV_RX_OFFLOAD_TCP_CKSUM))
2249 req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM);
2251 if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO)
2253 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2254 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2256 if (tx_offloads & (DEV_TX_OFFLOAD_UDP_CKSUM |
2257 DEV_TX_OFFLOAD_TCP_CKSUM))
2258 req_features |= (1ULL << VIRTIO_NET_F_CSUM);
2260 if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO)
2262 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2263 (1ULL << VIRTIO_NET_F_HOST_TSO6);
2265 /* if request features changed, reinit the device */
2266 if (req_features != hw->req_guest_features) {
2267 ret = virtio_init_device(dev, req_features);
2272 if ((rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2273 DEV_RX_OFFLOAD_TCP_CKSUM)) &&
2274 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) {
2276 "rx checksum not available on this host");
2280 if ((rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) &&
2281 (!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
2282 !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6))) {
2284 "Large Receive Offload not available on this host");
2288 /* start control queue */
2289 if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ))
2290 virtio_dev_cq_start(dev);
2292 if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2295 if ((rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2296 && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2298 "vlan filtering not available on this host");
2302 hw->has_tx_offload = tx_offload_enabled(hw);
2303 hw->has_rx_offload = rx_offload_enabled(hw);
2305 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2306 /* Enable vector (0) for Link State Intrerrupt */
2307 if (VTPCI_OPS(hw)->set_config_irq(hw, 0) ==
2308 VIRTIO_MSI_NO_VECTOR) {
2309 PMD_DRV_LOG(ERR, "failed to set config vector");
2313 if (vtpci_packed_queue(hw)) {
2314 #if defined(RTE_ARCH_X86_64) && defined(CC_AVX512_SUPPORT)
2315 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2316 (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) ||
2317 !vtpci_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2318 !vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
2319 rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)) {
2321 "disabled packed ring vectorized path for requirements not met");
2325 #elif defined(RTE_ARCH_ARM)
2326 if ((hw->use_vec_rx || hw->use_vec_tx) &&
2327 (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON) ||
2328 !vtpci_with_feature(hw, VIRTIO_F_IN_ORDER) ||
2329 !vtpci_with_feature(hw, VIRTIO_F_VERSION_1) ||
2330 rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128)) {
2332 "disabled packed ring vectorized path for requirements not met");
2341 if (hw->use_vec_rx) {
2342 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2344 "disabled packed ring vectorized rx for mrg_rxbuf enabled");
2348 if (rx_offloads & DEV_RX_OFFLOAD_TCP_LRO) {
2350 "disabled packed ring vectorized rx for TCP_LRO enabled");
2355 if (vtpci_with_feature(hw, VIRTIO_F_IN_ORDER)) {
2356 hw->use_inorder_tx = 1;
2357 hw->use_inorder_rx = 1;
2361 if (hw->use_vec_rx) {
2362 #if defined RTE_ARCH_ARM
2363 if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) {
2365 "disabled split ring vectorized path for requirement not met");
2369 if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
2371 "disabled split ring vectorized rx for mrg_rxbuf enabled");
2375 if (rx_offloads & (DEV_RX_OFFLOAD_UDP_CKSUM |
2376 DEV_RX_OFFLOAD_TCP_CKSUM |
2377 DEV_RX_OFFLOAD_TCP_LRO |
2378 DEV_RX_OFFLOAD_VLAN_STRIP)) {
2380 "disabled split ring vectorized rx for offloading enabled");
2384 if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
2386 "disabled split ring vectorized rx, max SIMD bitwidth too low");
2397 virtio_dev_start(struct rte_eth_dev *dev)
2399 uint16_t nb_queues, i;
2400 struct virtnet_rx *rxvq;
2401 struct virtnet_tx *txvq __rte_unused;
2402 struct virtio_hw *hw = dev->data->dev_private;
2405 /* Finish the initialization of the queues */
2406 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2407 ret = virtio_dev_rx_queue_setup_finish(dev, i);
2411 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2412 ret = virtio_dev_tx_queue_setup_finish(dev, i);
2417 /* check if lsc interrupt feature is enabled */
2418 if (dev->data->dev_conf.intr_conf.lsc) {
2419 if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2420 PMD_DRV_LOG(ERR, "link status not supported by host");
2425 /* Enable uio/vfio intr/eventfd mapping: althrough we already did that
2426 * in device configure, but it could be unmapped when device is
2429 if (dev->data->dev_conf.intr_conf.lsc ||
2430 dev->data->dev_conf.intr_conf.rxq) {
2431 virtio_intr_disable(dev);
2433 /* Setup interrupt callback */
2434 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
2435 rte_intr_callback_register(dev->intr_handle,
2436 virtio_interrupt_handler,
2439 if (virtio_intr_enable(dev) < 0) {
2440 PMD_DRV_LOG(ERR, "interrupt enable failed");
2445 /*Notify the backend
2446 *Otherwise the tap backend might already stop its queue due to fullness.
2447 *vhost backend will have no chance to be waked up
2449 nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
2450 if (hw->max_queue_pairs > 1) {
2451 if (virtio_set_multiple_queues(dev, nb_queues) != 0)
2455 PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues);
2457 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2458 rxvq = dev->data->rx_queues[i];
2459 /* Flush the old packets */
2460 virtqueue_rxvq_flush(rxvq->vq);
2461 virtqueue_notify(rxvq->vq);
2464 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2465 txvq = dev->data->tx_queues[i];
2466 virtqueue_notify(txvq->vq);
2469 PMD_INIT_LOG(DEBUG, "Notified backend at initialization");
2471 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2472 rxvq = dev->data->rx_queues[i];
2473 VIRTQUEUE_DUMP(rxvq->vq);
2476 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2477 txvq = dev->data->tx_queues[i];
2478 VIRTQUEUE_DUMP(txvq->vq);
2481 set_rxtx_funcs(dev);
2484 /* Initialize Link state */
2485 virtio_dev_link_update(dev, 0);
2490 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
2492 struct virtio_hw *hw = dev->data->dev_private;
2493 uint16_t nr_vq = virtio_get_nr_vq(hw);
2494 const char *type __rte_unused;
2495 unsigned int i, mbuf_num = 0;
2496 struct virtqueue *vq;
2497 struct rte_mbuf *buf;
2500 if (hw->vqs == NULL)
2503 for (i = 0; i < nr_vq; i++) {
2508 queue_type = virtio_get_queue_type(hw, i);
2509 if (queue_type == VTNET_RQ)
2511 else if (queue_type == VTNET_TQ)
2517 "Before freeing %s[%d] used and unused buf",
2521 while ((buf = virtqueue_detach_unused(vq)) != NULL) {
2522 rte_pktmbuf_free(buf);
2527 "After freeing %s[%d] used and unused buf",
2532 PMD_INIT_LOG(DEBUG, "%d mbufs freed", mbuf_num);
2536 * Stop device: disable interrupt and mark link down
2539 virtio_dev_stop(struct rte_eth_dev *dev)
2541 struct virtio_hw *hw = dev->data->dev_private;
2542 struct rte_eth_link link;
2543 struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
2545 PMD_INIT_LOG(DEBUG, "stop");
2546 dev->data->dev_started = 0;
2548 rte_spinlock_lock(&hw->state_lock);
2551 hw->started = false;
2553 if (intr_conf->lsc || intr_conf->rxq) {
2554 virtio_intr_disable(dev);
2556 /* Reset interrupt callback */
2557 if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
2558 rte_intr_callback_unregister(dev->intr_handle,
2559 virtio_interrupt_handler,
2564 memset(&link, 0, sizeof(link));
2565 rte_eth_linkstatus_set(dev, &link);
2567 rte_spinlock_unlock(&hw->state_lock);
2573 virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
2575 struct rte_eth_link link;
2577 struct virtio_hw *hw = dev->data->dev_private;
2579 memset(&link, 0, sizeof(link));
2580 link.link_duplex = hw->duplex;
2581 link.link_speed = hw->speed;
2582 link.link_autoneg = ETH_LINK_AUTONEG;
2585 link.link_status = ETH_LINK_DOWN;
2586 link.link_speed = ETH_SPEED_NUM_NONE;
2587 } else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
2588 PMD_INIT_LOG(DEBUG, "Get link status from hw");
2589 vtpci_read_dev_config(hw,
2590 offsetof(struct virtio_net_config, status),
2591 &status, sizeof(status));
2592 if ((status & VIRTIO_NET_S_LINK_UP) == 0) {
2593 link.link_status = ETH_LINK_DOWN;
2594 link.link_speed = ETH_SPEED_NUM_NONE;
2595 PMD_INIT_LOG(DEBUG, "Port %d is down",
2596 dev->data->port_id);
2598 link.link_status = ETH_LINK_UP;
2599 PMD_INIT_LOG(DEBUG, "Port %d is up",
2600 dev->data->port_id);
2603 link.link_status = ETH_LINK_UP;
2606 return rte_eth_linkstatus_set(dev, &link);
2610 virtio_dev_vlan_offload_set(struct rte_eth_dev *dev, int mask)
2612 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
2613 struct virtio_hw *hw = dev->data->dev_private;
2614 uint64_t offloads = rxmode->offloads;
2616 if (mask & ETH_VLAN_FILTER_MASK) {
2617 if ((offloads & DEV_RX_OFFLOAD_VLAN_FILTER) &&
2618 !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) {
2621 "vlan filtering not available on this host");
2627 if (mask & ETH_VLAN_STRIP_MASK)
2628 hw->vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
2634 virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2636 uint64_t tso_mask, host_features;
2637 struct virtio_hw *hw = dev->data->dev_private;
2638 dev_info->speed_capa = virtio_dev_speed_capa_get(hw->speed);
2640 dev_info->max_rx_queues =
2641 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES);
2642 dev_info->max_tx_queues =
2643 RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES);
2644 dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
2645 dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN;
2646 dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
2648 host_features = VTPCI_OPS(hw)->get_features(hw);
2649 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2650 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
2651 if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
2652 dev_info->rx_offload_capa |=
2653 DEV_RX_OFFLOAD_TCP_CKSUM |
2654 DEV_RX_OFFLOAD_UDP_CKSUM;
2656 if (host_features & (1ULL << VIRTIO_NET_F_CTRL_VLAN))
2657 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_VLAN_FILTER;
2658 tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
2659 (1ULL << VIRTIO_NET_F_GUEST_TSO6);
2660 if ((host_features & tso_mask) == tso_mask)
2661 dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO;
2663 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
2664 DEV_TX_OFFLOAD_VLAN_INSERT;
2665 if (host_features & (1ULL << VIRTIO_NET_F_CSUM)) {
2666 dev_info->tx_offload_capa |=
2667 DEV_TX_OFFLOAD_UDP_CKSUM |
2668 DEV_TX_OFFLOAD_TCP_CKSUM;
2670 tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2671 (1ULL << VIRTIO_NET_F_HOST_TSO6);
2672 if ((host_features & tso_mask) == tso_mask)
2673 dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
2679 * It enables testpmd to collect per queue stats.
2682 virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev,
2683 __rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx,
2684 __rte_unused uint8_t is_rx)
2689 RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__);
2690 RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map);
2691 RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio-pci");
2692 RTE_LOG_REGISTER(virtio_logtype_init, pmd.net.virtio.init, NOTICE);
2693 RTE_LOG_REGISTER(virtio_logtype_driver, pmd.net.virtio.driver, NOTICE);