drivers/net/virtio/virtqueue.h

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(c) 2010-2014 Intel Corporation
   3  */
   4
   5 #ifndef _VIRTQUEUE_H_
   6 #define _VIRTQUEUE_H_
   7
   8 #include <stdint.h>
   9
  10 #include <rte_atomic.h>
  11 #include <rte_memory.h>
  12 #include <rte_mempool.h>
  13
  14 #include "virtio_pci.h"
  15 #include "virtio_ring.h"
  16 #include "virtio_logs.h"
  17 #include "virtio_rxtx.h"
  18
  19 struct rte_mbuf;
  20
  21 /*
  22  * Per virtio_ring.h in Linux.
  23  *     For virtio_pci on SMP, we don't need to order with respect to MMIO
  24  *     accesses through relaxed memory I/O windows, so smp_mb() et al are
  25  *     sufficient.
  26  *
  27  *     For using virtio to talk to real devices (eg. vDPA) we do need real
  28  *     barriers.
  29  */
  30 static inline void
  31 virtio_mb(uint8_t weak_barriers)
  32 {
  33         if (weak_barriers)
  34                 rte_smp_mb();
  35         else
  36                 rte_mb();
  37 }
  38
  39 static inline void
  40 virtio_rmb(uint8_t weak_barriers)
  41 {
  42         if (weak_barriers)
  43                 rte_smp_rmb();
  44         else
  45                 rte_cio_rmb();
  46 }
  47
  48 static inline void
  49 virtio_wmb(uint8_t weak_barriers)
  50 {
  51         if (weak_barriers)
  52                 rte_smp_wmb();
  53         else
  54                 rte_cio_wmb();
  55 }
  56
  57 static inline uint16_t
  58 virtqueue_fetch_flags_packed(struct vring_packed_desc *dp,
  59                               uint8_t weak_barriers)
  60 {
  61         uint16_t flags;
  62
  63         if (weak_barriers) {
  64 /* x86 prefers to using rte_smp_rmb over __atomic_load_n as it reports
  65  * a better perf(~1.5%), which comes from the saved branch by the compiler.
  66  * The if and else branch are identical with the smp and cio barriers both
  67  * defined as compiler barriers on x86.
  68  */
  69 #ifdef RTE_ARCH_X86_64
  70                 flags = dp->flags;
  71                 rte_smp_rmb();
  72 #else
  73                 flags = __atomic_load_n(&dp->flags, __ATOMIC_ACQUIRE);
  74 #endif
  75         } else {
  76                 flags = dp->flags;
  77                 rte_cio_rmb();
  78         }
  79
  80         return flags;
  81 }
  82
  83 static inline void
  84 virtqueue_store_flags_packed(struct vring_packed_desc *dp,
  85                               uint16_t flags, uint8_t weak_barriers)
  86 {
  87         if (weak_barriers) {
  88 /* x86 prefers to using rte_smp_wmb over __atomic_store_n as it reports
  89  * a better perf(~1.5%), which comes from the saved branch by the compiler.
  90  * The if and else branch are identical with the smp and cio barriers both
  91  * defined as compiler barriers on x86.
  92  */
  93 #ifdef RTE_ARCH_X86_64
  94                 rte_smp_wmb();
  95                 dp->flags = flags;
  96 #else
  97                 __atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
  98 #endif
  99         } else {
 100                 rte_cio_wmb();
 101                 dp->flags = flags;
 102         }
 103 }
 104 #ifdef RTE_PMD_PACKET_PREFETCH
 105 #define rte_packet_prefetch(p)  rte_prefetch1(p)
 106 #else
 107 #define rte_packet_prefetch(p)  do {} while(0)
 108 #endif
 109
 110 #define VIRTQUEUE_MAX_NAME_SZ 32
 111
 112 #ifdef RTE_VIRTIO_USER
 113 /**
 114  * Return the physical address (or virtual address in case of
 115  * virtio-user) of mbuf data buffer.
 116  *
 117  * The address is firstly casted to the word size (sizeof(uintptr_t))
 118  * before casting it to uint64_t. This is to make it work with different
 119  * combination of word size (64 bit and 32 bit) and virtio device
 120  * (virtio-pci and virtio-user).
 121  */
 122 #define VIRTIO_MBUF_ADDR(mb, vq) \
 123         ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->offset)))
 124 #else
 125 #define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_iova)
 126 #endif
 127
 128 /**
 129  * Return the physical address (or virtual address in case of
 130  * virtio-user) of mbuf data buffer, taking care of mbuf data offset
 131  */
 132 #define VIRTIO_MBUF_DATA_DMA_ADDR(mb, vq) \
 133         (VIRTIO_MBUF_ADDR(mb, vq) + (mb)->data_off)
 134
 135 #define VTNET_SQ_RQ_QUEUE_IDX 0
 136 #define VTNET_SQ_TQ_QUEUE_IDX 1
 137 #define VTNET_SQ_CQ_QUEUE_IDX 2
 138
 139 enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
 140 /**
 141  * The maximum virtqueue size is 2^15. Use that value as the end of
 142  * descriptor chain terminator since it will never be a valid index
 143  * in the descriptor table. This is used to verify we are correctly
 144  * handling vq_free_cnt.
 145  */
 146 #define VQ_RING_DESC_CHAIN_END 32768
 147
 148 /**
 149  * Control the RX mode, ie. promiscuous, allmulti, etc...
 150  * All commands require an "out" sg entry containing a 1 byte
 151  * state value, zero = disable, non-zero = enable.  Commands
 152  * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
 153  * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
 154  */
 155 #define VIRTIO_NET_CTRL_RX              0
 156 #define VIRTIO_NET_CTRL_RX_PROMISC      0
 157 #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
 158 #define VIRTIO_NET_CTRL_RX_ALLUNI       2
 159 #define VIRTIO_NET_CTRL_RX_NOMULTI      3
 160 #define VIRTIO_NET_CTRL_RX_NOUNI        4
 161 #define VIRTIO_NET_CTRL_RX_NOBCAST      5
 162
 163 /**
 164  * Control the MAC
 165  *
 166  * The MAC filter table is managed by the hypervisor, the guest should
 167  * assume the size is infinite.  Filtering should be considered
 168  * non-perfect, ie. based on hypervisor resources, the guest may
 169  * received packets from sources not specified in the filter list.
 170  *
 171  * In addition to the class/cmd header, the TABLE_SET command requires
 172  * two out scatterlists.  Each contains a 4 byte count of entries followed
 173  * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
 174  * first sg list contains unicast addresses, the second is for multicast.
 175  * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
 176  * is available.
 177  *
 178  * The ADDR_SET command requests one out scatterlist, it contains a
 179  * 6 bytes MAC address. This functionality is present if the
 180  * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
 181  */
 182 struct virtio_net_ctrl_mac {
 183         uint32_t entries;
 184         uint8_t macs[][RTE_ETHER_ADDR_LEN];
 185 } __rte_packed;
 186
 187 #define VIRTIO_NET_CTRL_MAC    1
 188 #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
 189 #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
 190
 191 /**
 192  * Control VLAN filtering
 193  *
 194  * The VLAN filter table is controlled via a simple ADD/DEL interface.
 195  * VLAN IDs not added may be filtered by the hypervisor.  Del is the
 196  * opposite of add.  Both commands expect an out entry containing a 2
 197  * byte VLAN ID.  VLAN filtering is available with the
 198  * VIRTIO_NET_F_CTRL_VLAN feature bit.
 199  */
 200 #define VIRTIO_NET_CTRL_VLAN     2
 201 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 202 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 203
 204 /*
 205  * Control link announce acknowledgement
 206  *
 207  * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
 208  * driver has recevied the notification; device would clear the
 209  * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
 210  * this command.
 211  */
 212 #define VIRTIO_NET_CTRL_ANNOUNCE     3
 213 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
 214
 215 struct virtio_net_ctrl_hdr {
 216         uint8_t class;
 217         uint8_t cmd;
 218 } __rte_packed;
 219
 220 typedef uint8_t virtio_net_ctrl_ack;
 221
 222 #define VIRTIO_NET_OK     0
 223 #define VIRTIO_NET_ERR    1
 224
 225 #define VIRTIO_MAX_CTRL_DATA 2048
 226
 227 struct virtio_pmd_ctrl {
 228         struct virtio_net_ctrl_hdr hdr;
 229         virtio_net_ctrl_ack status;
 230         uint8_t data[VIRTIO_MAX_CTRL_DATA];
 231 };
 232
 233 struct vq_desc_extra {
 234         void *cookie;
 235         uint16_t ndescs;
 236         uint16_t next;
 237 };
 238
 239 struct virtqueue {
 240         struct virtio_hw  *hw; /**< virtio_hw structure pointer. */
 241         union {
 242                 struct {
 243                         /**< vring keeping desc, used and avail */
 244                         struct vring ring;
 245                 } vq_split;
 246
 247                 struct {
 248                         /**< vring keeping descs and events */
 249                         struct vring_packed ring;
 250                         bool used_wrap_counter;
 251                         uint16_t cached_flags; /**< cached flags for descs */
 252                         uint16_t event_flags_shadow;
 253                 } vq_packed;
 254         };
 255
 256         uint16_t vq_used_cons_idx; /**< last consumed descriptor */
 257         uint16_t vq_nentries;  /**< vring desc numbers */
 258         uint16_t vq_free_cnt;  /**< num of desc available */
 259         uint16_t vq_avail_idx; /**< sync until needed */
 260         uint16_t vq_free_thresh; /**< free threshold */
 261
 262         void *vq_ring_virt_mem;  /**< linear address of vring*/
 263         unsigned int vq_ring_size;
 264
 265         union {
 266                 struct virtnet_rx rxq;
 267                 struct virtnet_tx txq;
 268                 struct virtnet_ctl cq;
 269         };
 270
 271         rte_iova_t vq_ring_mem; /**< physical address of vring,
 272                                  * or virtual address for virtio_user. */
 273
 274         /**
 275          * Head of the free chain in the descriptor table. If
 276          * there are no free descriptors, this will be set to
 277          * VQ_RING_DESC_CHAIN_END.
 278          */
 279         uint16_t  vq_desc_head_idx;
 280         uint16_t  vq_desc_tail_idx;
 281         uint16_t  vq_queue_index;   /**< PCI queue index */
 282         uint16_t offset; /**< relative offset to obtain addr in mbuf */
 283         uint16_t  *notify_addr;
 284         struct rte_mbuf **sw_ring;  /**< RX software ring. */
 285         struct vq_desc_extra vq_descx[0];
 286 };
 287
 288 /* If multiqueue is provided by host, then we suppport it. */
 289 #define VIRTIO_NET_CTRL_MQ   4
 290 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
 291 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
 292 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
 293
 294 /**
 295  * This is the first element of the scatter-gather list.  If you don't
 296  * specify GSO or CSUM features, you can simply ignore the header.
 297  */
 298 struct virtio_net_hdr {
 299 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
 300 #define VIRTIO_NET_HDR_F_DATA_VALID 2    /**< Checksum is valid */
 301         uint8_t flags;
 302 #define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
 303 #define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
 304 #define VIRTIO_NET_HDR_GSO_UDP      3    /**< GSO frame, IPv4 UDP (UFO) */
 305 #define VIRTIO_NET_HDR_GSO_TCPV6    4    /**< GSO frame, IPv6 TCP */
 306 #define VIRTIO_NET_HDR_GSO_ECN      0x80 /**< TCP has ECN set */
 307         uint8_t gso_type;
 308         uint16_t hdr_len;     /**< Ethernet + IP + tcp/udp hdrs */
 309         uint16_t gso_size;    /**< Bytes to append to hdr_len per frame */
 310         uint16_t csum_start;  /**< Position to start checksumming from */
 311         uint16_t csum_offset; /**< Offset after that to place checksum */
 312 };
 313
 314 /**
 315  * This is the version of the header to use when the MRG_RXBUF
 316  * feature has been negotiated.
 317  */
 318 struct virtio_net_hdr_mrg_rxbuf {
 319         struct   virtio_net_hdr hdr;
 320         uint16_t num_buffers; /**< Number of merged rx buffers */
 321 };
 322
 323 /* Region reserved to allow for transmit header and indirect ring */
 324 #define VIRTIO_MAX_TX_INDIRECT 8
 325 struct virtio_tx_region {
 326         struct virtio_net_hdr_mrg_rxbuf tx_hdr;
 327         struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
 328                 __rte_aligned(16);
 329 };
 330
 331 static inline int
 332 desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq)
 333 {
 334         uint16_t used, avail, flags;
 335
 336         flags = virtqueue_fetch_flags_packed(desc, vq->hw->weak_barriers);
 337         used = !!(flags & VRING_PACKED_DESC_F_USED);
 338         avail = !!(flags & VRING_PACKED_DESC_F_AVAIL);
 339
 340         return avail == used && used == vq->vq_packed.used_wrap_counter;
 341 }
 342
 343 static inline void
 344 vring_desc_init_packed(struct virtqueue *vq, int n)
 345 {
 346         int i;
 347         for (i = 0; i < n - 1; i++) {
 348                 vq->vq_packed.ring.desc[i].id = i;
 349                 vq->vq_descx[i].next = i + 1;
 350         }
 351         vq->vq_packed.ring.desc[i].id = i;
 352         vq->vq_descx[i].next = VQ_RING_DESC_CHAIN_END;
 353 }
 354
 355 /* Chain all the descriptors in the ring with an END */
 356 static inline void
 357 vring_desc_init_split(struct vring_desc *dp, uint16_t n)
 358 {
 359         uint16_t i;
 360
 361         for (i = 0; i < n - 1; i++)
 362                 dp[i].next = (uint16_t)(i + 1);
 363         dp[i].next = VQ_RING_DESC_CHAIN_END;
 364 }
 365
 366 /**
 367  * Tell the backend not to interrupt us. Implementation for packed virtqueues.
 368  */
 369 static inline void
 370 virtqueue_disable_intr_packed(struct virtqueue *vq)
 371 {
 372         if (vq->vq_packed.event_flags_shadow != RING_EVENT_FLAGS_DISABLE) {
 373                 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_DISABLE;
 374                 vq->vq_packed.ring.driver->desc_event_flags =
 375                         vq->vq_packed.event_flags_shadow;
 376         }
 377 }
 378
 379 /**
 380  * Tell the backend not to interrupt us. Implementation for split virtqueues.
 381  */
 382 static inline void
 383 virtqueue_disable_intr_split(struct virtqueue *vq)
 384 {
 385         vq->vq_split.ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 386 }
 387
 388 /**
 389  * Tell the backend not to interrupt us.
 390  */
 391 static inline void
 392 virtqueue_disable_intr(struct virtqueue *vq)
 393 {
 394         if (vtpci_packed_queue(vq->hw))
 395                 virtqueue_disable_intr_packed(vq);
 396         else
 397                 virtqueue_disable_intr_split(vq);
 398 }
 399
 400 /**
 401  * Tell the backend to interrupt. Implementation for packed virtqueues.
 402  */
 403 static inline void
 404 virtqueue_enable_intr_packed(struct virtqueue *vq)
 405 {
 406         if (vq->vq_packed.event_flags_shadow == RING_EVENT_FLAGS_DISABLE) {
 407                 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_ENABLE;
 408                 vq->vq_packed.ring.driver->desc_event_flags =
 409                         vq->vq_packed.event_flags_shadow;
 410         }
 411 }
 412
 413 /**
 414  * Tell the backend to interrupt. Implementation for split virtqueues.
 415  */
 416 static inline void
 417 virtqueue_enable_intr_split(struct virtqueue *vq)
 418 {
 419         vq->vq_split.ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT);
 420 }
 421
 422 /**
 423  * Tell the backend to interrupt us.
 424  */
 425 static inline void
 426 virtqueue_enable_intr(struct virtqueue *vq)
 427 {
 428         if (vtpci_packed_queue(vq->hw))
 429                 virtqueue_enable_intr_packed(vq);
 430         else
 431                 virtqueue_enable_intr_split(vq);
 432 }
 433
 434 /**
 435  *  Dump virtqueue internal structures, for debug purpose only.
 436  */
 437 void virtqueue_dump(struct virtqueue *vq);
 438 /**
 439  *  Get all mbufs to be freed.
 440  */
 441 struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq);
 442
 443 /* Flush the elements in the used ring. */
 444 void virtqueue_rxvq_flush(struct virtqueue *vq);
 445
 446 int virtqueue_rxvq_reset_packed(struct virtqueue *vq);
 447
 448 int virtqueue_txvq_reset_packed(struct virtqueue *vq);
 449
 450 static inline int
 451 virtqueue_full(const struct virtqueue *vq)
 452 {
 453         return vq->vq_free_cnt == 0;
 454 }
 455
 456 static inline int
 457 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx)
 458 {
 459         if (vtpci_queue_idx == hw->max_queue_pairs * 2)
 460                 return VTNET_CQ;
 461         else if (vtpci_queue_idx % 2 == 0)
 462                 return VTNET_RQ;
 463         else
 464                 return VTNET_TQ;
 465 }
 466
 467 #define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_split.ring.used->idx - \
 468                                         (vq)->vq_used_cons_idx))
 469
 470 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx);
 471 void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx);
 472 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
 473                           uint16_t num);
 474
 475 static inline void
 476 vq_update_avail_idx(struct virtqueue *vq)
 477 {
 478         virtio_wmb(vq->hw->weak_barriers);
 479         vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
 480 }
 481
 482 static inline void
 483 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
 484 {
 485         uint16_t avail_idx;
 486         /*
 487          * Place the head of the descriptor chain into the next slot and make
 488          * it usable to the host. The chain is made available now rather than
 489          * deferring to virtqueue_notify() in the hopes that if the host is
 490          * currently running on another CPU, we can keep it processing the new
 491          * descriptor.
 492          */
 493         avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
 494         if (unlikely(vq->vq_split.ring.avail->ring[avail_idx] != desc_idx))
 495                 vq->vq_split.ring.avail->ring[avail_idx] = desc_idx;
 496         vq->vq_avail_idx++;
 497 }
 498
 499 static inline int
 500 virtqueue_kick_prepare(struct virtqueue *vq)
 501 {
 502         /*
 503          * Ensure updated avail->idx is visible to vhost before reading
 504          * the used->flags.
 505          */
 506         virtio_mb(vq->hw->weak_barriers);
 507         return !(vq->vq_split.ring.used->flags & VRING_USED_F_NO_NOTIFY);
 508 }
 509
 510 static inline int
 511 virtqueue_kick_prepare_packed(struct virtqueue *vq)
 512 {
 513         uint16_t flags;
 514
 515         /*
 516          * Ensure updated data is visible to vhost before reading the flags.
 517          */
 518         virtio_mb(vq->hw->weak_barriers);
 519         flags = vq->vq_packed.ring.device->desc_event_flags;
 520
 521         return flags != RING_EVENT_FLAGS_DISABLE;
 522 }
 523
 524 /*
 525  * virtqueue_kick_prepare*() or the virtio_wmb() should be called
 526  * before this function to be sure that all the data is visible to vhost.
 527  */
 528 static inline void
 529 virtqueue_notify(struct virtqueue *vq)
 530 {
 531         VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
 532 }
 533
 534 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 535 #define VIRTQUEUE_DUMP(vq) do { \
 536         uint16_t used_idx, nused; \
 537         used_idx = (vq)->vq_split.ring.used->idx; \
 538         nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
 539         if (vtpci_packed_queue((vq)->hw)) { \
 540                 PMD_INIT_LOG(DEBUG, \
 541                 "VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \
 542                 " cached_flags=0x%x; used_wrap_counter=%d", \
 543                 (vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \
 544                 (vq)->vq_avail_idx, (vq)->vq_packed.cached_flags, \
 545                 (vq)->vq_packed.used_wrap_counter); \
 546                 break; \
 547         } \
 548         PMD_INIT_LOG(DEBUG, \
 549           "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
 550           " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
 551           " avail.flags=0x%x; used.flags=0x%x", \
 552           (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \
 553           (vq)->vq_desc_head_idx, (vq)->vq_split.ring.avail->idx, \
 554           (vq)->vq_used_cons_idx, (vq)->vq_split.ring.used->idx, \
 555           (vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \
 556 } while (0)
 557 #else
 558 #define VIRTQUEUE_DUMP(vq) do { } while (0)
 559 #endif
 560
 561 #endif /* _VIRTQUEUE_H_ */