drivers/net/virtio/virtqueue.h

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(c) 2010-2014 Intel Corporation
   3  */
   4
   5 #ifndef _VIRTQUEUE_H_
   6 #define _VIRTQUEUE_H_
   7
   8 #include <stdint.h>
   9
  10 #include <rte_atomic.h>
  11 #include <rte_memory.h>
  12 #include <rte_mempool.h>
  13 #include <rte_net.h>
  14
  15 #include "virtio.h"
  16 #include "virtio_ring.h"
  17 #include "virtio_logs.h"
  18 #include "virtio_rxtx.h"
  19
  20 struct rte_mbuf;
  21
  22 #define DEFAULT_TX_FREE_THRESH 32
  23 #define DEFAULT_RX_FREE_THRESH 32
  24
  25 #define VIRTIO_MBUF_BURST_SZ 64
  26 /*
  27  * Per virtio_ring.h in Linux.
  28  *     For virtio_pci on SMP, we don't need to order with respect to MMIO
  29  *     accesses through relaxed memory I/O windows, so thread_fence is
  30  *     sufficient.
  31  *
  32  *     For using virtio to talk to real devices (eg. vDPA) we do need real
  33  *     barriers.
  34  */
  35 static inline void
  36 virtio_mb(uint8_t weak_barriers)
  37 {
  38         if (weak_barriers)
  39                 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
  40         else
  41                 rte_mb();
  42 }
  43
  44 static inline void
  45 virtio_rmb(uint8_t weak_barriers)
  46 {
  47         if (weak_barriers)
  48                 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
  49         else
  50                 rte_io_rmb();
  51 }
  52
  53 static inline void
  54 virtio_wmb(uint8_t weak_barriers)
  55 {
  56         if (weak_barriers)
  57                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
  58         else
  59                 rte_io_wmb();
  60 }
  61
  62 static inline uint16_t
  63 virtqueue_fetch_flags_packed(struct vring_packed_desc *dp,
  64                               uint8_t weak_barriers)
  65 {
  66         uint16_t flags;
  67
  68         if (weak_barriers) {
  69 /* x86 prefers to using rte_io_rmb over __atomic_load_n as it reports
  70  * a better perf(~1.5%), which comes from the saved branch by the compiler.
  71  * The if and else branch are identical  on the platforms except Arm.
  72  */
  73 #ifdef RTE_ARCH_ARM
  74                 flags = __atomic_load_n(&dp->flags, __ATOMIC_ACQUIRE);
  75 #else
  76                 flags = dp->flags;
  77                 rte_io_rmb();
  78 #endif
  79         } else {
  80                 flags = dp->flags;
  81                 rte_io_rmb();
  82         }
  83
  84         return flags;
  85 }
  86
  87 static inline void
  88 virtqueue_store_flags_packed(struct vring_packed_desc *dp,
  89                               uint16_t flags, uint8_t weak_barriers)
  90 {
  91         if (weak_barriers) {
  92 /* x86 prefers to using rte_io_wmb over __atomic_store_n as it reports
  93  * a better perf(~1.5%), which comes from the saved branch by the compiler.
  94  * The if and else branch are identical on the platforms except Arm.
  95  */
  96 #ifdef RTE_ARCH_ARM
  97                 __atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
  98 #else
  99                 rte_io_wmb();
 100                 dp->flags = flags;
 101 #endif
 102         } else {
 103                 rte_io_wmb();
 104                 dp->flags = flags;
 105         }
 106 }
 107
 108 #ifdef RTE_PMD_PACKET_PREFETCH
 109 #define rte_packet_prefetch(p)  rte_prefetch1(p)
 110 #else
 111 #define rte_packet_prefetch(p)  do {} while(0)
 112 #endif
 113
 114 #define VIRTQUEUE_MAX_NAME_SZ 32
 115
 116 /**
 117  * Return the IOVA (or virtual address in case of virtio-user) of mbuf
 118  * data buffer.
 119  *
 120  * The address is firstly casted to the word size (sizeof(uintptr_t))
 121  * before casting it to uint64_t. This is to make it work with different
 122  * combination of word size (64 bit and 32 bit) and virtio device
 123  * (virtio-pci and virtio-user).
 124  */
 125 #define VIRTIO_MBUF_ADDR(mb, vq) \
 126         ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset)))
 127
 128 /**
 129  * Return the physical address (or virtual address in case of
 130  * virtio-user) of mbuf data buffer, taking care of mbuf data offset
 131  */
 132 #define VIRTIO_MBUF_DATA_DMA_ADDR(mb, vq) \
 133         (VIRTIO_MBUF_ADDR(mb, vq) + (mb)->data_off)
 134
 135 #define VTNET_SQ_RQ_QUEUE_IDX 0
 136 #define VTNET_SQ_TQ_QUEUE_IDX 1
 137 #define VTNET_SQ_CQ_QUEUE_IDX 2
 138
 139 enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
 140 /**
 141  * The maximum virtqueue size is 2^15. Use that value as the end of
 142  * descriptor chain terminator since it will never be a valid index
 143  * in the descriptor table. This is used to verify we are correctly
 144  * handling vq_free_cnt.
 145  */
 146 #define VQ_RING_DESC_CHAIN_END 32768
 147
 148 /**
 149  * Control the RX mode, ie. promiscuous, allmulti, etc...
 150  * All commands require an "out" sg entry containing a 1 byte
 151  * state value, zero = disable, non-zero = enable.  Commands
 152  * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
 153  * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
 154  */
 155 #define VIRTIO_NET_CTRL_RX              0
 156 #define VIRTIO_NET_CTRL_RX_PROMISC      0
 157 #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
 158 #define VIRTIO_NET_CTRL_RX_ALLUNI       2
 159 #define VIRTIO_NET_CTRL_RX_NOMULTI      3
 160 #define VIRTIO_NET_CTRL_RX_NOUNI        4
 161 #define VIRTIO_NET_CTRL_RX_NOBCAST      5
 162
 163 /**
 164  * Control the MAC
 165  *
 166  * The MAC filter table is managed by the hypervisor, the guest should
 167  * assume the size is infinite.  Filtering should be considered
 168  * non-perfect, ie. based on hypervisor resources, the guest may
 169  * received packets from sources not specified in the filter list.
 170  *
 171  * In addition to the class/cmd header, the TABLE_SET command requires
 172  * two out scatterlists.  Each contains a 4 byte count of entries followed
 173  * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
 174  * first sg list contains unicast addresses, the second is for multicast.
 175  * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
 176  * is available.
 177  *
 178  * The ADDR_SET command requests one out scatterlist, it contains a
 179  * 6 bytes MAC address. This functionality is present if the
 180  * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
 181  */
 182 struct virtio_net_ctrl_mac {
 183         uint32_t entries;
 184         uint8_t macs[][RTE_ETHER_ADDR_LEN];
 185 } __rte_packed;
 186
 187 #define VIRTIO_NET_CTRL_MAC    1
 188 #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
 189 #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
 190
 191 /**
 192  * Control VLAN filtering
 193  *
 194  * The VLAN filter table is controlled via a simple ADD/DEL interface.
 195  * VLAN IDs not added may be filtered by the hypervisor.  Del is the
 196  * opposite of add.  Both commands expect an out entry containing a 2
 197  * byte VLAN ID.  VLAN filtering is available with the
 198  * VIRTIO_NET_F_CTRL_VLAN feature bit.
 199  */
 200 #define VIRTIO_NET_CTRL_VLAN     2
 201 #define VIRTIO_NET_CTRL_VLAN_ADD 0
 202 #define VIRTIO_NET_CTRL_VLAN_DEL 1
 203
 204 /*
 205  * Control link announce acknowledgement
 206  *
 207  * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
 208  * driver has recevied the notification; device would clear the
 209  * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
 210  * this command.
 211  */
 212 #define VIRTIO_NET_CTRL_ANNOUNCE     3
 213 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
 214
 215 struct virtio_net_ctrl_hdr {
 216         uint8_t class;
 217         uint8_t cmd;
 218 } __rte_packed;
 219
 220 typedef uint8_t virtio_net_ctrl_ack;
 221
 222 #define VIRTIO_NET_OK     0
 223 #define VIRTIO_NET_ERR    1
 224
 225 #define VIRTIO_MAX_CTRL_DATA 2048
 226
 227 struct virtio_pmd_ctrl {
 228         struct virtio_net_ctrl_hdr hdr;
 229         virtio_net_ctrl_ack status;
 230         uint8_t data[VIRTIO_MAX_CTRL_DATA];
 231 };
 232
 233 struct vq_desc_extra {
 234         void *cookie;
 235         uint16_t ndescs;
 236         uint16_t next;
 237 };
 238
 239 #define virtnet_rxq_to_vq(rxvq) container_of(rxvq, struct virtqueue, rxq)
 240 #define virtnet_txq_to_vq(txvq) container_of(txvq, struct virtqueue, txq)
 241 #define virtnet_cq_to_vq(cvq) container_of(cvq, struct virtqueue, cq)
 242
 243 struct virtqueue {
 244         struct virtio_hw  *hw; /**< virtio_hw structure pointer. */
 245         union {
 246                 struct {
 247                         /**< vring keeping desc, used and avail */
 248                         struct vring ring;
 249                 } vq_split;
 250
 251                 struct {
 252                         /**< vring keeping descs and events */
 253                         struct vring_packed ring;
 254                         bool used_wrap_counter;
 255                         uint16_t cached_flags; /**< cached flags for descs */
 256                         uint16_t event_flags_shadow;
 257                 } vq_packed;
 258         };
 259
 260         uint16_t vq_used_cons_idx; /**< last consumed descriptor */
 261         uint16_t vq_nentries;  /**< vring desc numbers */
 262         uint16_t vq_free_cnt;  /**< num of desc available */
 263         uint16_t vq_avail_idx; /**< sync until needed */
 264         uint16_t vq_free_thresh; /**< free threshold */
 265
 266         /**
 267          * Head of the free chain in the descriptor table. If
 268          * there are no free descriptors, this will be set to
 269          * VQ_RING_DESC_CHAIN_END.
 270          */
 271         uint16_t  vq_desc_head_idx;
 272         uint16_t  vq_desc_tail_idx;
 273         uint16_t  vq_queue_index;   /**< PCI queue index */
 274
 275         void *vq_ring_virt_mem;  /**< linear address of vring*/
 276         unsigned int vq_ring_size;
 277         uint16_t mbuf_addr_offset;
 278
 279         union {
 280                 struct virtnet_rx rxq;
 281                 struct virtnet_tx txq;
 282                 struct virtnet_ctl cq;
 283         };
 284
 285         rte_iova_t vq_ring_mem; /**< physical address of vring,
 286                                  * or virtual address for virtio_user. */
 287
 288         uint16_t  *notify_addr;
 289         struct rte_mbuf **sw_ring;  /**< RX software ring. */
 290         struct vq_desc_extra vq_descx[0];
 291 };
 292
 293 /* If multiqueue is provided by host, then we suppport it. */
 294 #define VIRTIO_NET_CTRL_MQ   4
 295 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
 296 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
 297 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
 298
 299 /**
 300  * This is the first element of the scatter-gather list.  If you don't
 301  * specify GSO or CSUM features, you can simply ignore the header.
 302  */
 303 struct virtio_net_hdr {
 304 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
 305 #define VIRTIO_NET_HDR_F_DATA_VALID 2    /**< Checksum is valid */
 306         uint8_t flags;
 307 #define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
 308 #define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
 309 #define VIRTIO_NET_HDR_GSO_UDP      3    /**< GSO frame, IPv4 UDP (UFO) */
 310 #define VIRTIO_NET_HDR_GSO_TCPV6    4    /**< GSO frame, IPv6 TCP */
 311 #define VIRTIO_NET_HDR_GSO_ECN      0x80 /**< TCP has ECN set */
 312         uint8_t gso_type;
 313         uint16_t hdr_len;     /**< Ethernet + IP + tcp/udp hdrs */
 314         uint16_t gso_size;    /**< Bytes to append to hdr_len per frame */
 315         uint16_t csum_start;  /**< Position to start checksumming from */
 316         uint16_t csum_offset; /**< Offset after that to place checksum */
 317 };
 318
 319 /**
 320  * This is the version of the header to use when the MRG_RXBUF
 321  * feature has been negotiated.
 322  */
 323 struct virtio_net_hdr_mrg_rxbuf {
 324         struct   virtio_net_hdr hdr;
 325         uint16_t num_buffers; /**< Number of merged rx buffers */
 326 };
 327
 328 /* Region reserved to allow for transmit header and indirect ring */
 329 #define VIRTIO_MAX_TX_INDIRECT 8
 330 struct virtio_tx_region {
 331         struct virtio_net_hdr_mrg_rxbuf tx_hdr;
 332         union {
 333                 struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT];
 334                 struct vring_packed_desc
 335                         tx_packed_indir[VIRTIO_MAX_TX_INDIRECT];
 336         } __rte_aligned(16);
 337 };
 338
 339 static inline int
 340 desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq)
 341 {
 342         uint16_t used, avail, flags;
 343
 344         flags = virtqueue_fetch_flags_packed(desc, vq->hw->weak_barriers);
 345         used = !!(flags & VRING_PACKED_DESC_F_USED);
 346         avail = !!(flags & VRING_PACKED_DESC_F_AVAIL);
 347
 348         return avail == used && used == vq->vq_packed.used_wrap_counter;
 349 }
 350
 351 static inline void
 352 vring_desc_init_packed(struct virtqueue *vq, int n)
 353 {
 354         int i;
 355         for (i = 0; i < n - 1; i++) {
 356                 vq->vq_packed.ring.desc[i].id = i;
 357                 vq->vq_descx[i].next = i + 1;
 358         }
 359         vq->vq_packed.ring.desc[i].id = i;
 360         vq->vq_descx[i].next = VQ_RING_DESC_CHAIN_END;
 361 }
 362
 363 /* Chain all the descriptors in the ring with an END */
 364 static inline void
 365 vring_desc_init_split(struct vring_desc *dp, uint16_t n)
 366 {
 367         uint16_t i;
 368
 369         for (i = 0; i < n - 1; i++)
 370                 dp[i].next = (uint16_t)(i + 1);
 371         dp[i].next = VQ_RING_DESC_CHAIN_END;
 372 }
 373
 374 static inline void
 375 vring_desc_init_indirect_packed(struct vring_packed_desc *dp, int n)
 376 {
 377         int i;
 378         for (i = 0; i < n; i++) {
 379                 dp[i].id = (uint16_t)i;
 380                 dp[i].flags = VRING_DESC_F_WRITE;
 381         }
 382 }
 383
 384 /**
 385  * Tell the backend not to interrupt us. Implementation for packed virtqueues.
 386  */
 387 static inline void
 388 virtqueue_disable_intr_packed(struct virtqueue *vq)
 389 {
 390         if (vq->vq_packed.event_flags_shadow != RING_EVENT_FLAGS_DISABLE) {
 391                 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_DISABLE;
 392                 vq->vq_packed.ring.driver->desc_event_flags =
 393                         vq->vq_packed.event_flags_shadow;
 394         }
 395 }
 396
 397 /**
 398  * Tell the backend not to interrupt us. Implementation for split virtqueues.
 399  */
 400 static inline void
 401 virtqueue_disable_intr_split(struct virtqueue *vq)
 402 {
 403         vq->vq_split.ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 404 }
 405
 406 /**
 407  * Tell the backend not to interrupt us.
 408  */
 409 static inline void
 410 virtqueue_disable_intr(struct virtqueue *vq)
 411 {
 412         if (virtio_with_packed_queue(vq->hw))
 413                 virtqueue_disable_intr_packed(vq);
 414         else
 415                 virtqueue_disable_intr_split(vq);
 416 }
 417
 418 /**
 419  * Tell the backend to interrupt. Implementation for packed virtqueues.
 420  */
 421 static inline void
 422 virtqueue_enable_intr_packed(struct virtqueue *vq)
 423 {
 424         if (vq->vq_packed.event_flags_shadow == RING_EVENT_FLAGS_DISABLE) {
 425                 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_ENABLE;
 426                 vq->vq_packed.ring.driver->desc_event_flags =
 427                         vq->vq_packed.event_flags_shadow;
 428         }
 429 }
 430
 431 /**
 432  * Tell the backend to interrupt. Implementation for split virtqueues.
 433  */
 434 static inline void
 435 virtqueue_enable_intr_split(struct virtqueue *vq)
 436 {
 437         vq->vq_split.ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT);
 438 }
 439
 440 /**
 441  * Tell the backend to interrupt us.
 442  */
 443 static inline void
 444 virtqueue_enable_intr(struct virtqueue *vq)
 445 {
 446         if (virtio_with_packed_queue(vq->hw))
 447                 virtqueue_enable_intr_packed(vq);
 448         else
 449                 virtqueue_enable_intr_split(vq);
 450 }
 451
 452 /**
 453  *  Dump virtqueue internal structures, for debug purpose only.
 454  */
 455 void virtqueue_dump(struct virtqueue *vq);
 456 /**
 457  *  Get all mbufs to be freed.
 458  */
 459 struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq);
 460
 461 /* Flush the elements in the used ring. */
 462 void virtqueue_rxvq_flush(struct virtqueue *vq);
 463
 464 int virtqueue_rxvq_reset_packed(struct virtqueue *vq);
 465
 466 int virtqueue_txvq_reset_packed(struct virtqueue *vq);
 467
 468 static inline int
 469 virtqueue_full(const struct virtqueue *vq)
 470 {
 471         return vq->vq_free_cnt == 0;
 472 }
 473
 474 static inline int
 475 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vq_idx)
 476 {
 477         if (vq_idx == hw->max_queue_pairs * 2)
 478                 return VTNET_CQ;
 479         else if (vq_idx % 2 == 0)
 480                 return VTNET_RQ;
 481         else
 482                 return VTNET_TQ;
 483 }
 484
 485 /* virtqueue_nused has load-acquire or rte_io_rmb insed */
 486 static inline uint16_t
 487 virtqueue_nused(const struct virtqueue *vq)
 488 {
 489         uint16_t idx;
 490
 491         if (vq->hw->weak_barriers) {
 492         /**
 493          * x86 prefers to using rte_smp_rmb over __atomic_load_n as it
 494          * reports a slightly better perf, which comes from the saved
 495          * branch by the compiler.
 496          * The if and else branches are identical with the smp and io
 497          * barriers both defined as compiler barriers on x86.
 498          */
 499 #ifdef RTE_ARCH_X86_64
 500                 idx = vq->vq_split.ring.used->idx;
 501                 rte_smp_rmb();
 502 #else
 503                 idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx,
 504                                 __ATOMIC_ACQUIRE);
 505 #endif
 506         } else {
 507                 idx = vq->vq_split.ring.used->idx;
 508                 rte_io_rmb();
 509         }
 510         return idx - vq->vq_used_cons_idx;
 511 }
 512
 513 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx);
 514 void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx);
 515 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
 516                           uint16_t num);
 517
 518 static inline void
 519 vq_update_avail_idx(struct virtqueue *vq)
 520 {
 521         if (vq->hw->weak_barriers) {
 522         /* x86 prefers to using rte_smp_wmb over __atomic_store_n as
 523          * it reports a slightly better perf, which comes from the
 524          * saved branch by the compiler.
 525          * The if and else branches are identical with the smp and
 526          * io barriers both defined as compiler barriers on x86.
 527          */
 528 #ifdef RTE_ARCH_X86_64
 529                 rte_smp_wmb();
 530                 vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
 531 #else
 532                 __atomic_store_n(&vq->vq_split.ring.avail->idx,
 533                                  vq->vq_avail_idx, __ATOMIC_RELEASE);
 534 #endif
 535         } else {
 536                 rte_io_wmb();
 537                 vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
 538         }
 539 }
 540
 541 static inline void
 542 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
 543 {
 544         uint16_t avail_idx;
 545         /*
 546          * Place the head of the descriptor chain into the next slot and make
 547          * it usable to the host. The chain is made available now rather than
 548          * deferring to virtqueue_notify() in the hopes that if the host is
 549          * currently running on another CPU, we can keep it processing the new
 550          * descriptor.
 551          */
 552         avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
 553         if (unlikely(vq->vq_split.ring.avail->ring[avail_idx] != desc_idx))
 554                 vq->vq_split.ring.avail->ring[avail_idx] = desc_idx;
 555         vq->vq_avail_idx++;
 556 }
 557
 558 static inline int
 559 virtqueue_kick_prepare(struct virtqueue *vq)
 560 {
 561         /*
 562          * Ensure updated avail->idx is visible to vhost before reading
 563          * the used->flags.
 564          */
 565         virtio_mb(vq->hw->weak_barriers);
 566         return !(vq->vq_split.ring.used->flags & VRING_USED_F_NO_NOTIFY);
 567 }
 568
 569 static inline int
 570 virtqueue_kick_prepare_packed(struct virtqueue *vq)
 571 {
 572         uint16_t flags;
 573
 574         /*
 575          * Ensure updated data is visible to vhost before reading the flags.
 576          */
 577         virtio_mb(vq->hw->weak_barriers);
 578         flags = vq->vq_packed.ring.device->desc_event_flags;
 579
 580         return flags != RING_EVENT_FLAGS_DISABLE;
 581 }
 582
 583 /*
 584  * virtqueue_kick_prepare*() or the virtio_wmb() should be called
 585  * before this function to be sure that all the data is visible to vhost.
 586  */
 587 static inline void
 588 virtqueue_notify(struct virtqueue *vq)
 589 {
 590         VIRTIO_OPS(vq->hw)->notify_queue(vq->hw, vq);
 591 }
 592
 593 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
 594 #define VIRTQUEUE_DUMP(vq) do { \
 595         uint16_t used_idx, nused; \
 596         used_idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx, \
 597                                    __ATOMIC_RELAXED); \
 598         nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
 599         if (virtio_with_packed_queue((vq)->hw)) { \
 600                 PMD_INIT_LOG(DEBUG, \
 601                 "VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \
 602                 " cached_flags=0x%x; used_wrap_counter=%d", \
 603                 (vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \
 604                 (vq)->vq_avail_idx, (vq)->vq_packed.cached_flags, \
 605                 (vq)->vq_packed.used_wrap_counter); \
 606                 break; \
 607         } \
 608         PMD_INIT_LOG(DEBUG, \
 609           "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
 610           " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
 611           " avail.flags=0x%x; used.flags=0x%x", \
 612           (vq)->vq_nentries, (vq)->vq_free_cnt, nused, (vq)->vq_desc_head_idx, \
 613           (vq)->vq_split.ring.avail->idx, (vq)->vq_used_cons_idx, \
 614           __atomic_load_n(&(vq)->vq_split.ring.used->idx, __ATOMIC_RELAXED), \
 615           (vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \
 616 } while (0)
 617 #else
 618 #define VIRTQUEUE_DUMP(vq) do { } while (0)
 619 #endif
 620
 621 /* avoid write operation when necessary, to lessen cache issues */
 622 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
 623         typeof(var) *const var_ = &(var);       \
 624         typeof(val)  const val_ = (val);        \
 625         if (*var_ != val_)                      \
 626                 *var_ = val_;                   \
 627 } while (0)
 628
 629 #define virtqueue_clear_net_hdr(hdr) do {               \
 630         typeof(hdr) hdr_ = (hdr);                       \
 631         ASSIGN_UNLESS_EQUAL((hdr_)->csum_start, 0);     \
 632         ASSIGN_UNLESS_EQUAL((hdr_)->csum_offset, 0);    \
 633         ASSIGN_UNLESS_EQUAL((hdr_)->flags, 0);          \
 634         ASSIGN_UNLESS_EQUAL((hdr_)->gso_type, 0);       \
 635         ASSIGN_UNLESS_EQUAL((hdr_)->gso_size, 0);       \
 636         ASSIGN_UNLESS_EQUAL((hdr_)->hdr_len, 0);        \
 637 } while (0)
 638
 639 static inline void
 640 virtqueue_xmit_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *cookie)
 641 {
 642         uint64_t csum_l4 = cookie->ol_flags & RTE_MBUF_F_TX_L4_MASK;
 643         uint16_t o_l23_len = (cookie->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
 644                              cookie->outer_l2_len + cookie->outer_l3_len : 0;
 645
 646         if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
 647                 csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM;
 648
 649         switch (csum_l4) {
 650         case RTE_MBUF_F_TX_UDP_CKSUM:
 651                 hdr->csum_start = o_l23_len + cookie->l2_len + cookie->l3_len;
 652                 hdr->csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
 653                 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
 654                 break;
 655
 656         case RTE_MBUF_F_TX_TCP_CKSUM:
 657                 hdr->csum_start = o_l23_len + cookie->l2_len + cookie->l3_len;
 658                 hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
 659                 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
 660                 break;
 661
 662         default:
 663                 ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
 664                 ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
 665                 ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
 666                 break;
 667         }
 668
 669         /* TCP Segmentation Offload */
 670         if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
 671                 hdr->gso_type = (cookie->ol_flags & RTE_MBUF_F_TX_IPV6) ?
 672                         VIRTIO_NET_HDR_GSO_TCPV6 :
 673                         VIRTIO_NET_HDR_GSO_TCPV4;
 674                 hdr->gso_size = cookie->tso_segsz;
 675                 hdr->hdr_len = o_l23_len + cookie->l2_len + cookie->l3_len +
 676                                cookie->l4_len;
 677         } else {
 678                 ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
 679                 ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
 680                 ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
 681         }
 682 }
 683
 684 static inline void
 685 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 686                               uint16_t needed, int use_indirect, int can_push,
 687                               int in_order)
 688 {
 689         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
 690         struct vq_desc_extra *dxp;
 691         struct virtqueue *vq = virtnet_txq_to_vq(txvq);
 692         struct vring_packed_desc *start_dp, *head_dp;
 693         uint16_t idx, id, head_idx, head_flags;
 694         int16_t head_size = vq->hw->vtnet_hdr_size;
 695         struct virtio_net_hdr *hdr;
 696         uint16_t prev;
 697         bool prepend_header = false;
 698         uint16_t seg_num = cookie->nb_segs;
 699
 700         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
 701
 702         dxp = &vq->vq_descx[id];
 703         dxp->ndescs = needed;
 704         dxp->cookie = cookie;
 705
 706         head_idx = vq->vq_avail_idx;
 707         idx = head_idx;
 708         prev = head_idx;
 709         start_dp = vq->vq_packed.ring.desc;
 710
 711         head_dp = &vq->vq_packed.ring.desc[idx];
 712         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
 713         head_flags |= vq->vq_packed.cached_flags;
 714
 715         if (can_push) {
 716                 /* prepend cannot fail, checked by caller */
 717                 hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
 718                                               -head_size);
 719                 prepend_header = true;
 720
 721                 /* if offload disabled, it is not zeroed below, do it now */
 722                 if (!vq->hw->has_tx_offload)
 723                         virtqueue_clear_net_hdr(hdr);
 724         } else if (use_indirect) {
 725                 /* setup tx ring slot to point to indirect
 726                  * descriptor list stored in reserved region.
 727                  *
 728                  * the first slot in indirect ring is already preset
 729                  * to point to the header in reserved region
 730                  */
 731                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
 732                         RTE_PTR_DIFF(&txr[idx].tx_packed_indir, txr);
 733                 start_dp[idx].len   = (seg_num + 1) *
 734                         sizeof(struct vring_packed_desc);
 735                 /* Packed descriptor id needs to be restored when inorder. */
 736                 if (in_order)
 737                         start_dp[idx].id = idx;
 738                 /* reset flags for indirect desc */
 739                 head_flags = VRING_DESC_F_INDIRECT;
 740                 head_flags |= vq->vq_packed.cached_flags;
 741                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
 742
 743                 /* loop below will fill in rest of the indirect elements */
 744                 start_dp = txr[idx].tx_packed_indir;
 745                 idx = 1;
 746         } else {
 747                 /* setup first tx ring slot to point to header
 748                  * stored in reserved region.
 749                  */
 750                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
 751                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
 752                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
 753                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
 754                 idx++;
 755                 if (idx >= vq->vq_nentries) {
 756                         idx -= vq->vq_nentries;
 757                         vq->vq_packed.cached_flags ^=
 758                                 VRING_PACKED_DESC_F_AVAIL_USED;
 759                 }
 760         }
 761
 762         if (vq->hw->has_tx_offload)
 763                 virtqueue_xmit_offload(hdr, cookie);
 764
 765         do {
 766                 uint16_t flags;
 767
 768                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
 769                 start_dp[idx].len  = cookie->data_len;
 770                 if (prepend_header) {
 771                         start_dp[idx].addr -= head_size;
 772                         start_dp[idx].len += head_size;
 773                         prepend_header = false;
 774                 }
 775
 776                 if (likely(idx != head_idx)) {
 777                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
 778                         flags |= vq->vq_packed.cached_flags;
 779                         start_dp[idx].flags = flags;
 780                 }
 781                 prev = idx;
 782                 idx++;
 783                 if (idx >= vq->vq_nentries) {
 784                         idx -= vq->vq_nentries;
 785                         vq->vq_packed.cached_flags ^=
 786                                 VRING_PACKED_DESC_F_AVAIL_USED;
 787                 }
 788         } while ((cookie = cookie->next) != NULL);
 789
 790         start_dp[prev].id = id;
 791
 792         if (use_indirect) {
 793                 idx = head_idx;
 794                 if (++idx >= vq->vq_nentries) {
 795                         idx -= vq->vq_nentries;
 796                         vq->vq_packed.cached_flags ^=
 797                                 VRING_PACKED_DESC_F_AVAIL_USED;
 798                 }
 799         }
 800
 801         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
 802         vq->vq_avail_idx = idx;
 803
 804         if (!in_order) {
 805                 vq->vq_desc_head_idx = dxp->next;
 806                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
 807                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
 808         }
 809
 810         virtqueue_store_flags_packed(head_dp, head_flags,
 811                                      vq->hw->weak_barriers);
 812 }
 813
 814 static void
 815 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
 816 {
 817         struct vq_desc_extra *dxp;
 818
 819         dxp = &vq->vq_descx[id];
 820         vq->vq_free_cnt += dxp->ndescs;
 821
 822         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
 823                 vq->vq_desc_head_idx = id;
 824         else
 825                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
 826
 827         vq->vq_desc_tail_idx = id;
 828         dxp->next = VQ_RING_DESC_CHAIN_END;
 829 }
 830
 831 static void
 832 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, uint16_t num)
 833 {
 834         uint16_t used_idx, id, curr_id, free_cnt = 0;
 835         uint16_t size = vq->vq_nentries;
 836         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
 837         struct vq_desc_extra *dxp;
 838         int nb = num;
 839
 840         used_idx = vq->vq_used_cons_idx;
 841         /* desc_is_used has a load-acquire or rte_io_rmb inside
 842          * and wait for used desc in virtqueue.
 843          */
 844         while (nb > 0 && desc_is_used(&desc[used_idx], vq)) {
 845                 id = desc[used_idx].id;
 846                 do {
 847                         curr_id = used_idx;
 848                         dxp = &vq->vq_descx[used_idx];
 849                         used_idx += dxp->ndescs;
 850                         free_cnt += dxp->ndescs;
 851                         nb -= dxp->ndescs;
 852                         if (used_idx >= size) {
 853                                 used_idx -= size;
 854                                 vq->vq_packed.used_wrap_counter ^= 1;
 855                         }
 856                         if (dxp->cookie != NULL) {
 857                                 rte_pktmbuf_free(dxp->cookie);
 858                                 dxp->cookie = NULL;
 859                         }
 860                 } while (curr_id != id);
 861         }
 862         vq->vq_used_cons_idx = used_idx;
 863         vq->vq_free_cnt += free_cnt;
 864 }
 865
 866 static void
 867 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, uint16_t num)
 868 {
 869         uint16_t used_idx, id;
 870         uint16_t size = vq->vq_nentries;
 871         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
 872         struct vq_desc_extra *dxp;
 873
 874         used_idx = vq->vq_used_cons_idx;
 875         /* desc_is_used has a load-acquire or rte_io_rmb inside
 876          * and wait for used desc in virtqueue.
 877          */
 878         while (num-- && desc_is_used(&desc[used_idx], vq)) {
 879                 id = desc[used_idx].id;
 880                 dxp = &vq->vq_descx[id];
 881                 vq->vq_used_cons_idx += dxp->ndescs;
 882                 if (vq->vq_used_cons_idx >= size) {
 883                         vq->vq_used_cons_idx -= size;
 884                         vq->vq_packed.used_wrap_counter ^= 1;
 885                 }
 886                 vq_ring_free_id_packed(vq, id);
 887                 if (dxp->cookie != NULL) {
 888                         rte_pktmbuf_free(dxp->cookie);
 889                         dxp->cookie = NULL;
 890                 }
 891                 used_idx = vq->vq_used_cons_idx;
 892         }
 893 }
 894
 895 /* Cleanup from completed transmits. */
 896 static inline void
 897 virtio_xmit_cleanup_packed(struct virtqueue *vq, uint16_t num, int in_order)
 898 {
 899         if (in_order)
 900                 virtio_xmit_cleanup_inorder_packed(vq, num);
 901         else
 902                 virtio_xmit_cleanup_normal_packed(vq, num);
 903 }
 904
 905 static inline void
 906 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
 907 {
 908         uint16_t i, used_idx, desc_idx;
 909         for (i = 0; i < num; i++) {
 910                 struct vring_used_elem *uep;
 911                 struct vq_desc_extra *dxp;
 912
 913                 used_idx = (uint16_t)(vq->vq_used_cons_idx &
 914                                 (vq->vq_nentries - 1));
 915                 uep = &vq->vq_split.ring.used->ring[used_idx];
 916
 917                 desc_idx = (uint16_t)uep->id;
 918                 dxp = &vq->vq_descx[desc_idx];
 919                 vq->vq_used_cons_idx++;
 920                 vq_ring_free_chain(vq, desc_idx);
 921
 922                 if (dxp->cookie != NULL) {
 923                         rte_pktmbuf_free(dxp->cookie);
 924                         dxp->cookie = NULL;
 925                 }
 926         }
 927 }
 928
 929 /* Cleanup from completed inorder transmits. */
 930 static __rte_always_inline void
 931 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
 932 {
 933         uint16_t i, idx = vq->vq_used_cons_idx;
 934         int16_t free_cnt = 0;
 935         struct vq_desc_extra *dxp = NULL;
 936
 937         if (unlikely(num == 0))
 938                 return;
 939
 940         for (i = 0; i < num; i++) {
 941                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
 942                 free_cnt += dxp->ndescs;
 943                 if (dxp->cookie != NULL) {
 944                         rte_pktmbuf_free(dxp->cookie);
 945                         dxp->cookie = NULL;
 946                 }
 947         }
 948
 949         vq->vq_free_cnt += free_cnt;
 950         vq->vq_used_cons_idx = idx;
 951 }
 952 #endif /* _VIRTQUEUE_H_ */