net/virtio: move features definition to generic header
[dpdk.git] / drivers / net / virtio / virtqueue.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #ifndef _VIRTQUEUE_H_
6 #define _VIRTQUEUE_H_
7
8 #include <stdint.h>
9
10 #include <rte_atomic.h>
11 #include <rte_memory.h>
12 #include <rte_mempool.h>
13 #include <rte_net.h>
14
15 #include "virtio.h"
16 #include "virtio_ring.h"
17 #include "virtio_logs.h"
18 #include "virtio_rxtx.h"
19
20 struct rte_mbuf;
21
22 #define DEFAULT_TX_FREE_THRESH 32
23 #define DEFAULT_RX_FREE_THRESH 32
24
25 #define VIRTIO_MBUF_BURST_SZ 64
26 /*
27  * Per virtio_ring.h in Linux.
28  *     For virtio_pci on SMP, we don't need to order with respect to MMIO
29  *     accesses through relaxed memory I/O windows, so thread_fence is
30  *     sufficient.
31  *
32  *     For using virtio to talk to real devices (eg. vDPA) we do need real
33  *     barriers.
34  */
35 static inline void
36 virtio_mb(uint8_t weak_barriers)
37 {
38         if (weak_barriers)
39                 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
40         else
41                 rte_mb();
42 }
43
44 static inline void
45 virtio_rmb(uint8_t weak_barriers)
46 {
47         if (weak_barriers)
48                 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
49         else
50                 rte_io_rmb();
51 }
52
53 static inline void
54 virtio_wmb(uint8_t weak_barriers)
55 {
56         if (weak_barriers)
57                 rte_atomic_thread_fence(__ATOMIC_RELEASE);
58         else
59                 rte_io_wmb();
60 }
61
62 static inline uint16_t
63 virtqueue_fetch_flags_packed(struct vring_packed_desc *dp,
64                               uint8_t weak_barriers)
65 {
66         uint16_t flags;
67
68         if (weak_barriers) {
69 /* x86 prefers to using rte_io_rmb over __atomic_load_n as it reports
70  * a better perf(~1.5%), which comes from the saved branch by the compiler.
71  * The if and else branch are identical  on the platforms except Arm.
72  */
73 #ifdef RTE_ARCH_ARM
74                 flags = __atomic_load_n(&dp->flags, __ATOMIC_ACQUIRE);
75 #else
76                 flags = dp->flags;
77                 rte_io_rmb();
78 #endif
79         } else {
80                 flags = dp->flags;
81                 rte_io_rmb();
82         }
83
84         return flags;
85 }
86
87 static inline void
88 virtqueue_store_flags_packed(struct vring_packed_desc *dp,
89                               uint16_t flags, uint8_t weak_barriers)
90 {
91         if (weak_barriers) {
92 /* x86 prefers to using rte_io_wmb over __atomic_store_n as it reports
93  * a better perf(~1.5%), which comes from the saved branch by the compiler.
94  * The if and else branch are identical on the platforms except Arm.
95  */
96 #ifdef RTE_ARCH_ARM
97                 __atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
98 #else
99                 rte_io_wmb();
100                 dp->flags = flags;
101 #endif
102         } else {
103                 rte_io_wmb();
104                 dp->flags = flags;
105         }
106 }
107
108 #ifdef RTE_PMD_PACKET_PREFETCH
109 #define rte_packet_prefetch(p)  rte_prefetch1(p)
110 #else
111 #define rte_packet_prefetch(p)  do {} while(0)
112 #endif
113
114 #define VIRTQUEUE_MAX_NAME_SZ 32
115
116 #define VTNET_SQ_RQ_QUEUE_IDX 0
117 #define VTNET_SQ_TQ_QUEUE_IDX 1
118 #define VTNET_SQ_CQ_QUEUE_IDX 2
119
120 enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
121 /**
122  * The maximum virtqueue size is 2^15. Use that value as the end of
123  * descriptor chain terminator since it will never be a valid index
124  * in the descriptor table. This is used to verify we are correctly
125  * handling vq_free_cnt.
126  */
127 #define VQ_RING_DESC_CHAIN_END 32768
128
129 /**
130  * Control the RX mode, ie. promiscuous, allmulti, etc...
131  * All commands require an "out" sg entry containing a 1 byte
132  * state value, zero = disable, non-zero = enable.  Commands
133  * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
134  * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
135  */
136 #define VIRTIO_NET_CTRL_RX              0
137 #define VIRTIO_NET_CTRL_RX_PROMISC      0
138 #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
139 #define VIRTIO_NET_CTRL_RX_ALLUNI       2
140 #define VIRTIO_NET_CTRL_RX_NOMULTI      3
141 #define VIRTIO_NET_CTRL_RX_NOUNI        4
142 #define VIRTIO_NET_CTRL_RX_NOBCAST      5
143
144 /**
145  * Control the MAC
146  *
147  * The MAC filter table is managed by the hypervisor, the guest should
148  * assume the size is infinite.  Filtering should be considered
149  * non-perfect, ie. based on hypervisor resources, the guest may
150  * received packets from sources not specified in the filter list.
151  *
152  * In addition to the class/cmd header, the TABLE_SET command requires
153  * two out scatterlists.  Each contains a 4 byte count of entries followed
154  * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
155  * first sg list contains unicast addresses, the second is for multicast.
156  * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
157  * is available.
158  *
159  * The ADDR_SET command requests one out scatterlist, it contains a
160  * 6 bytes MAC address. This functionality is present if the
161  * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
162  */
163 struct virtio_net_ctrl_mac {
164         uint32_t entries;
165         uint8_t macs[][RTE_ETHER_ADDR_LEN];
166 } __rte_packed;
167
168 #define VIRTIO_NET_CTRL_MAC    1
169 #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
170 #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
171
172 /**
173  * Control VLAN filtering
174  *
175  * The VLAN filter table is controlled via a simple ADD/DEL interface.
176  * VLAN IDs not added may be filtered by the hypervisor.  Del is the
177  * opposite of add.  Both commands expect an out entry containing a 2
178  * byte VLAN ID.  VLAN filtering is available with the
179  * VIRTIO_NET_F_CTRL_VLAN feature bit.
180  */
181 #define VIRTIO_NET_CTRL_VLAN     2
182 #define VIRTIO_NET_CTRL_VLAN_ADD 0
183 #define VIRTIO_NET_CTRL_VLAN_DEL 1
184
185 /*
186  * Control link announce acknowledgement
187  *
188  * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
189  * driver has recevied the notification; device would clear the
190  * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
191  * this command.
192  */
193 #define VIRTIO_NET_CTRL_ANNOUNCE     3
194 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
195
196 struct virtio_net_ctrl_hdr {
197         uint8_t class;
198         uint8_t cmd;
199 } __rte_packed;
200
201 typedef uint8_t virtio_net_ctrl_ack;
202
203 #define VIRTIO_NET_OK     0
204 #define VIRTIO_NET_ERR    1
205
206 #define VIRTIO_MAX_CTRL_DATA 2048
207
208 struct virtio_pmd_ctrl {
209         struct virtio_net_ctrl_hdr hdr;
210         virtio_net_ctrl_ack status;
211         uint8_t data[VIRTIO_MAX_CTRL_DATA];
212 };
213
214 struct vq_desc_extra {
215         void *cookie;
216         uint16_t ndescs;
217         uint16_t next;
218 };
219
220 struct virtqueue {
221         struct virtio_hw  *hw; /**< virtio_hw structure pointer. */
222         union {
223                 struct {
224                         /**< vring keeping desc, used and avail */
225                         struct vring ring;
226                 } vq_split;
227
228                 struct {
229                         /**< vring keeping descs and events */
230                         struct vring_packed ring;
231                         bool used_wrap_counter;
232                         uint16_t cached_flags; /**< cached flags for descs */
233                         uint16_t event_flags_shadow;
234                 } vq_packed;
235         };
236
237         uint16_t vq_used_cons_idx; /**< last consumed descriptor */
238         uint16_t vq_nentries;  /**< vring desc numbers */
239         uint16_t vq_free_cnt;  /**< num of desc available */
240         uint16_t vq_avail_idx; /**< sync until needed */
241         uint16_t vq_free_thresh; /**< free threshold */
242
243         void *vq_ring_virt_mem;  /**< linear address of vring*/
244         unsigned int vq_ring_size;
245
246         union {
247                 struct virtnet_rx rxq;
248                 struct virtnet_tx txq;
249                 struct virtnet_ctl cq;
250         };
251
252         rte_iova_t vq_ring_mem; /**< physical address of vring,
253                                  * or virtual address for virtio_user. */
254
255         /**
256          * Head of the free chain in the descriptor table. If
257          * there are no free descriptors, this will be set to
258          * VQ_RING_DESC_CHAIN_END.
259          */
260         uint16_t  vq_desc_head_idx;
261         uint16_t  vq_desc_tail_idx;
262         uint16_t  vq_queue_index;   /**< PCI queue index */
263         uint16_t offset; /**< relative offset to obtain addr in mbuf */
264         uint16_t  *notify_addr;
265         struct rte_mbuf **sw_ring;  /**< RX software ring. */
266         struct vq_desc_extra vq_descx[0];
267 };
268
269 /* If multiqueue is provided by host, then we suppport it. */
270 #define VIRTIO_NET_CTRL_MQ   4
271 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
272 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
273 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
274
275 /**
276  * This is the first element of the scatter-gather list.  If you don't
277  * specify GSO or CSUM features, you can simply ignore the header.
278  */
279 struct virtio_net_hdr {
280 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
281 #define VIRTIO_NET_HDR_F_DATA_VALID 2    /**< Checksum is valid */
282         uint8_t flags;
283 #define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
284 #define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
285 #define VIRTIO_NET_HDR_GSO_UDP      3    /**< GSO frame, IPv4 UDP (UFO) */
286 #define VIRTIO_NET_HDR_GSO_TCPV6    4    /**< GSO frame, IPv6 TCP */
287 #define VIRTIO_NET_HDR_GSO_ECN      0x80 /**< TCP has ECN set */
288         uint8_t gso_type;
289         uint16_t hdr_len;     /**< Ethernet + IP + tcp/udp hdrs */
290         uint16_t gso_size;    /**< Bytes to append to hdr_len per frame */
291         uint16_t csum_start;  /**< Position to start checksumming from */
292         uint16_t csum_offset; /**< Offset after that to place checksum */
293 };
294
295 /**
296  * This is the version of the header to use when the MRG_RXBUF
297  * feature has been negotiated.
298  */
299 struct virtio_net_hdr_mrg_rxbuf {
300         struct   virtio_net_hdr hdr;
301         uint16_t num_buffers; /**< Number of merged rx buffers */
302 };
303
304 /* Region reserved to allow for transmit header and indirect ring */
305 #define VIRTIO_MAX_TX_INDIRECT 8
306 struct virtio_tx_region {
307         struct virtio_net_hdr_mrg_rxbuf tx_hdr;
308         union {
309                 struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT];
310                 struct vring_packed_desc
311                         tx_packed_indir[VIRTIO_MAX_TX_INDIRECT];
312         } __rte_aligned(16);
313 };
314
315 static inline int
316 desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq)
317 {
318         uint16_t used, avail, flags;
319
320         flags = virtqueue_fetch_flags_packed(desc, vq->hw->weak_barriers);
321         used = !!(flags & VRING_PACKED_DESC_F_USED);
322         avail = !!(flags & VRING_PACKED_DESC_F_AVAIL);
323
324         return avail == used && used == vq->vq_packed.used_wrap_counter;
325 }
326
327 static inline void
328 vring_desc_init_packed(struct virtqueue *vq, int n)
329 {
330         int i;
331         for (i = 0; i < n - 1; i++) {
332                 vq->vq_packed.ring.desc[i].id = i;
333                 vq->vq_descx[i].next = i + 1;
334         }
335         vq->vq_packed.ring.desc[i].id = i;
336         vq->vq_descx[i].next = VQ_RING_DESC_CHAIN_END;
337 }
338
339 /* Chain all the descriptors in the ring with an END */
340 static inline void
341 vring_desc_init_split(struct vring_desc *dp, uint16_t n)
342 {
343         uint16_t i;
344
345         for (i = 0; i < n - 1; i++)
346                 dp[i].next = (uint16_t)(i + 1);
347         dp[i].next = VQ_RING_DESC_CHAIN_END;
348 }
349
350 static inline void
351 vring_desc_init_indirect_packed(struct vring_packed_desc *dp, int n)
352 {
353         int i;
354         for (i = 0; i < n; i++) {
355                 dp[i].id = (uint16_t)i;
356                 dp[i].flags = VRING_DESC_F_WRITE;
357         }
358 }
359
360 /**
361  * Tell the backend not to interrupt us. Implementation for packed virtqueues.
362  */
363 static inline void
364 virtqueue_disable_intr_packed(struct virtqueue *vq)
365 {
366         if (vq->vq_packed.event_flags_shadow != RING_EVENT_FLAGS_DISABLE) {
367                 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_DISABLE;
368                 vq->vq_packed.ring.driver->desc_event_flags =
369                         vq->vq_packed.event_flags_shadow;
370         }
371 }
372
373 /**
374  * Tell the backend not to interrupt us. Implementation for split virtqueues.
375  */
376 static inline void
377 virtqueue_disable_intr_split(struct virtqueue *vq)
378 {
379         vq->vq_split.ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
380 }
381
382 /**
383  * Tell the backend not to interrupt us.
384  */
385 static inline void
386 virtqueue_disable_intr(struct virtqueue *vq)
387 {
388         if (virtio_with_packed_queue(vq->hw))
389                 virtqueue_disable_intr_packed(vq);
390         else
391                 virtqueue_disable_intr_split(vq);
392 }
393
394 /**
395  * Tell the backend to interrupt. Implementation for packed virtqueues.
396  */
397 static inline void
398 virtqueue_enable_intr_packed(struct virtqueue *vq)
399 {
400         if (vq->vq_packed.event_flags_shadow == RING_EVENT_FLAGS_DISABLE) {
401                 vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_ENABLE;
402                 vq->vq_packed.ring.driver->desc_event_flags =
403                         vq->vq_packed.event_flags_shadow;
404         }
405 }
406
407 /**
408  * Tell the backend to interrupt. Implementation for split virtqueues.
409  */
410 static inline void
411 virtqueue_enable_intr_split(struct virtqueue *vq)
412 {
413         vq->vq_split.ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT);
414 }
415
416 /**
417  * Tell the backend to interrupt us.
418  */
419 static inline void
420 virtqueue_enable_intr(struct virtqueue *vq)
421 {
422         if (virtio_with_packed_queue(vq->hw))
423                 virtqueue_enable_intr_packed(vq);
424         else
425                 virtqueue_enable_intr_split(vq);
426 }
427
428 /**
429  *  Dump virtqueue internal structures, for debug purpose only.
430  */
431 void virtqueue_dump(struct virtqueue *vq);
432 /**
433  *  Get all mbufs to be freed.
434  */
435 struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq);
436
437 /* Flush the elements in the used ring. */
438 void virtqueue_rxvq_flush(struct virtqueue *vq);
439
440 int virtqueue_rxvq_reset_packed(struct virtqueue *vq);
441
442 int virtqueue_txvq_reset_packed(struct virtqueue *vq);
443
444 static inline int
445 virtqueue_full(const struct virtqueue *vq)
446 {
447         return vq->vq_free_cnt == 0;
448 }
449
450 static inline int
451 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx)
452 {
453         if (vtpci_queue_idx == hw->max_queue_pairs * 2)
454                 return VTNET_CQ;
455         else if (vtpci_queue_idx % 2 == 0)
456                 return VTNET_RQ;
457         else
458                 return VTNET_TQ;
459 }
460
461 /* virtqueue_nused has load-acquire or rte_io_rmb insed */
462 static inline uint16_t
463 virtqueue_nused(const struct virtqueue *vq)
464 {
465         uint16_t idx;
466
467         if (vq->hw->weak_barriers) {
468         /**
469          * x86 prefers to using rte_smp_rmb over __atomic_load_n as it
470          * reports a slightly better perf, which comes from the saved
471          * branch by the compiler.
472          * The if and else branches are identical with the smp and io
473          * barriers both defined as compiler barriers on x86.
474          */
475 #ifdef RTE_ARCH_X86_64
476                 idx = vq->vq_split.ring.used->idx;
477                 rte_smp_rmb();
478 #else
479                 idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx,
480                                 __ATOMIC_ACQUIRE);
481 #endif
482         } else {
483                 idx = vq->vq_split.ring.used->idx;
484                 rte_io_rmb();
485         }
486         return idx - vq->vq_used_cons_idx;
487 }
488
489 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx);
490 void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx);
491 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
492                           uint16_t num);
493
494 static inline void
495 vq_update_avail_idx(struct virtqueue *vq)
496 {
497         if (vq->hw->weak_barriers) {
498         /* x86 prefers to using rte_smp_wmb over __atomic_store_n as
499          * it reports a slightly better perf, which comes from the
500          * saved branch by the compiler.
501          * The if and else branches are identical with the smp and
502          * io barriers both defined as compiler barriers on x86.
503          */
504 #ifdef RTE_ARCH_X86_64
505                 rte_smp_wmb();
506                 vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
507 #else
508                 __atomic_store_n(&vq->vq_split.ring.avail->idx,
509                                  vq->vq_avail_idx, __ATOMIC_RELEASE);
510 #endif
511         } else {
512                 rte_io_wmb();
513                 vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
514         }
515 }
516
517 static inline void
518 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
519 {
520         uint16_t avail_idx;
521         /*
522          * Place the head of the descriptor chain into the next slot and make
523          * it usable to the host. The chain is made available now rather than
524          * deferring to virtqueue_notify() in the hopes that if the host is
525          * currently running on another CPU, we can keep it processing the new
526          * descriptor.
527          */
528         avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
529         if (unlikely(vq->vq_split.ring.avail->ring[avail_idx] != desc_idx))
530                 vq->vq_split.ring.avail->ring[avail_idx] = desc_idx;
531         vq->vq_avail_idx++;
532 }
533
534 static inline int
535 virtqueue_kick_prepare(struct virtqueue *vq)
536 {
537         /*
538          * Ensure updated avail->idx is visible to vhost before reading
539          * the used->flags.
540          */
541         virtio_mb(vq->hw->weak_barriers);
542         return !(vq->vq_split.ring.used->flags & VRING_USED_F_NO_NOTIFY);
543 }
544
545 static inline int
546 virtqueue_kick_prepare_packed(struct virtqueue *vq)
547 {
548         uint16_t flags;
549
550         /*
551          * Ensure updated data is visible to vhost before reading the flags.
552          */
553         virtio_mb(vq->hw->weak_barriers);
554         flags = vq->vq_packed.ring.device->desc_event_flags;
555
556         return flags != RING_EVENT_FLAGS_DISABLE;
557 }
558
559 /*
560  * virtqueue_kick_prepare*() or the virtio_wmb() should be called
561  * before this function to be sure that all the data is visible to vhost.
562  */
563 static inline void
564 virtqueue_notify(struct virtqueue *vq)
565 {
566         VIRTIO_OPS(vq->hw)->notify_queue(vq->hw, vq);
567 }
568
569 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
570 #define VIRTQUEUE_DUMP(vq) do { \
571         uint16_t used_idx, nused; \
572         used_idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx, \
573                                    __ATOMIC_RELAXED); \
574         nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
575         if (virtio_with_packed_queue((vq)->hw)) { \
576                 PMD_INIT_LOG(DEBUG, \
577                 "VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \
578                 " cached_flags=0x%x; used_wrap_counter=%d", \
579                 (vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \
580                 (vq)->vq_avail_idx, (vq)->vq_packed.cached_flags, \
581                 (vq)->vq_packed.used_wrap_counter); \
582                 break; \
583         } \
584         PMD_INIT_LOG(DEBUG, \
585           "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
586           " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
587           " avail.flags=0x%x; used.flags=0x%x", \
588           (vq)->vq_nentries, (vq)->vq_free_cnt, nused, (vq)->vq_desc_head_idx, \
589           (vq)->vq_split.ring.avail->idx, (vq)->vq_used_cons_idx, \
590           __atomic_load_n(&(vq)->vq_split.ring.used->idx, __ATOMIC_RELAXED), \
591           (vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \
592 } while (0)
593 #else
594 #define VIRTQUEUE_DUMP(vq) do { } while (0)
595 #endif
596
597 /* avoid write operation when necessary, to lessen cache issues */
598 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
599         typeof(var) *const var_ = &(var);       \
600         typeof(val)  const val_ = (val);        \
601         if (*var_ != val_)                      \
602                 *var_ = val_;                   \
603 } while (0)
604
605 #define virtqueue_clear_net_hdr(hdr) do {               \
606         typeof(hdr) hdr_ = (hdr);                       \
607         ASSIGN_UNLESS_EQUAL((hdr_)->csum_start, 0);     \
608         ASSIGN_UNLESS_EQUAL((hdr_)->csum_offset, 0);    \
609         ASSIGN_UNLESS_EQUAL((hdr_)->flags, 0);          \
610         ASSIGN_UNLESS_EQUAL((hdr_)->gso_type, 0);       \
611         ASSIGN_UNLESS_EQUAL((hdr_)->gso_size, 0);       \
612         ASSIGN_UNLESS_EQUAL((hdr_)->hdr_len, 0);        \
613 } while (0)
614
615 static inline void
616 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
617                         struct rte_mbuf *cookie,
618                         uint8_t offload)
619 {
620         if (offload) {
621                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
622                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
623
624                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
625                 case PKT_TX_UDP_CKSUM:
626                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
627                         hdr->csum_offset = offsetof(struct rte_udp_hdr,
628                                 dgram_cksum);
629                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
630                         break;
631
632                 case PKT_TX_TCP_CKSUM:
633                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
634                         hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
635                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
636                         break;
637
638                 default:
639                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
640                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
641                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
642                         break;
643                 }
644
645                 /* TCP Segmentation Offload */
646                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
647                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
648                                 VIRTIO_NET_HDR_GSO_TCPV6 :
649                                 VIRTIO_NET_HDR_GSO_TCPV4;
650                         hdr->gso_size = cookie->tso_segsz;
651                         hdr->hdr_len =
652                                 cookie->l2_len +
653                                 cookie->l3_len +
654                                 cookie->l4_len;
655                 } else {
656                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
657                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
658                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
659                 }
660         }
661 }
662
663 static inline void
664 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
665                               uint16_t needed, int use_indirect, int can_push,
666                               int in_order)
667 {
668         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
669         struct vq_desc_extra *dxp;
670         struct virtqueue *vq = txvq->vq;
671         struct vring_packed_desc *start_dp, *head_dp;
672         uint16_t idx, id, head_idx, head_flags;
673         int16_t head_size = vq->hw->vtnet_hdr_size;
674         struct virtio_net_hdr *hdr;
675         uint16_t prev;
676         bool prepend_header = false;
677         uint16_t seg_num = cookie->nb_segs;
678
679         id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
680
681         dxp = &vq->vq_descx[id];
682         dxp->ndescs = needed;
683         dxp->cookie = cookie;
684
685         head_idx = vq->vq_avail_idx;
686         idx = head_idx;
687         prev = head_idx;
688         start_dp = vq->vq_packed.ring.desc;
689
690         head_dp = &vq->vq_packed.ring.desc[idx];
691         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
692         head_flags |= vq->vq_packed.cached_flags;
693
694         if (can_push) {
695                 /* prepend cannot fail, checked by caller */
696                 hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
697                                               -head_size);
698                 prepend_header = true;
699
700                 /* if offload disabled, it is not zeroed below, do it now */
701                 if (!vq->hw->has_tx_offload)
702                         virtqueue_clear_net_hdr(hdr);
703         } else if (use_indirect) {
704                 /* setup tx ring slot to point to indirect
705                  * descriptor list stored in reserved region.
706                  *
707                  * the first slot in indirect ring is already preset
708                  * to point to the header in reserved region
709                  */
710                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
711                         RTE_PTR_DIFF(&txr[idx].tx_packed_indir, txr);
712                 start_dp[idx].len   = (seg_num + 1) *
713                         sizeof(struct vring_packed_desc);
714                 /* reset flags for indirect desc */
715                 head_flags = VRING_DESC_F_INDIRECT;
716                 head_flags |= vq->vq_packed.cached_flags;
717                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
718
719                 /* loop below will fill in rest of the indirect elements */
720                 start_dp = txr[idx].tx_packed_indir;
721                 idx = 1;
722         } else {
723                 /* setup first tx ring slot to point to header
724                  * stored in reserved region.
725                  */
726                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
727                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
728                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
729                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
730                 idx++;
731                 if (idx >= vq->vq_nentries) {
732                         idx -= vq->vq_nentries;
733                         vq->vq_packed.cached_flags ^=
734                                 VRING_PACKED_DESC_F_AVAIL_USED;
735                 }
736         }
737
738         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
739
740         do {
741                 uint16_t flags;
742
743                 start_dp[idx].addr = rte_mbuf_data_iova(cookie);
744                 start_dp[idx].len  = cookie->data_len;
745                 if (prepend_header) {
746                         start_dp[idx].addr -= head_size;
747                         start_dp[idx].len += head_size;
748                         prepend_header = false;
749                 }
750
751                 if (likely(idx != head_idx)) {
752                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
753                         flags |= vq->vq_packed.cached_flags;
754                         start_dp[idx].flags = flags;
755                 }
756                 prev = idx;
757                 idx++;
758                 if (idx >= vq->vq_nentries) {
759                         idx -= vq->vq_nentries;
760                         vq->vq_packed.cached_flags ^=
761                                 VRING_PACKED_DESC_F_AVAIL_USED;
762                 }
763         } while ((cookie = cookie->next) != NULL);
764
765         start_dp[prev].id = id;
766
767         if (use_indirect) {
768                 idx = head_idx;
769                 if (++idx >= vq->vq_nentries) {
770                         idx -= vq->vq_nentries;
771                         vq->vq_packed.cached_flags ^=
772                                 VRING_PACKED_DESC_F_AVAIL_USED;
773                 }
774         }
775
776         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
777         vq->vq_avail_idx = idx;
778
779         if (!in_order) {
780                 vq->vq_desc_head_idx = dxp->next;
781                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
782                         vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
783         }
784
785         virtqueue_store_flags_packed(head_dp, head_flags,
786                                      vq->hw->weak_barriers);
787 }
788
789 static void
790 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
791 {
792         struct vq_desc_extra *dxp;
793
794         dxp = &vq->vq_descx[id];
795         vq->vq_free_cnt += dxp->ndescs;
796
797         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
798                 vq->vq_desc_head_idx = id;
799         else
800                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
801
802         vq->vq_desc_tail_idx = id;
803         dxp->next = VQ_RING_DESC_CHAIN_END;
804 }
805
806 static void
807 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
808 {
809         uint16_t used_idx, id, curr_id, free_cnt = 0;
810         uint16_t size = vq->vq_nentries;
811         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
812         struct vq_desc_extra *dxp;
813
814         used_idx = vq->vq_used_cons_idx;
815         /* desc_is_used has a load-acquire or rte_io_rmb inside
816          * and wait for used desc in virtqueue.
817          */
818         while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
819                 id = desc[used_idx].id;
820                 do {
821                         curr_id = used_idx;
822                         dxp = &vq->vq_descx[used_idx];
823                         used_idx += dxp->ndescs;
824                         free_cnt += dxp->ndescs;
825                         num -= dxp->ndescs;
826                         if (used_idx >= size) {
827                                 used_idx -= size;
828                                 vq->vq_packed.used_wrap_counter ^= 1;
829                         }
830                         if (dxp->cookie != NULL) {
831                                 rte_pktmbuf_free(dxp->cookie);
832                                 dxp->cookie = NULL;
833                         }
834                 } while (curr_id != id);
835         }
836         vq->vq_used_cons_idx = used_idx;
837         vq->vq_free_cnt += free_cnt;
838 }
839
840 static void
841 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
842 {
843         uint16_t used_idx, id;
844         uint16_t size = vq->vq_nentries;
845         struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
846         struct vq_desc_extra *dxp;
847
848         used_idx = vq->vq_used_cons_idx;
849         /* desc_is_used has a load-acquire or rte_io_rmb inside
850          * and wait for used desc in virtqueue.
851          */
852         while (num-- && desc_is_used(&desc[used_idx], vq)) {
853                 id = desc[used_idx].id;
854                 dxp = &vq->vq_descx[id];
855                 vq->vq_used_cons_idx += dxp->ndescs;
856                 if (vq->vq_used_cons_idx >= size) {
857                         vq->vq_used_cons_idx -= size;
858                         vq->vq_packed.used_wrap_counter ^= 1;
859                 }
860                 vq_ring_free_id_packed(vq, id);
861                 if (dxp->cookie != NULL) {
862                         rte_pktmbuf_free(dxp->cookie);
863                         dxp->cookie = NULL;
864                 }
865                 used_idx = vq->vq_used_cons_idx;
866         }
867 }
868
869 /* Cleanup from completed transmits. */
870 static inline void
871 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
872 {
873         if (in_order)
874                 virtio_xmit_cleanup_inorder_packed(vq, num);
875         else
876                 virtio_xmit_cleanup_normal_packed(vq, num);
877 }
878
879 static inline void
880 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
881 {
882         uint16_t i, used_idx, desc_idx;
883         for (i = 0; i < num; i++) {
884                 struct vring_used_elem *uep;
885                 struct vq_desc_extra *dxp;
886
887                 used_idx = (uint16_t)(vq->vq_used_cons_idx &
888                                 (vq->vq_nentries - 1));
889                 uep = &vq->vq_split.ring.used->ring[used_idx];
890
891                 desc_idx = (uint16_t)uep->id;
892                 dxp = &vq->vq_descx[desc_idx];
893                 vq->vq_used_cons_idx++;
894                 vq_ring_free_chain(vq, desc_idx);
895
896                 if (dxp->cookie != NULL) {
897                         rte_pktmbuf_free(dxp->cookie);
898                         dxp->cookie = NULL;
899                 }
900         }
901 }
902
903 /* Cleanup from completed inorder transmits. */
904 static __rte_always_inline void
905 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
906 {
907         uint16_t i, idx = vq->vq_used_cons_idx;
908         int16_t free_cnt = 0;
909         struct vq_desc_extra *dxp = NULL;
910
911         if (unlikely(num == 0))
912                 return;
913
914         for (i = 0; i < num; i++) {
915                 dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
916                 free_cnt += dxp->ndescs;
917                 if (dxp->cookie != NULL) {
918                         rte_pktmbuf_free(dxp->cookie);
919                         dxp->cookie = NULL;
920                 }
921         }
922
923         vq->vq_free_cnt += free_cnt;
924         vq->vq_used_cons_idx = idx;
925 }
926 #endif /* _VIRTQUEUE_H_ */