net/virtio: implement Tx path for packed queues
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev_driver.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33 #include "virtio_rxtx_simple.h"
34
35 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
36 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
37 #else
38 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
39 #endif
40
41 int
42 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
43 {
44         struct virtnet_rx *rxvq = rxq;
45         struct virtqueue *vq = rxvq->vq;
46
47         return VIRTQUEUE_NUSED(vq) >= offset;
48 }
49
50 void
51 vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx, uint16_t num)
52 {
53         vq->vq_free_cnt += num;
54         vq->vq_desc_tail_idx = desc_idx & (vq->vq_nentries - 1);
55 }
56
57 void
58 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
59 {
60         struct vring_desc *dp, *dp_tail;
61         struct vq_desc_extra *dxp;
62         uint16_t desc_idx_last = desc_idx;
63
64         dp  = &vq->vq_ring.desc[desc_idx];
65         dxp = &vq->vq_descx[desc_idx];
66         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
67         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
68                 while (dp->flags & VRING_DESC_F_NEXT) {
69                         desc_idx_last = dp->next;
70                         dp = &vq->vq_ring.desc[dp->next];
71                 }
72         }
73         dxp->ndescs = 0;
74
75         /*
76          * We must append the existing free chain, if any, to the end of
77          * newly freed chain. If the virtqueue was completely used, then
78          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
79          */
80         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
81                 vq->vq_desc_head_idx = desc_idx;
82         } else {
83                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
84                 dp_tail->next = desc_idx;
85         }
86
87         vq->vq_desc_tail_idx = desc_idx_last;
88         dp->next = VQ_RING_DESC_CHAIN_END;
89 }
90
91 static void
92 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
93 {
94         struct vq_desc_extra *dxp;
95
96         dxp = &vq->vq_descx[id];
97         vq->vq_free_cnt += dxp->ndescs;
98
99         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
100                 vq->vq_desc_head_idx = id;
101         else
102                 vq->vq_descx[vq->vq_desc_tail_idx].next = id;
103
104         vq->vq_desc_tail_idx = id;
105         dxp->next = VQ_RING_DESC_CHAIN_END;
106 }
107
108 static uint16_t
109 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
110                            uint32_t *len, uint16_t num)
111 {
112         struct vring_used_elem *uep;
113         struct rte_mbuf *cookie;
114         uint16_t used_idx, desc_idx;
115         uint16_t i;
116
117         /*  Caller does the check */
118         for (i = 0; i < num ; i++) {
119                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
120                 uep = &vq->vq_ring.used->ring[used_idx];
121                 desc_idx = (uint16_t) uep->id;
122                 len[i] = uep->len;
123                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
124
125                 if (unlikely(cookie == NULL)) {
126                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
127                                 vq->vq_used_cons_idx);
128                         break;
129                 }
130
131                 rte_prefetch0(cookie);
132                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
133                 rx_pkts[i]  = cookie;
134                 vq->vq_used_cons_idx++;
135                 vq_ring_free_chain(vq, desc_idx);
136                 vq->vq_descx[desc_idx].cookie = NULL;
137         }
138
139         return i;
140 }
141
142 static uint16_t
143 virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
144                         struct rte_mbuf **rx_pkts,
145                         uint32_t *len,
146                         uint16_t num)
147 {
148         struct vring_used_elem *uep;
149         struct rte_mbuf *cookie;
150         uint16_t used_idx = 0;
151         uint16_t i;
152
153         if (unlikely(num == 0))
154                 return 0;
155
156         for (i = 0; i < num; i++) {
157                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
158                 /* Desc idx same as used idx */
159                 uep = &vq->vq_ring.used->ring[used_idx];
160                 len[i] = uep->len;
161                 cookie = (struct rte_mbuf *)vq->vq_descx[used_idx].cookie;
162
163                 if (unlikely(cookie == NULL)) {
164                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
165                                 vq->vq_used_cons_idx);
166                         break;
167                 }
168
169                 rte_prefetch0(cookie);
170                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
171                 rx_pkts[i]  = cookie;
172                 vq->vq_used_cons_idx++;
173                 vq->vq_descx[used_idx].cookie = NULL;
174         }
175
176         vq_ring_free_inorder(vq, used_idx, i);
177         return i;
178 }
179
180 #ifndef DEFAULT_TX_FREE_THRESH
181 #define DEFAULT_TX_FREE_THRESH 32
182 #endif
183
184 /* Cleanup from completed transmits. */
185 static void
186 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num)
187 {
188         uint16_t used_idx, id;
189         uint16_t size = vq->vq_nentries;
190         struct vring_packed_desc *desc = vq->ring_packed.desc_packed;
191         struct vq_desc_extra *dxp;
192
193         used_idx = vq->vq_used_cons_idx;
194         while (num-- && desc_is_used(&desc[used_idx], vq)) {
195                 used_idx = vq->vq_used_cons_idx;
196                 id = desc[used_idx].id;
197                 dxp = &vq->vq_descx[id];
198                 vq->vq_used_cons_idx += dxp->ndescs;
199                 if (vq->vq_used_cons_idx >= size) {
200                         vq->vq_used_cons_idx -= size;
201                         vq->used_wrap_counter ^= 1;
202                 }
203                 vq_ring_free_id_packed(vq, id);
204                 if (dxp->cookie != NULL) {
205                         rte_pktmbuf_free(dxp->cookie);
206                         dxp->cookie = NULL;
207                 }
208                 used_idx = vq->vq_used_cons_idx;
209         }
210 }
211
212 static void
213 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
214 {
215         uint16_t i, used_idx, desc_idx;
216         for (i = 0; i < num; i++) {
217                 struct vring_used_elem *uep;
218                 struct vq_desc_extra *dxp;
219
220                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
221                 uep = &vq->vq_ring.used->ring[used_idx];
222
223                 desc_idx = (uint16_t) uep->id;
224                 dxp = &vq->vq_descx[desc_idx];
225                 vq->vq_used_cons_idx++;
226                 vq_ring_free_chain(vq, desc_idx);
227
228                 if (dxp->cookie != NULL) {
229                         rte_pktmbuf_free(dxp->cookie);
230                         dxp->cookie = NULL;
231                 }
232         }
233 }
234
235 /* Cleanup from completed inorder transmits. */
236 static void
237 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
238 {
239         uint16_t i, used_idx, desc_idx = 0, last_idx;
240         int16_t free_cnt = 0;
241         struct vq_desc_extra *dxp = NULL;
242
243         if (unlikely(num == 0))
244                 return;
245
246         for (i = 0; i < num; i++) {
247                 struct vring_used_elem *uep;
248
249                 used_idx = vq->vq_used_cons_idx & (vq->vq_nentries - 1);
250                 uep = &vq->vq_ring.used->ring[used_idx];
251                 desc_idx = (uint16_t)uep->id;
252
253                 dxp = &vq->vq_descx[desc_idx];
254                 vq->vq_used_cons_idx++;
255
256                 if (dxp->cookie != NULL) {
257                         rte_pktmbuf_free(dxp->cookie);
258                         dxp->cookie = NULL;
259                 }
260         }
261
262         last_idx = desc_idx + dxp->ndescs - 1;
263         free_cnt = last_idx - vq->vq_desc_tail_idx;
264         if (free_cnt <= 0)
265                 free_cnt += vq->vq_nentries;
266
267         vq_ring_free_inorder(vq, last_idx, free_cnt);
268 }
269
270 static inline int
271 virtqueue_enqueue_refill_inorder(struct virtqueue *vq,
272                         struct rte_mbuf **cookies,
273                         uint16_t num)
274 {
275         struct vq_desc_extra *dxp;
276         struct virtio_hw *hw = vq->hw;
277         struct vring_desc *start_dp;
278         uint16_t head_idx, idx, i = 0;
279
280         if (unlikely(vq->vq_free_cnt == 0))
281                 return -ENOSPC;
282         if (unlikely(vq->vq_free_cnt < num))
283                 return -EMSGSIZE;
284
285         head_idx = vq->vq_desc_head_idx & (vq->vq_nentries - 1);
286         start_dp = vq->vq_ring.desc;
287
288         while (i < num) {
289                 idx = head_idx & (vq->vq_nentries - 1);
290                 dxp = &vq->vq_descx[idx];
291                 dxp->cookie = (void *)cookies[i];
292                 dxp->ndescs = 1;
293
294                 start_dp[idx].addr =
295                                 VIRTIO_MBUF_ADDR(cookies[i], vq) +
296                                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
297                 start_dp[idx].len =
298                                 cookies[i]->buf_len -
299                                 RTE_PKTMBUF_HEADROOM +
300                                 hw->vtnet_hdr_size;
301                 start_dp[idx].flags =  VRING_DESC_F_WRITE;
302
303                 vq_update_avail_ring(vq, idx);
304                 head_idx++;
305                 i++;
306         }
307
308         vq->vq_desc_head_idx += num;
309         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
310         return 0;
311 }
312
313 static inline int
314 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
315 {
316         struct vq_desc_extra *dxp;
317         struct virtio_hw *hw = vq->hw;
318         struct vring_desc *start_dp;
319         uint16_t needed = 1;
320         uint16_t head_idx, idx;
321
322         if (unlikely(vq->vq_free_cnt == 0))
323                 return -ENOSPC;
324         if (unlikely(vq->vq_free_cnt < needed))
325                 return -EMSGSIZE;
326
327         head_idx = vq->vq_desc_head_idx;
328         if (unlikely(head_idx >= vq->vq_nentries))
329                 return -EFAULT;
330
331         idx = head_idx;
332         dxp = &vq->vq_descx[idx];
333         dxp->cookie = (void *)cookie;
334         dxp->ndescs = needed;
335
336         start_dp = vq->vq_ring.desc;
337         start_dp[idx].addr =
338                 VIRTIO_MBUF_ADDR(cookie, vq) +
339                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
340         start_dp[idx].len =
341                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
342         start_dp[idx].flags =  VRING_DESC_F_WRITE;
343         idx = start_dp[idx].next;
344         vq->vq_desc_head_idx = idx;
345         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
346                 vq->vq_desc_tail_idx = idx;
347         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
348         vq_update_avail_ring(vq, head_idx);
349
350         return 0;
351 }
352
353 /* When doing TSO, the IP length is not included in the pseudo header
354  * checksum of the packet given to the PMD, but for virtio it is
355  * expected.
356  */
357 static void
358 virtio_tso_fix_cksum(struct rte_mbuf *m)
359 {
360         /* common case: header is not fragmented */
361         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
362                         m->l4_len)) {
363                 struct ipv4_hdr *iph;
364                 struct ipv6_hdr *ip6h;
365                 struct tcp_hdr *th;
366                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
367                 uint32_t tmp;
368
369                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
370                 th = RTE_PTR_ADD(iph, m->l3_len);
371                 if ((iph->version_ihl >> 4) == 4) {
372                         iph->hdr_checksum = 0;
373                         iph->hdr_checksum = rte_ipv4_cksum(iph);
374                         ip_len = iph->total_length;
375                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
376                                 m->l3_len);
377                 } else {
378                         ip6h = (struct ipv6_hdr *)iph;
379                         ip_paylen = ip6h->payload_len;
380                 }
381
382                 /* calculate the new phdr checksum not including ip_paylen */
383                 prev_cksum = th->cksum;
384                 tmp = prev_cksum;
385                 tmp += ip_paylen;
386                 tmp = (tmp & 0xffff) + (tmp >> 16);
387                 new_cksum = tmp;
388
389                 /* replace it in the packet */
390                 th->cksum = new_cksum;
391         }
392 }
393
394
395 /* avoid write operation when necessary, to lessen cache issues */
396 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
397         if ((var) != (val))                     \
398                 (var) = (val);                  \
399 } while (0)
400
401 static inline void
402 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
403                         struct rte_mbuf *cookie,
404                         bool offload)
405 {
406         if (offload) {
407                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
408                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
409
410                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
411                 case PKT_TX_UDP_CKSUM:
412                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
413                         hdr->csum_offset = offsetof(struct udp_hdr,
414                                 dgram_cksum);
415                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
416                         break;
417
418                 case PKT_TX_TCP_CKSUM:
419                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
420                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
421                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
422                         break;
423
424                 default:
425                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
426                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
427                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
428                         break;
429                 }
430
431                 /* TCP Segmentation Offload */
432                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
433                         virtio_tso_fix_cksum(cookie);
434                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
435                                 VIRTIO_NET_HDR_GSO_TCPV6 :
436                                 VIRTIO_NET_HDR_GSO_TCPV4;
437                         hdr->gso_size = cookie->tso_segsz;
438                         hdr->hdr_len =
439                                 cookie->l2_len +
440                                 cookie->l3_len +
441                                 cookie->l4_len;
442                 } else {
443                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
444                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
445                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
446                 }
447         }
448 }
449
450 static inline void
451 virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
452                         struct rte_mbuf **cookies,
453                         uint16_t num)
454 {
455         struct vq_desc_extra *dxp;
456         struct virtqueue *vq = txvq->vq;
457         struct vring_desc *start_dp;
458         struct virtio_net_hdr *hdr;
459         uint16_t idx;
460         uint16_t head_size = vq->hw->vtnet_hdr_size;
461         uint16_t i = 0;
462
463         idx = vq->vq_desc_head_idx;
464         start_dp = vq->vq_ring.desc;
465
466         while (i < num) {
467                 idx = idx & (vq->vq_nentries - 1);
468                 dxp = &vq->vq_descx[idx];
469                 dxp->cookie = (void *)cookies[i];
470                 dxp->ndescs = 1;
471
472                 hdr = (struct virtio_net_hdr *)
473                         rte_pktmbuf_prepend(cookies[i], head_size);
474                 cookies[i]->pkt_len -= head_size;
475
476                 /* if offload disabled, it is not zeroed below, do it now */
477                 if (!vq->hw->has_tx_offload) {
478                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
479                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
480                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
481                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
482                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
483                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
484                 }
485
486                 virtqueue_xmit_offload(hdr, cookies[i],
487                                 vq->hw->has_tx_offload);
488
489                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookies[i], vq);
490                 start_dp[idx].len   = cookies[i]->data_len;
491                 start_dp[idx].flags = 0;
492
493                 vq_update_avail_ring(vq, idx);
494
495                 idx++;
496                 i++;
497         };
498
499         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
500         vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
501 }
502
503 static inline void
504 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
505                               uint16_t needed, int can_push)
506 {
507         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
508         struct vq_desc_extra *dxp;
509         struct virtqueue *vq = txvq->vq;
510         struct vring_packed_desc *start_dp, *head_dp;
511         uint16_t idx, id, head_idx, head_flags;
512         uint16_t head_size = vq->hw->vtnet_hdr_size;
513         struct virtio_net_hdr *hdr;
514         uint16_t prev;
515
516         id = vq->vq_desc_head_idx;
517
518         dxp = &vq->vq_descx[id];
519         dxp->ndescs = needed;
520         dxp->cookie = cookie;
521
522         head_idx = vq->vq_avail_idx;
523         idx = head_idx;
524         prev = head_idx;
525         start_dp = vq->ring_packed.desc_packed;
526
527         head_dp = &vq->ring_packed.desc_packed[idx];
528         head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
529         head_flags |= vq->avail_used_flags;
530
531         if (can_push) {
532                 /* prepend cannot fail, checked by caller */
533                 hdr = (struct virtio_net_hdr *)
534                         rte_pktmbuf_prepend(cookie, head_size);
535                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
536                  * which is wrong. Below subtract restores correct pkt size.
537                  */
538                 cookie->pkt_len -= head_size;
539
540                 /* if offload disabled, it is not zeroed below, do it now */
541                 if (!vq->hw->has_tx_offload) {
542                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
543                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
544                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
545                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
546                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
547                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
548                 }
549         } else {
550                 /* setup first tx ring slot to point to header
551                  * stored in reserved region.
552                  */
553                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
554                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
555                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
556                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
557                 idx++;
558                 if (idx >= vq->vq_nentries) {
559                         idx -= vq->vq_nentries;
560                         vq->avail_wrap_counter ^= 1;
561                         vq->avail_used_flags =
562                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
563                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
564                 }
565         }
566
567         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
568
569         do {
570                 uint16_t flags;
571
572                 start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
573                 start_dp[idx].len  = cookie->data_len;
574                 if (likely(idx != head_idx)) {
575                         flags = cookie->next ? VRING_DESC_F_NEXT : 0;
576                         flags |= vq->avail_used_flags;
577                         start_dp[idx].flags = flags;
578                 }
579                 prev = idx;
580                 idx++;
581                 if (idx >= vq->vq_nentries) {
582                         idx -= vq->vq_nentries;
583                         vq->avail_wrap_counter ^= 1;
584                         vq->avail_used_flags =
585                                 VRING_DESC_F_AVAIL(vq->avail_wrap_counter) |
586                                 VRING_DESC_F_USED(!vq->avail_wrap_counter);
587                 }
588         } while ((cookie = cookie->next) != NULL);
589
590         start_dp[prev].id = id;
591
592         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
593
594         vq->vq_desc_head_idx = dxp->next;
595         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
596                 vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
597
598         vq->vq_avail_idx = idx;
599
600         rte_smp_wmb();
601         head_dp->flags = head_flags;
602 }
603
604 static inline void
605 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
606                         uint16_t needed, int use_indirect, int can_push,
607                         int in_order)
608 {
609         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
610         struct vq_desc_extra *dxp;
611         struct virtqueue *vq = txvq->vq;
612         struct vring_desc *start_dp;
613         uint16_t seg_num = cookie->nb_segs;
614         uint16_t head_idx, idx;
615         uint16_t head_size = vq->hw->vtnet_hdr_size;
616         struct virtio_net_hdr *hdr;
617
618         head_idx = vq->vq_desc_head_idx;
619         idx = head_idx;
620         dxp = &vq->vq_descx[idx];
621         dxp->cookie = (void *)cookie;
622         dxp->ndescs = needed;
623
624         start_dp = vq->vq_ring.desc;
625
626         if (can_push) {
627                 /* prepend cannot fail, checked by caller */
628                 hdr = (struct virtio_net_hdr *)
629                         rte_pktmbuf_prepend(cookie, head_size);
630                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
631                  * which is wrong. Below subtract restores correct pkt size.
632                  */
633                 cookie->pkt_len -= head_size;
634
635                 /* if offload disabled, it is not zeroed below, do it now */
636                 if (!vq->hw->has_tx_offload) {
637                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
638                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
639                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
640                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
641                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
642                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
643                 }
644         } else if (use_indirect) {
645                 /* setup tx ring slot to point to indirect
646                  * descriptor list stored in reserved region.
647                  *
648                  * the first slot in indirect ring is already preset
649                  * to point to the header in reserved region
650                  */
651                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
652                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
653                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
654                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
655                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
656
657                 /* loop below will fill in rest of the indirect elements */
658                 start_dp = txr[idx].tx_indir;
659                 idx = 1;
660         } else {
661                 /* setup first tx ring slot to point to header
662                  * stored in reserved region.
663                  */
664                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
665                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
666                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
667                 start_dp[idx].flags = VRING_DESC_F_NEXT;
668                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
669
670                 idx = start_dp[idx].next;
671         }
672
673         virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
674
675         do {
676                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
677                 start_dp[idx].len   = cookie->data_len;
678                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
679                 idx = start_dp[idx].next;
680         } while ((cookie = cookie->next) != NULL);
681
682         if (use_indirect)
683                 idx = vq->vq_ring.desc[head_idx].next;
684
685         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
686
687         vq->vq_desc_head_idx = idx;
688         vq_update_avail_ring(vq, head_idx);
689
690         if (!in_order) {
691                 if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
692                         vq->vq_desc_tail_idx = idx;
693         }
694 }
695
696 void
697 virtio_dev_cq_start(struct rte_eth_dev *dev)
698 {
699         struct virtio_hw *hw = dev->data->dev_private;
700
701         if (hw->cvq && hw->cvq->vq) {
702                 rte_spinlock_init(&hw->cvq->lock);
703                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
704         }
705 }
706
707 int
708 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
709                         uint16_t queue_idx,
710                         uint16_t nb_desc,
711                         unsigned int socket_id __rte_unused,
712                         const struct rte_eth_rxconf *rx_conf __rte_unused,
713                         struct rte_mempool *mp)
714 {
715         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
716         struct virtio_hw *hw = dev->data->dev_private;
717         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
718         struct virtnet_rx *rxvq;
719
720         PMD_INIT_FUNC_TRACE();
721
722         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
723                 nb_desc = vq->vq_nentries;
724         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
725
726         rxvq = &vq->rxq;
727         rxvq->queue_id = queue_idx;
728         rxvq->mpool = mp;
729         if (rxvq->mpool == NULL) {
730                 rte_exit(EXIT_FAILURE,
731                         "Cannot allocate mbufs for rx virtqueue");
732         }
733
734         dev->data->rx_queues[queue_idx] = rxvq;
735
736         return 0;
737 }
738
739 int
740 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
741 {
742         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
743         struct virtio_hw *hw = dev->data->dev_private;
744         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
745         struct virtnet_rx *rxvq = &vq->rxq;
746         struct rte_mbuf *m;
747         uint16_t desc_idx;
748         int error, nbufs, i;
749
750         PMD_INIT_FUNC_TRACE();
751
752         /* Allocate blank mbufs for the each rx descriptor */
753         nbufs = 0;
754
755         if (hw->use_simple_rx) {
756                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
757                      desc_idx++) {
758                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
759                         vq->vq_ring.desc[desc_idx].flags =
760                                 VRING_DESC_F_WRITE;
761                 }
762
763                 virtio_rxq_vec_setup(rxvq);
764         }
765
766         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
767         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
768              desc_idx++) {
769                 vq->sw_ring[vq->vq_nentries + desc_idx] =
770                         &rxvq->fake_mbuf;
771         }
772
773         if (hw->use_simple_rx) {
774                 while (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) {
775                         virtio_rxq_rearm_vec(rxvq);
776                         nbufs += RTE_VIRTIO_VPMD_RX_REARM_THRESH;
777                 }
778         } else if (hw->use_inorder_rx) {
779                 if ((!virtqueue_full(vq))) {
780                         uint16_t free_cnt = vq->vq_free_cnt;
781                         struct rte_mbuf *pkts[free_cnt];
782
783                         if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, pkts,
784                                 free_cnt)) {
785                                 error = virtqueue_enqueue_refill_inorder(vq,
786                                                 pkts,
787                                                 free_cnt);
788                                 if (unlikely(error)) {
789                                         for (i = 0; i < free_cnt; i++)
790                                                 rte_pktmbuf_free(pkts[i]);
791                                 }
792                         }
793
794                         nbufs += free_cnt;
795                         vq_update_avail_idx(vq);
796                 }
797         } else {
798                 while (!virtqueue_full(vq)) {
799                         m = rte_mbuf_raw_alloc(rxvq->mpool);
800                         if (m == NULL)
801                                 break;
802
803                         /* Enqueue allocated buffers */
804                         error = virtqueue_enqueue_recv_refill(vq, m);
805                         if (error) {
806                                 rte_pktmbuf_free(m);
807                                 break;
808                         }
809                         nbufs++;
810                 }
811
812                 vq_update_avail_idx(vq);
813         }
814
815         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
816
817         VIRTQUEUE_DUMP(vq);
818
819         return 0;
820 }
821
822 /*
823  * struct rte_eth_dev *dev: Used to update dev
824  * uint16_t nb_desc: Defaults to values read from config space
825  * unsigned int socket_id: Used to allocate memzone
826  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
827  * uint16_t queue_idx: Just used as an index in dev txq list
828  */
829 int
830 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
831                         uint16_t queue_idx,
832                         uint16_t nb_desc,
833                         unsigned int socket_id __rte_unused,
834                         const struct rte_eth_txconf *tx_conf)
835 {
836         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
837         struct virtio_hw *hw = dev->data->dev_private;
838         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
839         struct virtnet_tx *txvq;
840         uint16_t tx_free_thresh;
841
842         PMD_INIT_FUNC_TRACE();
843
844         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
845                 nb_desc = vq->vq_nentries;
846         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
847
848         txvq = &vq->txq;
849         txvq->queue_id = queue_idx;
850
851         tx_free_thresh = tx_conf->tx_free_thresh;
852         if (tx_free_thresh == 0)
853                 tx_free_thresh =
854                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
855
856         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
857                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
858                         "number of TX entries minus 3 (%u)."
859                         " (tx_free_thresh=%u port=%u queue=%u)\n",
860                         vq->vq_nentries - 3,
861                         tx_free_thresh, dev->data->port_id, queue_idx);
862                 return -EINVAL;
863         }
864
865         vq->vq_free_thresh = tx_free_thresh;
866
867         dev->data->tx_queues[queue_idx] = txvq;
868         return 0;
869 }
870
871 int
872 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
873                                 uint16_t queue_idx)
874 {
875         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
876         struct virtio_hw *hw = dev->data->dev_private;
877         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
878
879         PMD_INIT_FUNC_TRACE();
880
881         if (!vtpci_packed_queue(hw)) {
882                 if (hw->use_inorder_tx)
883                         vq->vq_ring.desc[vq->vq_nentries - 1].next = 0;
884         }
885
886         VIRTQUEUE_DUMP(vq);
887
888         return 0;
889 }
890
891 static void
892 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
893 {
894         int error;
895         /*
896          * Requeue the discarded mbuf. This should always be
897          * successful since it was just dequeued.
898          */
899         error = virtqueue_enqueue_recv_refill(vq, m);
900
901         if (unlikely(error)) {
902                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
903                 rte_pktmbuf_free(m);
904         }
905 }
906
907 static void
908 virtio_discard_rxbuf_inorder(struct virtqueue *vq, struct rte_mbuf *m)
909 {
910         int error;
911
912         error = virtqueue_enqueue_refill_inorder(vq, &m, 1);
913         if (unlikely(error)) {
914                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
915                 rte_pktmbuf_free(m);
916         }
917 }
918
919 static void
920 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
921 {
922         uint32_t s = mbuf->pkt_len;
923         struct ether_addr *ea;
924
925         stats->bytes += s;
926
927         if (s == 64) {
928                 stats->size_bins[1]++;
929         } else if (s > 64 && s < 1024) {
930                 uint32_t bin;
931
932                 /* count zeros, and offset into correct bin */
933                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
934                 stats->size_bins[bin]++;
935         } else {
936                 if (s < 64)
937                         stats->size_bins[0]++;
938                 else if (s < 1519)
939                         stats->size_bins[6]++;
940                 else if (s >= 1519)
941                         stats->size_bins[7]++;
942         }
943
944         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
945         if (is_multicast_ether_addr(ea)) {
946                 if (is_broadcast_ether_addr(ea))
947                         stats->broadcast++;
948                 else
949                         stats->multicast++;
950         }
951 }
952
953 static inline void
954 virtio_rx_stats_updated(struct virtnet_rx *rxvq, struct rte_mbuf *m)
955 {
956         VIRTIO_DUMP_PACKET(m, m->data_len);
957
958         virtio_update_packet_stats(&rxvq->stats, m);
959 }
960
961 /* Optionally fill offload information in structure */
962 static int
963 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
964 {
965         struct rte_net_hdr_lens hdr_lens;
966         uint32_t hdrlen, ptype;
967         int l4_supported = 0;
968
969         /* nothing to do */
970         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
971                 return 0;
972
973         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
974
975         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
976         m->packet_type = ptype;
977         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
978             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
979             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
980                 l4_supported = 1;
981
982         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
983                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
984                 if (hdr->csum_start <= hdrlen && l4_supported) {
985                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
986                 } else {
987                         /* Unknown proto or tunnel, do sw cksum. We can assume
988                          * the cksum field is in the first segment since the
989                          * buffers we provided to the host are large enough.
990                          * In case of SCTP, this will be wrong since it's a CRC
991                          * but there's nothing we can do.
992                          */
993                         uint16_t csum = 0, off;
994
995                         rte_raw_cksum_mbuf(m, hdr->csum_start,
996                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
997                                 &csum);
998                         if (likely(csum != 0xffff))
999                                 csum = ~csum;
1000                         off = hdr->csum_offset + hdr->csum_start;
1001                         if (rte_pktmbuf_data_len(m) >= off + 1)
1002                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
1003                                         off) = csum;
1004                 }
1005         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
1006                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1007         }
1008
1009         /* GSO request, save required information in mbuf */
1010         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1011                 /* Check unsupported modes */
1012                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
1013                     (hdr->gso_size == 0)) {
1014                         return -EINVAL;
1015                 }
1016
1017                 /* Update mss lengthes in mbuf */
1018                 m->tso_segsz = hdr->gso_size;
1019                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1020                         case VIRTIO_NET_HDR_GSO_TCPV4:
1021                         case VIRTIO_NET_HDR_GSO_TCPV6:
1022                                 m->ol_flags |= PKT_RX_LRO | \
1023                                         PKT_RX_L4_CKSUM_NONE;
1024                                 break;
1025                         default:
1026                                 return -EINVAL;
1027                 }
1028         }
1029
1030         return 0;
1031 }
1032
1033 #define VIRTIO_MBUF_BURST_SZ 64
1034 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
1035 uint16_t
1036 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1037 {
1038         struct virtnet_rx *rxvq = rx_queue;
1039         struct virtqueue *vq = rxvq->vq;
1040         struct virtio_hw *hw = vq->hw;
1041         struct rte_mbuf *rxm, *new_mbuf;
1042         uint16_t nb_used, num, nb_rx;
1043         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1044         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1045         int error;
1046         uint32_t i, nb_enqueued;
1047         uint32_t hdr_size;
1048         struct virtio_net_hdr *hdr;
1049
1050         nb_rx = 0;
1051         if (unlikely(hw->started == 0))
1052                 return nb_rx;
1053
1054         nb_used = VIRTQUEUE_NUSED(vq);
1055
1056         virtio_rmb();
1057
1058         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
1059         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
1060                 num = VIRTIO_MBUF_BURST_SZ;
1061         if (likely(num > DESC_PER_CACHELINE))
1062                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
1063
1064         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
1065         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
1066
1067         nb_enqueued = 0;
1068         hdr_size = hw->vtnet_hdr_size;
1069
1070         for (i = 0; i < num ; i++) {
1071                 rxm = rcv_pkts[i];
1072
1073                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1074
1075                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1076                         PMD_RX_LOG(ERR, "Packet drop");
1077                         nb_enqueued++;
1078                         virtio_discard_rxbuf(vq, rxm);
1079                         rxvq->stats.errors++;
1080                         continue;
1081                 }
1082
1083                 rxm->port = rxvq->port_id;
1084                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1085                 rxm->ol_flags = 0;
1086                 rxm->vlan_tci = 0;
1087
1088                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1089                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1090
1091                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
1092                         RTE_PKTMBUF_HEADROOM - hdr_size);
1093
1094                 if (hw->vlan_strip)
1095                         rte_vlan_strip(rxm);
1096
1097                 if (hw->has_rx_offload && virtio_rx_offload(rxm, hdr) < 0) {
1098                         virtio_discard_rxbuf(vq, rxm);
1099                         rxvq->stats.errors++;
1100                         continue;
1101                 }
1102
1103                 virtio_rx_stats_updated(rxvq, rxm);
1104
1105                 rx_pkts[nb_rx++] = rxm;
1106         }
1107
1108         rxvq->stats.packets += nb_rx;
1109
1110         /* Allocate new mbuf for the used descriptor */
1111         while (likely(!virtqueue_full(vq))) {
1112                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1113                 if (unlikely(new_mbuf == NULL)) {
1114                         struct rte_eth_dev *dev
1115                                 = &rte_eth_devices[rxvq->port_id];
1116                         dev->data->rx_mbuf_alloc_failed++;
1117                         break;
1118                 }
1119                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1120                 if (unlikely(error)) {
1121                         rte_pktmbuf_free(new_mbuf);
1122                         break;
1123                 }
1124                 nb_enqueued++;
1125         }
1126
1127         if (likely(nb_enqueued)) {
1128                 vq_update_avail_idx(vq);
1129
1130                 if (unlikely(virtqueue_kick_prepare(vq))) {
1131                         virtqueue_notify(vq);
1132                         PMD_RX_LOG(DEBUG, "Notified");
1133                 }
1134         }
1135
1136         return nb_rx;
1137 }
1138
1139 uint16_t
1140 virtio_recv_mergeable_pkts_inorder(void *rx_queue,
1141                         struct rte_mbuf **rx_pkts,
1142                         uint16_t nb_pkts)
1143 {
1144         struct virtnet_rx *rxvq = rx_queue;
1145         struct virtqueue *vq = rxvq->vq;
1146         struct virtio_hw *hw = vq->hw;
1147         struct rte_mbuf *rxm;
1148         struct rte_mbuf *prev;
1149         uint16_t nb_used, num, nb_rx;
1150         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1151         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1152         int error;
1153         uint32_t nb_enqueued;
1154         uint32_t seg_num;
1155         uint32_t seg_res;
1156         uint32_t hdr_size;
1157         int32_t i;
1158
1159         nb_rx = 0;
1160         if (unlikely(hw->started == 0))
1161                 return nb_rx;
1162
1163         nb_used = VIRTQUEUE_NUSED(vq);
1164         nb_used = RTE_MIN(nb_used, nb_pkts);
1165         nb_used = RTE_MIN(nb_used, VIRTIO_MBUF_BURST_SZ);
1166
1167         virtio_rmb();
1168
1169         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1170
1171         nb_enqueued = 0;
1172         seg_num = 1;
1173         seg_res = 0;
1174         hdr_size = hw->vtnet_hdr_size;
1175
1176         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len, nb_used);
1177
1178         for (i = 0; i < num; i++) {
1179                 struct virtio_net_hdr_mrg_rxbuf *header;
1180
1181                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1182                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
1183
1184                 rxm = rcv_pkts[i];
1185
1186                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
1187                         PMD_RX_LOG(ERR, "Packet drop");
1188                         nb_enqueued++;
1189                         virtio_discard_rxbuf_inorder(vq, rxm);
1190                         rxvq->stats.errors++;
1191                         continue;
1192                 }
1193
1194                 header = (struct virtio_net_hdr_mrg_rxbuf *)
1195                          ((char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM
1196                          - hdr_size);
1197                 seg_num = header->num_buffers;
1198
1199                 if (seg_num == 0)
1200                         seg_num = 1;
1201
1202                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1203                 rxm->nb_segs = seg_num;
1204                 rxm->ol_flags = 0;
1205                 rxm->vlan_tci = 0;
1206                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
1207                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
1208
1209                 rxm->port = rxvq->port_id;
1210
1211                 rx_pkts[nb_rx] = rxm;
1212                 prev = rxm;
1213
1214                 if (vq->hw->has_rx_offload &&
1215                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1216                         virtio_discard_rxbuf_inorder(vq, rxm);
1217                         rxvq->stats.errors++;
1218                         continue;
1219                 }
1220
1221                 if (hw->vlan_strip)
1222                         rte_vlan_strip(rx_pkts[nb_rx]);
1223
1224                 seg_res = seg_num - 1;
1225
1226                 /* Merge remaining segments */
1227                 while (seg_res != 0 && i < (num - 1)) {
1228                         i++;
1229
1230                         rxm = rcv_pkts[i];
1231                         rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1232                         rxm->pkt_len = (uint32_t)(len[i]);
1233                         rxm->data_len = (uint16_t)(len[i]);
1234
1235                         rx_pkts[nb_rx]->pkt_len += (uint32_t)(len[i]);
1236                         rx_pkts[nb_rx]->data_len += (uint16_t)(len[i]);
1237
1238                         if (prev)
1239                                 prev->next = rxm;
1240
1241                         prev = rxm;
1242                         seg_res -= 1;
1243                 }
1244
1245                 if (!seg_res) {
1246                         virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1247                         nb_rx++;
1248                 }
1249         }
1250
1251         /* Last packet still need merge segments */
1252         while (seg_res != 0) {
1253                 uint16_t rcv_cnt = RTE_MIN((uint16_t)seg_res,
1254                                         VIRTIO_MBUF_BURST_SZ);
1255
1256                 prev = rcv_pkts[nb_rx];
1257                 if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1258                         num = virtqueue_dequeue_rx_inorder(vq, rcv_pkts, len,
1259                                                            rcv_cnt);
1260                         uint16_t extra_idx = 0;
1261
1262                         rcv_cnt = num;
1263                         while (extra_idx < rcv_cnt) {
1264                                 rxm = rcv_pkts[extra_idx];
1265                                 rxm->data_off =
1266                                         RTE_PKTMBUF_HEADROOM - hdr_size;
1267                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1268                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1269                                 prev->next = rxm;
1270                                 prev = rxm;
1271                                 rx_pkts[nb_rx]->pkt_len += len[extra_idx];
1272                                 rx_pkts[nb_rx]->data_len += len[extra_idx];
1273                                 extra_idx += 1;
1274                         };
1275                         seg_res -= rcv_cnt;
1276
1277                         if (!seg_res) {
1278                                 virtio_rx_stats_updated(rxvq, rx_pkts[nb_rx]);
1279                                 nb_rx++;
1280                         }
1281                 } else {
1282                         PMD_RX_LOG(ERR,
1283                                         "No enough segments for packet.");
1284                         virtio_discard_rxbuf_inorder(vq, prev);
1285                         rxvq->stats.errors++;
1286                         break;
1287                 }
1288         }
1289
1290         rxvq->stats.packets += nb_rx;
1291
1292         /* Allocate new mbuf for the used descriptor */
1293
1294         if (likely(!virtqueue_full(vq))) {
1295                 /* free_cnt may include mrg descs */
1296                 uint16_t free_cnt = vq->vq_free_cnt;
1297                 struct rte_mbuf *new_pkts[free_cnt];
1298
1299                 if (!rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts, free_cnt)) {
1300                         error = virtqueue_enqueue_refill_inorder(vq, new_pkts,
1301                                         free_cnt);
1302                         if (unlikely(error)) {
1303                                 for (i = 0; i < free_cnt; i++)
1304                                         rte_pktmbuf_free(new_pkts[i]);
1305                         }
1306                         nb_enqueued += free_cnt;
1307                 } else {
1308                         struct rte_eth_dev *dev =
1309                                 &rte_eth_devices[rxvq->port_id];
1310                         dev->data->rx_mbuf_alloc_failed += free_cnt;
1311                 }
1312         }
1313
1314         if (likely(nb_enqueued)) {
1315                 vq_update_avail_idx(vq);
1316
1317                 if (unlikely(virtqueue_kick_prepare(vq))) {
1318                         virtqueue_notify(vq);
1319                         PMD_RX_LOG(DEBUG, "Notified");
1320                 }
1321         }
1322
1323         return nb_rx;
1324 }
1325
1326 uint16_t
1327 virtio_recv_mergeable_pkts(void *rx_queue,
1328                         struct rte_mbuf **rx_pkts,
1329                         uint16_t nb_pkts)
1330 {
1331         struct virtnet_rx *rxvq = rx_queue;
1332         struct virtqueue *vq = rxvq->vq;
1333         struct virtio_hw *hw = vq->hw;
1334         struct rte_mbuf *rxm, *new_mbuf;
1335         uint16_t nb_used, num, nb_rx;
1336         uint32_t len[VIRTIO_MBUF_BURST_SZ];
1337         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
1338         struct rte_mbuf *prev;
1339         int error;
1340         uint32_t i, nb_enqueued;
1341         uint32_t seg_num;
1342         uint16_t extra_idx;
1343         uint32_t seg_res;
1344         uint32_t hdr_size;
1345
1346         nb_rx = 0;
1347         if (unlikely(hw->started == 0))
1348                 return nb_rx;
1349
1350         nb_used = VIRTQUEUE_NUSED(vq);
1351
1352         virtio_rmb();
1353
1354         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
1355
1356         i = 0;
1357         nb_enqueued = 0;
1358         seg_num = 0;
1359         extra_idx = 0;
1360         seg_res = 0;
1361         hdr_size = hw->vtnet_hdr_size;
1362
1363         while (i < nb_used) {
1364                 struct virtio_net_hdr_mrg_rxbuf *header;
1365
1366                 if (nb_rx == nb_pkts)
1367                         break;
1368
1369                 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
1370                 if (num != 1)
1371                         continue;
1372
1373                 i++;
1374
1375                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
1376                 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
1377
1378                 rxm = rcv_pkts[0];
1379
1380                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
1381                         PMD_RX_LOG(ERR, "Packet drop");
1382                         nb_enqueued++;
1383                         virtio_discard_rxbuf(vq, rxm);
1384                         rxvq->stats.errors++;
1385                         continue;
1386                 }
1387
1388                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
1389                         RTE_PKTMBUF_HEADROOM - hdr_size);
1390                 seg_num = header->num_buffers;
1391
1392                 if (seg_num == 0)
1393                         seg_num = 1;
1394
1395                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1396                 rxm->nb_segs = seg_num;
1397                 rxm->ol_flags = 0;
1398                 rxm->vlan_tci = 0;
1399                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
1400                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
1401
1402                 rxm->port = rxvq->port_id;
1403                 rx_pkts[nb_rx] = rxm;
1404                 prev = rxm;
1405
1406                 if (hw->has_rx_offload &&
1407                                 virtio_rx_offload(rxm, &header->hdr) < 0) {
1408                         virtio_discard_rxbuf(vq, rxm);
1409                         rxvq->stats.errors++;
1410                         continue;
1411                 }
1412
1413                 seg_res = seg_num - 1;
1414
1415                 while (seg_res != 0) {
1416                         /*
1417                          * Get extra segments for current uncompleted packet.
1418                          */
1419                         uint16_t  rcv_cnt =
1420                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
1421                         if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
1422                                 uint32_t rx_num =
1423                                         virtqueue_dequeue_burst_rx(vq,
1424                                         rcv_pkts, len, rcv_cnt);
1425                                 i += rx_num;
1426                                 rcv_cnt = rx_num;
1427                         } else {
1428                                 PMD_RX_LOG(ERR,
1429                                            "No enough segments for packet.");
1430                                 nb_enqueued++;
1431                                 virtio_discard_rxbuf(vq, rxm);
1432                                 rxvq->stats.errors++;
1433                                 break;
1434                         }
1435
1436                         extra_idx = 0;
1437
1438                         while (extra_idx < rcv_cnt) {
1439                                 rxm = rcv_pkts[extra_idx];
1440
1441                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
1442                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
1443                                 rxm->data_len = (uint16_t)(len[extra_idx]);
1444
1445                                 if (prev)
1446                                         prev->next = rxm;
1447
1448                                 prev = rxm;
1449                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
1450                                 extra_idx++;
1451                         };
1452                         seg_res -= rcv_cnt;
1453                 }
1454
1455                 if (hw->vlan_strip)
1456                         rte_vlan_strip(rx_pkts[nb_rx]);
1457
1458                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
1459                         rx_pkts[nb_rx]->data_len);
1460
1461                 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
1462                 nb_rx++;
1463         }
1464
1465         rxvq->stats.packets += nb_rx;
1466
1467         /* Allocate new mbuf for the used descriptor */
1468         while (likely(!virtqueue_full(vq))) {
1469                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
1470                 if (unlikely(new_mbuf == NULL)) {
1471                         struct rte_eth_dev *dev
1472                                 = &rte_eth_devices[rxvq->port_id];
1473                         dev->data->rx_mbuf_alloc_failed++;
1474                         break;
1475                 }
1476                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
1477                 if (unlikely(error)) {
1478                         rte_pktmbuf_free(new_mbuf);
1479                         break;
1480                 }
1481                 nb_enqueued++;
1482         }
1483
1484         if (likely(nb_enqueued)) {
1485                 vq_update_avail_idx(vq);
1486
1487                 if (unlikely(virtqueue_kick_prepare(vq))) {
1488                         virtqueue_notify(vq);
1489                         PMD_RX_LOG(DEBUG, "Notified");
1490                 }
1491         }
1492
1493         return nb_rx;
1494 }
1495
1496 uint16_t
1497 virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
1498                         uint16_t nb_pkts)
1499 {
1500         struct virtnet_tx *txvq = tx_queue;
1501         struct virtqueue *vq = txvq->vq;
1502         struct virtio_hw *hw = vq->hw;
1503         uint16_t hdr_size = hw->vtnet_hdr_size;
1504         uint16_t nb_tx = 0;
1505         int error;
1506
1507         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1508                 return nb_tx;
1509
1510         if (unlikely(nb_pkts < 1))
1511                 return nb_pkts;
1512
1513         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1514
1515         if (nb_pkts > vq->vq_free_cnt)
1516                 virtio_xmit_cleanup_packed(vq, nb_pkts - vq->vq_free_cnt);
1517
1518         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1519                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1520                 int can_push = 0, slots, need;
1521
1522                 /* Do VLAN tag insertion */
1523                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1524                         error = rte_vlan_insert(&txm);
1525                         if (unlikely(error)) {
1526                                 rte_pktmbuf_free(txm);
1527                                 continue;
1528                         }
1529                 }
1530
1531                 /* optimize ring usage */
1532                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1533                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1534                     rte_mbuf_refcnt_read(txm) == 1 &&
1535                     RTE_MBUF_DIRECT(txm) &&
1536                     txm->nb_segs == 1 &&
1537                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1538                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1539                            __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1540                         can_push = 1;
1541
1542                 /* How many main ring entries are needed to this Tx?
1543                  * any_layout => number of segments
1544                  * default    => number of segments + 1
1545                  */
1546                 slots = txm->nb_segs + !can_push;
1547                 need = slots - vq->vq_free_cnt;
1548
1549                 /* Positive value indicates it need free vring descriptors */
1550                 if (unlikely(need > 0)) {
1551                         virtio_rmb();
1552                         need = RTE_MIN(need, (int)nb_pkts);
1553                         virtio_xmit_cleanup_packed(vq, need);
1554                         need = slots - vq->vq_free_cnt;
1555                         if (unlikely(need > 0)) {
1556                                 PMD_TX_LOG(ERR,
1557                                            "No free tx descriptors to transmit");
1558                                 break;
1559                         }
1560                 }
1561
1562                 /* Enqueue Packet buffers */
1563                 virtqueue_enqueue_xmit_packed(txvq, txm, slots, can_push);
1564
1565                 txvq->stats.bytes += txm->pkt_len;
1566                 virtio_update_packet_stats(&txvq->stats, txm);
1567         }
1568
1569         txvq->stats.packets += nb_tx;
1570
1571         if (likely(nb_tx)) {
1572                 if (unlikely(virtqueue_kick_prepare_packed(vq))) {
1573                         virtqueue_notify(vq);
1574                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1575                 }
1576         }
1577
1578         return nb_tx;
1579 }
1580
1581 uint16_t
1582 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1583 {
1584         struct virtnet_tx *txvq = tx_queue;
1585         struct virtqueue *vq = txvq->vq;
1586         struct virtio_hw *hw = vq->hw;
1587         uint16_t hdr_size = hw->vtnet_hdr_size;
1588         uint16_t nb_used, nb_tx = 0;
1589         int error;
1590
1591         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1592                 return nb_tx;
1593
1594         if (unlikely(nb_pkts < 1))
1595                 return nb_pkts;
1596
1597         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1598         nb_used = VIRTQUEUE_NUSED(vq);
1599
1600         virtio_rmb();
1601         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1602                 virtio_xmit_cleanup(vq, nb_used);
1603
1604         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1605                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1606                 int can_push = 0, use_indirect = 0, slots, need;
1607
1608                 /* Do VLAN tag insertion */
1609                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1610                         error = rte_vlan_insert(&txm);
1611                         if (unlikely(error)) {
1612                                 rte_pktmbuf_free(txm);
1613                                 continue;
1614                         }
1615                 }
1616
1617                 /* optimize ring usage */
1618                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1619                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1620                     rte_mbuf_refcnt_read(txm) == 1 &&
1621                     RTE_MBUF_DIRECT(txm) &&
1622                     txm->nb_segs == 1 &&
1623                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1624                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1625                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1626                         can_push = 1;
1627                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1628                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1629                         use_indirect = 1;
1630
1631                 /* How many main ring entries are needed to this Tx?
1632                  * any_layout => number of segments
1633                  * indirect   => 1
1634                  * default    => number of segments + 1
1635                  */
1636                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1637                 need = slots - vq->vq_free_cnt;
1638
1639                 /* Positive value indicates it need free vring descriptors */
1640                 if (unlikely(need > 0)) {
1641                         nb_used = VIRTQUEUE_NUSED(vq);
1642                         virtio_rmb();
1643                         need = RTE_MIN(need, (int)nb_used);
1644
1645                         virtio_xmit_cleanup(vq, need);
1646                         need = slots - vq->vq_free_cnt;
1647                         if (unlikely(need > 0)) {
1648                                 PMD_TX_LOG(ERR,
1649                                            "No free tx descriptors to transmit");
1650                                 break;
1651                         }
1652                 }
1653
1654                 /* Enqueue Packet buffers */
1655                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
1656                         can_push, 0);
1657
1658                 virtio_update_packet_stats(&txvq->stats, txm);
1659         }
1660
1661         txvq->stats.packets += nb_tx;
1662
1663         if (likely(nb_tx)) {
1664                 vq_update_avail_idx(vq);
1665
1666                 if (unlikely(virtqueue_kick_prepare(vq))) {
1667                         virtqueue_notify(vq);
1668                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1669                 }
1670         }
1671
1672         return nb_tx;
1673 }
1674
1675 uint16_t
1676 virtio_xmit_pkts_inorder(void *tx_queue,
1677                         struct rte_mbuf **tx_pkts,
1678                         uint16_t nb_pkts)
1679 {
1680         struct virtnet_tx *txvq = tx_queue;
1681         struct virtqueue *vq = txvq->vq;
1682         struct virtio_hw *hw = vq->hw;
1683         uint16_t hdr_size = hw->vtnet_hdr_size;
1684         uint16_t nb_used, nb_avail, nb_tx = 0, nb_inorder_pkts = 0;
1685         struct rte_mbuf *inorder_pkts[nb_pkts];
1686         int error;
1687
1688         if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
1689                 return nb_tx;
1690
1691         if (unlikely(nb_pkts < 1))
1692                 return nb_pkts;
1693
1694         VIRTQUEUE_DUMP(vq);
1695         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
1696         nb_used = VIRTQUEUE_NUSED(vq);
1697
1698         virtio_rmb();
1699         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1700                 virtio_xmit_cleanup_inorder(vq, nb_used);
1701
1702         if (unlikely(!vq->vq_free_cnt))
1703                 virtio_xmit_cleanup_inorder(vq, nb_used);
1704
1705         nb_avail = RTE_MIN(vq->vq_free_cnt, nb_pkts);
1706
1707         for (nb_tx = 0; nb_tx < nb_avail; nb_tx++) {
1708                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1709                 int slots, need;
1710
1711                 /* Do VLAN tag insertion */
1712                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1713                         error = rte_vlan_insert(&txm);
1714                         if (unlikely(error)) {
1715                                 rte_pktmbuf_free(txm);
1716                                 continue;
1717                         }
1718                 }
1719
1720                 /* optimize ring usage */
1721                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1722                      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1723                      rte_mbuf_refcnt_read(txm) == 1 &&
1724                      RTE_MBUF_DIRECT(txm) &&
1725                      txm->nb_segs == 1 &&
1726                      rte_pktmbuf_headroom(txm) >= hdr_size &&
1727                      rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1728                                 __alignof__(struct virtio_net_hdr_mrg_rxbuf))) {
1729                         inorder_pkts[nb_inorder_pkts] = txm;
1730                         nb_inorder_pkts++;
1731
1732                         virtio_update_packet_stats(&txvq->stats, txm);
1733                         continue;
1734                 }
1735
1736                 if (nb_inorder_pkts) {
1737                         virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1738                                                         nb_inorder_pkts);
1739                         nb_inorder_pkts = 0;
1740                 }
1741
1742                 slots = txm->nb_segs + 1;
1743                 need = slots - vq->vq_free_cnt;
1744                 if (unlikely(need > 0)) {
1745                         nb_used = VIRTQUEUE_NUSED(vq);
1746                         virtio_rmb();
1747                         need = RTE_MIN(need, (int)nb_used);
1748
1749                         virtio_xmit_cleanup_inorder(vq, need);
1750
1751                         need = slots - vq->vq_free_cnt;
1752
1753                         if (unlikely(need > 0)) {
1754                                 PMD_TX_LOG(ERR,
1755                                         "No free tx descriptors to transmit");
1756                                 break;
1757                         }
1758                 }
1759                 /* Enqueue Packet buffers */
1760                 virtqueue_enqueue_xmit(txvq, txm, slots, 0, 0, 1);
1761
1762                 virtio_update_packet_stats(&txvq->stats, txm);
1763         }
1764
1765         /* Transmit all inorder packets */
1766         if (nb_inorder_pkts)
1767                 virtqueue_enqueue_xmit_inorder(txvq, inorder_pkts,
1768                                                 nb_inorder_pkts);
1769
1770         txvq->stats.packets += nb_tx;
1771
1772         if (likely(nb_tx)) {
1773                 vq_update_avail_idx(vq);
1774
1775                 if (unlikely(virtqueue_kick_prepare(vq))) {
1776                         virtqueue_notify(vq);
1777                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1778                 }
1779         }
1780
1781         VIRTQUEUE_DUMP(vq);
1782
1783         return nb_tx;
1784 }