drivers: use SPDX tag for Intel copyright files
[dpdk.git] / drivers / net / virtio / virtio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <errno.h>
10
11 #include <rte_cycles.h>
12 #include <rte_memory.h>
13 #include <rte_branch_prediction.h>
14 #include <rte_mempool.h>
15 #include <rte_malloc.h>
16 #include <rte_mbuf.h>
17 #include <rte_ether.h>
18 #include <rte_ethdev.h>
19 #include <rte_prefetch.h>
20 #include <rte_string_fns.h>
21 #include <rte_errno.h>
22 #include <rte_byteorder.h>
23 #include <rte_net.h>
24 #include <rte_ip.h>
25 #include <rte_udp.h>
26 #include <rte_tcp.h>
27
28 #include "virtio_logs.h"
29 #include "virtio_ethdev.h"
30 #include "virtio_pci.h"
31 #include "virtqueue.h"
32 #include "virtio_rxtx.h"
33
34 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
35 #define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len)
36 #else
37 #define  VIRTIO_DUMP_PACKET(m, len) do { } while (0)
38 #endif
39
40
41 #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
42         ETH_TXQ_FLAGS_NOOFFLOADS)
43
44 int
45 virtio_dev_rx_queue_done(void *rxq, uint16_t offset)
46 {
47         struct virtnet_rx *rxvq = rxq;
48         struct virtqueue *vq = rxvq->vq;
49
50         return VIRTQUEUE_NUSED(vq) >= offset;
51 }
52
53 void
54 vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
55 {
56         struct vring_desc *dp, *dp_tail;
57         struct vq_desc_extra *dxp;
58         uint16_t desc_idx_last = desc_idx;
59
60         dp  = &vq->vq_ring.desc[desc_idx];
61         dxp = &vq->vq_descx[desc_idx];
62         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
63         if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
64                 while (dp->flags & VRING_DESC_F_NEXT) {
65                         desc_idx_last = dp->next;
66                         dp = &vq->vq_ring.desc[dp->next];
67                 }
68         }
69         dxp->ndescs = 0;
70
71         /*
72          * We must append the existing free chain, if any, to the end of
73          * newly freed chain. If the virtqueue was completely used, then
74          * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
75          */
76         if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
77                 vq->vq_desc_head_idx = desc_idx;
78         } else {
79                 dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx];
80                 dp_tail->next = desc_idx;
81         }
82
83         vq->vq_desc_tail_idx = desc_idx_last;
84         dp->next = VQ_RING_DESC_CHAIN_END;
85 }
86
87 static uint16_t
88 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
89                            uint32_t *len, uint16_t num)
90 {
91         struct vring_used_elem *uep;
92         struct rte_mbuf *cookie;
93         uint16_t used_idx, desc_idx;
94         uint16_t i;
95
96         /*  Caller does the check */
97         for (i = 0; i < num ; i++) {
98                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
99                 uep = &vq->vq_ring.used->ring[used_idx];
100                 desc_idx = (uint16_t) uep->id;
101                 len[i] = uep->len;
102                 cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie;
103
104                 if (unlikely(cookie == NULL)) {
105                         PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
106                                 vq->vq_used_cons_idx);
107                         break;
108                 }
109
110                 rte_prefetch0(cookie);
111                 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
112                 rx_pkts[i]  = cookie;
113                 vq->vq_used_cons_idx++;
114                 vq_ring_free_chain(vq, desc_idx);
115                 vq->vq_descx[desc_idx].cookie = NULL;
116         }
117
118         return i;
119 }
120
121 #ifndef DEFAULT_TX_FREE_THRESH
122 #define DEFAULT_TX_FREE_THRESH 32
123 #endif
124
125 /* Cleanup from completed transmits. */
126 static void
127 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
128 {
129         uint16_t i, used_idx, desc_idx;
130         for (i = 0; i < num; i++) {
131                 struct vring_used_elem *uep;
132                 struct vq_desc_extra *dxp;
133
134                 used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1));
135                 uep = &vq->vq_ring.used->ring[used_idx];
136
137                 desc_idx = (uint16_t) uep->id;
138                 dxp = &vq->vq_descx[desc_idx];
139                 vq->vq_used_cons_idx++;
140                 vq_ring_free_chain(vq, desc_idx);
141
142                 if (dxp->cookie != NULL) {
143                         rte_pktmbuf_free(dxp->cookie);
144                         dxp->cookie = NULL;
145                 }
146         }
147 }
148
149
150 static inline int
151 virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
152 {
153         struct vq_desc_extra *dxp;
154         struct virtio_hw *hw = vq->hw;
155         struct vring_desc *start_dp;
156         uint16_t needed = 1;
157         uint16_t head_idx, idx;
158
159         if (unlikely(vq->vq_free_cnt == 0))
160                 return -ENOSPC;
161         if (unlikely(vq->vq_free_cnt < needed))
162                 return -EMSGSIZE;
163
164         head_idx = vq->vq_desc_head_idx;
165         if (unlikely(head_idx >= vq->vq_nentries))
166                 return -EFAULT;
167
168         idx = head_idx;
169         dxp = &vq->vq_descx[idx];
170         dxp->cookie = (void *)cookie;
171         dxp->ndescs = needed;
172
173         start_dp = vq->vq_ring.desc;
174         start_dp[idx].addr =
175                 VIRTIO_MBUF_ADDR(cookie, vq) +
176                 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
177         start_dp[idx].len =
178                 cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
179         start_dp[idx].flags =  VRING_DESC_F_WRITE;
180         idx = start_dp[idx].next;
181         vq->vq_desc_head_idx = idx;
182         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
183                 vq->vq_desc_tail_idx = idx;
184         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
185         vq_update_avail_ring(vq, head_idx);
186
187         return 0;
188 }
189
190 /* When doing TSO, the IP length is not included in the pseudo header
191  * checksum of the packet given to the PMD, but for virtio it is
192  * expected.
193  */
194 static void
195 virtio_tso_fix_cksum(struct rte_mbuf *m)
196 {
197         /* common case: header is not fragmented */
198         if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len +
199                         m->l4_len)) {
200                 struct ipv4_hdr *iph;
201                 struct ipv6_hdr *ip6h;
202                 struct tcp_hdr *th;
203                 uint16_t prev_cksum, new_cksum, ip_len, ip_paylen;
204                 uint32_t tmp;
205
206                 iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
207                 th = RTE_PTR_ADD(iph, m->l3_len);
208                 if ((iph->version_ihl >> 4) == 4) {
209                         iph->hdr_checksum = 0;
210                         iph->hdr_checksum = rte_ipv4_cksum(iph);
211                         ip_len = iph->total_length;
212                         ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) -
213                                 m->l3_len);
214                 } else {
215                         ip6h = (struct ipv6_hdr *)iph;
216                         ip_paylen = ip6h->payload_len;
217                 }
218
219                 /* calculate the new phdr checksum not including ip_paylen */
220                 prev_cksum = th->cksum;
221                 tmp = prev_cksum;
222                 tmp += ip_paylen;
223                 tmp = (tmp & 0xffff) + (tmp >> 16);
224                 new_cksum = tmp;
225
226                 /* replace it in the packet */
227                 th->cksum = new_cksum;
228         }
229 }
230
231 static inline int
232 tx_offload_enabled(struct virtio_hw *hw)
233 {
234         return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) ||
235                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) ||
236                 vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6);
237 }
238
239 /* avoid write operation when necessary, to lessen cache issues */
240 #define ASSIGN_UNLESS_EQUAL(var, val) do {      \
241         if ((var) != (val))                     \
242                 (var) = (val);                  \
243 } while (0)
244
245 static inline void
246 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
247                        uint16_t needed, int use_indirect, int can_push)
248 {
249         struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
250         struct vq_desc_extra *dxp;
251         struct virtqueue *vq = txvq->vq;
252         struct vring_desc *start_dp;
253         uint16_t seg_num = cookie->nb_segs;
254         uint16_t head_idx, idx;
255         uint16_t head_size = vq->hw->vtnet_hdr_size;
256         struct virtio_net_hdr *hdr;
257         int offload;
258
259         offload = tx_offload_enabled(vq->hw);
260         head_idx = vq->vq_desc_head_idx;
261         idx = head_idx;
262         dxp = &vq->vq_descx[idx];
263         dxp->cookie = (void *)cookie;
264         dxp->ndescs = needed;
265
266         start_dp = vq->vq_ring.desc;
267
268         if (can_push) {
269                 /* prepend cannot fail, checked by caller */
270                 hdr = (struct virtio_net_hdr *)
271                         rte_pktmbuf_prepend(cookie, head_size);
272                 /* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
273                  * which is wrong. Below subtract restores correct pkt size.
274                  */
275                 cookie->pkt_len -= head_size;
276                 /* if offload disabled, it is not zeroed below, do it now */
277                 if (offload == 0) {
278                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
279                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
280                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
281                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
282                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
283                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
284                 }
285         } else if (use_indirect) {
286                 /* setup tx ring slot to point to indirect
287                  * descriptor list stored in reserved region.
288                  *
289                  * the first slot in indirect ring is already preset
290                  * to point to the header in reserved region
291                  */
292                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
293                         RTE_PTR_DIFF(&txr[idx].tx_indir, txr);
294                 start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc);
295                 start_dp[idx].flags = VRING_DESC_F_INDIRECT;
296                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
297
298                 /* loop below will fill in rest of the indirect elements */
299                 start_dp = txr[idx].tx_indir;
300                 idx = 1;
301         } else {
302                 /* setup first tx ring slot to point to header
303                  * stored in reserved region.
304                  */
305                 start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
306                         RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
307                 start_dp[idx].len   = vq->hw->vtnet_hdr_size;
308                 start_dp[idx].flags = VRING_DESC_F_NEXT;
309                 hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
310
311                 idx = start_dp[idx].next;
312         }
313
314         /* Checksum Offload / TSO */
315         if (offload) {
316                 if (cookie->ol_flags & PKT_TX_TCP_SEG)
317                         cookie->ol_flags |= PKT_TX_TCP_CKSUM;
318
319                 switch (cookie->ol_flags & PKT_TX_L4_MASK) {
320                 case PKT_TX_UDP_CKSUM:
321                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
322                         hdr->csum_offset = offsetof(struct udp_hdr,
323                                 dgram_cksum);
324                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
325                         break;
326
327                 case PKT_TX_TCP_CKSUM:
328                         hdr->csum_start = cookie->l2_len + cookie->l3_len;
329                         hdr->csum_offset = offsetof(struct tcp_hdr, cksum);
330                         hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
331                         break;
332
333                 default:
334                         ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
335                         ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
336                         ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
337                         break;
338                 }
339
340                 /* TCP Segmentation Offload */
341                 if (cookie->ol_flags & PKT_TX_TCP_SEG) {
342                         virtio_tso_fix_cksum(cookie);
343                         hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
344                                 VIRTIO_NET_HDR_GSO_TCPV6 :
345                                 VIRTIO_NET_HDR_GSO_TCPV4;
346                         hdr->gso_size = cookie->tso_segsz;
347                         hdr->hdr_len =
348                                 cookie->l2_len +
349                                 cookie->l3_len +
350                                 cookie->l4_len;
351                 } else {
352                         ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
353                         ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
354                         ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
355                 }
356         }
357
358         do {
359                 start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
360                 start_dp[idx].len   = cookie->data_len;
361                 start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0;
362                 idx = start_dp[idx].next;
363         } while ((cookie = cookie->next) != NULL);
364
365         if (use_indirect)
366                 idx = vq->vq_ring.desc[head_idx].next;
367
368         vq->vq_desc_head_idx = idx;
369         if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
370                 vq->vq_desc_tail_idx = idx;
371         vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
372         vq_update_avail_ring(vq, head_idx);
373 }
374
375 void
376 virtio_dev_cq_start(struct rte_eth_dev *dev)
377 {
378         struct virtio_hw *hw = dev->data->dev_private;
379
380         if (hw->cvq && hw->cvq->vq) {
381                 VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq);
382         }
383 }
384
385 int
386 virtio_dev_rx_queue_setup(struct rte_eth_dev *dev,
387                         uint16_t queue_idx,
388                         uint16_t nb_desc,
389                         unsigned int socket_id __rte_unused,
390                         __rte_unused const struct rte_eth_rxconf *rx_conf,
391                         struct rte_mempool *mp)
392 {
393         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
394         struct virtio_hw *hw = dev->data->dev_private;
395         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
396         struct virtnet_rx *rxvq;
397
398         PMD_INIT_FUNC_TRACE();
399
400         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
401                 nb_desc = vq->vq_nentries;
402         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
403
404         rxvq = &vq->rxq;
405         rxvq->queue_id = queue_idx;
406         rxvq->mpool = mp;
407         if (rxvq->mpool == NULL) {
408                 rte_exit(EXIT_FAILURE,
409                         "Cannot allocate mbufs for rx virtqueue");
410         }
411         dev->data->rx_queues[queue_idx] = rxvq;
412
413         return 0;
414 }
415
416 int
417 virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx)
418 {
419         uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX;
420         struct virtio_hw *hw = dev->data->dev_private;
421         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
422         struct virtnet_rx *rxvq = &vq->rxq;
423         struct rte_mbuf *m;
424         uint16_t desc_idx;
425         int error, nbufs;
426
427         PMD_INIT_FUNC_TRACE();
428
429         /* Allocate blank mbufs for the each rx descriptor */
430         nbufs = 0;
431
432         if (hw->use_simple_rx) {
433                 for (desc_idx = 0; desc_idx < vq->vq_nentries;
434                      desc_idx++) {
435                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
436                         vq->vq_ring.desc[desc_idx].flags =
437                                 VRING_DESC_F_WRITE;
438                 }
439         }
440
441         memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf));
442         for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST;
443              desc_idx++) {
444                 vq->sw_ring[vq->vq_nentries + desc_idx] =
445                         &rxvq->fake_mbuf;
446         }
447
448         while (!virtqueue_full(vq)) {
449                 m = rte_mbuf_raw_alloc(rxvq->mpool);
450                 if (m == NULL)
451                         break;
452
453                 /* Enqueue allocated buffers */
454                 if (hw->use_simple_rx)
455                         error = virtqueue_enqueue_recv_refill_simple(vq, m);
456                 else
457                         error = virtqueue_enqueue_recv_refill(vq, m);
458
459                 if (error) {
460                         rte_pktmbuf_free(m);
461                         break;
462                 }
463                 nbufs++;
464         }
465
466         vq_update_avail_idx(vq);
467
468         PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs);
469
470         virtio_rxq_vec_setup(rxvq);
471
472         VIRTQUEUE_DUMP(vq);
473
474         return 0;
475 }
476
477 /*
478  * struct rte_eth_dev *dev: Used to update dev
479  * uint16_t nb_desc: Defaults to values read from config space
480  * unsigned int socket_id: Used to allocate memzone
481  * const struct rte_eth_txconf *tx_conf: Used to setup tx engine
482  * uint16_t queue_idx: Just used as an index in dev txq list
483  */
484 int
485 virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
486                         uint16_t queue_idx,
487                         uint16_t nb_desc,
488                         unsigned int socket_id __rte_unused,
489                         const struct rte_eth_txconf *tx_conf)
490 {
491         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
492         struct virtio_hw *hw = dev->data->dev_private;
493         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
494         struct virtnet_tx *txvq;
495         uint16_t tx_free_thresh;
496
497         PMD_INIT_FUNC_TRACE();
498
499         /* cannot use simple rxtx funcs with multisegs or offloads */
500         if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) != VIRTIO_SIMPLE_FLAGS)
501                 hw->use_simple_tx = 0;
502
503         if (nb_desc == 0 || nb_desc > vq->vq_nentries)
504                 nb_desc = vq->vq_nentries;
505         vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc);
506
507         txvq = &vq->txq;
508         txvq->queue_id = queue_idx;
509
510         tx_free_thresh = tx_conf->tx_free_thresh;
511         if (tx_free_thresh == 0)
512                 tx_free_thresh =
513                         RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH);
514
515         if (tx_free_thresh >= (vq->vq_nentries - 3)) {
516                 RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
517                         "number of TX entries minus 3 (%u)."
518                         " (tx_free_thresh=%u port=%u queue=%u)\n",
519                         vq->vq_nentries - 3,
520                         tx_free_thresh, dev->data->port_id, queue_idx);
521                 return -EINVAL;
522         }
523
524         vq->vq_free_thresh = tx_free_thresh;
525
526         dev->data->tx_queues[queue_idx] = txvq;
527         return 0;
528 }
529
530 int
531 virtio_dev_tx_queue_setup_finish(struct rte_eth_dev *dev,
532                                 uint16_t queue_idx)
533 {
534         uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
535         struct virtio_hw *hw = dev->data->dev_private;
536         struct virtqueue *vq = hw->vqs[vtpci_queue_idx];
537         uint16_t mid_idx = vq->vq_nentries >> 1;
538         struct virtnet_tx *txvq = &vq->txq;
539         uint16_t desc_idx;
540
541         PMD_INIT_FUNC_TRACE();
542
543         if (hw->use_simple_tx) {
544                 for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) {
545                         vq->vq_ring.avail->ring[desc_idx] =
546                                 desc_idx + mid_idx;
547                         vq->vq_ring.desc[desc_idx + mid_idx].next =
548                                 desc_idx;
549                         vq->vq_ring.desc[desc_idx + mid_idx].addr =
550                                 txvq->virtio_net_hdr_mem +
551                                 offsetof(struct virtio_tx_region, tx_hdr);
552                         vq->vq_ring.desc[desc_idx + mid_idx].len =
553                                 vq->hw->vtnet_hdr_size;
554                         vq->vq_ring.desc[desc_idx + mid_idx].flags =
555                                 VRING_DESC_F_NEXT;
556                         vq->vq_ring.desc[desc_idx].flags = 0;
557                 }
558                 for (desc_idx = mid_idx; desc_idx < vq->vq_nentries;
559                      desc_idx++)
560                         vq->vq_ring.avail->ring[desc_idx] = desc_idx;
561         }
562
563         VIRTQUEUE_DUMP(vq);
564
565         return 0;
566 }
567
568 static void
569 virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
570 {
571         int error;
572         /*
573          * Requeue the discarded mbuf. This should always be
574          * successful since it was just dequeued.
575          */
576         error = virtqueue_enqueue_recv_refill(vq, m);
577         if (unlikely(error)) {
578                 RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
579                 rte_pktmbuf_free(m);
580         }
581 }
582
583 static void
584 virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf)
585 {
586         uint32_t s = mbuf->pkt_len;
587         struct ether_addr *ea;
588
589         if (s == 64) {
590                 stats->size_bins[1]++;
591         } else if (s > 64 && s < 1024) {
592                 uint32_t bin;
593
594                 /* count zeros, and offset into correct bin */
595                 bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
596                 stats->size_bins[bin]++;
597         } else {
598                 if (s < 64)
599                         stats->size_bins[0]++;
600                 else if (s < 1519)
601                         stats->size_bins[6]++;
602                 else if (s >= 1519)
603                         stats->size_bins[7]++;
604         }
605
606         ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *);
607         if (is_multicast_ether_addr(ea)) {
608                 if (is_broadcast_ether_addr(ea))
609                         stats->broadcast++;
610                 else
611                         stats->multicast++;
612         }
613 }
614
615 /* Optionally fill offload information in structure */
616 static int
617 virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
618 {
619         struct rte_net_hdr_lens hdr_lens;
620         uint32_t hdrlen, ptype;
621         int l4_supported = 0;
622
623         /* nothing to do */
624         if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
625                 return 0;
626
627         m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
628
629         ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
630         m->packet_type = ptype;
631         if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
632             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
633             (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
634                 l4_supported = 1;
635
636         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
637                 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
638                 if (hdr->csum_start <= hdrlen && l4_supported) {
639                         m->ol_flags |= PKT_RX_L4_CKSUM_NONE;
640                 } else {
641                         /* Unknown proto or tunnel, do sw cksum. We can assume
642                          * the cksum field is in the first segment since the
643                          * buffers we provided to the host are large enough.
644                          * In case of SCTP, this will be wrong since it's a CRC
645                          * but there's nothing we can do.
646                          */
647                         uint16_t csum = 0, off;
648
649                         rte_raw_cksum_mbuf(m, hdr->csum_start,
650                                 rte_pktmbuf_pkt_len(m) - hdr->csum_start,
651                                 &csum);
652                         if (likely(csum != 0xffff))
653                                 csum = ~csum;
654                         off = hdr->csum_offset + hdr->csum_start;
655                         if (rte_pktmbuf_data_len(m) >= off + 1)
656                                 *rte_pktmbuf_mtod_offset(m, uint16_t *,
657                                         off) = csum;
658                 }
659         } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
660                 m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
661         }
662
663         /* GSO request, save required information in mbuf */
664         if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
665                 /* Check unsupported modes */
666                 if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) ||
667                     (hdr->gso_size == 0)) {
668                         return -EINVAL;
669                 }
670
671                 /* Update mss lengthes in mbuf */
672                 m->tso_segsz = hdr->gso_size;
673                 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
674                         case VIRTIO_NET_HDR_GSO_TCPV4:
675                         case VIRTIO_NET_HDR_GSO_TCPV6:
676                                 m->ol_flags |= PKT_RX_LRO | \
677                                         PKT_RX_L4_CKSUM_NONE;
678                                 break;
679                         default:
680                                 return -EINVAL;
681                 }
682         }
683
684         return 0;
685 }
686
687 static inline int
688 rx_offload_enabled(struct virtio_hw *hw)
689 {
690         return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) ||
691                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) ||
692                 vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6);
693 }
694
695 #define VIRTIO_MBUF_BURST_SZ 64
696 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
697 uint16_t
698 virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
699 {
700         struct virtnet_rx *rxvq = rx_queue;
701         struct virtqueue *vq = rxvq->vq;
702         struct virtio_hw *hw = vq->hw;
703         struct rte_mbuf *rxm, *new_mbuf;
704         uint16_t nb_used, num, nb_rx;
705         uint32_t len[VIRTIO_MBUF_BURST_SZ];
706         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
707         int error;
708         uint32_t i, nb_enqueued;
709         uint32_t hdr_size;
710         int offload;
711         struct virtio_net_hdr *hdr;
712
713         nb_rx = 0;
714         if (unlikely(hw->started == 0))
715                 return nb_rx;
716
717         nb_used = VIRTQUEUE_NUSED(vq);
718
719         virtio_rmb();
720
721         num = likely(nb_used <= nb_pkts) ? nb_used : nb_pkts;
722         if (unlikely(num > VIRTIO_MBUF_BURST_SZ))
723                 num = VIRTIO_MBUF_BURST_SZ;
724         if (likely(num > DESC_PER_CACHELINE))
725                 num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE);
726
727         num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num);
728         PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num);
729
730         nb_enqueued = 0;
731         hdr_size = hw->vtnet_hdr_size;
732         offload = rx_offload_enabled(hw);
733
734         for (i = 0; i < num ; i++) {
735                 rxm = rcv_pkts[i];
736
737                 PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
738
739                 if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
740                         PMD_RX_LOG(ERR, "Packet drop");
741                         nb_enqueued++;
742                         virtio_discard_rxbuf(vq, rxm);
743                         rxvq->stats.errors++;
744                         continue;
745                 }
746
747                 rxm->port = rxvq->port_id;
748                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
749                 rxm->ol_flags = 0;
750                 rxm->vlan_tci = 0;
751
752                 rxm->pkt_len = (uint32_t)(len[i] - hdr_size);
753                 rxm->data_len = (uint16_t)(len[i] - hdr_size);
754
755                 hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr +
756                         RTE_PKTMBUF_HEADROOM - hdr_size);
757
758                 if (hw->vlan_strip)
759                         rte_vlan_strip(rxm);
760
761                 if (offload && virtio_rx_offload(rxm, hdr) < 0) {
762                         virtio_discard_rxbuf(vq, rxm);
763                         rxvq->stats.errors++;
764                         continue;
765                 }
766
767                 VIRTIO_DUMP_PACKET(rxm, rxm->data_len);
768
769                 rx_pkts[nb_rx++] = rxm;
770
771                 rxvq->stats.bytes += rxm->pkt_len;
772                 virtio_update_packet_stats(&rxvq->stats, rxm);
773         }
774
775         rxvq->stats.packets += nb_rx;
776
777         /* Allocate new mbuf for the used descriptor */
778         error = ENOSPC;
779         while (likely(!virtqueue_full(vq))) {
780                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
781                 if (unlikely(new_mbuf == NULL)) {
782                         struct rte_eth_dev *dev
783                                 = &rte_eth_devices[rxvq->port_id];
784                         dev->data->rx_mbuf_alloc_failed++;
785                         break;
786                 }
787                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
788                 if (unlikely(error)) {
789                         rte_pktmbuf_free(new_mbuf);
790                         break;
791                 }
792                 nb_enqueued++;
793         }
794
795         if (likely(nb_enqueued)) {
796                 vq_update_avail_idx(vq);
797
798                 if (unlikely(virtqueue_kick_prepare(vq))) {
799                         virtqueue_notify(vq);
800                         PMD_RX_LOG(DEBUG, "Notified");
801                 }
802         }
803
804         return nb_rx;
805 }
806
807 uint16_t
808 virtio_recv_mergeable_pkts(void *rx_queue,
809                         struct rte_mbuf **rx_pkts,
810                         uint16_t nb_pkts)
811 {
812         struct virtnet_rx *rxvq = rx_queue;
813         struct virtqueue *vq = rxvq->vq;
814         struct virtio_hw *hw = vq->hw;
815         struct rte_mbuf *rxm, *new_mbuf;
816         uint16_t nb_used, num, nb_rx;
817         uint32_t len[VIRTIO_MBUF_BURST_SZ];
818         struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
819         struct rte_mbuf *prev;
820         int error;
821         uint32_t i, nb_enqueued;
822         uint32_t seg_num;
823         uint16_t extra_idx;
824         uint32_t seg_res;
825         uint32_t hdr_size;
826         int offload;
827
828         nb_rx = 0;
829         if (unlikely(hw->started == 0))
830                 return nb_rx;
831
832         nb_used = VIRTQUEUE_NUSED(vq);
833
834         virtio_rmb();
835
836         PMD_RX_LOG(DEBUG, "used:%d", nb_used);
837
838         i = 0;
839         nb_enqueued = 0;
840         seg_num = 0;
841         extra_idx = 0;
842         seg_res = 0;
843         hdr_size = hw->vtnet_hdr_size;
844         offload = rx_offload_enabled(hw);
845
846         while (i < nb_used) {
847                 struct virtio_net_hdr_mrg_rxbuf *header;
848
849                 if (nb_rx == nb_pkts)
850                         break;
851
852                 num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1);
853                 if (num != 1)
854                         continue;
855
856                 i++;
857
858                 PMD_RX_LOG(DEBUG, "dequeue:%d", num);
859                 PMD_RX_LOG(DEBUG, "packet len:%d", len[0]);
860
861                 rxm = rcv_pkts[0];
862
863                 if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
864                         PMD_RX_LOG(ERR, "Packet drop");
865                         nb_enqueued++;
866                         virtio_discard_rxbuf(vq, rxm);
867                         rxvq->stats.errors++;
868                         continue;
869                 }
870
871                 header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
872                         RTE_PKTMBUF_HEADROOM - hdr_size);
873                 seg_num = header->num_buffers;
874
875                 if (seg_num == 0)
876                         seg_num = 1;
877
878                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
879                 rxm->nb_segs = seg_num;
880                 rxm->ol_flags = 0;
881                 rxm->vlan_tci = 0;
882                 rxm->pkt_len = (uint32_t)(len[0] - hdr_size);
883                 rxm->data_len = (uint16_t)(len[0] - hdr_size);
884
885                 rxm->port = rxvq->port_id;
886                 rx_pkts[nb_rx] = rxm;
887                 prev = rxm;
888
889                 if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) {
890                         virtio_discard_rxbuf(vq, rxm);
891                         rxvq->stats.errors++;
892                         continue;
893                 }
894
895                 seg_res = seg_num - 1;
896
897                 while (seg_res != 0) {
898                         /*
899                          * Get extra segments for current uncompleted packet.
900                          */
901                         uint16_t  rcv_cnt =
902                                 RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
903                         if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) {
904                                 uint32_t rx_num =
905                                         virtqueue_dequeue_burst_rx(vq,
906                                         rcv_pkts, len, rcv_cnt);
907                                 i += rx_num;
908                                 rcv_cnt = rx_num;
909                         } else {
910                                 PMD_RX_LOG(ERR,
911                                            "No enough segments for packet.");
912                                 nb_enqueued++;
913                                 virtio_discard_rxbuf(vq, rxm);
914                                 rxvq->stats.errors++;
915                                 break;
916                         }
917
918                         extra_idx = 0;
919
920                         while (extra_idx < rcv_cnt) {
921                                 rxm = rcv_pkts[extra_idx];
922
923                                 rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size;
924                                 rxm->pkt_len = (uint32_t)(len[extra_idx]);
925                                 rxm->data_len = (uint16_t)(len[extra_idx]);
926
927                                 if (prev)
928                                         prev->next = rxm;
929
930                                 prev = rxm;
931                                 rx_pkts[nb_rx]->pkt_len += rxm->pkt_len;
932                                 extra_idx++;
933                         };
934                         seg_res -= rcv_cnt;
935                 }
936
937                 if (hw->vlan_strip)
938                         rte_vlan_strip(rx_pkts[nb_rx]);
939
940                 VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
941                         rx_pkts[nb_rx]->data_len);
942
943                 rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len;
944                 virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]);
945                 nb_rx++;
946         }
947
948         rxvq->stats.packets += nb_rx;
949
950         /* Allocate new mbuf for the used descriptor */
951         error = ENOSPC;
952         while (likely(!virtqueue_full(vq))) {
953                 new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
954                 if (unlikely(new_mbuf == NULL)) {
955                         struct rte_eth_dev *dev
956                                 = &rte_eth_devices[rxvq->port_id];
957                         dev->data->rx_mbuf_alloc_failed++;
958                         break;
959                 }
960                 error = virtqueue_enqueue_recv_refill(vq, new_mbuf);
961                 if (unlikely(error)) {
962                         rte_pktmbuf_free(new_mbuf);
963                         break;
964                 }
965                 nb_enqueued++;
966         }
967
968         if (likely(nb_enqueued)) {
969                 vq_update_avail_idx(vq);
970
971                 if (unlikely(virtqueue_kick_prepare(vq))) {
972                         virtqueue_notify(vq);
973                         PMD_RX_LOG(DEBUG, "Notified");
974                 }
975         }
976
977         return nb_rx;
978 }
979
980 uint16_t
981 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
982 {
983         struct virtnet_tx *txvq = tx_queue;
984         struct virtqueue *vq = txvq->vq;
985         struct virtio_hw *hw = vq->hw;
986         uint16_t hdr_size = hw->vtnet_hdr_size;
987         uint16_t nb_used, nb_tx = 0;
988         int error;
989
990         if (unlikely(hw->started == 0))
991                 return nb_tx;
992
993         if (unlikely(nb_pkts < 1))
994                 return nb_pkts;
995
996         PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
997         nb_used = VIRTQUEUE_NUSED(vq);
998
999         virtio_rmb();
1000         if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh))
1001                 virtio_xmit_cleanup(vq, nb_used);
1002
1003         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1004                 struct rte_mbuf *txm = tx_pkts[nb_tx];
1005                 int can_push = 0, use_indirect = 0, slots, need;
1006
1007                 /* Do VLAN tag insertion */
1008                 if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
1009                         error = rte_vlan_insert(&txm);
1010                         if (unlikely(error)) {
1011                                 rte_pktmbuf_free(txm);
1012                                 continue;
1013                         }
1014                 }
1015
1016                 /* optimize ring usage */
1017                 if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
1018                       vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
1019                     rte_mbuf_refcnt_read(txm) == 1 &&
1020                     RTE_MBUF_DIRECT(txm) &&
1021                     txm->nb_segs == 1 &&
1022                     rte_pktmbuf_headroom(txm) >= hdr_size &&
1023                     rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
1024                                    __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
1025                         can_push = 1;
1026                 else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
1027                          txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
1028                         use_indirect = 1;
1029
1030                 /* How many main ring entries are needed to this Tx?
1031                  * any_layout => number of segments
1032                  * indirect   => 1
1033                  * default    => number of segments + 1
1034                  */
1035                 slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
1036                 need = slots - vq->vq_free_cnt;
1037
1038                 /* Positive value indicates it need free vring descriptors */
1039                 if (unlikely(need > 0)) {
1040                         nb_used = VIRTQUEUE_NUSED(vq);
1041                         virtio_rmb();
1042                         need = RTE_MIN(need, (int)nb_used);
1043
1044                         virtio_xmit_cleanup(vq, need);
1045                         need = slots - vq->vq_free_cnt;
1046                         if (unlikely(need > 0)) {
1047                                 PMD_TX_LOG(ERR,
1048                                            "No free tx descriptors to transmit");
1049                                 break;
1050                         }
1051                 }
1052
1053                 /* Enqueue Packet buffers */
1054                 virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push);
1055
1056                 txvq->stats.bytes += txm->pkt_len;
1057                 virtio_update_packet_stats(&txvq->stats, txm);
1058         }
1059
1060         txvq->stats.packets += nb_tx;
1061
1062         if (likely(nb_tx)) {
1063                 vq_update_avail_idx(vq);
1064
1065                 if (unlikely(virtqueue_kick_prepare(vq))) {
1066                         virtqueue_notify(vq);
1067                         PMD_TX_LOG(DEBUG, "Notified backend after xmit");
1068                 }
1069         }
1070
1071         return nb_tx;
1072 }