net/iavf/base: move to drivers common directory
[dpdk.git] / drivers / net / iavf / iavf_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <stdarg.h>
11 #include <unistd.h>
12 #include <inttypes.h>
13 #include <sys/queue.h>
14
15 #include <rte_string_fns.h>
16 #include <rte_memzone.h>
17 #include <rte_mbuf.h>
18 #include <rte_malloc.h>
19 #include <rte_ether.h>
20 #include <rte_ethdev_driver.h>
21 #include <rte_tcp.h>
22 #include <rte_sctp.h>
23 #include <rte_udp.h>
24 #include <rte_ip.h>
25 #include <rte_net.h>
26
27 #include "iavf.h"
28 #include "iavf_rxtx.h"
29
30 static inline int
31 check_rx_thresh(uint16_t nb_desc, uint16_t thresh)
32 {
33         /* The following constraints must be satisfied:
34          *   thresh < rxq->nb_rx_desc
35          */
36         if (thresh >= nb_desc) {
37                 PMD_INIT_LOG(ERR, "rx_free_thresh (%u) must be less than %u",
38                              thresh, nb_desc);
39                 return -EINVAL;
40         }
41         return 0;
42 }
43
44 static inline int
45 check_tx_thresh(uint16_t nb_desc, uint16_t tx_rs_thresh,
46                 uint16_t tx_free_thresh)
47 {
48         /* TX descriptors will have their RS bit set after tx_rs_thresh
49          * descriptors have been used. The TX descriptor ring will be cleaned
50          * after tx_free_thresh descriptors are used or if the number of
51          * descriptors required to transmit a packet is greater than the
52          * number of free TX descriptors.
53          *
54          * The following constraints must be satisfied:
55          *  - tx_rs_thresh must be less than the size of the ring minus 2.
56          *  - tx_free_thresh must be less than the size of the ring minus 3.
57          *  - tx_rs_thresh must be less than or equal to tx_free_thresh.
58          *  - tx_rs_thresh must be a divisor of the ring size.
59          *
60          * One descriptor in the TX ring is used as a sentinel to avoid a H/W
61          * race condition, hence the maximum threshold constraints. When set
62          * to zero use default values.
63          */
64         if (tx_rs_thresh >= (nb_desc - 2)) {
65                 PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be less than the "
66                              "number of TX descriptors (%u) minus 2",
67                              tx_rs_thresh, nb_desc);
68                 return -EINVAL;
69         }
70         if (tx_free_thresh >= (nb_desc - 3)) {
71                 PMD_INIT_LOG(ERR, "tx_free_thresh (%u) must be less than the "
72                              "number of TX descriptors (%u) minus 3.",
73                              tx_free_thresh, nb_desc);
74                 return -EINVAL;
75         }
76         if (tx_rs_thresh > tx_free_thresh) {
77                 PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be less than or "
78                              "equal to tx_free_thresh (%u).",
79                              tx_rs_thresh, tx_free_thresh);
80                 return -EINVAL;
81         }
82         if ((nb_desc % tx_rs_thresh) != 0) {
83                 PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be a divisor of the "
84                              "number of TX descriptors (%u).",
85                              tx_rs_thresh, nb_desc);
86                 return -EINVAL;
87         }
88
89         return 0;
90 }
91
92 static inline bool
93 check_rx_vec_allow(struct iavf_rx_queue *rxq)
94 {
95         if (rxq->rx_free_thresh >= IAVF_VPMD_RX_MAX_BURST &&
96             rxq->nb_rx_desc % rxq->rx_free_thresh == 0) {
97                 PMD_INIT_LOG(DEBUG, "Vector Rx can be enabled on this rxq.");
98                 return TRUE;
99         }
100
101         PMD_INIT_LOG(DEBUG, "Vector Rx cannot be enabled on this rxq.");
102         return FALSE;
103 }
104
105 static inline bool
106 check_tx_vec_allow(struct iavf_tx_queue *txq)
107 {
108         if (!(txq->offloads & IAVF_NO_VECTOR_FLAGS) &&
109             txq->rs_thresh >= IAVF_VPMD_TX_MAX_BURST &&
110             txq->rs_thresh <= IAVF_VPMD_TX_MAX_FREE_BUF) {
111                 PMD_INIT_LOG(DEBUG, "Vector tx can be enabled on this txq.");
112                 return TRUE;
113         }
114         PMD_INIT_LOG(DEBUG, "Vector Tx cannot be enabled on this txq.");
115         return FALSE;
116 }
117
118 static inline bool
119 check_rx_bulk_allow(struct iavf_rx_queue *rxq)
120 {
121         int ret = TRUE;
122
123         if (!(rxq->rx_free_thresh >= IAVF_RX_MAX_BURST)) {
124                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
125                              "rxq->rx_free_thresh=%d, "
126                              "IAVF_RX_MAX_BURST=%d",
127                              rxq->rx_free_thresh, IAVF_RX_MAX_BURST);
128                 ret = FALSE;
129         } else if (rxq->nb_rx_desc % rxq->rx_free_thresh != 0) {
130                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
131                              "rxq->nb_rx_desc=%d, "
132                              "rxq->rx_free_thresh=%d",
133                              rxq->nb_rx_desc, rxq->rx_free_thresh);
134                 ret = FALSE;
135         }
136         return ret;
137 }
138
139 static inline void
140 reset_rx_queue(struct iavf_rx_queue *rxq)
141 {
142         uint16_t len;
143         uint32_t i;
144
145         if (!rxq)
146                 return;
147
148         len = rxq->nb_rx_desc + IAVF_RX_MAX_BURST;
149
150         for (i = 0; i < len * sizeof(union iavf_rx_desc); i++)
151                 ((volatile char *)rxq->rx_ring)[i] = 0;
152
153         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
154
155         for (i = 0; i < IAVF_RX_MAX_BURST; i++)
156                 rxq->sw_ring[rxq->nb_rx_desc + i] = &rxq->fake_mbuf;
157
158         /* for rx bulk */
159         rxq->rx_nb_avail = 0;
160         rxq->rx_next_avail = 0;
161         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
162
163         rxq->rx_tail = 0;
164         rxq->nb_rx_hold = 0;
165         rxq->pkt_first_seg = NULL;
166         rxq->pkt_last_seg = NULL;
167 }
168
169 static inline void
170 reset_tx_queue(struct iavf_tx_queue *txq)
171 {
172         struct iavf_tx_entry *txe;
173         uint32_t i, size;
174         uint16_t prev;
175
176         if (!txq) {
177                 PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
178                 return;
179         }
180
181         txe = txq->sw_ring;
182         size = sizeof(struct iavf_tx_desc) * txq->nb_tx_desc;
183         for (i = 0; i < size; i++)
184                 ((volatile char *)txq->tx_ring)[i] = 0;
185
186         prev = (uint16_t)(txq->nb_tx_desc - 1);
187         for (i = 0; i < txq->nb_tx_desc; i++) {
188                 txq->tx_ring[i].cmd_type_offset_bsz =
189                         rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE);
190                 txe[i].mbuf =  NULL;
191                 txe[i].last_id = i;
192                 txe[prev].next_id = i;
193                 prev = i;
194         }
195
196         txq->tx_tail = 0;
197         txq->nb_used = 0;
198
199         txq->last_desc_cleaned = txq->nb_tx_desc - 1;
200         txq->nb_free = txq->nb_tx_desc - 1;
201
202         txq->next_dd = txq->rs_thresh - 1;
203         txq->next_rs = txq->rs_thresh - 1;
204 }
205
206 static int
207 alloc_rxq_mbufs(struct iavf_rx_queue *rxq)
208 {
209         volatile union iavf_rx_desc *rxd;
210         struct rte_mbuf *mbuf = NULL;
211         uint64_t dma_addr;
212         uint16_t i;
213
214         for (i = 0; i < rxq->nb_rx_desc; i++) {
215                 mbuf = rte_mbuf_raw_alloc(rxq->mp);
216                 if (unlikely(!mbuf)) {
217                         PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
218                         return -ENOMEM;
219                 }
220
221                 rte_mbuf_refcnt_set(mbuf, 1);
222                 mbuf->next = NULL;
223                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
224                 mbuf->nb_segs = 1;
225                 mbuf->port = rxq->port_id;
226
227                 dma_addr =
228                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
229
230                 rxd = &rxq->rx_ring[i];
231                 rxd->read.pkt_addr = dma_addr;
232                 rxd->read.hdr_addr = 0;
233 #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
234                 rxd->read.rsvd1 = 0;
235                 rxd->read.rsvd2 = 0;
236 #endif
237
238                 rxq->sw_ring[i] = mbuf;
239         }
240
241         return 0;
242 }
243
244 static inline void
245 release_rxq_mbufs(struct iavf_rx_queue *rxq)
246 {
247         uint16_t i;
248
249         if (!rxq->sw_ring)
250                 return;
251
252         for (i = 0; i < rxq->nb_rx_desc; i++) {
253                 if (rxq->sw_ring[i]) {
254                         rte_pktmbuf_free_seg(rxq->sw_ring[i]);
255                         rxq->sw_ring[i] = NULL;
256                 }
257         }
258
259         /* for rx bulk */
260         if (rxq->rx_nb_avail == 0)
261                 return;
262         for (i = 0; i < rxq->rx_nb_avail; i++) {
263                 struct rte_mbuf *mbuf;
264
265                 mbuf = rxq->rx_stage[rxq->rx_next_avail + i];
266                 rte_pktmbuf_free_seg(mbuf);
267         }
268         rxq->rx_nb_avail = 0;
269 }
270
271 static inline void
272 release_txq_mbufs(struct iavf_tx_queue *txq)
273 {
274         uint16_t i;
275
276         if (!txq || !txq->sw_ring) {
277                 PMD_DRV_LOG(DEBUG, "Pointer to rxq or sw_ring is NULL");
278                 return;
279         }
280
281         for (i = 0; i < txq->nb_tx_desc; i++) {
282                 if (txq->sw_ring[i].mbuf) {
283                         rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
284                         txq->sw_ring[i].mbuf = NULL;
285                 }
286         }
287 }
288
289 static const struct iavf_rxq_ops def_rxq_ops = {
290         .release_mbufs = release_rxq_mbufs,
291 };
292
293 static const struct iavf_txq_ops def_txq_ops = {
294         .release_mbufs = release_txq_mbufs,
295 };
296
297 int
298 iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
299                        uint16_t nb_desc, unsigned int socket_id,
300                        const struct rte_eth_rxconf *rx_conf,
301                        struct rte_mempool *mp)
302 {
303         struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
304         struct iavf_adapter *ad =
305                 IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
306         struct iavf_rx_queue *rxq;
307         const struct rte_memzone *mz;
308         uint32_t ring_size;
309         uint16_t len;
310         uint16_t rx_free_thresh;
311
312         PMD_INIT_FUNC_TRACE();
313
314         if (nb_desc % IAVF_ALIGN_RING_DESC != 0 ||
315             nb_desc > IAVF_MAX_RING_DESC ||
316             nb_desc < IAVF_MIN_RING_DESC) {
317                 PMD_INIT_LOG(ERR, "Number (%u) of receive descriptors is "
318                              "invalid", nb_desc);
319                 return -EINVAL;
320         }
321
322         /* Check free threshold */
323         rx_free_thresh = (rx_conf->rx_free_thresh == 0) ?
324                          IAVF_DEFAULT_RX_FREE_THRESH :
325                          rx_conf->rx_free_thresh;
326         if (check_rx_thresh(nb_desc, rx_free_thresh) != 0)
327                 return -EINVAL;
328
329         /* Free memory if needed */
330         if (dev->data->rx_queues[queue_idx]) {
331                 iavf_dev_rx_queue_release(dev->data->rx_queues[queue_idx]);
332                 dev->data->rx_queues[queue_idx] = NULL;
333         }
334
335         /* Allocate the rx queue data structure */
336         rxq = rte_zmalloc_socket("iavf rxq",
337                                  sizeof(struct iavf_rx_queue),
338                                  RTE_CACHE_LINE_SIZE,
339                                  socket_id);
340         if (!rxq) {
341                 PMD_INIT_LOG(ERR, "Failed to allocate memory for "
342                              "rx queue data structure");
343                 return -ENOMEM;
344         }
345
346         rxq->mp = mp;
347         rxq->nb_rx_desc = nb_desc;
348         rxq->rx_free_thresh = rx_free_thresh;
349         rxq->queue_id = queue_idx;
350         rxq->port_id = dev->data->port_id;
351         rxq->crc_len = 0; /* crc stripping by default */
352         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
353         rxq->rx_hdr_len = 0;
354
355         len = rte_pktmbuf_data_room_size(rxq->mp) - RTE_PKTMBUF_HEADROOM;
356         rxq->rx_buf_len = RTE_ALIGN(len, (1 << IAVF_RXQ_CTX_DBUFF_SHIFT));
357
358         /* Allocate the software ring. */
359         len = nb_desc + IAVF_RX_MAX_BURST;
360         rxq->sw_ring =
361                 rte_zmalloc_socket("iavf rx sw ring",
362                                    sizeof(struct rte_mbuf *) * len,
363                                    RTE_CACHE_LINE_SIZE,
364                                    socket_id);
365         if (!rxq->sw_ring) {
366                 PMD_INIT_LOG(ERR, "Failed to allocate memory for SW ring");
367                 rte_free(rxq);
368                 return -ENOMEM;
369         }
370
371         /* Allocate the maximun number of RX ring hardware descriptor with
372          * a liitle more to support bulk allocate.
373          */
374         len = IAVF_MAX_RING_DESC + IAVF_RX_MAX_BURST;
375         ring_size = RTE_ALIGN(len * sizeof(union iavf_rx_desc),
376                               IAVF_DMA_MEM_ALIGN);
377         mz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
378                                       ring_size, IAVF_RING_BASE_ALIGN,
379                                       socket_id);
380         if (!mz) {
381                 PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for RX");
382                 rte_free(rxq->sw_ring);
383                 rte_free(rxq);
384                 return -ENOMEM;
385         }
386         /* Zero all the descriptors in the ring. */
387         memset(mz->addr, 0, ring_size);
388         rxq->rx_ring_phys_addr = mz->iova;
389         rxq->rx_ring = (union iavf_rx_desc *)mz->addr;
390
391         rxq->mz = mz;
392         reset_rx_queue(rxq);
393         rxq->q_set = TRUE;
394         dev->data->rx_queues[queue_idx] = rxq;
395         rxq->qrx_tail = hw->hw_addr + IAVF_QRX_TAIL1(rxq->queue_id);
396         rxq->ops = &def_rxq_ops;
397
398         if (check_rx_bulk_allow(rxq) == TRUE) {
399                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
400                              "satisfied. Rx Burst Bulk Alloc function will be "
401                              "used on port=%d, queue=%d.",
402                              rxq->port_id, rxq->queue_id);
403         } else {
404                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
405                              "not satisfied, Scattered Rx is requested "
406                              "on port=%d, queue=%d.",
407                              rxq->port_id, rxq->queue_id);
408                 ad->rx_bulk_alloc_allowed = false;
409         }
410
411         if (check_rx_vec_allow(rxq) == FALSE)
412                 ad->rx_vec_allowed = false;
413
414         return 0;
415 }
416
417 int
418 iavf_dev_tx_queue_setup(struct rte_eth_dev *dev,
419                        uint16_t queue_idx,
420                        uint16_t nb_desc,
421                        unsigned int socket_id,
422                        const struct rte_eth_txconf *tx_conf)
423 {
424         struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
425         struct iavf_tx_queue *txq;
426         const struct rte_memzone *mz;
427         uint32_t ring_size;
428         uint16_t tx_rs_thresh, tx_free_thresh;
429         uint64_t offloads;
430
431         PMD_INIT_FUNC_TRACE();
432
433         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
434
435         if (nb_desc % IAVF_ALIGN_RING_DESC != 0 ||
436             nb_desc > IAVF_MAX_RING_DESC ||
437             nb_desc < IAVF_MIN_RING_DESC) {
438                 PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is "
439                             "invalid", nb_desc);
440                 return -EINVAL;
441         }
442
443         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
444                 tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
445         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
446                 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
447         check_tx_thresh(nb_desc, tx_rs_thresh, tx_rs_thresh);
448
449         /* Free memory if needed. */
450         if (dev->data->tx_queues[queue_idx]) {
451                 iavf_dev_tx_queue_release(dev->data->tx_queues[queue_idx]);
452                 dev->data->tx_queues[queue_idx] = NULL;
453         }
454
455         /* Allocate the TX queue data structure. */
456         txq = rte_zmalloc_socket("iavf txq",
457                                  sizeof(struct iavf_tx_queue),
458                                  RTE_CACHE_LINE_SIZE,
459                                  socket_id);
460         if (!txq) {
461                 PMD_INIT_LOG(ERR, "Failed to allocate memory for "
462                              "tx queue structure");
463                 return -ENOMEM;
464         }
465
466         txq->nb_tx_desc = nb_desc;
467         txq->rs_thresh = tx_rs_thresh;
468         txq->free_thresh = tx_free_thresh;
469         txq->queue_id = queue_idx;
470         txq->port_id = dev->data->port_id;
471         txq->offloads = offloads;
472         txq->tx_deferred_start = tx_conf->tx_deferred_start;
473
474         /* Allocate software ring */
475         txq->sw_ring =
476                 rte_zmalloc_socket("iavf tx sw ring",
477                                    sizeof(struct iavf_tx_entry) * nb_desc,
478                                    RTE_CACHE_LINE_SIZE,
479                                    socket_id);
480         if (!txq->sw_ring) {
481                 PMD_INIT_LOG(ERR, "Failed to allocate memory for SW TX ring");
482                 rte_free(txq);
483                 return -ENOMEM;
484         }
485
486         /* Allocate TX hardware ring descriptors. */
487         ring_size = sizeof(struct iavf_tx_desc) * IAVF_MAX_RING_DESC;
488         ring_size = RTE_ALIGN(ring_size, IAVF_DMA_MEM_ALIGN);
489         mz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
490                                       ring_size, IAVF_RING_BASE_ALIGN,
491                                       socket_id);
492         if (!mz) {
493                 PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
494                 rte_free(txq->sw_ring);
495                 rte_free(txq);
496                 return -ENOMEM;
497         }
498         txq->tx_ring_phys_addr = mz->iova;
499         txq->tx_ring = (struct iavf_tx_desc *)mz->addr;
500
501         txq->mz = mz;
502         reset_tx_queue(txq);
503         txq->q_set = TRUE;
504         dev->data->tx_queues[queue_idx] = txq;
505         txq->qtx_tail = hw->hw_addr + IAVF_QTX_TAIL1(queue_idx);
506         txq->ops = &def_txq_ops;
507
508         if (check_tx_vec_allow(txq) == FALSE) {
509                 struct iavf_adapter *ad =
510                         IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
511                 ad->tx_vec_allowed = false;
512         }
513
514         return 0;
515 }
516
517 int
518 iavf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
519 {
520         struct iavf_adapter *adapter =
521                 IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
522         struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
523         struct iavf_rx_queue *rxq;
524         int err = 0;
525
526         PMD_DRV_FUNC_TRACE();
527
528         if (rx_queue_id >= dev->data->nb_rx_queues)
529                 return -EINVAL;
530
531         rxq = dev->data->rx_queues[rx_queue_id];
532
533         err = alloc_rxq_mbufs(rxq);
534         if (err) {
535                 PMD_DRV_LOG(ERR, "Failed to allocate RX queue mbuf");
536                 return err;
537         }
538
539         rte_wmb();
540
541         /* Init the RX tail register. */
542         IAVF_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
543         IAVF_WRITE_FLUSH(hw);
544
545         /* Ready to switch the queue on */
546         err = iavf_switch_queue(adapter, rx_queue_id, TRUE, TRUE);
547         if (err)
548                 PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
549                             rx_queue_id);
550         else
551                 dev->data->rx_queue_state[rx_queue_id] =
552                         RTE_ETH_QUEUE_STATE_STARTED;
553
554         return err;
555 }
556
557 int
558 iavf_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
559 {
560         struct iavf_adapter *adapter =
561                 IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
562         struct iavf_hw *hw = IAVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
563         struct iavf_tx_queue *txq;
564         int err = 0;
565
566         PMD_DRV_FUNC_TRACE();
567
568         if (tx_queue_id >= dev->data->nb_tx_queues)
569                 return -EINVAL;
570
571         txq = dev->data->tx_queues[tx_queue_id];
572
573         /* Init the RX tail register. */
574         IAVF_PCI_REG_WRITE(txq->qtx_tail, 0);
575         IAVF_WRITE_FLUSH(hw);
576
577         /* Ready to switch the queue on */
578         err = iavf_switch_queue(adapter, tx_queue_id, FALSE, TRUE);
579
580         if (err)
581                 PMD_DRV_LOG(ERR, "Failed to switch TX queue %u on",
582                             tx_queue_id);
583         else
584                 dev->data->tx_queue_state[tx_queue_id] =
585                         RTE_ETH_QUEUE_STATE_STARTED;
586
587         return err;
588 }
589
590 int
591 iavf_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
592 {
593         struct iavf_adapter *adapter =
594                 IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
595         struct iavf_rx_queue *rxq;
596         int err;
597
598         PMD_DRV_FUNC_TRACE();
599
600         if (rx_queue_id >= dev->data->nb_rx_queues)
601                 return -EINVAL;
602
603         err = iavf_switch_queue(adapter, rx_queue_id, TRUE, FALSE);
604         if (err) {
605                 PMD_DRV_LOG(ERR, "Failed to switch RX queue %u off",
606                             rx_queue_id);
607                 return err;
608         }
609
610         rxq = dev->data->rx_queues[rx_queue_id];
611         rxq->ops->release_mbufs(rxq);
612         reset_rx_queue(rxq);
613         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
614
615         return 0;
616 }
617
618 int
619 iavf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
620 {
621         struct iavf_adapter *adapter =
622                 IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
623         struct iavf_tx_queue *txq;
624         int err;
625
626         PMD_DRV_FUNC_TRACE();
627
628         if (tx_queue_id >= dev->data->nb_tx_queues)
629                 return -EINVAL;
630
631         err = iavf_switch_queue(adapter, tx_queue_id, FALSE, FALSE);
632         if (err) {
633                 PMD_DRV_LOG(ERR, "Failed to switch TX queue %u off",
634                             tx_queue_id);
635                 return err;
636         }
637
638         txq = dev->data->tx_queues[tx_queue_id];
639         txq->ops->release_mbufs(txq);
640         reset_tx_queue(txq);
641         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
642
643         return 0;
644 }
645
646 void
647 iavf_dev_rx_queue_release(void *rxq)
648 {
649         struct iavf_rx_queue *q = (struct iavf_rx_queue *)rxq;
650
651         if (!q)
652                 return;
653
654         q->ops->release_mbufs(q);
655         rte_free(q->sw_ring);
656         rte_memzone_free(q->mz);
657         rte_free(q);
658 }
659
660 void
661 iavf_dev_tx_queue_release(void *txq)
662 {
663         struct iavf_tx_queue *q = (struct iavf_tx_queue *)txq;
664
665         if (!q)
666                 return;
667
668         q->ops->release_mbufs(q);
669         rte_free(q->sw_ring);
670         rte_memzone_free(q->mz);
671         rte_free(q);
672 }
673
674 void
675 iavf_stop_queues(struct rte_eth_dev *dev)
676 {
677         struct iavf_adapter *adapter =
678                 IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
679         struct iavf_rx_queue *rxq;
680         struct iavf_tx_queue *txq;
681         int ret, i;
682
683         /* Stop All queues */
684         ret = iavf_disable_queues(adapter);
685         if (ret)
686                 PMD_DRV_LOG(WARNING, "Fail to stop queues");
687
688         for (i = 0; i < dev->data->nb_tx_queues; i++) {
689                 txq = dev->data->tx_queues[i];
690                 if (!txq)
691                         continue;
692                 txq->ops->release_mbufs(txq);
693                 reset_tx_queue(txq);
694                 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
695         }
696         for (i = 0; i < dev->data->nb_rx_queues; i++) {
697                 rxq = dev->data->rx_queues[i];
698                 if (!rxq)
699                         continue;
700                 rxq->ops->release_mbufs(rxq);
701                 reset_rx_queue(rxq);
702                 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
703         }
704 }
705
706 static inline void
707 iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp)
708 {
709         if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
710                 (1 << IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
711                 mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
712                 mb->vlan_tci =
713                         rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1);
714         } else {
715                 mb->vlan_tci = 0;
716         }
717 }
718
719 /* Translate the rx descriptor status and error fields to pkt flags */
720 static inline uint64_t
721 iavf_rxd_to_pkt_flags(uint64_t qword)
722 {
723         uint64_t flags;
724         uint64_t error_bits = (qword >> IAVF_RXD_QW1_ERROR_SHIFT);
725
726 #define IAVF_RX_ERR_BITS 0x3f
727
728         /* Check if RSS_HASH */
729         flags = (((qword >> IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT) &
730                                         IAVF_RX_DESC_FLTSTAT_RSS_HASH) ==
731                         IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
732
733         if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) {
734                 flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
735                 return flags;
736         }
737
738         if (unlikely(error_bits & (1 << IAVF_RX_DESC_ERROR_IPE_SHIFT)))
739                 flags |= PKT_RX_IP_CKSUM_BAD;
740         else
741                 flags |= PKT_RX_IP_CKSUM_GOOD;
742
743         if (unlikely(error_bits & (1 << IAVF_RX_DESC_ERROR_L4E_SHIFT)))
744                 flags |= PKT_RX_L4_CKSUM_BAD;
745         else
746                 flags |= PKT_RX_L4_CKSUM_GOOD;
747
748         /* TODO: Oversize error bit is not processed here */
749
750         return flags;
751 }
752
753 /* implement recv_pkts */
754 uint16_t
755 iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
756 {
757         volatile union iavf_rx_desc *rx_ring;
758         volatile union iavf_rx_desc *rxdp;
759         struct iavf_rx_queue *rxq;
760         union iavf_rx_desc rxd;
761         struct rte_mbuf *rxe;
762         struct rte_eth_dev *dev;
763         struct rte_mbuf *rxm;
764         struct rte_mbuf *nmb;
765         uint16_t nb_rx;
766         uint32_t rx_status;
767         uint64_t qword1;
768         uint16_t rx_packet_len;
769         uint16_t rx_id, nb_hold;
770         uint64_t dma_addr;
771         uint64_t pkt_flags;
772         static const uint32_t ptype_tbl[UINT8_MAX + 1] __rte_cache_aligned = {
773                 /* [0] reserved */
774                 [1] = RTE_PTYPE_L2_ETHER,
775                 /* [2] - [21] reserved */
776                 [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
777                         RTE_PTYPE_L4_FRAG,
778                 [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
779                         RTE_PTYPE_L4_NONFRAG,
780                 [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
781                         RTE_PTYPE_L4_UDP,
782                 /* [25] reserved */
783                 [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
784                         RTE_PTYPE_L4_TCP,
785                 [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
786                         RTE_PTYPE_L4_SCTP,
787                 [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
788                         RTE_PTYPE_L4_ICMP,
789                 /* All others reserved */
790         };
791
792         nb_rx = 0;
793         nb_hold = 0;
794         rxq = rx_queue;
795         rx_id = rxq->rx_tail;
796         rx_ring = rxq->rx_ring;
797
798         while (nb_rx < nb_pkts) {
799                 rxdp = &rx_ring[rx_id];
800                 qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
801                 rx_status = (qword1 & IAVF_RXD_QW1_STATUS_MASK) >>
802                             IAVF_RXD_QW1_STATUS_SHIFT;
803
804                 /* Check the DD bit first */
805                 if (!(rx_status & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)))
806                         break;
807                 IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
808
809                 nmb = rte_mbuf_raw_alloc(rxq->mp);
810                 if (unlikely(!nmb)) {
811                         dev = &rte_eth_devices[rxq->port_id];
812                         dev->data->rx_mbuf_alloc_failed++;
813                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
814                                    "queue_id=%u", rxq->port_id, rxq->queue_id);
815                         break;
816                 }
817
818                 rxd = *rxdp;
819                 nb_hold++;
820                 rxe = rxq->sw_ring[rx_id];
821                 rx_id++;
822                 if (unlikely(rx_id == rxq->nb_rx_desc))
823                         rx_id = 0;
824
825                 /* Prefetch next mbuf */
826                 rte_prefetch0(rxq->sw_ring[rx_id]);
827
828                 /* When next RX descriptor is on a cache line boundary,
829                  * prefetch the next 4 RX descriptors and next 8 pointers
830                  * to mbufs.
831                  */
832                 if ((rx_id & 0x3) == 0) {
833                         rte_prefetch0(&rx_ring[rx_id]);
834                         rte_prefetch0(rxq->sw_ring[rx_id]);
835                 }
836                 rxm = rxe;
837                 rxe = nmb;
838                 dma_addr =
839                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
840                 rxdp->read.hdr_addr = 0;
841                 rxdp->read.pkt_addr = dma_addr;
842
843                 rx_packet_len = ((qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
844                                 IAVF_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
845
846                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
847                 rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
848                 rxm->nb_segs = 1;
849                 rxm->next = NULL;
850                 rxm->pkt_len = rx_packet_len;
851                 rxm->data_len = rx_packet_len;
852                 rxm->port = rxq->port_id;
853                 rxm->ol_flags = 0;
854                 iavf_rxd_to_vlan_tci(rxm, &rxd);
855                 pkt_flags = iavf_rxd_to_pkt_flags(qword1);
856                 rxm->packet_type =
857                         ptype_tbl[(uint8_t)((qword1 &
858                         IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT)];
859
860                 if (pkt_flags & PKT_RX_RSS_HASH)
861                         rxm->hash.rss =
862                                 rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
863
864                 rxm->ol_flags |= pkt_flags;
865
866                 rx_pkts[nb_rx++] = rxm;
867         }
868         rxq->rx_tail = rx_id;
869
870         /* If the number of free RX descriptors is greater than the RX free
871          * threshold of the queue, advance the receive tail register of queue.
872          * Update that register with the value of the last processed RX
873          * descriptor minus 1.
874          */
875         nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
876         if (nb_hold > rxq->rx_free_thresh) {
877                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
878                            "nb_hold=%u nb_rx=%u",
879                            rxq->port_id, rxq->queue_id,
880                            rx_id, nb_hold, nb_rx);
881                 rx_id = (uint16_t)((rx_id == 0) ?
882                         (rxq->nb_rx_desc - 1) : (rx_id - 1));
883                 IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
884                 nb_hold = 0;
885         }
886         rxq->nb_rx_hold = nb_hold;
887
888         return nb_rx;
889 }
890
891 /* implement recv_scattered_pkts  */
892 uint16_t
893 iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
894                         uint16_t nb_pkts)
895 {
896         struct iavf_rx_queue *rxq = rx_queue;
897         union iavf_rx_desc rxd;
898         struct rte_mbuf *rxe;
899         struct rte_mbuf *first_seg = rxq->pkt_first_seg;
900         struct rte_mbuf *last_seg = rxq->pkt_last_seg;
901         struct rte_mbuf *nmb, *rxm;
902         uint16_t rx_id = rxq->rx_tail;
903         uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
904         struct rte_eth_dev *dev;
905         uint32_t rx_status;
906         uint64_t qword1;
907         uint64_t dma_addr;
908         uint64_t pkt_flags;
909
910         volatile union iavf_rx_desc *rx_ring = rxq->rx_ring;
911         volatile union iavf_rx_desc *rxdp;
912         static const uint32_t ptype_tbl[UINT8_MAX + 1] __rte_cache_aligned = {
913                 /* [0] reserved */
914                 [1] = RTE_PTYPE_L2_ETHER,
915                 /* [2] - [21] reserved */
916                 [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
917                         RTE_PTYPE_L4_FRAG,
918                 [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
919                         RTE_PTYPE_L4_NONFRAG,
920                 [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
921                         RTE_PTYPE_L4_UDP,
922                 /* [25] reserved */
923                 [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
924                         RTE_PTYPE_L4_TCP,
925                 [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
926                         RTE_PTYPE_L4_SCTP,
927                 [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
928                         RTE_PTYPE_L4_ICMP,
929                 /* All others reserved */
930         };
931
932         while (nb_rx < nb_pkts) {
933                 rxdp = &rx_ring[rx_id];
934                 qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
935                 rx_status = (qword1 & IAVF_RXD_QW1_STATUS_MASK) >>
936                             IAVF_RXD_QW1_STATUS_SHIFT;
937
938                 /* Check the DD bit */
939                 if (!(rx_status & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)))
940                         break;
941                 IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id);
942
943                 nmb = rte_mbuf_raw_alloc(rxq->mp);
944                 if (unlikely(!nmb)) {
945                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
946                                    "queue_id=%u", rxq->port_id, rxq->queue_id);
947                         dev = &rte_eth_devices[rxq->port_id];
948                         dev->data->rx_mbuf_alloc_failed++;
949                         break;
950                 }
951
952                 rxd = *rxdp;
953                 nb_hold++;
954                 rxe = rxq->sw_ring[rx_id];
955                 rx_id++;
956                 if (rx_id == rxq->nb_rx_desc)
957                         rx_id = 0;
958
959                 /* Prefetch next mbuf */
960                 rte_prefetch0(rxq->sw_ring[rx_id]);
961
962                 /* When next RX descriptor is on a cache line boundary,
963                  * prefetch the next 4 RX descriptors and next 8 pointers
964                  * to mbufs.
965                  */
966                 if ((rx_id & 0x3) == 0) {
967                         rte_prefetch0(&rx_ring[rx_id]);
968                         rte_prefetch0(rxq->sw_ring[rx_id]);
969                 }
970
971                 rxm = rxe;
972                 rxe = nmb;
973                 dma_addr =
974                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
975
976                 /* Set data buffer address and data length of the mbuf */
977                 rxdp->read.hdr_addr = 0;
978                 rxdp->read.pkt_addr = dma_addr;
979                 rx_packet_len = (qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
980                                  IAVF_RXD_QW1_LENGTH_PBUF_SHIFT;
981                 rxm->data_len = rx_packet_len;
982                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
983
984                 /* If this is the first buffer of the received packet, set the
985                  * pointer to the first mbuf of the packet and initialize its
986                  * context. Otherwise, update the total length and the number
987                  * of segments of the current scattered packet, and update the
988                  * pointer to the last mbuf of the current packet.
989                  */
990                 if (!first_seg) {
991                         first_seg = rxm;
992                         first_seg->nb_segs = 1;
993                         first_seg->pkt_len = rx_packet_len;
994                 } else {
995                         first_seg->pkt_len =
996                                 (uint16_t)(first_seg->pkt_len +
997                                                 rx_packet_len);
998                         first_seg->nb_segs++;
999                         last_seg->next = rxm;
1000                 }
1001
1002                 /* If this is not the last buffer of the received packet,
1003                  * update the pointer to the last mbuf of the current scattered
1004                  * packet and continue to parse the RX ring.
1005                  */
1006                 if (!(rx_status & (1 << IAVF_RX_DESC_STATUS_EOF_SHIFT))) {
1007                         last_seg = rxm;
1008                         continue;
1009                 }
1010
1011                 /* This is the last buffer of the received packet. If the CRC
1012                  * is not stripped by the hardware:
1013                  *  - Subtract the CRC length from the total packet length.
1014                  *  - If the last buffer only contains the whole CRC or a part
1015                  *  of it, free the mbuf associated to the last buffer. If part
1016                  *  of the CRC is also contained in the previous mbuf, subtract
1017                  *  the length of that CRC part from the data length of the
1018                  *  previous mbuf.
1019                  */
1020                 rxm->next = NULL;
1021                 if (unlikely(rxq->crc_len > 0)) {
1022                         first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
1023                         if (rx_packet_len <= RTE_ETHER_CRC_LEN) {
1024                                 rte_pktmbuf_free_seg(rxm);
1025                                 first_seg->nb_segs--;
1026                                 last_seg->data_len =
1027                                         (uint16_t)(last_seg->data_len -
1028                                         (RTE_ETHER_CRC_LEN - rx_packet_len));
1029                                 last_seg->next = NULL;
1030                         } else
1031                                 rxm->data_len = (uint16_t)(rx_packet_len -
1032                                                         RTE_ETHER_CRC_LEN);
1033                 }
1034
1035                 first_seg->port = rxq->port_id;
1036                 first_seg->ol_flags = 0;
1037                 iavf_rxd_to_vlan_tci(first_seg, &rxd);
1038                 pkt_flags = iavf_rxd_to_pkt_flags(qword1);
1039                 first_seg->packet_type =
1040                         ptype_tbl[(uint8_t)((qword1 &
1041                         IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT)];
1042
1043                 if (pkt_flags & PKT_RX_RSS_HASH)
1044                         first_seg->hash.rss =
1045                                 rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
1046
1047                 first_seg->ol_flags |= pkt_flags;
1048
1049                 /* Prefetch data of first segment, if configured to do so. */
1050                 rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
1051                                           first_seg->data_off));
1052                 rx_pkts[nb_rx++] = first_seg;
1053                 first_seg = NULL;
1054         }
1055
1056         /* Record index of the next RX descriptor to probe. */
1057         rxq->rx_tail = rx_id;
1058         rxq->pkt_first_seg = first_seg;
1059         rxq->pkt_last_seg = last_seg;
1060
1061         /* If the number of free RX descriptors is greater than the RX free
1062          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1063          * register. Update the RDT with the value of the last processed RX
1064          * descriptor minus 1, to guarantee that the RDT register is never
1065          * equal to the RDH register, which creates a "full" ring situtation
1066          * from the hardware point of view.
1067          */
1068         nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
1069         if (nb_hold > rxq->rx_free_thresh) {
1070                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1071                            "nb_hold=%u nb_rx=%u",
1072                            rxq->port_id, rxq->queue_id,
1073                            rx_id, nb_hold, nb_rx);
1074                 rx_id = (uint16_t)(rx_id == 0 ?
1075                         (rxq->nb_rx_desc - 1) : (rx_id - 1));
1076                 IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
1077                 nb_hold = 0;
1078         }
1079         rxq->nb_rx_hold = nb_hold;
1080
1081         return nb_rx;
1082 }
1083
1084 #define IAVF_LOOK_AHEAD 8
1085 static inline int
1086 iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq)
1087 {
1088         volatile union iavf_rx_desc *rxdp;
1089         struct rte_mbuf **rxep;
1090         struct rte_mbuf *mb;
1091         uint16_t pkt_len;
1092         uint64_t qword1;
1093         uint32_t rx_status;
1094         int32_t s[IAVF_LOOK_AHEAD], nb_dd;
1095         int32_t i, j, nb_rx = 0;
1096         uint64_t pkt_flags;
1097         static const uint32_t ptype_tbl[UINT8_MAX + 1] __rte_cache_aligned = {
1098                 /* [0] reserved */
1099                 [1] = RTE_PTYPE_L2_ETHER,
1100                 /* [2] - [21] reserved */
1101                 [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1102                         RTE_PTYPE_L4_FRAG,
1103                 [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1104                         RTE_PTYPE_L4_NONFRAG,
1105                 [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1106                         RTE_PTYPE_L4_UDP,
1107                 /* [25] reserved */
1108                 [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1109                         RTE_PTYPE_L4_TCP,
1110                 [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1111                         RTE_PTYPE_L4_SCTP,
1112                 [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1113                         RTE_PTYPE_L4_ICMP,
1114                 /* All others reserved */
1115         };
1116
1117         rxdp = &rxq->rx_ring[rxq->rx_tail];
1118         rxep = &rxq->sw_ring[rxq->rx_tail];
1119
1120         qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
1121         rx_status = (qword1 & IAVF_RXD_QW1_STATUS_MASK) >>
1122                     IAVF_RXD_QW1_STATUS_SHIFT;
1123
1124         /* Make sure there is at least 1 packet to receive */
1125         if (!(rx_status & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)))
1126                 return 0;
1127
1128         /* Scan LOOK_AHEAD descriptors at a time to determine which
1129          * descriptors reference packets that are ready to be received.
1130          */
1131         for (i = 0; i < IAVF_RX_MAX_BURST; i += IAVF_LOOK_AHEAD,
1132              rxdp += IAVF_LOOK_AHEAD, rxep += IAVF_LOOK_AHEAD) {
1133                 /* Read desc statuses backwards to avoid race condition */
1134                 for (j = IAVF_LOOK_AHEAD - 1; j >= 0; j--) {
1135                         qword1 = rte_le_to_cpu_64(
1136                                 rxdp[j].wb.qword1.status_error_len);
1137                         s[j] = (qword1 & IAVF_RXD_QW1_STATUS_MASK) >>
1138                                IAVF_RXD_QW1_STATUS_SHIFT;
1139                 }
1140
1141                 rte_smp_rmb();
1142
1143                 /* Compute how many status bits were set */
1144                 for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++)
1145                         nb_dd += s[j] & (1 << IAVF_RX_DESC_STATUS_DD_SHIFT);
1146
1147                 nb_rx += nb_dd;
1148
1149                 /* Translate descriptor info to mbuf parameters */
1150                 for (j = 0; j < nb_dd; j++) {
1151                         IAVF_DUMP_RX_DESC(rxq, &rxdp[j],
1152                                          rxq->rx_tail + i * IAVF_LOOK_AHEAD + j);
1153
1154                         mb = rxep[j];
1155                         qword1 = rte_le_to_cpu_64
1156                                         (rxdp[j].wb.qword1.status_error_len);
1157                         pkt_len = ((qword1 & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
1158                                   IAVF_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
1159                         mb->data_len = pkt_len;
1160                         mb->pkt_len = pkt_len;
1161                         mb->ol_flags = 0;
1162                         iavf_rxd_to_vlan_tci(mb, &rxdp[j]);
1163                         pkt_flags = iavf_rxd_to_pkt_flags(qword1);
1164                         mb->packet_type =
1165                                 ptype_tbl[(uint8_t)((qword1 &
1166                                 IAVF_RXD_QW1_PTYPE_MASK) >>
1167                                 IAVF_RXD_QW1_PTYPE_SHIFT)];
1168
1169                         if (pkt_flags & PKT_RX_RSS_HASH)
1170                                 mb->hash.rss = rte_le_to_cpu_32(
1171                                         rxdp[j].wb.qword0.hi_dword.rss);
1172
1173                         mb->ol_flags |= pkt_flags;
1174                 }
1175
1176                 for (j = 0; j < IAVF_LOOK_AHEAD; j++)
1177                         rxq->rx_stage[i + j] = rxep[j];
1178
1179                 if (nb_dd != IAVF_LOOK_AHEAD)
1180                         break;
1181         }
1182
1183         /* Clear software ring entries */
1184         for (i = 0; i < nb_rx; i++)
1185                 rxq->sw_ring[rxq->rx_tail + i] = NULL;
1186
1187         return nb_rx;
1188 }
1189
1190 static inline uint16_t
1191 iavf_rx_fill_from_stage(struct iavf_rx_queue *rxq,
1192                        struct rte_mbuf **rx_pkts,
1193                        uint16_t nb_pkts)
1194 {
1195         uint16_t i;
1196         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1197
1198         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1199
1200         for (i = 0; i < nb_pkts; i++)
1201                 rx_pkts[i] = stage[i];
1202
1203         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1204         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1205
1206         return nb_pkts;
1207 }
1208
1209 static inline int
1210 iavf_rx_alloc_bufs(struct iavf_rx_queue *rxq)
1211 {
1212         volatile union iavf_rx_desc *rxdp;
1213         struct rte_mbuf **rxep;
1214         struct rte_mbuf *mb;
1215         uint16_t alloc_idx, i;
1216         uint64_t dma_addr;
1217         int diag;
1218
1219         /* Allocate buffers in bulk */
1220         alloc_idx = (uint16_t)(rxq->rx_free_trigger -
1221                                 (rxq->rx_free_thresh - 1));
1222         rxep = &rxq->sw_ring[alloc_idx];
1223         diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep,
1224                                     rxq->rx_free_thresh);
1225         if (unlikely(diag != 0)) {
1226                 PMD_RX_LOG(ERR, "Failed to get mbufs in bulk");
1227                 return -ENOMEM;
1228         }
1229
1230         rxdp = &rxq->rx_ring[alloc_idx];
1231         for (i = 0; i < rxq->rx_free_thresh; i++) {
1232                 if (likely(i < (rxq->rx_free_thresh - 1)))
1233                         /* Prefetch next mbuf */
1234                         rte_prefetch0(rxep[i + 1]);
1235
1236                 mb = rxep[i];
1237                 rte_mbuf_refcnt_set(mb, 1);
1238                 mb->next = NULL;
1239                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1240                 mb->nb_segs = 1;
1241                 mb->port = rxq->port_id;
1242                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1243                 rxdp[i].read.hdr_addr = 0;
1244                 rxdp[i].read.pkt_addr = dma_addr;
1245         }
1246
1247         /* Update rx tail register */
1248         rte_wmb();
1249         IAVF_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rxq->rx_free_trigger);
1250
1251         rxq->rx_free_trigger =
1252                 (uint16_t)(rxq->rx_free_trigger + rxq->rx_free_thresh);
1253         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1254                 rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
1255
1256         return 0;
1257 }
1258
1259 static inline uint16_t
1260 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
1261 {
1262         struct iavf_rx_queue *rxq = (struct iavf_rx_queue *)rx_queue;
1263         uint16_t nb_rx = 0;
1264
1265         if (!nb_pkts)
1266                 return 0;
1267
1268         if (rxq->rx_nb_avail)
1269                 return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1270
1271         nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq);
1272         rxq->rx_next_avail = 0;
1273         rxq->rx_nb_avail = nb_rx;
1274         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1275
1276         if (rxq->rx_tail > rxq->rx_free_trigger) {
1277                 if (iavf_rx_alloc_bufs(rxq) != 0) {
1278                         uint16_t i, j;
1279
1280                         /* TODO: count rx_mbuf_alloc_failed here */
1281
1282                         rxq->rx_nb_avail = 0;
1283                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1284                         for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++)
1285                                 rxq->sw_ring[j] = rxq->rx_stage[i];
1286
1287                         return 0;
1288                 }
1289         }
1290
1291         if (rxq->rx_tail >= rxq->nb_rx_desc)
1292                 rxq->rx_tail = 0;
1293
1294         PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u, nb_rx=%u",
1295                    rxq->port_id, rxq->queue_id,
1296                    rxq->rx_tail, nb_rx);
1297
1298         if (rxq->rx_nb_avail)
1299                 return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1300
1301         return 0;
1302 }
1303
1304 static uint16_t
1305 iavf_recv_pkts_bulk_alloc(void *rx_queue,
1306                          struct rte_mbuf **rx_pkts,
1307                          uint16_t nb_pkts)
1308 {
1309         uint16_t nb_rx = 0, n, count;
1310
1311         if (unlikely(nb_pkts == 0))
1312                 return 0;
1313
1314         if (likely(nb_pkts <= IAVF_RX_MAX_BURST))
1315                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1316
1317         while (nb_pkts) {
1318                 n = RTE_MIN(nb_pkts, IAVF_RX_MAX_BURST);
1319                 count = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1320                 nb_rx = (uint16_t)(nb_rx + count);
1321                 nb_pkts = (uint16_t)(nb_pkts - count);
1322                 if (count < n)
1323                         break;
1324         }
1325
1326         return nb_rx;
1327 }
1328
1329 static inline int
1330 iavf_xmit_cleanup(struct iavf_tx_queue *txq)
1331 {
1332         struct iavf_tx_entry *sw_ring = txq->sw_ring;
1333         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
1334         uint16_t nb_tx_desc = txq->nb_tx_desc;
1335         uint16_t desc_to_clean_to;
1336         uint16_t nb_tx_to_clean;
1337
1338         volatile struct iavf_tx_desc *txd = txq->tx_ring;
1339
1340         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->rs_thresh);
1341         if (desc_to_clean_to >= nb_tx_desc)
1342                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
1343
1344         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
1345         if ((txd[desc_to_clean_to].cmd_type_offset_bsz &
1346                         rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)) !=
1347                         rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE)) {
1348                 PMD_TX_FREE_LOG(DEBUG, "TX descriptor %4u is not done "
1349                                 "(port=%d queue=%d)", desc_to_clean_to,
1350                                 txq->port_id, txq->queue_id);
1351                 return -1;
1352         }
1353
1354         if (last_desc_cleaned > desc_to_clean_to)
1355                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
1356                                                         desc_to_clean_to);
1357         else
1358                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
1359                                         last_desc_cleaned);
1360
1361         txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
1362
1363         txq->last_desc_cleaned = desc_to_clean_to;
1364         txq->nb_free = (uint16_t)(txq->nb_free + nb_tx_to_clean);
1365
1366         return 0;
1367 }
1368
1369 /* Check if the context descriptor is needed for TX offloading */
1370 static inline uint16_t
1371 iavf_calc_context_desc(uint64_t flags)
1372 {
1373         static uint64_t mask = PKT_TX_TCP_SEG;
1374
1375         return (flags & mask) ? 1 : 0;
1376 }
1377
1378 static inline void
1379 iavf_txd_enable_checksum(uint64_t ol_flags,
1380                         uint32_t *td_cmd,
1381                         uint32_t *td_offset,
1382                         union iavf_tx_offload tx_offload)
1383 {
1384         /* Set MACLEN */
1385         *td_offset |= (tx_offload.l2_len >> 1) <<
1386                       IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
1387
1388         /* Enable L3 checksum offloads */
1389         if (ol_flags & PKT_TX_IP_CKSUM) {
1390                 *td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
1391                 *td_offset |= (tx_offload.l3_len >> 2) <<
1392                               IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
1393         } else if (ol_flags & PKT_TX_IPV4) {
1394                 *td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
1395                 *td_offset |= (tx_offload.l3_len >> 2) <<
1396                               IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
1397         } else if (ol_flags & PKT_TX_IPV6) {
1398                 *td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6;
1399                 *td_offset |= (tx_offload.l3_len >> 2) <<
1400                               IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
1401         }
1402
1403         if (ol_flags & PKT_TX_TCP_SEG) {
1404                 *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
1405                 *td_offset |= (tx_offload.l4_len >> 2) <<
1406                               IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1407                 return;
1408         }
1409
1410         /* Enable L4 checksum offloads */
1411         switch (ol_flags & PKT_TX_L4_MASK) {
1412         case PKT_TX_TCP_CKSUM:
1413                 *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
1414                 *td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
1415                               IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1416                 break;
1417         case PKT_TX_SCTP_CKSUM:
1418                 *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP;
1419                 *td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
1420                               IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1421                 break;
1422         case PKT_TX_UDP_CKSUM:
1423                 *td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
1424                 *td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
1425                               IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1426                 break;
1427         default:
1428                 break;
1429         }
1430 }
1431
1432 /* set TSO context descriptor
1433  * support IP -> L4 and IP -> IP -> L4
1434  */
1435 static inline uint64_t
1436 iavf_set_tso_ctx(struct rte_mbuf *mbuf, union iavf_tx_offload tx_offload)
1437 {
1438         uint64_t ctx_desc = 0;
1439         uint32_t cd_cmd, hdr_len, cd_tso_len;
1440
1441         if (!tx_offload.l4_len) {
1442                 PMD_TX_LOG(DEBUG, "L4 length set to 0");
1443                 return ctx_desc;
1444         }
1445
1446         hdr_len = tx_offload.l2_len +
1447                   tx_offload.l3_len +
1448                   tx_offload.l4_len;
1449
1450         cd_cmd = IAVF_TX_CTX_DESC_TSO;
1451         cd_tso_len = mbuf->pkt_len - hdr_len;
1452         ctx_desc |= ((uint64_t)cd_cmd << IAVF_TXD_CTX_QW1_CMD_SHIFT) |
1453                      ((uint64_t)cd_tso_len << IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) |
1454                      ((uint64_t)mbuf->tso_segsz << IAVF_TXD_CTX_QW1_MSS_SHIFT);
1455
1456         return ctx_desc;
1457 }
1458
1459 /* Construct the tx flags */
1460 static inline uint64_t
1461 iavf_build_ctob(uint32_t td_cmd, uint32_t td_offset, unsigned int size,
1462                uint32_t td_tag)
1463 {
1464         return rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DATA |
1465                                 ((uint64_t)td_cmd  << IAVF_TXD_QW1_CMD_SHIFT) |
1466                                 ((uint64_t)td_offset <<
1467                                  IAVF_TXD_QW1_OFFSET_SHIFT) |
1468                                 ((uint64_t)size  <<
1469                                  IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) |
1470                                 ((uint64_t)td_tag  <<
1471                                  IAVF_TXD_QW1_L2TAG1_SHIFT));
1472 }
1473
1474 /* TX function */
1475 uint16_t
1476 iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1477 {
1478         volatile struct iavf_tx_desc *txd;
1479         volatile struct iavf_tx_desc *txr;
1480         struct iavf_tx_queue *txq;
1481         struct iavf_tx_entry *sw_ring;
1482         struct iavf_tx_entry *txe, *txn;
1483         struct rte_mbuf *tx_pkt;
1484         struct rte_mbuf *m_seg;
1485         uint16_t tx_id;
1486         uint16_t nb_tx;
1487         uint32_t td_cmd;
1488         uint32_t td_offset;
1489         uint32_t td_tag;
1490         uint64_t ol_flags;
1491         uint16_t nb_used;
1492         uint16_t nb_ctx;
1493         uint16_t tx_last;
1494         uint16_t slen;
1495         uint64_t buf_dma_addr;
1496         union iavf_tx_offload tx_offload = {0};
1497
1498         txq = tx_queue;
1499         sw_ring = txq->sw_ring;
1500         txr = txq->tx_ring;
1501         tx_id = txq->tx_tail;
1502         txe = &sw_ring[tx_id];
1503
1504         /* Check if the descriptor ring needs to be cleaned. */
1505         if (txq->nb_free < txq->free_thresh)
1506                 iavf_xmit_cleanup(txq);
1507
1508         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1509                 td_cmd = 0;
1510                 td_tag = 0;
1511                 td_offset = 0;
1512
1513                 tx_pkt = *tx_pkts++;
1514                 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
1515
1516                 ol_flags = tx_pkt->ol_flags;
1517                 tx_offload.l2_len = tx_pkt->l2_len;
1518                 tx_offload.l3_len = tx_pkt->l3_len;
1519                 tx_offload.l4_len = tx_pkt->l4_len;
1520                 tx_offload.tso_segsz = tx_pkt->tso_segsz;
1521
1522                 /* Calculate the number of context descriptors needed. */
1523                 nb_ctx = iavf_calc_context_desc(ol_flags);
1524
1525                 /* The number of descriptors that must be allocated for
1526                  * a packet equals to the number of the segments of that
1527                  * packet plus 1 context descriptor if needed.
1528                  */
1529                 nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
1530                 tx_last = (uint16_t)(tx_id + nb_used - 1);
1531
1532                 /* Circular ring */
1533                 if (tx_last >= txq->nb_tx_desc)
1534                         tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
1535
1536                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u"
1537                            " tx_first=%u tx_last=%u",
1538                            txq->port_id, txq->queue_id, tx_id, tx_last);
1539
1540                 if (nb_used > txq->nb_free) {
1541                         if (iavf_xmit_cleanup(txq)) {
1542                                 if (nb_tx == 0)
1543                                         return 0;
1544                                 goto end_of_tx;
1545                         }
1546                         if (unlikely(nb_used > txq->rs_thresh)) {
1547                                 while (nb_used > txq->nb_free) {
1548                                         if (iavf_xmit_cleanup(txq)) {
1549                                                 if (nb_tx == 0)
1550                                                         return 0;
1551                                                 goto end_of_tx;
1552                                         }
1553                                 }
1554                         }
1555                 }
1556
1557                 /* Descriptor based VLAN insertion */
1558                 if (ol_flags & PKT_TX_VLAN_PKT) {
1559                         td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
1560                         td_tag = tx_pkt->vlan_tci;
1561                 }
1562
1563                 /* According to datasheet, the bit2 is reserved and must be
1564                  * set to 1.
1565                  */
1566                 td_cmd |= 0x04;
1567
1568                 /* Enable checksum offloading */
1569                 if (ol_flags & IAVF_TX_CKSUM_OFFLOAD_MASK)
1570                         iavf_txd_enable_checksum(ol_flags, &td_cmd,
1571                                                 &td_offset, tx_offload);
1572
1573                 if (nb_ctx) {
1574                         /* Setup TX context descriptor if required */
1575                         uint64_t cd_type_cmd_tso_mss =
1576                                 IAVF_TX_DESC_DTYPE_CONTEXT;
1577                         volatile struct iavf_tx_context_desc *ctx_txd =
1578                                 (volatile struct iavf_tx_context_desc *)
1579                                                         &txr[tx_id];
1580
1581                         txn = &sw_ring[txe->next_id];
1582                         RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1583                         if (txe->mbuf) {
1584                                 rte_pktmbuf_free_seg(txe->mbuf);
1585                                 txe->mbuf = NULL;
1586                         }
1587
1588                         /* TSO enabled */
1589                         if (ol_flags & PKT_TX_TCP_SEG)
1590                                 cd_type_cmd_tso_mss |=
1591                                         iavf_set_tso_ctx(tx_pkt, tx_offload);
1592
1593                         ctx_txd->type_cmd_tso_mss =
1594                                 rte_cpu_to_le_64(cd_type_cmd_tso_mss);
1595
1596                         IAVF_DUMP_TX_DESC(txq, &txr[tx_id], tx_id);
1597                         txe->last_id = tx_last;
1598                         tx_id = txe->next_id;
1599                         txe = txn;
1600                 }
1601
1602                 m_seg = tx_pkt;
1603                 do {
1604                         txd = &txr[tx_id];
1605                         txn = &sw_ring[txe->next_id];
1606
1607                         if (txe->mbuf)
1608                                 rte_pktmbuf_free_seg(txe->mbuf);
1609                         txe->mbuf = m_seg;
1610
1611                         /* Setup TX Descriptor */
1612                         slen = m_seg->data_len;
1613                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
1614                         txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
1615                         txd->cmd_type_offset_bsz = iavf_build_ctob(td_cmd,
1616                                                                   td_offset,
1617                                                                   slen,
1618                                                                   td_tag);
1619
1620                         IAVF_DUMP_TX_DESC(txq, txd, tx_id);
1621                         txe->last_id = tx_last;
1622                         tx_id = txe->next_id;
1623                         txe = txn;
1624                         m_seg = m_seg->next;
1625                 } while (m_seg);
1626
1627                 /* The last packet data descriptor needs End Of Packet (EOP) */
1628                 td_cmd |= IAVF_TX_DESC_CMD_EOP;
1629                 txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
1630                 txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
1631
1632                 if (txq->nb_used >= txq->rs_thresh) {
1633                         PMD_TX_LOG(DEBUG, "Setting RS bit on TXD id="
1634                                    "%4u (port=%d queue=%d)",
1635                                    tx_last, txq->port_id, txq->queue_id);
1636
1637                         td_cmd |= IAVF_TX_DESC_CMD_RS;
1638
1639                         /* Update txq RS bit counters */
1640                         txq->nb_used = 0;
1641                 }
1642
1643                 txd->cmd_type_offset_bsz |=
1644                         rte_cpu_to_le_64(((uint64_t)td_cmd) <<
1645                                          IAVF_TXD_QW1_CMD_SHIFT);
1646                 IAVF_DUMP_TX_DESC(txq, txd, tx_id);
1647         }
1648
1649 end_of_tx:
1650         rte_wmb();
1651
1652         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
1653                    txq->port_id, txq->queue_id, tx_id, nb_tx);
1654
1655         IAVF_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
1656         txq->tx_tail = tx_id;
1657
1658         return nb_tx;
1659 }
1660
1661 /* TX prep functions */
1662 uint16_t
1663 iavf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
1664               uint16_t nb_pkts)
1665 {
1666         int i, ret;
1667         uint64_t ol_flags;
1668         struct rte_mbuf *m;
1669
1670         for (i = 0; i < nb_pkts; i++) {
1671                 m = tx_pkts[i];
1672                 ol_flags = m->ol_flags;
1673
1674                 /* Check condition for nb_segs > IAVF_TX_MAX_MTU_SEG. */
1675                 if (!(ol_flags & PKT_TX_TCP_SEG)) {
1676                         if (m->nb_segs > IAVF_TX_MAX_MTU_SEG) {
1677                                 rte_errno = EINVAL;
1678                                 return i;
1679                         }
1680                 } else if ((m->tso_segsz < IAVF_MIN_TSO_MSS) ||
1681                            (m->tso_segsz > IAVF_MAX_TSO_MSS)) {
1682                         /* MSS outside the range are considered malicious */
1683                         rte_errno = EINVAL;
1684                         return i;
1685                 }
1686
1687                 if (ol_flags & IAVF_TX_OFFLOAD_NOTSUP_MASK) {
1688                         rte_errno = ENOTSUP;
1689                         return i;
1690                 }
1691
1692 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1693                 ret = rte_validate_tx_offload(m);
1694                 if (ret != 0) {
1695                         rte_errno = -ret;
1696                         return i;
1697                 }
1698 #endif
1699                 ret = rte_net_intel_cksum_prepare(m);
1700                 if (ret != 0) {
1701                         rte_errno = -ret;
1702                         return i;
1703                 }
1704         }
1705
1706         return i;
1707 }
1708
1709 /* choose rx function*/
1710 void
1711 iavf_set_rx_function(struct rte_eth_dev *dev)
1712 {
1713         struct iavf_adapter *adapter =
1714                 IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
1715 #ifdef RTE_ARCH_X86
1716         struct iavf_rx_queue *rxq;
1717         int i;
1718         bool use_avx2 = false;
1719
1720         if (!iavf_rx_vec_dev_check(dev)) {
1721                 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1722                         rxq = dev->data->rx_queues[i];
1723                         (void)iavf_rxq_vec_setup(rxq);
1724                 }
1725
1726                 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
1727                     rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
1728                         use_avx2 = true;
1729
1730                 if (dev->data->scattered_rx) {
1731                         PMD_DRV_LOG(DEBUG,
1732                                     "Using %sVector Scattered Rx (port %d).",
1733                                     use_avx2 ? "avx2 " : "",
1734                                     dev->data->port_id);
1735                         dev->rx_pkt_burst = use_avx2 ?
1736                                             iavf_recv_scattered_pkts_vec_avx2 :
1737                                             iavf_recv_scattered_pkts_vec;
1738                 } else {
1739                         PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
1740                                     use_avx2 ? "avx2 " : "",
1741                                     dev->data->port_id);
1742                         dev->rx_pkt_burst = use_avx2 ?
1743                                             iavf_recv_pkts_vec_avx2 :
1744                                             iavf_recv_pkts_vec;
1745                 }
1746
1747                 return;
1748         }
1749 #endif
1750
1751         if (dev->data->scattered_rx) {
1752                 PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).",
1753                             dev->data->port_id);
1754                 dev->rx_pkt_burst = iavf_recv_scattered_pkts;
1755         } else if (adapter->rx_bulk_alloc_allowed) {
1756                 PMD_DRV_LOG(DEBUG, "Using bulk Rx callback (port=%d).",
1757                             dev->data->port_id);
1758                 dev->rx_pkt_burst = iavf_recv_pkts_bulk_alloc;
1759         } else {
1760                 PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).",
1761                             dev->data->port_id);
1762                 dev->rx_pkt_burst = iavf_recv_pkts;
1763         }
1764 }
1765
1766 /* choose tx function*/
1767 void
1768 iavf_set_tx_function(struct rte_eth_dev *dev)
1769 {
1770 #ifdef RTE_ARCH_X86
1771         struct iavf_tx_queue *txq;
1772         int i;
1773         bool use_avx2 = false;
1774
1775         if (!iavf_tx_vec_dev_check(dev)) {
1776                 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1777                         txq = dev->data->tx_queues[i];
1778                         if (!txq)
1779                                 continue;
1780                         iavf_txq_vec_setup(txq);
1781                 }
1782
1783                 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
1784                     rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
1785                         use_avx2 = true;
1786
1787                 PMD_DRV_LOG(DEBUG, "Using %sVector Tx (port %d).",
1788                             use_avx2 ? "avx2 " : "",
1789                             dev->data->port_id);
1790                 dev->tx_pkt_burst = use_avx2 ?
1791                                     iavf_xmit_pkts_vec_avx2 :
1792                                     iavf_xmit_pkts_vec;
1793                 dev->tx_pkt_prepare = NULL;
1794
1795                 return;
1796         }
1797 #endif
1798
1799         PMD_DRV_LOG(DEBUG, "Using Basic Tx callback (port=%d).",
1800                     dev->data->port_id);
1801         dev->tx_pkt_burst = iavf_xmit_pkts;
1802         dev->tx_pkt_prepare = iavf_prep_pkts;
1803 }
1804
1805 void
1806 iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
1807                      struct rte_eth_rxq_info *qinfo)
1808 {
1809         struct iavf_rx_queue *rxq;
1810
1811         rxq = dev->data->rx_queues[queue_id];
1812
1813         qinfo->mp = rxq->mp;
1814         qinfo->scattered_rx = dev->data->scattered_rx;
1815         qinfo->nb_desc = rxq->nb_rx_desc;
1816
1817         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
1818         qinfo->conf.rx_drop_en = TRUE;
1819         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
1820 }
1821
1822 void
1823 iavf_dev_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
1824                      struct rte_eth_txq_info *qinfo)
1825 {
1826         struct iavf_tx_queue *txq;
1827
1828         txq = dev->data->tx_queues[queue_id];
1829
1830         qinfo->nb_desc = txq->nb_tx_desc;
1831
1832         qinfo->conf.tx_free_thresh = txq->free_thresh;
1833         qinfo->conf.tx_rs_thresh = txq->rs_thresh;
1834         qinfo->conf.offloads = txq->offloads;
1835         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
1836 }
1837
1838 /* Get the number of used descriptors of a rx queue */
1839 uint32_t
1840 iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id)
1841 {
1842 #define IAVF_RXQ_SCAN_INTERVAL 4
1843         volatile union iavf_rx_desc *rxdp;
1844         struct iavf_rx_queue *rxq;
1845         uint16_t desc = 0;
1846
1847         rxq = dev->data->rx_queues[queue_id];
1848         rxdp = &rxq->rx_ring[rxq->rx_tail];
1849         while ((desc < rxq->nb_rx_desc) &&
1850                ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
1851                  IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) &
1852                (1 << IAVF_RX_DESC_STATUS_DD_SHIFT)) {
1853                 /* Check the DD bit of a rx descriptor of each 4 in a group,
1854                  * to avoid checking too frequently and downgrading performance
1855                  * too much.
1856                  */
1857                 desc += IAVF_RXQ_SCAN_INTERVAL;
1858                 rxdp += IAVF_RXQ_SCAN_INTERVAL;
1859                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1860                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
1861                                         desc - rxq->nb_rx_desc]);
1862         }
1863
1864         return desc;
1865 }
1866
1867 int
1868 iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset)
1869 {
1870         struct iavf_rx_queue *rxq = rx_queue;
1871         volatile uint64_t *status;
1872         uint64_t mask;
1873         uint32_t desc;
1874
1875         if (unlikely(offset >= rxq->nb_rx_desc))
1876                 return -EINVAL;
1877
1878         if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1879                 return RTE_ETH_RX_DESC_UNAVAIL;
1880
1881         desc = rxq->rx_tail + offset;
1882         if (desc >= rxq->nb_rx_desc)
1883                 desc -= rxq->nb_rx_desc;
1884
1885         status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
1886         mask = rte_le_to_cpu_64((1ULL << IAVF_RX_DESC_STATUS_DD_SHIFT)
1887                 << IAVF_RXD_QW1_STATUS_SHIFT);
1888         if (*status & mask)
1889                 return RTE_ETH_RX_DESC_DONE;
1890
1891         return RTE_ETH_RX_DESC_AVAIL;
1892 }
1893
1894 int
1895 iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset)
1896 {
1897         struct iavf_tx_queue *txq = tx_queue;
1898         volatile uint64_t *status;
1899         uint64_t mask, expect;
1900         uint32_t desc;
1901
1902         if (unlikely(offset >= txq->nb_tx_desc))
1903                 return -EINVAL;
1904
1905         desc = txq->tx_tail + offset;
1906         /* go to next desc that has the RS bit */
1907         desc = ((desc + txq->rs_thresh - 1) / txq->rs_thresh) *
1908                 txq->rs_thresh;
1909         if (desc >= txq->nb_tx_desc) {
1910                 desc -= txq->nb_tx_desc;
1911                 if (desc >= txq->nb_tx_desc)
1912                         desc -= txq->nb_tx_desc;
1913         }
1914
1915         status = &txq->tx_ring[desc].cmd_type_offset_bsz;
1916         mask = rte_le_to_cpu_64(IAVF_TXD_QW1_DTYPE_MASK);
1917         expect = rte_cpu_to_le_64(
1918                  IAVF_TX_DESC_DTYPE_DESC_DONE << IAVF_TXD_QW1_DTYPE_SHIFT);
1919         if ((*status & mask) == expect)
1920                 return RTE_ETH_TX_DESC_DONE;
1921
1922         return RTE_ETH_TX_DESC_FULL;
1923 }