net/qede: support scatter gather
[dpdk.git] / drivers / net / qede / qede_rxtx.c
1 /*
2  * Copyright (c) 2016 QLogic Corporation.
3  * All rights reserved.
4  * www.qlogic.com
5  *
6  * See LICENSE.qede_pmd for copyright and licensing details.
7  */
8
9 #include "qede_rxtx.h"
10
11 static bool gro_disable = 1;    /* mod_param */
12
13 #define QEDE_FASTPATH_TX        (1 << 0)
14 #define QEDE_FASTPATH_RX        (1 << 1)
15
16 static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
17 {
18         struct rte_mbuf *new_mb = NULL;
19         struct eth_rx_bd *rx_bd;
20         dma_addr_t mapping;
21         uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
22
23         new_mb = rte_mbuf_raw_alloc(rxq->mb_pool);
24         if (unlikely(!new_mb)) {
25                 PMD_RX_LOG(ERR, rxq,
26                            "Failed to allocate rx buffer "
27                            "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u",
28                            idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq),
29                            rte_mempool_avail_count(rxq->mb_pool),
30                            rte_mempool_in_use_count(rxq->mb_pool));
31                 return -ENOMEM;
32         }
33         rxq->sw_rx_ring[idx].mbuf = new_mb;
34         rxq->sw_rx_ring[idx].page_offset = 0;
35         mapping = rte_mbuf_data_dma_addr_default(new_mb);
36         /* Advance PROD and get BD pointer */
37         rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring);
38         rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
39         rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
40         rxq->sw_rx_prod++;
41         return 0;
42 }
43
44 static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq)
45 {
46         uint16_t i;
47
48         if (rxq->sw_rx_ring != NULL) {
49                 for (i = 0; i < rxq->nb_rx_desc; i++) {
50                         if (rxq->sw_rx_ring[i].mbuf != NULL) {
51                                 rte_pktmbuf_free(rxq->sw_rx_ring[i].mbuf);
52                                 rxq->sw_rx_ring[i].mbuf = NULL;
53                         }
54                 }
55         }
56 }
57
58 void qede_rx_queue_release(void *rx_queue)
59 {
60         struct qede_rx_queue *rxq = rx_queue;
61
62         if (rxq != NULL) {
63                 qede_rx_queue_release_mbufs(rxq);
64                 rte_free(rxq->sw_rx_ring);
65                 rxq->sw_rx_ring = NULL;
66                 rte_free(rxq);
67                 rxq = NULL;
68         }
69 }
70
71 static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq)
72 {
73         unsigned int i;
74
75         PMD_TX_LOG(DEBUG, txq, "releasing %u mbufs\n", txq->nb_tx_desc);
76
77         if (txq->sw_tx_ring) {
78                 for (i = 0; i < txq->nb_tx_desc; i++) {
79                         if (txq->sw_tx_ring[i].mbuf) {
80                                 rte_pktmbuf_free(txq->sw_tx_ring[i].mbuf);
81                                 txq->sw_tx_ring[i].mbuf = NULL;
82                         }
83                 }
84         }
85 }
86
87 int
88 qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
89                     uint16_t nb_desc, unsigned int socket_id,
90                     const struct rte_eth_rxconf *rx_conf,
91                     struct rte_mempool *mp)
92 {
93         struct qede_dev *qdev = dev->data->dev_private;
94         struct ecore_dev *edev = &qdev->edev;
95         struct rte_eth_dev_data *eth_data = dev->data;
96         struct qede_rx_queue *rxq;
97         uint16_t pkt_len = (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len;
98         size_t size;
99         uint16_t data_size;
100         int rc;
101         int i;
102
103         PMD_INIT_FUNC_TRACE(edev);
104
105         /* Note: Ring size/align is controlled by struct rte_eth_desc_lim */
106         if (!rte_is_power_of_2(nb_desc)) {
107                 DP_ERR(edev, "Ring size %u is not power of 2\n",
108                           nb_desc);
109                 return -EINVAL;
110         }
111
112         /* Free memory prior to re-allocation if needed... */
113         if (dev->data->rx_queues[queue_idx] != NULL) {
114                 qede_rx_queue_release(dev->data->rx_queues[queue_idx]);
115                 dev->data->rx_queues[queue_idx] = NULL;
116         }
117
118         /* First allocate the rx queue data structure */
119         rxq = rte_zmalloc_socket("qede_rx_queue", sizeof(struct qede_rx_queue),
120                                  RTE_CACHE_LINE_SIZE, socket_id);
121
122         if (!rxq) {
123                 DP_ERR(edev, "Unable to allocate memory for rxq on socket %u",
124                           socket_id);
125                 return -ENOMEM;
126         }
127
128         rxq->qdev = qdev;
129         rxq->mb_pool = mp;
130         rxq->nb_rx_desc = nb_desc;
131         rxq->queue_id = queue_idx;
132         rxq->port_id = dev->data->port_id;
133
134         /* Sanity check */
135         data_size = (uint16_t)rte_pktmbuf_data_room_size(mp) -
136                                 RTE_PKTMBUF_HEADROOM;
137
138         if (pkt_len > data_size && !dev->data->scattered_rx) {
139                 DP_ERR(edev, "MTU %u should not exceed dataroom %u\n",
140                        pkt_len, data_size);
141                 rte_free(rxq);
142                 return -EINVAL;
143         }
144
145         if (dev->data->scattered_rx)
146                 rxq->rx_buf_size = data_size;
147         else
148                 rxq->rx_buf_size = pkt_len + QEDE_ETH_OVERHEAD;
149
150         qdev->mtu = pkt_len;
151
152         DP_INFO(edev, "MTU = %u ; RX buffer = %u\n",
153                 qdev->mtu, rxq->rx_buf_size);
154
155         if (pkt_len > ETHER_MAX_LEN) {
156                 dev->data->dev_conf.rxmode.jumbo_frame = 1;
157                 DP_NOTICE(edev, false, "jumbo frame enabled\n");
158         } else {
159                 dev->data->dev_conf.rxmode.jumbo_frame = 0;
160         }
161
162         /* Allocate the parallel driver ring for Rx buffers */
163         size = sizeof(*rxq->sw_rx_ring) * rxq->nb_rx_desc;
164         rxq->sw_rx_ring = rte_zmalloc_socket("sw_rx_ring", size,
165                                              RTE_CACHE_LINE_SIZE, socket_id);
166         if (!rxq->sw_rx_ring) {
167                 DP_NOTICE(edev, false,
168                           "Unable to alloc memory for sw_rx_ring on socket %u\n",
169                           socket_id);
170                 rte_free(rxq);
171                 rxq = NULL;
172                 return -ENOMEM;
173         }
174
175         /* Allocate FW Rx ring  */
176         rc = qdev->ops->common->chain_alloc(edev,
177                                             ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
178                                             ECORE_CHAIN_MODE_NEXT_PTR,
179                                             ECORE_CHAIN_CNT_TYPE_U16,
180                                             rxq->nb_rx_desc,
181                                             sizeof(struct eth_rx_bd),
182                                             &rxq->rx_bd_ring);
183
184         if (rc != ECORE_SUCCESS) {
185                 DP_NOTICE(edev, false,
186                           "Unable to alloc memory for rxbd ring on socket %u\n",
187                           socket_id);
188                 rte_free(rxq->sw_rx_ring);
189                 rxq->sw_rx_ring = NULL;
190                 rte_free(rxq);
191                 rxq = NULL;
192                 return -ENOMEM;
193         }
194
195         /* Allocate FW completion ring */
196         rc = qdev->ops->common->chain_alloc(edev,
197                                             ECORE_CHAIN_USE_TO_CONSUME,
198                                             ECORE_CHAIN_MODE_PBL,
199                                             ECORE_CHAIN_CNT_TYPE_U16,
200                                             rxq->nb_rx_desc,
201                                             sizeof(union eth_rx_cqe),
202                                             &rxq->rx_comp_ring);
203
204         if (rc != ECORE_SUCCESS) {
205                 DP_NOTICE(edev, false,
206                           "Unable to alloc memory for cqe ring on socket %u\n",
207                           socket_id);
208                 /* TBD: Freeing RX BD ring */
209                 rte_free(rxq->sw_rx_ring);
210                 rxq->sw_rx_ring = NULL;
211                 rte_free(rxq);
212                 return -ENOMEM;
213         }
214
215         /* Allocate buffers for the Rx ring */
216         for (i = 0; i < rxq->nb_rx_desc; i++) {
217                 rc = qede_alloc_rx_buffer(rxq);
218                 if (rc) {
219                         DP_NOTICE(edev, false,
220                                   "RX buffer allocation failed at idx=%d\n", i);
221                         goto err4;
222                 }
223         }
224
225         dev->data->rx_queues[queue_idx] = rxq;
226
227         DP_INFO(edev, "rxq %d num_desc %u rx_buf_size=%u socket %u\n",
228                   queue_idx, nb_desc, qdev->mtu, socket_id);
229
230         return 0;
231 err4:
232         qede_rx_queue_release(rxq);
233         return -ENOMEM;
234 }
235
236 void qede_tx_queue_release(void *tx_queue)
237 {
238         struct qede_tx_queue *txq = tx_queue;
239
240         if (txq != NULL) {
241                 qede_tx_queue_release_mbufs(txq);
242                 if (txq->sw_tx_ring) {
243                         rte_free(txq->sw_tx_ring);
244                         txq->sw_tx_ring = NULL;
245                 }
246                 rte_free(txq);
247         }
248         txq = NULL;
249 }
250
251 int
252 qede_tx_queue_setup(struct rte_eth_dev *dev,
253                     uint16_t queue_idx,
254                     uint16_t nb_desc,
255                     unsigned int socket_id,
256                     const struct rte_eth_txconf *tx_conf)
257 {
258         struct qede_dev *qdev = dev->data->dev_private;
259         struct ecore_dev *edev = &qdev->edev;
260         struct qede_tx_queue *txq;
261         int rc;
262
263         PMD_INIT_FUNC_TRACE(edev);
264
265         if (!rte_is_power_of_2(nb_desc)) {
266                 DP_ERR(edev, "Ring size %u is not power of 2\n",
267                        nb_desc);
268                 return -EINVAL;
269         }
270
271         /* Free memory prior to re-allocation if needed... */
272         if (dev->data->tx_queues[queue_idx] != NULL) {
273                 qede_tx_queue_release(dev->data->tx_queues[queue_idx]);
274                 dev->data->tx_queues[queue_idx] = NULL;
275         }
276
277         txq = rte_zmalloc_socket("qede_tx_queue", sizeof(struct qede_tx_queue),
278                                  RTE_CACHE_LINE_SIZE, socket_id);
279
280         if (txq == NULL) {
281                 DP_ERR(edev,
282                        "Unable to allocate memory for txq on socket %u",
283                        socket_id);
284                 return -ENOMEM;
285         }
286
287         txq->nb_tx_desc = nb_desc;
288         txq->qdev = qdev;
289         txq->port_id = dev->data->port_id;
290
291         rc = qdev->ops->common->chain_alloc(edev,
292                                             ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
293                                             ECORE_CHAIN_MODE_PBL,
294                                             ECORE_CHAIN_CNT_TYPE_U16,
295                                             txq->nb_tx_desc,
296                                             sizeof(union eth_tx_bd_types),
297                                             &txq->tx_pbl);
298         if (rc != ECORE_SUCCESS) {
299                 DP_ERR(edev,
300                        "Unable to allocate memory for txbd ring on socket %u",
301                        socket_id);
302                 qede_tx_queue_release(txq);
303                 return -ENOMEM;
304         }
305
306         /* Allocate software ring */
307         txq->sw_tx_ring = rte_zmalloc_socket("txq->sw_tx_ring",
308                                              (sizeof(struct qede_tx_entry) *
309                                               txq->nb_tx_desc),
310                                              RTE_CACHE_LINE_SIZE, socket_id);
311
312         if (!txq->sw_tx_ring) {
313                 DP_ERR(edev,
314                        "Unable to allocate memory for txbd ring on socket %u",
315                        socket_id);
316                 qede_tx_queue_release(txq);
317                 return -ENOMEM;
318         }
319
320         txq->queue_id = queue_idx;
321
322         txq->nb_tx_avail = txq->nb_tx_desc;
323
324         txq->tx_free_thresh =
325             tx_conf->tx_free_thresh ? tx_conf->tx_free_thresh :
326             (txq->nb_tx_desc - QEDE_DEFAULT_TX_FREE_THRESH);
327
328         dev->data->tx_queues[queue_idx] = txq;
329
330         DP_INFO(edev,
331                   "txq %u num_desc %u tx_free_thresh %u socket %u\n",
332                   queue_idx, nb_desc, txq->tx_free_thresh, socket_id);
333
334         return 0;
335 }
336
337 /* This function inits fp content and resets the SB, RXQ and TXQ arrays */
338 static void qede_init_fp(struct qede_dev *qdev)
339 {
340         struct qede_fastpath *fp;
341         uint8_t i, rss_id, tc;
342         int fp_rx = qdev->fp_num_rx, rxq = 0, txq = 0;
343
344         memset((void *)qdev->fp_array, 0, (QEDE_QUEUE_CNT(qdev) *
345                                            sizeof(*qdev->fp_array)));
346         memset((void *)qdev->sb_array, 0, (QEDE_QUEUE_CNT(qdev) *
347                                            sizeof(*qdev->sb_array)));
348         for_each_queue(i) {
349                 fp = &qdev->fp_array[i];
350                 if (fp_rx) {
351                         fp->type = QEDE_FASTPATH_RX;
352                         fp_rx--;
353                 } else{
354                         fp->type = QEDE_FASTPATH_TX;
355                 }
356                 fp->qdev = qdev;
357                 fp->id = i;
358                 fp->sb_info = &qdev->sb_array[i];
359                 snprintf(fp->name, sizeof(fp->name), "%s-fp-%d", "qdev", i);
360         }
361
362         qdev->gro_disable = gro_disable;
363 }
364
365 void qede_free_fp_arrays(struct qede_dev *qdev)
366 {
367         /* It asseumes qede_free_mem_load() is called before */
368         if (qdev->fp_array != NULL) {
369                 rte_free(qdev->fp_array);
370                 qdev->fp_array = NULL;
371         }
372
373         if (qdev->sb_array != NULL) {
374                 rte_free(qdev->sb_array);
375                 qdev->sb_array = NULL;
376         }
377 }
378
379 int qede_alloc_fp_array(struct qede_dev *qdev)
380 {
381         struct qede_fastpath *fp;
382         struct ecore_dev *edev = &qdev->edev;
383         int i;
384
385         qdev->fp_array = rte_calloc("fp", QEDE_QUEUE_CNT(qdev),
386                                     sizeof(*qdev->fp_array),
387                                     RTE_CACHE_LINE_SIZE);
388
389         if (!qdev->fp_array) {
390                 DP_ERR(edev, "fp array allocation failed\n");
391                 return -ENOMEM;
392         }
393
394         qdev->sb_array = rte_calloc("sb", QEDE_QUEUE_CNT(qdev),
395                                     sizeof(*qdev->sb_array),
396                                     RTE_CACHE_LINE_SIZE);
397
398         if (!qdev->sb_array) {
399                 DP_ERR(edev, "sb array allocation failed\n");
400                 rte_free(qdev->fp_array);
401                 return -ENOMEM;
402         }
403
404         return 0;
405 }
406
407 /* This function allocates fast-path status block memory */
408 static int
409 qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info,
410                   uint16_t sb_id)
411 {
412         struct ecore_dev *edev = &qdev->edev;
413         struct status_block *sb_virt;
414         dma_addr_t sb_phys;
415         int rc;
416
417         sb_virt = OSAL_DMA_ALLOC_COHERENT(edev, &sb_phys, sizeof(*sb_virt));
418
419         if (!sb_virt) {
420                 DP_ERR(edev, "Status block allocation failed\n");
421                 return -ENOMEM;
422         }
423
424         rc = qdev->ops->common->sb_init(edev, sb_info,
425                                         sb_virt, sb_phys, sb_id,
426                                         QED_SB_TYPE_L2_QUEUE);
427         if (rc) {
428                 DP_ERR(edev, "Status block initialization failed\n");
429                 /* TBD: No dma_free_coherent possible */
430                 return rc;
431         }
432
433         return 0;
434 }
435
436 int qede_alloc_fp_resc(struct qede_dev *qdev)
437 {
438         struct qede_fastpath *fp;
439         int rc, i;
440
441         if (qdev->fp_array)
442                 qede_free_fp_arrays(qdev);
443
444         rc = qede_alloc_fp_array(qdev);
445         if (rc != 0)
446                 return rc;
447
448         qede_init_fp(qdev);
449
450         for (i = 0; i < QEDE_QUEUE_CNT(qdev); i++) {
451                 fp = &qdev->fp_array[i];
452                 if (qede_alloc_mem_sb(qdev, fp->sb_info, i)) {
453                         qede_free_fp_arrays(qdev);
454                         return -ENOMEM;
455                 }
456         }
457
458         return 0;
459 }
460
461 void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev)
462 {
463         struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
464
465         qede_free_mem_load(eth_dev);
466         qede_free_fp_arrays(qdev);
467 }
468
469 static inline void
470 qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq)
471 {
472         uint16_t bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring);
473         uint16_t cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring);
474         struct eth_rx_prod_data rx_prods = { 0 };
475
476         /* Update producers */
477         rx_prods.bd_prod = rte_cpu_to_le_16(bd_prod);
478         rx_prods.cqe_prod = rte_cpu_to_le_16(cqe_prod);
479
480         /* Make sure that the BD and SGE data is updated before updating the
481          * producers since FW might read the BD/SGE right after the producer
482          * is updated.
483          */
484         rte_wmb();
485
486         internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods),
487                         (uint32_t *)&rx_prods);
488
489         /* mmiowb is needed to synchronize doorbell writes from more than one
490          * processor. It guarantees that the write arrives to the device before
491          * the napi lock is released and another qede_poll is called (possibly
492          * on another CPU). Without this barrier, the next doorbell can bypass
493          * this doorbell. This is applicable to IA64/Altix systems.
494          */
495         rte_wmb();
496
497         PMD_RX_LOG(DEBUG, rxq, "bd_prod %u  cqe_prod %u\n", bd_prod, cqe_prod);
498 }
499
500 static inline uint32_t
501 qede_rxfh_indir_default(uint32_t index, uint32_t n_rx_rings)
502 {
503         return index % n_rx_rings;
504 }
505
506 static void qede_prandom_bytes(uint32_t *buff, size_t bytes)
507 {
508         unsigned int i;
509
510         srand((unsigned int)time(NULL));
511
512         for (i = 0; i < ECORE_RSS_KEY_SIZE; i++)
513                 buff[i] = rand();
514 }
515
516 static bool
517 qede_check_vport_rss_enable(struct rte_eth_dev *eth_dev,
518                             struct qed_update_vport_rss_params *rss_params)
519 {
520         struct rte_eth_rss_conf rss_conf;
521         enum rte_eth_rx_mq_mode mode = eth_dev->data->dev_conf.rxmode.mq_mode;
522         struct qede_dev *qdev = eth_dev->data->dev_private;
523         struct ecore_dev *edev = &qdev->edev;
524         uint8_t rss_caps;
525         unsigned int i;
526         uint64_t hf;
527         uint32_t *key;
528
529         PMD_INIT_FUNC_TRACE(edev);
530
531         rss_conf = eth_dev->data->dev_conf.rx_adv_conf.rss_conf;
532         key = (uint32_t *)rss_conf.rss_key;
533         hf = rss_conf.rss_hf;
534
535         /* Check if RSS conditions are met.
536          * Note: Even though its meaningless to enable RSS with one queue, it
537          * could be used to produce RSS Hash, so skipping that check.
538          */
539         if (!(mode & ETH_MQ_RX_RSS)) {
540                 DP_INFO(edev, "RSS flag is not set\n");
541                 return false;
542         }
543
544         if (hf == 0) {
545                 DP_INFO(edev, "Request to disable RSS\n");
546                 return false;
547         }
548
549         memset(rss_params, 0, sizeof(*rss_params));
550
551         for (i = 0; i < ECORE_RSS_IND_TABLE_SIZE; i++)
552                 rss_params->rss_ind_table[i] = qede_rxfh_indir_default(i,
553                                                         QEDE_RSS_COUNT(qdev));
554
555         if (!key)
556                 qede_prandom_bytes(rss_params->rss_key,
557                                    sizeof(rss_params->rss_key));
558         else
559                 memcpy(rss_params->rss_key, rss_conf.rss_key,
560                        rss_conf.rss_key_len);
561
562         qede_init_rss_caps(&rss_caps, hf);
563
564         rss_params->rss_caps = rss_caps;
565
566         DP_INFO(edev, "RSS conditions are met\n");
567
568         return true;
569 }
570
571 static int qede_start_queues(struct rte_eth_dev *eth_dev, bool clear_stats)
572 {
573         struct qede_dev *qdev = eth_dev->data->dev_private;
574         struct ecore_dev *edev = &qdev->edev;
575         struct qed_update_vport_rss_params *rss_params = &qdev->rss_params;
576         struct qed_dev_info *qed_info = &qdev->dev_info.common;
577         struct qed_update_vport_params vport_update_params;
578         struct qede_tx_queue *txq;
579         struct qede_fastpath *fp;
580         dma_addr_t p_phys_table;
581         int txq_index;
582         uint16_t page_cnt;
583         int vlan_removal_en = 1;
584         int rc, tc, i;
585
586         for_each_queue(i) {
587                 fp = &qdev->fp_array[i];
588                 if (fp->type & QEDE_FASTPATH_RX) {
589                         p_phys_table = ecore_chain_get_pbl_phys(&fp->rxq->
590                                                                 rx_comp_ring);
591                         page_cnt = ecore_chain_get_page_cnt(&fp->rxq->
592                                                                 rx_comp_ring);
593
594                         ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0);
595
596                         rc = qdev->ops->q_rx_start(edev, i, fp->rxq->queue_id,
597                                            0,
598                                            fp->sb_info->igu_sb_id,
599                                            RX_PI,
600                                            fp->rxq->rx_buf_size,
601                                            fp->rxq->rx_bd_ring.p_phys_addr,
602                                            p_phys_table,
603                                            page_cnt,
604                                            &fp->rxq->hw_rxq_prod_addr);
605                         if (rc) {
606                                 DP_ERR(edev, "Start rxq #%d failed %d\n",
607                                        fp->rxq->queue_id, rc);
608                                 return rc;
609                         }
610
611                         fp->rxq->hw_cons_ptr =
612                                         &fp->sb_info->sb_virt->pi_array[RX_PI];
613
614                         qede_update_rx_prod(qdev, fp->rxq);
615                 }
616
617                 if (!(fp->type & QEDE_FASTPATH_TX))
618                         continue;
619                 for (tc = 0; tc < qdev->num_tc; tc++) {
620                         txq = fp->txqs[tc];
621                         txq_index = tc * QEDE_RSS_COUNT(qdev) + i;
622
623                         p_phys_table = ecore_chain_get_pbl_phys(&txq->tx_pbl);
624                         page_cnt = ecore_chain_get_page_cnt(&txq->tx_pbl);
625                         rc = qdev->ops->q_tx_start(edev, i, txq->queue_id,
626                                                    0,
627                                                    fp->sb_info->igu_sb_id,
628                                                    TX_PI(tc),
629                                                    p_phys_table, page_cnt,
630                                                    &txq->doorbell_addr);
631                         if (rc) {
632                                 DP_ERR(edev, "Start txq %u failed %d\n",
633                                        txq_index, rc);
634                                 return rc;
635                         }
636
637                         txq->hw_cons_ptr =
638                             &fp->sb_info->sb_virt->pi_array[TX_PI(tc)];
639                         SET_FIELD(txq->tx_db.data.params,
640                                   ETH_DB_DATA_DEST, DB_DEST_XCM);
641                         SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD,
642                                   DB_AGG_CMD_SET);
643                         SET_FIELD(txq->tx_db.data.params,
644                                   ETH_DB_DATA_AGG_VAL_SEL,
645                                   DQ_XCM_ETH_TX_BD_PROD_CMD);
646
647                         txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
648                 }
649         }
650
651         /* Prepare and send the vport enable */
652         memset(&vport_update_params, 0, sizeof(vport_update_params));
653         /* Update MTU via vport update */
654         vport_update_params.mtu = qdev->mtu;
655         vport_update_params.vport_id = 0;
656         vport_update_params.update_vport_active_flg = 1;
657         vport_update_params.vport_active_flg = 1;
658
659         /* @DPDK */
660         if (qed_info->mf_mode == MF_NPAR && qed_info->tx_switching) {
661                 /* TBD: Check SRIOV enabled for VF */
662                 vport_update_params.update_tx_switching_flg = 1;
663                 vport_update_params.tx_switching_flg = 1;
664         }
665
666         if (qede_check_vport_rss_enable(eth_dev, rss_params)) {
667                 vport_update_params.update_rss_flg = 1;
668                 qdev->rss_enabled = 1;
669         } else {
670                 qdev->rss_enabled = 0;
671         }
672
673         rte_memcpy(&vport_update_params.rss_params, rss_params,
674                sizeof(*rss_params));
675
676         rc = qdev->ops->vport_update(edev, &vport_update_params);
677         if (rc) {
678                 DP_ERR(edev, "Update V-PORT failed %d\n", rc);
679                 return rc;
680         }
681
682         return 0;
683 }
684
685 #ifdef ENC_SUPPORTED
686 static bool qede_tunn_exist(uint16_t flag)
687 {
688         return !!((PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK <<
689                     PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT) & flag);
690 }
691
692 static inline uint8_t qede_check_tunn_csum(uint16_t flag)
693 {
694         uint8_t tcsum = 0;
695         uint16_t csum_flag = 0;
696
697         if ((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK <<
698              PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT) & flag)
699                 csum_flag |= PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK <<
700                     PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT;
701
702         if ((PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK <<
703              PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT) & flag) {
704                 csum_flag |= PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK <<
705                     PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT;
706                 tcsum = QEDE_TUNN_CSUM_UNNECESSARY;
707         }
708
709         csum_flag |= PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK <<
710             PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT |
711             PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK <<
712             PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT;
713
714         if (csum_flag & flag)
715                 return QEDE_CSUM_ERROR;
716
717         return QEDE_CSUM_UNNECESSARY | tcsum;
718 }
719 #else
720 static inline uint8_t qede_tunn_exist(uint16_t flag)
721 {
722         return 0;
723 }
724
725 static inline uint8_t qede_check_tunn_csum(uint16_t flag)
726 {
727         return 0;
728 }
729 #endif
730
731 static inline uint8_t qede_check_notunn_csum(uint16_t flag)
732 {
733         uint8_t csum = 0;
734         uint16_t csum_flag = 0;
735
736         if ((PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK <<
737              PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT) & flag) {
738                 csum_flag |= PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK <<
739                     PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT;
740                 csum = QEDE_CSUM_UNNECESSARY;
741         }
742
743         csum_flag |= PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK <<
744             PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT;
745
746         if (csum_flag & flag)
747                 return QEDE_CSUM_ERROR;
748
749         return csum;
750 }
751
752 static inline uint8_t qede_check_csum(uint16_t flag)
753 {
754         if (likely(!qede_tunn_exist(flag)))
755                 return qede_check_notunn_csum(flag);
756         else
757                 return qede_check_tunn_csum(flag);
758 }
759
760 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq)
761 {
762         ecore_chain_consume(&rxq->rx_bd_ring);
763         rxq->sw_rx_cons++;
764 }
765
766 static inline void
767 qede_reuse_page(struct qede_dev *qdev,
768                 struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons)
769 {
770         struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring);
771         uint16_t idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
772         struct qede_rx_entry *curr_prod;
773         dma_addr_t new_mapping;
774
775         curr_prod = &rxq->sw_rx_ring[idx];
776         *curr_prod = *curr_cons;
777
778         new_mapping = rte_mbuf_data_dma_addr_default(curr_prod->mbuf) +
779                       curr_prod->page_offset;
780
781         rx_bd_prod->addr.hi = rte_cpu_to_le_32(U64_HI(new_mapping));
782         rx_bd_prod->addr.lo = rte_cpu_to_le_32(U64_LO(new_mapping));
783
784         rxq->sw_rx_prod++;
785 }
786
787 static inline void
788 qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq,
789                         struct qede_dev *qdev, uint8_t count)
790 {
791         struct qede_rx_entry *curr_cons;
792
793         for (; count > 0; count--) {
794                 curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS(rxq)];
795                 qede_reuse_page(qdev, rxq, curr_cons);
796                 qede_rx_bd_ring_consume(rxq);
797         }
798 }
799
800 static inline uint32_t qede_rx_cqe_to_pkt_type(uint16_t flags)
801 {
802         uint32_t p_type;
803         /* TBD - L4 indications needed ? */
804         uint16_t protocol = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK <<
805                               PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) & flags);
806
807         /* protocol = 3 means LLC/SNAP over Ethernet */
808         if (unlikely(protocol == 0 || protocol == 3))
809                 p_type = RTE_PTYPE_UNKNOWN;
810         else if (protocol == 1)
811                 p_type = RTE_PTYPE_L3_IPV4;
812         else if (protocol == 2)
813                 p_type = RTE_PTYPE_L3_IPV6;
814
815         return RTE_PTYPE_L2_ETHER | p_type;
816 }
817
818 int qede_process_sg_pkts(void *p_rxq,  struct rte_mbuf *rx_mb,
819                          int num_frags, uint16_t pkt_len)
820 {
821         struct qede_rx_queue *rxq = p_rxq;
822         struct qede_dev *qdev = rxq->qdev;
823         struct ecore_dev *edev = &qdev->edev;
824         uint16_t sw_rx_index, cur_size;
825
826         register struct rte_mbuf *seg1 = NULL;
827         register struct rte_mbuf *seg2 = NULL;
828
829         seg1 = rx_mb;
830         while (num_frags) {
831                 cur_size = pkt_len > rxq->rx_buf_size ?
832                                 rxq->rx_buf_size : pkt_len;
833                 if (!cur_size) {
834                         PMD_RX_LOG(DEBUG, rxq,
835                                    "SG packet, len and num BD mismatch\n");
836                         qede_recycle_rx_bd_ring(rxq, qdev, num_frags);
837                         return -EINVAL;
838                 }
839
840                 if (qede_alloc_rx_buffer(rxq)) {
841                         uint8_t index;
842
843                         PMD_RX_LOG(DEBUG, rxq, "Buffer allocation failed\n");
844                         index = rxq->port_id;
845                         rte_eth_devices[index].data->rx_mbuf_alloc_failed++;
846                         rxq->rx_alloc_errors++;
847                         return -ENOMEM;
848                 }
849
850                 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
851                 seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf;
852                 qede_rx_bd_ring_consume(rxq);
853                 pkt_len -= cur_size;
854                 seg2->data_len = cur_size;
855                 seg1->next = seg2;
856                 seg1 = seg1->next;
857                 num_frags--;
858                 continue;
859         }
860         seg1 = NULL;
861
862         if (pkt_len)
863                 PMD_RX_LOG(DEBUG, rxq,
864                            "Mapped all BDs of jumbo, but still have %d bytes\n",
865                            pkt_len);
866
867         return ECORE_SUCCESS;
868 }
869
870 uint16_t
871 qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
872 {
873         struct qede_rx_queue *rxq = p_rxq;
874         struct qede_dev *qdev = rxq->qdev;
875         struct ecore_dev *edev = &qdev->edev;
876         struct qede_fastpath *fp = &qdev->fp_array[rxq->queue_id];
877         uint16_t hw_comp_cons, sw_comp_cons, sw_rx_index;
878         uint16_t rx_pkt = 0;
879         union eth_rx_cqe *cqe;
880         struct eth_fast_path_rx_reg_cqe *fp_cqe;
881         register struct rte_mbuf *rx_mb = NULL;
882         register struct rte_mbuf *seg1 = NULL;
883         enum eth_rx_cqe_type cqe_type;
884         uint16_t len, pad, preload_idx, pkt_len, parse_flag;
885         uint8_t csum_flag, num_frags;
886         enum rss_hash_type htype;
887         int ret;
888
889         hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
890         sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
891
892         rte_rmb();
893
894         if (hw_comp_cons == sw_comp_cons)
895                 return 0;
896
897         while (sw_comp_cons != hw_comp_cons) {
898                 /* Get the CQE from the completion ring */
899                 cqe =
900                     (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
901                 cqe_type = cqe->fast_path_regular.type;
902
903                 if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) {
904                         PMD_RX_LOG(DEBUG, rxq, "Got a slowath CQE\n");
905
906                         qdev->ops->eth_cqe_completion(edev, fp->id,
907                                 (struct eth_slow_path_rx_cqe *)cqe);
908                         goto next_cqe;
909                 }
910
911                 /* Get the data from the SW ring */
912                 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
913                 rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf;
914                 assert(rx_mb != NULL);
915
916                 /* non GRO */
917                 fp_cqe = &cqe->fast_path_regular;
918
919                 len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd);
920                 pad = fp_cqe->placement_offset;
921                 assert((len + pad) <= rx_mb->buf_len);
922
923                 PMD_RX_LOG(DEBUG, rxq,
924                            "CQE type = 0x%x, flags = 0x%x, vlan = 0x%x"
925                            " len = %u, parsing_flags = %d\n",
926                            cqe_type, fp_cqe->bitfields,
927                            rte_le_to_cpu_16(fp_cqe->vlan_tag),
928                            len, rte_le_to_cpu_16(fp_cqe->pars_flags.flags));
929
930                 /* If this is an error packet then drop it */
931                 parse_flag =
932                     rte_le_to_cpu_16(cqe->fast_path_regular.pars_flags.flags);
933                 csum_flag = qede_check_csum(parse_flag);
934                 if (unlikely(csum_flag == QEDE_CSUM_ERROR)) {
935                         PMD_RX_LOG(ERR, rxq,
936                                    "CQE in CONS = %u has error, flags = 0x%x "
937                                    "dropping incoming packet\n",
938                                    sw_comp_cons, parse_flag);
939                         rxq->rx_hw_errors++;
940                         qede_recycle_rx_bd_ring(rxq, qdev, fp_cqe->bd_num);
941                         goto next_cqe;
942                 }
943
944                 if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) {
945                         PMD_RX_LOG(ERR, rxq,
946                                    "New buffer allocation failed,"
947                                    "dropping incoming packet\n");
948                         qede_recycle_rx_bd_ring(rxq, qdev, fp_cqe->bd_num);
949                         rte_eth_devices[rxq->port_id].
950                             data->rx_mbuf_alloc_failed++;
951                         rxq->rx_alloc_errors++;
952                         break;
953                 }
954
955                 qede_rx_bd_ring_consume(rxq);
956
957                 if (fp_cqe->bd_num > 1) {
958                         pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
959                         num_frags = fp_cqe->bd_num - 1;
960
961                         pkt_len -= len;
962                         seg1 = rx_mb;
963                         ret = qede_process_sg_pkts(p_rxq, seg1, num_frags,
964                                                    pkt_len);
965                         if (ret != ECORE_SUCCESS) {
966                                 qede_recycle_rx_bd_ring(rxq, qdev,
967                                                         fp_cqe->bd_num);
968                                 goto next_cqe;
969                         }
970                 }
971
972                 /* Prefetch next mbuf while processing current one. */
973                 preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
974                 rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf);
975
976                 /* Update MBUF fields */
977                 rx_mb->ol_flags = 0;
978                 rx_mb->data_off = pad + RTE_PKTMBUF_HEADROOM;
979                 rx_mb->nb_segs = fp_cqe->bd_num;
980                 rx_mb->data_len = len;
981                 rx_mb->pkt_len = fp_cqe->pkt_len;
982                 rx_mb->port = rxq->port_id;
983                 rx_mb->packet_type = qede_rx_cqe_to_pkt_type(parse_flag);
984
985                 htype = (uint8_t)GET_FIELD(fp_cqe->bitfields,
986                                 ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE);
987                 if (qdev->rss_enabled && htype) {
988                         rx_mb->ol_flags |= PKT_RX_RSS_HASH;
989                         rx_mb->hash.rss = rte_le_to_cpu_32(fp_cqe->rss_hash);
990                         PMD_RX_LOG(DEBUG, rxq, "Hash result 0x%x\n",
991                                    rx_mb->hash.rss);
992                 }
993
994                 rte_prefetch1(rte_pktmbuf_mtod(rx_mb, void *));
995
996                 if (CQE_HAS_VLAN(parse_flag)) {
997                         rx_mb->vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag);
998                         rx_mb->ol_flags |= PKT_RX_VLAN_PKT;
999                 }
1000
1001                 if (CQE_HAS_OUTER_VLAN(parse_flag)) {
1002                         /* FW does not provide indication of Outer VLAN tag,
1003                          * which is always stripped, so vlan_tci_outer is set
1004                          * to 0. Here vlan_tag represents inner VLAN tag.
1005                          */
1006                         rx_mb->vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag);
1007                         rx_mb->ol_flags |= PKT_RX_QINQ_PKT;
1008                         rx_mb->vlan_tci_outer = 0;
1009                 }
1010
1011                 rx_pkts[rx_pkt] = rx_mb;
1012                 rx_pkt++;
1013 next_cqe:
1014                 ecore_chain_recycle_consumed(&rxq->rx_comp_ring);
1015                 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
1016                 if (rx_pkt == nb_pkts) {
1017                         PMD_RX_LOG(DEBUG, rxq,
1018                                    "Budget reached nb_pkts=%u received=%u\n",
1019                                    rx_pkt, nb_pkts);
1020                         break;
1021                 }
1022         }
1023
1024         qede_update_rx_prod(qdev, rxq);
1025
1026         PMD_RX_LOG(DEBUG, rxq, "rx_pkts=%u core=%d\n", rx_pkt, rte_lcore_id());
1027
1028         return rx_pkt;
1029 }
1030
1031 static inline int
1032 qede_free_tx_pkt(struct ecore_dev *edev, struct qede_tx_queue *txq)
1033 {
1034         uint16_t nb_segs, idx = TX_CONS(txq);
1035         struct eth_tx_bd *tx_data_bd;
1036         struct rte_mbuf *mbuf = txq->sw_tx_ring[idx].mbuf;
1037
1038         if (unlikely(!mbuf)) {
1039                 PMD_TX_LOG(ERR, txq, "null mbuf\n");
1040                 PMD_TX_LOG(ERR, txq,
1041                            "tx_desc %u tx_avail %u tx_cons %u tx_prod %u\n",
1042                            txq->nb_tx_desc, txq->nb_tx_avail, idx,
1043                            TX_PROD(txq));
1044                 return -1;
1045         }
1046
1047         nb_segs = mbuf->nb_segs;
1048         while (nb_segs) {
1049                 /* It's like consuming rxbuf in recv() */
1050                 ecore_chain_consume(&txq->tx_pbl);
1051                 txq->nb_tx_avail++;
1052                 nb_segs--;
1053         }
1054         rte_pktmbuf_free(mbuf);
1055         txq->sw_tx_ring[idx].mbuf = NULL;
1056
1057         return 0;
1058 }
1059
1060 static inline uint16_t
1061 qede_process_tx_compl(struct ecore_dev *edev, struct qede_tx_queue *txq)
1062 {
1063         uint16_t tx_compl = 0;
1064         uint16_t hw_bd_cons;
1065
1066         hw_bd_cons = rte_le_to_cpu_16(*txq->hw_cons_ptr);
1067         rte_compiler_barrier();
1068
1069         while (hw_bd_cons != ecore_chain_get_cons_idx(&txq->tx_pbl)) {
1070                 if (qede_free_tx_pkt(edev, txq)) {
1071                         PMD_TX_LOG(ERR, txq,
1072                                    "hw_bd_cons = %u, chain_cons = %u\n",
1073                                    hw_bd_cons,
1074                                    ecore_chain_get_cons_idx(&txq->tx_pbl));
1075                         break;
1076                 }
1077                 txq->sw_tx_cons++;      /* Making TXD available */
1078                 tx_compl++;
1079         }
1080
1081         PMD_TX_LOG(DEBUG, txq, "Tx compl %u sw_tx_cons %u avail %u\n",
1082                    tx_compl, txq->sw_tx_cons, txq->nb_tx_avail);
1083         return tx_compl;
1084 }
1085
1086 /* Populate scatter gather buffer descriptor fields */
1087 static inline uint16_t qede_encode_sg_bd(struct qede_tx_queue *p_txq,
1088                                          struct rte_mbuf *m_seg,
1089                                          uint16_t count,
1090                                          struct eth_tx_1st_bd *bd1)
1091 {
1092         struct qede_tx_queue *txq = p_txq;
1093         struct eth_tx_2nd_bd *bd2 = NULL;
1094         struct eth_tx_3rd_bd *bd3 = NULL;
1095         struct eth_tx_bd *tx_bd = NULL;
1096         uint16_t nb_segs = count;
1097         dma_addr_t mapping;
1098
1099         /* Check for scattered buffers */
1100         while (m_seg) {
1101                 if (nb_segs == 1) {
1102                         bd2 = (struct eth_tx_2nd_bd *)
1103                                 ecore_chain_produce(&txq->tx_pbl);
1104                         memset(bd2, 0, sizeof(*bd2));
1105                         mapping = rte_mbuf_data_dma_addr(m_seg);
1106                         bd2->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
1107                         bd2->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
1108                         bd2->nbytes = rte_cpu_to_le_16(m_seg->data_len);
1109                 } else if (nb_segs == 2) {
1110                         bd3 = (struct eth_tx_3rd_bd *)
1111                                 ecore_chain_produce(&txq->tx_pbl);
1112                         memset(bd3, 0, sizeof(*bd3));
1113                         mapping = rte_mbuf_data_dma_addr(m_seg);
1114                         bd3->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
1115                         bd3->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
1116                         bd3->nbytes = rte_cpu_to_le_16(m_seg->data_len);
1117                 } else {
1118                         tx_bd = (struct eth_tx_bd *)
1119                                 ecore_chain_produce(&txq->tx_pbl);
1120                         memset(tx_bd, 0, sizeof(*tx_bd));
1121                         mapping = rte_mbuf_data_dma_addr(m_seg);
1122                         tx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
1123                         tx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
1124                         tx_bd->nbytes = rte_cpu_to_le_16(m_seg->data_len);
1125                 }
1126                 nb_segs++;
1127                 bd1->data.nbds = nb_segs;
1128                 m_seg = m_seg->next;
1129         }
1130
1131         /* Return total scattered buffers */
1132         return nb_segs;
1133 }
1134
1135 uint16_t
1136 qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1137 {
1138         struct qede_tx_queue *txq = p_txq;
1139         struct qede_dev *qdev = txq->qdev;
1140         struct ecore_dev *edev = &qdev->edev;
1141         struct qede_fastpath *fp;
1142         struct eth_tx_1st_bd *bd1;
1143         struct rte_mbuf *m_seg = NULL;
1144         uint16_t nb_tx_pkts;
1145         uint16_t nb_pkt_sent = 0;
1146         uint16_t bd_prod;
1147         uint16_t idx;
1148         uint16_t tx_count;
1149         uint16_t nb_segs = 0;
1150
1151         fp = &qdev->fp_array[QEDE_RSS_COUNT(qdev) + txq->queue_id];
1152
1153         if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) {
1154                 PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u\n",
1155                            nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh);
1156                 (void)qede_process_tx_compl(edev, txq);
1157         }
1158
1159         nb_tx_pkts = RTE_MIN(nb_pkts, (txq->nb_tx_avail /
1160                         ETH_TX_MAX_BDS_PER_NON_LSO_PACKET));
1161         if (unlikely(nb_tx_pkts == 0)) {
1162                 PMD_TX_LOG(DEBUG, txq, "Out of BDs nb_pkts=%u avail=%u\n",
1163                            nb_pkts, txq->nb_tx_avail);
1164                 return 0;
1165         }
1166
1167         tx_count = nb_tx_pkts;
1168         while (nb_tx_pkts--) {
1169                 /* Fill the entry in the SW ring and the BDs in the FW ring */
1170                 idx = TX_PROD(txq);
1171                 struct rte_mbuf *mbuf = *tx_pkts++;
1172
1173                 txq->sw_tx_ring[idx].mbuf = mbuf;
1174                 bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);
1175                 /* Zero init struct fields */
1176                 bd1->data.bd_flags.bitfields = 0;
1177                 bd1->data.bitfields = 0;
1178
1179                 bd1->data.bd_flags.bitfields =
1180                         1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
1181                 /* Map MBUF linear data for DMA and set in the first BD */
1182                 QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_dma_addr(mbuf),
1183                                      mbuf->pkt_len);
1184
1185                 /* Descriptor based VLAN insertion */
1186                 if (mbuf->ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_QINQ_PKT)) {
1187                         bd1->data.vlan = rte_cpu_to_le_16(mbuf->vlan_tci);
1188                         bd1->data.bd_flags.bitfields |=
1189                             1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT;
1190                 }
1191
1192                 /* Offload the IP checksum in the hardware */
1193                 if (mbuf->ol_flags & PKT_TX_IP_CKSUM) {
1194                         bd1->data.bd_flags.bitfields |=
1195                             1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
1196                 }
1197
1198                 /* L4 checksum offload (tcp or udp) */
1199                 if (mbuf->ol_flags & (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
1200                         bd1->data.bd_flags.bitfields |=
1201                             1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
1202                         /* IPv6 + extn. -> later */
1203                 }
1204
1205                 /* Handle fragmented MBUF */
1206                 m_seg = mbuf->next;
1207                 nb_segs++;
1208                 bd1->data.nbds = nb_segs;
1209                 /* Encode scatter gather buffer descriptors if required */
1210                 nb_segs = qede_encode_sg_bd(txq, m_seg, nb_segs, bd1);
1211                 txq->nb_tx_avail = txq->nb_tx_avail - nb_segs;
1212                 nb_segs = 0;
1213                 txq->sw_tx_prod++;
1214                 rte_prefetch0(txq->sw_tx_ring[TX_PROD(txq)].mbuf);
1215                 bd_prod =
1216                     rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
1217                 nb_pkt_sent++;
1218         }
1219
1220         /* Write value of prod idx into bd_prod */
1221         txq->tx_db.data.bd_prod = bd_prod;
1222         rte_wmb();
1223         rte_compiler_barrier();
1224         DIRECT_REG_WR(edev, txq->doorbell_addr, txq->tx_db.raw);
1225         rte_wmb();
1226
1227         /* Check again for Tx completions */
1228         (void)qede_process_tx_compl(edev, txq);
1229
1230         PMD_TX_LOG(DEBUG, txq, "to_send=%u can_send=%u sent=%u core=%d\n",
1231                    nb_pkts, tx_count, nb_pkt_sent, rte_lcore_id());
1232
1233         return nb_pkt_sent;
1234 }
1235
1236 static void qede_init_fp_queue(struct rte_eth_dev *eth_dev)
1237 {
1238         struct qede_dev *qdev = eth_dev->data->dev_private;
1239         struct qede_fastpath *fp;
1240         uint8_t i, rss_id, txq_index, tc;
1241         int rxq = 0, txq = 0;
1242
1243         for_each_queue(i) {
1244                 fp = &qdev->fp_array[i];
1245                 if (fp->type & QEDE_FASTPATH_RX) {
1246                         fp->rxq = eth_dev->data->rx_queues[i];
1247                         fp->rxq->queue_id = rxq++;
1248                 }
1249
1250                 if (fp->type & QEDE_FASTPATH_TX) {
1251                         for (tc = 0; tc < qdev->num_tc; tc++) {
1252                                 txq_index = tc * QEDE_TSS_COUNT(qdev) + txq;
1253                                 fp->txqs[tc] =
1254                                         eth_dev->data->tx_queues[txq_index];
1255                                 fp->txqs[tc]->queue_id = txq_index;
1256                         }
1257                         txq++;
1258                 }
1259         }
1260 }
1261
1262 int qede_dev_start(struct rte_eth_dev *eth_dev)
1263 {
1264         struct qede_dev *qdev = eth_dev->data->dev_private;
1265         struct ecore_dev *edev = &qdev->edev;
1266         struct qed_link_output link_output;
1267         struct qede_fastpath *fp;
1268         int rc, i;
1269
1270         DP_INFO(edev, "Device state is %d\n", qdev->state);
1271
1272         if (qdev->state == QEDE_DEV_START) {
1273                 DP_INFO(edev, "Port is already started\n");
1274                 return 0;
1275         }
1276
1277         if (qdev->state == QEDE_DEV_CONFIG)
1278                 qede_init_fp_queue(eth_dev);
1279
1280         rc = qede_start_queues(eth_dev, true);
1281         if (rc) {
1282                 DP_ERR(edev, "Failed to start queues\n");
1283                 /* TBD: free */
1284                 return rc;
1285         }
1286
1287         /* Bring-up the link */
1288         qede_dev_set_link_state(eth_dev, true);
1289
1290         /* Reset ring */
1291         if (qede_reset_fp_rings(qdev))
1292                 return -ENOMEM;
1293
1294         /* Start/resume traffic */
1295         qdev->ops->fastpath_start(edev);
1296
1297         qdev->state = QEDE_DEV_START;
1298
1299         DP_INFO(edev, "dev_state is QEDE_DEV_START\n");
1300
1301         return 0;
1302 }
1303
1304 static int qede_drain_txq(struct qede_dev *qdev,
1305                           struct qede_tx_queue *txq, bool allow_drain)
1306 {
1307         struct ecore_dev *edev = &qdev->edev;
1308         int rc, cnt = 1000;
1309
1310         while (txq->sw_tx_cons != txq->sw_tx_prod) {
1311                 qede_process_tx_compl(edev, txq);
1312                 if (!cnt) {
1313                         if (allow_drain) {
1314                                 DP_NOTICE(edev, false,
1315                                           "Tx queue[%u] is stuck,"
1316                                           "requesting MCP to drain\n",
1317                                           txq->queue_id);
1318                                 rc = qdev->ops->common->drain(edev);
1319                                 if (rc)
1320                                         return rc;
1321                                 return qede_drain_txq(qdev, txq, false);
1322                         }
1323
1324                         DP_NOTICE(edev, false,
1325                                   "Timeout waiting for tx queue[%d]:"
1326                                   "PROD=%d, CONS=%d\n",
1327                                   txq->queue_id, txq->sw_tx_prod,
1328                                   txq->sw_tx_cons);
1329                         return -ENODEV;
1330                 }
1331                 cnt--;
1332                 DELAY(1000);
1333                 rte_compiler_barrier();
1334         }
1335
1336         /* FW finished processing, wait for HW to transmit all tx packets */
1337         DELAY(2000);
1338
1339         return 0;
1340 }
1341
1342 static int qede_stop_queues(struct qede_dev *qdev)
1343 {
1344         struct qed_update_vport_params vport_update_params;
1345         struct ecore_dev *edev = &qdev->edev;
1346         int rc, tc, i;
1347
1348         /* Disable the vport */
1349         memset(&vport_update_params, 0, sizeof(vport_update_params));
1350         vport_update_params.vport_id = 0;
1351         vport_update_params.update_vport_active_flg = 1;
1352         vport_update_params.vport_active_flg = 0;
1353         vport_update_params.update_rss_flg = 0;
1354
1355         DP_INFO(edev, "Deactivate vport\n");
1356
1357         rc = qdev->ops->vport_update(edev, &vport_update_params);
1358         if (rc) {
1359                 DP_ERR(edev, "Failed to update vport\n");
1360                 return rc;
1361         }
1362
1363         DP_INFO(edev, "Flushing tx queues\n");
1364
1365         /* Flush Tx queues. If needed, request drain from MCP */
1366         for_each_queue(i) {
1367                 struct qede_fastpath *fp = &qdev->fp_array[i];
1368
1369                 if (fp->type & QEDE_FASTPATH_TX) {
1370                         for (tc = 0; tc < qdev->num_tc; tc++) {
1371                                 struct qede_tx_queue *txq = fp->txqs[tc];
1372
1373                                 rc = qede_drain_txq(qdev, txq, true);
1374                                 if (rc)
1375                                         return rc;
1376                         }
1377                 }
1378         }
1379
1380         /* Stop all Queues in reverse order */
1381         for (i = QEDE_QUEUE_CNT(qdev) - 1; i >= 0; i--) {
1382                 struct qed_stop_rxq_params rx_params;
1383
1384                 /* Stop the Tx Queue(s) */
1385                 if (qdev->fp_array[i].type & QEDE_FASTPATH_TX) {
1386                         for (tc = 0; tc < qdev->num_tc; tc++) {
1387                                 struct qed_stop_txq_params tx_params;
1388                                 u8 val;
1389
1390                                 tx_params.rss_id = i;
1391                                 val = qdev->fp_array[i].txqs[tc]->queue_id;
1392                                 tx_params.tx_queue_id = val;
1393
1394                                 DP_INFO(edev, "Stopping tx queues\n");
1395                                 rc = qdev->ops->q_tx_stop(edev, &tx_params);
1396                                 if (rc) {
1397                                         DP_ERR(edev, "Failed to stop TXQ #%d\n",
1398                                                tx_params.tx_queue_id);
1399                                         return rc;
1400                                 }
1401                         }
1402                 }
1403
1404                 /* Stop the Rx Queue */
1405                 if (qdev->fp_array[i].type & QEDE_FASTPATH_RX) {
1406                         memset(&rx_params, 0, sizeof(rx_params));
1407                         rx_params.rss_id = i;
1408                         rx_params.rx_queue_id = qdev->fp_array[i].rxq->queue_id;
1409                         rx_params.eq_completion_only = 1;
1410
1411                         DP_INFO(edev, "Stopping rx queues\n");
1412
1413                         rc = qdev->ops->q_rx_stop(edev, &rx_params);
1414                         if (rc) {
1415                                 DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
1416                                 return rc;
1417                         }
1418                 }
1419         }
1420
1421         return 0;
1422 }
1423
1424 int qede_reset_fp_rings(struct qede_dev *qdev)
1425 {
1426         struct qede_fastpath *fp;
1427         struct qede_tx_queue *txq;
1428         uint8_t tc;
1429         uint16_t id, i;
1430
1431         for_each_queue(id) {
1432                 fp = &qdev->fp_array[id];
1433
1434                 if (fp->type & QEDE_FASTPATH_RX) {
1435                         DP_INFO(&qdev->edev,
1436                                 "Reset FP chain for RSS %u\n", id);
1437                         qede_rx_queue_release_mbufs(fp->rxq);
1438                         ecore_chain_reset(&fp->rxq->rx_bd_ring);
1439                         ecore_chain_reset(&fp->rxq->rx_comp_ring);
1440                         fp->rxq->sw_rx_prod = 0;
1441                         fp->rxq->sw_rx_cons = 0;
1442                         *fp->rxq->hw_cons_ptr = 0;
1443                         for (i = 0; i < fp->rxq->nb_rx_desc; i++) {
1444                                 if (qede_alloc_rx_buffer(fp->rxq)) {
1445                                         DP_ERR(&qdev->edev,
1446                                                "RX buffer allocation failed\n");
1447                                         return -ENOMEM;
1448                                 }
1449                         }
1450                 }
1451                 if (fp->type & QEDE_FASTPATH_TX) {
1452                         for (tc = 0; tc < qdev->num_tc; tc++) {
1453                                 txq = fp->txqs[tc];
1454                                 qede_tx_queue_release_mbufs(txq);
1455                                 ecore_chain_reset(&txq->tx_pbl);
1456                                 txq->sw_tx_cons = 0;
1457                                 txq->sw_tx_prod = 0;
1458                                 *txq->hw_cons_ptr = 0;
1459                         }
1460                 }
1461         }
1462
1463         return 0;
1464 }
1465
1466 /* This function frees all memory of a single fp */
1467 void qede_free_mem_load(struct rte_eth_dev *eth_dev)
1468 {
1469         struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
1470         struct qede_fastpath *fp;
1471         uint16_t txq_idx;
1472         uint8_t id;
1473         uint8_t tc;
1474
1475         for_each_queue(id) {
1476                 fp = &qdev->fp_array[id];
1477                 if (fp->type & QEDE_FASTPATH_RX) {
1478                         qede_rx_queue_release(fp->rxq);
1479                         eth_dev->data->rx_queues[id] = NULL;
1480                 } else {
1481                         for (tc = 0; tc < qdev->num_tc; tc++) {
1482                                 txq_idx = fp->txqs[tc]->queue_id;
1483                                 qede_tx_queue_release(fp->txqs[tc]);
1484                                 eth_dev->data->tx_queues[txq_idx] = NULL;
1485                         }
1486                 }
1487         }
1488 }
1489
1490 void qede_dev_stop(struct rte_eth_dev *eth_dev)
1491 {
1492         struct qede_dev *qdev = eth_dev->data->dev_private;
1493         struct ecore_dev *edev = &qdev->edev;
1494
1495         DP_INFO(edev, "port %u\n", eth_dev->data->port_id);
1496
1497         if (qdev->state != QEDE_DEV_START) {
1498                 DP_INFO(edev, "Device not yet started\n");
1499                 return;
1500         }
1501
1502         if (qede_stop_queues(qdev))
1503                 DP_ERR(edev, "Didn't succeed to close queues\n");
1504
1505         DP_INFO(edev, "Stopped queues\n");
1506
1507         qdev->ops->fastpath_stop(edev);
1508
1509         /* Bring the link down */
1510         qede_dev_set_link_state(eth_dev, false);
1511
1512         qdev->state = QEDE_DEV_STOP;
1513
1514         DP_INFO(edev, "dev_state is QEDE_DEV_STOP\n");
1515 }