net/qede: fix LRO handling
[dpdk.git] / drivers / net / qede / qede_rxtx.c
1 /*
2  * Copyright (c) 2016 QLogic Corporation.
3  * All rights reserved.
4  * www.qlogic.com
5  *
6  * See LICENSE.qede_pmd for copyright and licensing details.
7  */
8
9 #include <rte_net.h>
10 #include "qede_rxtx.h"
11
12 static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
13 {
14         struct rte_mbuf *new_mb = NULL;
15         struct eth_rx_bd *rx_bd;
16         dma_addr_t mapping;
17         uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
18
19         new_mb = rte_mbuf_raw_alloc(rxq->mb_pool);
20         if (unlikely(!new_mb)) {
21                 PMD_RX_LOG(ERR, rxq,
22                            "Failed to allocate rx buffer "
23                            "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u",
24                            idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq),
25                            rte_mempool_avail_count(rxq->mb_pool),
26                            rte_mempool_in_use_count(rxq->mb_pool));
27                 return -ENOMEM;
28         }
29         rxq->sw_rx_ring[idx].mbuf = new_mb;
30         rxq->sw_rx_ring[idx].page_offset = 0;
31         mapping = rte_mbuf_data_dma_addr_default(new_mb);
32         /* Advance PROD and get BD pointer */
33         rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring);
34         rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
35         rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
36         rxq->sw_rx_prod++;
37         return 0;
38 }
39
40 static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq)
41 {
42         uint16_t i;
43
44         if (rxq->sw_rx_ring != NULL) {
45                 for (i = 0; i < rxq->nb_rx_desc; i++) {
46                         if (rxq->sw_rx_ring[i].mbuf != NULL) {
47                                 rte_pktmbuf_free(rxq->sw_rx_ring[i].mbuf);
48                                 rxq->sw_rx_ring[i].mbuf = NULL;
49                         }
50                 }
51         }
52 }
53
54 void qede_rx_queue_release(void *rx_queue)
55 {
56         struct qede_rx_queue *rxq = rx_queue;
57
58         if (rxq != NULL) {
59                 qede_rx_queue_release_mbufs(rxq);
60                 rte_free(rxq->sw_rx_ring);
61                 rxq->sw_rx_ring = NULL;
62                 rte_free(rxq);
63                 rxq = NULL;
64         }
65 }
66
67 static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq)
68 {
69         unsigned int i;
70
71         PMD_TX_LOG(DEBUG, txq, "releasing %u mbufs", txq->nb_tx_desc);
72
73         if (txq->sw_tx_ring) {
74                 for (i = 0; i < txq->nb_tx_desc; i++) {
75                         if (txq->sw_tx_ring[i].mbuf) {
76                                 rte_pktmbuf_free(txq->sw_tx_ring[i].mbuf);
77                                 txq->sw_tx_ring[i].mbuf = NULL;
78                         }
79                 }
80         }
81 }
82
83 int
84 qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
85                     uint16_t nb_desc, unsigned int socket_id,
86                     const struct rte_eth_rxconf *rx_conf,
87                     struct rte_mempool *mp)
88 {
89         struct qede_dev *qdev = dev->data->dev_private;
90         struct ecore_dev *edev = &qdev->edev;
91         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
92         struct qede_rx_queue *rxq;
93         uint16_t max_rx_pkt_len;
94         uint16_t bufsz;
95         size_t size;
96         int rc;
97         int i;
98
99         PMD_INIT_FUNC_TRACE(edev);
100
101         /* Note: Ring size/align is controlled by struct rte_eth_desc_lim */
102         if (!rte_is_power_of_2(nb_desc)) {
103                 DP_ERR(edev, "Ring size %u is not power of 2\n",
104                           nb_desc);
105                 return -EINVAL;
106         }
107
108         /* Free memory prior to re-allocation if needed... */
109         if (dev->data->rx_queues[queue_idx] != NULL) {
110                 qede_rx_queue_release(dev->data->rx_queues[queue_idx]);
111                 dev->data->rx_queues[queue_idx] = NULL;
112         }
113
114         /* First allocate the rx queue data structure */
115         rxq = rte_zmalloc_socket("qede_rx_queue", sizeof(struct qede_rx_queue),
116                                  RTE_CACHE_LINE_SIZE, socket_id);
117
118         if (!rxq) {
119                 DP_ERR(edev, "Unable to allocate memory for rxq on socket %u",
120                           socket_id);
121                 return -ENOMEM;
122         }
123
124         rxq->qdev = qdev;
125         rxq->mb_pool = mp;
126         rxq->nb_rx_desc = nb_desc;
127         rxq->queue_id = queue_idx;
128         rxq->port_id = dev->data->port_id;
129         max_rx_pkt_len = (uint16_t)rxmode->max_rx_pkt_len;
130         qdev->mtu = max_rx_pkt_len;
131
132         /* Fix up RX buffer size */
133         bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
134         if ((rxmode->enable_scatter)                    ||
135             (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) {
136                 if (!dev->data->scattered_rx) {
137                         DP_INFO(edev, "Forcing scatter-gather mode\n");
138                         dev->data->scattered_rx = 1;
139                 }
140         }
141         if (dev->data->scattered_rx)
142                 rxq->rx_buf_size = bufsz + QEDE_ETH_OVERHEAD;
143         else
144                 rxq->rx_buf_size = qdev->mtu + QEDE_ETH_OVERHEAD;
145         /* Align to cache-line size if needed */
146         rxq->rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rxq->rx_buf_size);
147
148         DP_INFO(edev, "mtu %u mbufsz %u bd_max_bytes %u scatter_mode %d\n",
149                 qdev->mtu, bufsz, rxq->rx_buf_size, dev->data->scattered_rx);
150
151         /* Allocate the parallel driver ring for Rx buffers */
152         size = sizeof(*rxq->sw_rx_ring) * rxq->nb_rx_desc;
153         rxq->sw_rx_ring = rte_zmalloc_socket("sw_rx_ring", size,
154                                              RTE_CACHE_LINE_SIZE, socket_id);
155         if (!rxq->sw_rx_ring) {
156                 DP_NOTICE(edev, false,
157                           "Unable to alloc memory for sw_rx_ring on socket %u\n",
158                           socket_id);
159                 rte_free(rxq);
160                 rxq = NULL;
161                 return -ENOMEM;
162         }
163
164         /* Allocate FW Rx ring  */
165         rc = qdev->ops->common->chain_alloc(edev,
166                                             ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
167                                             ECORE_CHAIN_MODE_NEXT_PTR,
168                                             ECORE_CHAIN_CNT_TYPE_U16,
169                                             rxq->nb_rx_desc,
170                                             sizeof(struct eth_rx_bd),
171                                             &rxq->rx_bd_ring,
172                                             NULL);
173
174         if (rc != ECORE_SUCCESS) {
175                 DP_NOTICE(edev, false,
176                           "Unable to alloc memory for rxbd ring on socket %u\n",
177                           socket_id);
178                 rte_free(rxq->sw_rx_ring);
179                 rxq->sw_rx_ring = NULL;
180                 rte_free(rxq);
181                 rxq = NULL;
182                 return -ENOMEM;
183         }
184
185         /* Allocate FW completion ring */
186         rc = qdev->ops->common->chain_alloc(edev,
187                                             ECORE_CHAIN_USE_TO_CONSUME,
188                                             ECORE_CHAIN_MODE_PBL,
189                                             ECORE_CHAIN_CNT_TYPE_U16,
190                                             rxq->nb_rx_desc,
191                                             sizeof(union eth_rx_cqe),
192                                             &rxq->rx_comp_ring,
193                                             NULL);
194
195         if (rc != ECORE_SUCCESS) {
196                 DP_NOTICE(edev, false,
197                           "Unable to alloc memory for cqe ring on socket %u\n",
198                           socket_id);
199                 /* TBD: Freeing RX BD ring */
200                 rte_free(rxq->sw_rx_ring);
201                 rxq->sw_rx_ring = NULL;
202                 rte_free(rxq);
203                 return -ENOMEM;
204         }
205
206         /* Allocate buffers for the Rx ring */
207         for (i = 0; i < rxq->nb_rx_desc; i++) {
208                 rc = qede_alloc_rx_buffer(rxq);
209                 if (rc) {
210                         DP_NOTICE(edev, false,
211                                   "RX buffer allocation failed at idx=%d\n", i);
212                         goto err4;
213                 }
214         }
215
216         dev->data->rx_queues[queue_idx] = rxq;
217
218         DP_INFO(edev, "rxq %d num_desc %u rx_buf_size=%u socket %u\n",
219                   queue_idx, nb_desc, qdev->mtu, socket_id);
220
221         return 0;
222 err4:
223         qede_rx_queue_release(rxq);
224         return -ENOMEM;
225 }
226
227 void qede_tx_queue_release(void *tx_queue)
228 {
229         struct qede_tx_queue *txq = tx_queue;
230
231         if (txq != NULL) {
232                 qede_tx_queue_release_mbufs(txq);
233                 if (txq->sw_tx_ring) {
234                         rte_free(txq->sw_tx_ring);
235                         txq->sw_tx_ring = NULL;
236                 }
237                 rte_free(txq);
238         }
239         txq = NULL;
240 }
241
242 int
243 qede_tx_queue_setup(struct rte_eth_dev *dev,
244                     uint16_t queue_idx,
245                     uint16_t nb_desc,
246                     unsigned int socket_id,
247                     const struct rte_eth_txconf *tx_conf)
248 {
249         struct qede_dev *qdev = dev->data->dev_private;
250         struct ecore_dev *edev = &qdev->edev;
251         struct qede_tx_queue *txq;
252         int rc;
253
254         PMD_INIT_FUNC_TRACE(edev);
255
256         if (!rte_is_power_of_2(nb_desc)) {
257                 DP_ERR(edev, "Ring size %u is not power of 2\n",
258                        nb_desc);
259                 return -EINVAL;
260         }
261
262         /* Free memory prior to re-allocation if needed... */
263         if (dev->data->tx_queues[queue_idx] != NULL) {
264                 qede_tx_queue_release(dev->data->tx_queues[queue_idx]);
265                 dev->data->tx_queues[queue_idx] = NULL;
266         }
267
268         txq = rte_zmalloc_socket("qede_tx_queue", sizeof(struct qede_tx_queue),
269                                  RTE_CACHE_LINE_SIZE, socket_id);
270
271         if (txq == NULL) {
272                 DP_ERR(edev,
273                        "Unable to allocate memory for txq on socket %u",
274                        socket_id);
275                 return -ENOMEM;
276         }
277
278         txq->nb_tx_desc = nb_desc;
279         txq->qdev = qdev;
280         txq->port_id = dev->data->port_id;
281
282         rc = qdev->ops->common->chain_alloc(edev,
283                                             ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
284                                             ECORE_CHAIN_MODE_PBL,
285                                             ECORE_CHAIN_CNT_TYPE_U16,
286                                             txq->nb_tx_desc,
287                                             sizeof(union eth_tx_bd_types),
288                                             &txq->tx_pbl,
289                                             NULL);
290         if (rc != ECORE_SUCCESS) {
291                 DP_ERR(edev,
292                        "Unable to allocate memory for txbd ring on socket %u",
293                        socket_id);
294                 qede_tx_queue_release(txq);
295                 return -ENOMEM;
296         }
297
298         /* Allocate software ring */
299         txq->sw_tx_ring = rte_zmalloc_socket("txq->sw_tx_ring",
300                                              (sizeof(struct qede_tx_entry) *
301                                               txq->nb_tx_desc),
302                                              RTE_CACHE_LINE_SIZE, socket_id);
303
304         if (!txq->sw_tx_ring) {
305                 DP_ERR(edev,
306                        "Unable to allocate memory for txbd ring on socket %u",
307                        socket_id);
308                 qede_tx_queue_release(txq);
309                 return -ENOMEM;
310         }
311
312         txq->queue_id = queue_idx;
313
314         txq->nb_tx_avail = txq->nb_tx_desc;
315
316         txq->tx_free_thresh =
317             tx_conf->tx_free_thresh ? tx_conf->tx_free_thresh :
318             (txq->nb_tx_desc - QEDE_DEFAULT_TX_FREE_THRESH);
319
320         dev->data->tx_queues[queue_idx] = txq;
321
322         DP_INFO(edev,
323                   "txq %u num_desc %u tx_free_thresh %u socket %u\n",
324                   queue_idx, nb_desc, txq->tx_free_thresh, socket_id);
325
326         return 0;
327 }
328
329 /* This function inits fp content and resets the SB, RXQ and TXQ arrays */
330 static void qede_init_fp(struct qede_dev *qdev)
331 {
332         struct qede_fastpath *fp;
333         uint8_t i, rss_id, tc;
334         int fp_rx = qdev->fp_num_rx, rxq = 0, txq = 0;
335
336         memset((void *)qdev->fp_array, 0, (QEDE_QUEUE_CNT(qdev) *
337                                            sizeof(*qdev->fp_array)));
338         memset((void *)qdev->sb_array, 0, (QEDE_QUEUE_CNT(qdev) *
339                                            sizeof(*qdev->sb_array)));
340         for_each_queue(i) {
341                 fp = &qdev->fp_array[i];
342                 if (fp_rx) {
343                         fp->type = QEDE_FASTPATH_RX;
344                         fp_rx--;
345                 } else{
346                         fp->type = QEDE_FASTPATH_TX;
347                 }
348                 fp->qdev = qdev;
349                 fp->id = i;
350                 fp->sb_info = &qdev->sb_array[i];
351                 snprintf(fp->name, sizeof(fp->name), "%s-fp-%d", "qdev", i);
352         }
353
354 }
355
356 void qede_free_fp_arrays(struct qede_dev *qdev)
357 {
358         /* It asseumes qede_free_mem_load() is called before */
359         if (qdev->fp_array != NULL) {
360                 rte_free(qdev->fp_array);
361                 qdev->fp_array = NULL;
362         }
363
364         if (qdev->sb_array != NULL) {
365                 rte_free(qdev->sb_array);
366                 qdev->sb_array = NULL;
367         }
368 }
369
370 int qede_alloc_fp_array(struct qede_dev *qdev)
371 {
372         struct qede_fastpath *fp;
373         struct ecore_dev *edev = &qdev->edev;
374         int i;
375
376         qdev->fp_array = rte_calloc("fp", QEDE_QUEUE_CNT(qdev),
377                                     sizeof(*qdev->fp_array),
378                                     RTE_CACHE_LINE_SIZE);
379
380         if (!qdev->fp_array) {
381                 DP_ERR(edev, "fp array allocation failed\n");
382                 return -ENOMEM;
383         }
384
385         qdev->sb_array = rte_calloc("sb", QEDE_QUEUE_CNT(qdev),
386                                     sizeof(*qdev->sb_array),
387                                     RTE_CACHE_LINE_SIZE);
388
389         if (!qdev->sb_array) {
390                 DP_ERR(edev, "sb array allocation failed\n");
391                 rte_free(qdev->fp_array);
392                 return -ENOMEM;
393         }
394
395         return 0;
396 }
397
398 /* This function allocates fast-path status block memory */
399 static int
400 qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info,
401                   uint16_t sb_id)
402 {
403         struct ecore_dev *edev = &qdev->edev;
404         struct status_block *sb_virt;
405         dma_addr_t sb_phys;
406         int rc;
407
408         sb_virt = OSAL_DMA_ALLOC_COHERENT(edev, &sb_phys, sizeof(*sb_virt));
409
410         if (!sb_virt) {
411                 DP_ERR(edev, "Status block allocation failed\n");
412                 return -ENOMEM;
413         }
414
415         rc = qdev->ops->common->sb_init(edev, sb_info,
416                                         sb_virt, sb_phys, sb_id,
417                                         QED_SB_TYPE_L2_QUEUE);
418         if (rc) {
419                 DP_ERR(edev, "Status block initialization failed\n");
420                 /* TBD: No dma_free_coherent possible */
421                 return rc;
422         }
423
424         return 0;
425 }
426
427 int qede_alloc_fp_resc(struct qede_dev *qdev)
428 {
429         struct ecore_dev *edev = &qdev->edev;
430         struct qede_fastpath *fp;
431         uint32_t num_sbs;
432         uint16_t i;
433         uint16_t sb_idx;
434         int rc;
435
436         if (IS_VF(edev))
437                 ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs);
438         else
439                 num_sbs = ecore_cxt_get_proto_cid_count
440                           (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL);
441
442         if (num_sbs == 0) {
443                 DP_ERR(edev, "No status blocks available\n");
444                 return -EINVAL;
445         }
446
447         if (qdev->fp_array)
448                 qede_free_fp_arrays(qdev);
449
450         rc = qede_alloc_fp_array(qdev);
451         if (rc != 0)
452                 return rc;
453
454         qede_init_fp(qdev);
455
456         for (i = 0; i < QEDE_QUEUE_CNT(qdev); i++) {
457                 fp = &qdev->fp_array[i];
458                 if (IS_VF(edev))
459                         sb_idx = i % num_sbs;
460                 else
461                         sb_idx = i;
462                 if (qede_alloc_mem_sb(qdev, fp->sb_info, sb_idx)) {
463                         qede_free_fp_arrays(qdev);
464                         return -ENOMEM;
465                 }
466         }
467
468         return 0;
469 }
470
471 void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev)
472 {
473         struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
474
475         qede_free_mem_load(eth_dev);
476         qede_free_fp_arrays(qdev);
477 }
478
479 static inline void
480 qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq)
481 {
482         uint16_t bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring);
483         uint16_t cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring);
484         struct eth_rx_prod_data rx_prods = { 0 };
485
486         /* Update producers */
487         rx_prods.bd_prod = rte_cpu_to_le_16(bd_prod);
488         rx_prods.cqe_prod = rte_cpu_to_le_16(cqe_prod);
489
490         /* Make sure that the BD and SGE data is updated before updating the
491          * producers since FW might read the BD/SGE right after the producer
492          * is updated.
493          */
494         rte_wmb();
495
496         internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods),
497                         (uint32_t *)&rx_prods);
498
499         /* mmiowb is needed to synchronize doorbell writes from more than one
500          * processor. It guarantees that the write arrives to the device before
501          * the napi lock is released and another qede_poll is called (possibly
502          * on another CPU). Without this barrier, the next doorbell can bypass
503          * this doorbell. This is applicable to IA64/Altix systems.
504          */
505         rte_wmb();
506
507         PMD_RX_LOG(DEBUG, rxq, "bd_prod %u  cqe_prod %u", bd_prod, cqe_prod);
508 }
509
510 static void
511 qede_update_sge_tpa_params(struct ecore_sge_tpa_params *sge_tpa_params,
512                            uint16_t mtu, bool enable)
513 {
514         /* Enable LRO in split mode */
515         sge_tpa_params->tpa_ipv4_en_flg = enable;
516         sge_tpa_params->tpa_ipv6_en_flg = enable;
517         sge_tpa_params->tpa_ipv4_tunn_en_flg = false;
518         sge_tpa_params->tpa_ipv6_tunn_en_flg = false;
519         /* set if tpa enable changes */
520         sge_tpa_params->update_tpa_en_flg = 1;
521         /* set if tpa parameters should be handled */
522         sge_tpa_params->update_tpa_param_flg = enable;
523
524         sge_tpa_params->max_buffers_per_cqe = 20;
525         /* Enable TPA in split mode. In this mode each TPA segment
526          * starts on the new BD, so there is one BD per segment.
527          */
528         sge_tpa_params->tpa_pkt_split_flg = 1;
529         sge_tpa_params->tpa_hdr_data_split_flg = 0;
530         sge_tpa_params->tpa_gro_consistent_flg = 0;
531         sge_tpa_params->tpa_max_aggs_num = ETH_TPA_MAX_AGGS_NUM;
532         sge_tpa_params->tpa_max_size = 0x7FFF;
533         sge_tpa_params->tpa_min_size_to_start = mtu / 2;
534         sge_tpa_params->tpa_min_size_to_cont = mtu / 2;
535 }
536
537 static int qede_start_queues(struct rte_eth_dev *eth_dev, bool clear_stats)
538 {
539         struct qede_dev *qdev = eth_dev->data->dev_private;
540         struct ecore_dev *edev = &qdev->edev;
541         struct ecore_queue_start_common_params q_params;
542         struct qed_dev_info *qed_info = &qdev->dev_info.common;
543         struct qed_update_vport_params vport_update_params;
544         struct ecore_sge_tpa_params tpa_params;
545         struct qede_tx_queue *txq;
546         struct qede_fastpath *fp;
547         dma_addr_t p_phys_table;
548         int txq_index;
549         uint16_t page_cnt;
550         int vlan_removal_en = 1;
551         int rc, tc, i;
552
553         for_each_queue(i) {
554                 fp = &qdev->fp_array[i];
555                 if (fp->type & QEDE_FASTPATH_RX) {
556                         struct ecore_rxq_start_ret_params ret_params;
557
558                         p_phys_table =
559                             ecore_chain_get_pbl_phys(&fp->rxq->rx_comp_ring);
560                         page_cnt =
561                             ecore_chain_get_page_cnt(&fp->rxq->rx_comp_ring);
562
563                         memset(&ret_params, 0, sizeof(ret_params));
564                         memset(&q_params, 0, sizeof(q_params));
565                         q_params.queue_id = i;
566                         q_params.vport_id = 0;
567                         q_params.sb = fp->sb_info->igu_sb_id;
568                         q_params.sb_idx = RX_PI;
569
570                         ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0);
571
572                         rc = qdev->ops->q_rx_start(edev, i, &q_params,
573                                            fp->rxq->rx_buf_size,
574                                            fp->rxq->rx_bd_ring.p_phys_addr,
575                                            p_phys_table,
576                                            page_cnt,
577                                            &ret_params);
578                         if (rc) {
579                                 DP_ERR(edev, "Start rxq #%d failed %d\n",
580                                        fp->rxq->queue_id, rc);
581                                 return rc;
582                         }
583
584                         /* Use the return parameters */
585                         fp->rxq->hw_rxq_prod_addr = ret_params.p_prod;
586                         fp->rxq->handle = ret_params.p_handle;
587
588                         fp->rxq->hw_cons_ptr =
589                                         &fp->sb_info->sb_virt->pi_array[RX_PI];
590
591                         qede_update_rx_prod(qdev, fp->rxq);
592                 }
593
594                 if (!(fp->type & QEDE_FASTPATH_TX))
595                         continue;
596                 for (tc = 0; tc < qdev->num_tc; tc++) {
597                         struct ecore_txq_start_ret_params ret_params;
598
599                         txq = fp->txqs[tc];
600                         txq_index = tc * QEDE_RSS_COUNT(qdev) + i;
601
602                         p_phys_table = ecore_chain_get_pbl_phys(&txq->tx_pbl);
603                         page_cnt = ecore_chain_get_page_cnt(&txq->tx_pbl);
604
605                         memset(&q_params, 0, sizeof(q_params));
606                         memset(&ret_params, 0, sizeof(ret_params));
607                         q_params.queue_id = txq->queue_id;
608                         q_params.vport_id = 0;
609                         q_params.sb = fp->sb_info->igu_sb_id;
610                         q_params.sb_idx = TX_PI(tc);
611
612                         rc = qdev->ops->q_tx_start(edev, i, &q_params,
613                                                    p_phys_table,
614                                                    page_cnt, /* **pp_doorbell */
615                                                    &ret_params);
616                         if (rc) {
617                                 DP_ERR(edev, "Start txq %u failed %d\n",
618                                        txq_index, rc);
619                                 return rc;
620                         }
621
622                         txq->doorbell_addr = ret_params.p_doorbell;
623                         txq->handle = ret_params.p_handle;
624
625                         txq->hw_cons_ptr =
626                             &fp->sb_info->sb_virt->pi_array[TX_PI(tc)];
627                         SET_FIELD(txq->tx_db.data.params,
628                                   ETH_DB_DATA_DEST, DB_DEST_XCM);
629                         SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD,
630                                   DB_AGG_CMD_SET);
631                         SET_FIELD(txq->tx_db.data.params,
632                                   ETH_DB_DATA_AGG_VAL_SEL,
633                                   DQ_XCM_ETH_TX_BD_PROD_CMD);
634
635                         txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
636                 }
637         }
638
639         /* Prepare and send the vport enable */
640         memset(&vport_update_params, 0, sizeof(vport_update_params));
641         /* Update MTU via vport update */
642         vport_update_params.mtu = qdev->mtu;
643         vport_update_params.vport_id = 0;
644         vport_update_params.update_vport_active_flg = 1;
645         vport_update_params.vport_active_flg = 1;
646
647         /* @DPDK */
648         if (qed_info->mf_mode == MF_NPAR && qed_info->tx_switching) {
649                 /* TBD: Check SRIOV enabled for VF */
650                 vport_update_params.update_tx_switching_flg = 1;
651                 vport_update_params.tx_switching_flg = 1;
652         }
653
654         /* TPA */
655         if (qdev->enable_lro) {
656                 DP_INFO(edev, "Enabling LRO\n");
657                 memset(&tpa_params, 0, sizeof(struct ecore_sge_tpa_params));
658                 qede_update_sge_tpa_params(&tpa_params, qdev->mtu, true);
659                 vport_update_params.sge_tpa_params = &tpa_params;
660         }
661
662         rc = qdev->ops->vport_update(edev, &vport_update_params);
663         if (rc) {
664                 DP_ERR(edev, "Update V-PORT failed %d\n", rc);
665                 return rc;
666         }
667
668         return 0;
669 }
670
671 static bool qede_tunn_exist(uint16_t flag)
672 {
673         return !!((PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK <<
674                     PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT) & flag);
675 }
676
677 /*
678  * qede_check_tunn_csum_l4:
679  * Returns:
680  * 1 : If L4 csum is enabled AND if the validation has failed.
681  * 0 : Otherwise
682  */
683 static inline uint8_t qede_check_tunn_csum_l4(uint16_t flag)
684 {
685         if ((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK <<
686              PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT) & flag)
687                 return !!((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK <<
688                         PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT) & flag);
689
690         return 0;
691 }
692
693 static inline uint8_t qede_check_notunn_csum_l4(uint16_t flag)
694 {
695         if ((PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK <<
696              PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT) & flag)
697                 return !!((PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK <<
698                            PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT) & flag);
699
700         return 0;
701 }
702
703 static inline uint8_t
704 qede_check_notunn_csum_l3(struct rte_mbuf *m, uint16_t flag)
705 {
706         struct ipv4_hdr *ip;
707         uint16_t pkt_csum;
708         uint16_t calc_csum;
709         uint16_t val;
710
711         val = ((PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK <<
712                 PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT) & flag);
713
714         if (unlikely(val)) {
715                 m->packet_type = qede_rx_cqe_to_pkt_type(flag);
716                 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
717                         ip = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
718                                            sizeof(struct ether_hdr));
719                         pkt_csum = ip->hdr_checksum;
720                         ip->hdr_checksum = 0;
721                         calc_csum = rte_ipv4_cksum(ip);
722                         ip->hdr_checksum = pkt_csum;
723                         return (calc_csum != pkt_csum);
724                 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
725                         return 1;
726                 }
727         }
728         return 0;
729 }
730
731 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq)
732 {
733         ecore_chain_consume(&rxq->rx_bd_ring);
734         rxq->sw_rx_cons++;
735 }
736
737 static inline void
738 qede_reuse_page(struct qede_dev *qdev,
739                 struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons)
740 {
741         struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring);
742         uint16_t idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
743         struct qede_rx_entry *curr_prod;
744         dma_addr_t new_mapping;
745
746         curr_prod = &rxq->sw_rx_ring[idx];
747         *curr_prod = *curr_cons;
748
749         new_mapping = rte_mbuf_data_dma_addr_default(curr_prod->mbuf) +
750                       curr_prod->page_offset;
751
752         rx_bd_prod->addr.hi = rte_cpu_to_le_32(U64_HI(new_mapping));
753         rx_bd_prod->addr.lo = rte_cpu_to_le_32(U64_LO(new_mapping));
754
755         rxq->sw_rx_prod++;
756 }
757
758 static inline void
759 qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq,
760                         struct qede_dev *qdev, uint8_t count)
761 {
762         struct qede_rx_entry *curr_cons;
763
764         for (; count > 0; count--) {
765                 curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS(rxq)];
766                 qede_reuse_page(qdev, rxq, curr_cons);
767                 qede_rx_bd_ring_consume(rxq);
768         }
769 }
770
771 static inline uint32_t qede_rx_cqe_to_pkt_type(uint16_t flags)
772 {
773         uint16_t val;
774
775         /* Lookup table */
776         static const uint32_t
777         ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = {
778                 [QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_L3_IPV4,
779                 [QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_L3_IPV6,
780                 [QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
781                 [QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
782                 [QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
783                 [QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
784         };
785
786         /* Bits (0..3) provides L3/L4 protocol type */
787         val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK <<
788                PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) |
789                (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK <<
790                 PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT)) & flags;
791
792         if (val < QEDE_PKT_TYPE_MAX)
793                 return ptype_lkup_tbl[val] | RTE_PTYPE_L2_ETHER;
794         else
795                 return RTE_PTYPE_UNKNOWN;
796 }
797
798 static inline void
799 qede_rx_process_tpa_cmn_cont_end_cqe(struct qede_dev *qdev,
800                                      struct qede_rx_queue *rxq,
801                                      uint8_t agg_index, uint16_t len)
802 {
803         struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
804         struct qede_agg_info *tpa_info;
805         struct rte_mbuf *curr_frag; /* Pointer to currently filled TPA seg */
806         uint16_t cons_idx;
807
808         /* Under certain conditions it is possible that FW may not consume
809          * additional or new BD. So decision to consume the BD must be made
810          * based on len_list[0].
811          */
812         if (rte_le_to_cpu_16(len)) {
813                 tpa_info = &rxq->tpa_info[agg_index];
814                 cons_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
815                 curr_frag = rxq->sw_rx_ring[cons_idx].mbuf;
816                 assert(curr_frag);
817                 curr_frag->nb_segs = 1;
818                 curr_frag->pkt_len = rte_le_to_cpu_16(len);
819                 curr_frag->data_len = curr_frag->pkt_len;
820                 tpa_info->tpa_tail->next = curr_frag;
821                 tpa_info->tpa_tail = curr_frag;
822                 qede_rx_bd_ring_consume(rxq);
823                 if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) {
824                         PMD_RX_LOG(ERR, rxq, "mbuf allocation fails\n");
825                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
826                         rxq->rx_alloc_errors++;
827                 }
828         }
829 }
830
831 static inline void
832 qede_rx_process_tpa_cont_cqe(struct qede_dev *qdev,
833                              struct qede_rx_queue *rxq,
834                              struct eth_fast_path_rx_tpa_cont_cqe *cqe)
835 {
836         PMD_RX_LOG(INFO, rxq, "TPA cont[%d] - len [%d]\n",
837                    cqe->tpa_agg_index, rte_le_to_cpu_16(cqe->len_list[0]));
838         /* only len_list[0] will have value */
839         qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index,
840                                              cqe->len_list[0]);
841 }
842
843 static inline void
844 qede_rx_process_tpa_end_cqe(struct qede_dev *qdev,
845                             struct qede_rx_queue *rxq,
846                             struct eth_fast_path_rx_tpa_end_cqe *cqe)
847 {
848         struct qede_agg_info *tpa_info;
849         struct rte_mbuf *rx_mb; /* Pointer to head of the chained agg */
850
851         qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index,
852                                              cqe->len_list[0]);
853         /* Update total length and frags based on end TPA */
854         tpa_info = &rxq->tpa_info[cqe->tpa_agg_index];
855         rx_mb = rxq->tpa_info[cqe->tpa_agg_index].tpa_head;
856         /* TODO:  Add Sanity Checks */
857         rx_mb->nb_segs = cqe->num_of_bds;
858         rx_mb->pkt_len = cqe->total_packet_len;
859
860         PMD_RX_LOG(INFO, rxq, "TPA End[%d] reason %d cqe_len %d nb_segs %d"
861                    " pkt_len %d\n", cqe->tpa_agg_index, cqe->end_reason,
862                    rte_le_to_cpu_16(cqe->len_list[0]), rx_mb->nb_segs,
863                    rx_mb->pkt_len);
864 }
865
866 static inline uint32_t qede_rx_cqe_to_tunn_pkt_type(uint16_t flags)
867 {
868         uint32_t val;
869
870         /* Lookup table */
871         static const uint32_t
872         ptype_tunn_lkup_tbl[QEDE_PKT_TYPE_TUNN_MAX_TYPE] __rte_cache_aligned = {
873                 [QEDE_PKT_TYPE_UNKNOWN] = RTE_PTYPE_UNKNOWN,
874                 [QEDE_PKT_TYPE_TUNN_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE,
875                 [QEDE_PKT_TYPE_TUNN_GRE] = RTE_PTYPE_TUNNEL_GRE,
876                 [QEDE_PKT_TYPE_TUNN_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
877                 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GENEVE] =
878                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L2_ETHER,
879                 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GRE] =
880                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L2_ETHER,
881                 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_VXLAN] =
882                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L2_ETHER,
883                 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GENEVE] =
884                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L2_ETHER,
885                 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GRE] =
886                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L2_ETHER,
887                 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_VXLAN] =
888                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L2_ETHER,
889                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GENEVE] =
890                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4,
891                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GRE] =
892                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4,
893                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_VXLAN] =
894                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4,
895                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GENEVE] =
896                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4,
897                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GRE] =
898                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4,
899                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_VXLAN] =
900                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4,
901                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GENEVE] =
902                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6,
903                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GRE] =
904                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6,
905                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_VXLAN] =
906                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6,
907                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GENEVE] =
908                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6,
909                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GRE] =
910                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6,
911                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_VXLAN] =
912                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6,
913         };
914
915         /* Cover bits[4-0] to include tunn_type and next protocol */
916         val = ((ETH_TUNNEL_PARSING_FLAGS_TYPE_MASK <<
917                 ETH_TUNNEL_PARSING_FLAGS_TYPE_SHIFT) |
918                 (ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK <<
919                 ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT)) & flags;
920
921         if (val < QEDE_PKT_TYPE_TUNN_MAX_TYPE)
922                 return ptype_tunn_lkup_tbl[val];
923         else
924                 return RTE_PTYPE_UNKNOWN;
925 }
926
927 static inline int
928 qede_process_sg_pkts(void *p_rxq,  struct rte_mbuf *rx_mb,
929                      uint8_t num_segs, uint16_t pkt_len)
930 {
931         struct qede_rx_queue *rxq = p_rxq;
932         struct qede_dev *qdev = rxq->qdev;
933         struct ecore_dev *edev = &qdev->edev;
934         register struct rte_mbuf *seg1 = NULL;
935         register struct rte_mbuf *seg2 = NULL;
936         uint16_t sw_rx_index;
937         uint16_t cur_size;
938
939         seg1 = rx_mb;
940         while (num_segs) {
941                 cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size :
942                                                         pkt_len;
943                 if (unlikely(!cur_size)) {
944                         PMD_RX_LOG(ERR, rxq, "Length is 0 while %u BDs"
945                                    " left for mapping jumbo", num_segs);
946                         qede_recycle_rx_bd_ring(rxq, qdev, num_segs);
947                         return -EINVAL;
948                 }
949                 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
950                 seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf;
951                 qede_rx_bd_ring_consume(rxq);
952                 pkt_len -= cur_size;
953                 seg2->data_len = cur_size;
954                 seg1->next = seg2;
955                 seg1 = seg1->next;
956                 num_segs--;
957                 rxq->rx_segs++;
958         }
959
960         return 0;
961 }
962
963 uint16_t
964 qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
965 {
966         struct qede_rx_queue *rxq = p_rxq;
967         struct qede_dev *qdev = rxq->qdev;
968         struct ecore_dev *edev = &qdev->edev;
969         struct qede_fastpath *fp = &qdev->fp_array[rxq->queue_id];
970         uint16_t hw_comp_cons, sw_comp_cons, sw_rx_index;
971         uint16_t rx_pkt = 0;
972         union eth_rx_cqe *cqe;
973         struct eth_fast_path_rx_reg_cqe *fp_cqe;
974         register struct rte_mbuf *rx_mb = NULL;
975         register struct rte_mbuf *seg1 = NULL;
976         enum eth_rx_cqe_type cqe_type;
977         uint16_t pkt_len; /* Sum of all BD segments */
978         uint16_t len; /* Length of first BD */
979         uint8_t num_segs = 1;
980         uint16_t preload_idx;
981         uint8_t csum_flag;
982         uint16_t parse_flag;
983         enum rss_hash_type htype;
984         uint8_t tunn_parse_flag;
985         uint8_t j;
986         struct eth_fast_path_rx_tpa_start_cqe *cqe_start_tpa;
987         uint64_t ol_flags;
988         uint32_t packet_type;
989         uint16_t vlan_tci;
990         bool tpa_start_flg;
991         uint8_t bitfield_val;
992         uint8_t offset, tpa_agg_idx, flags;
993         struct qede_agg_info *tpa_info;
994
995         hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
996         sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
997
998         rte_rmb();
999
1000         if (hw_comp_cons == sw_comp_cons)
1001                 return 0;
1002
1003         while (sw_comp_cons != hw_comp_cons) {
1004                 ol_flags = 0;
1005                 packet_type = RTE_PTYPE_UNKNOWN;
1006                 vlan_tci = 0;
1007                 tpa_start_flg = false;
1008
1009                 /* Get the CQE from the completion ring */
1010                 cqe =
1011                     (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
1012                 cqe_type = cqe->fast_path_regular.type;
1013                 PMD_RX_LOG(INFO, rxq, "Rx CQE type %d\n", cqe_type);
1014
1015                 switch (cqe_type) {
1016                 case ETH_RX_CQE_TYPE_REGULAR:
1017                         fp_cqe = &cqe->fast_path_regular;
1018                 break;
1019                 case ETH_RX_CQE_TYPE_TPA_START:
1020                         cqe_start_tpa = &cqe->fast_path_tpa_start;
1021                         tpa_info = &rxq->tpa_info[cqe_start_tpa->tpa_agg_index];
1022                         tpa_start_flg = true;
1023                         /* Mark it as LRO packet */
1024                         ol_flags |= PKT_RX_LRO;
1025                         /* In split mode,  seg_len is same as len_on_first_bd
1026                          * and ext_bd_len_list will be empty since there are
1027                          * no additional buffers
1028                          */
1029                         PMD_RX_LOG(INFO, rxq,
1030                             "TPA start[%d] - len_on_first_bd %d header %d"
1031                             " [bd_list[0] %d], [seg_len %d]\n",
1032                             cqe_start_tpa->tpa_agg_index,
1033                             rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd),
1034                             cqe_start_tpa->header_len,
1035                             rte_le_to_cpu_16(cqe_start_tpa->ext_bd_len_list[0]),
1036                             rte_le_to_cpu_16(cqe_start_tpa->seg_len));
1037
1038                 break;
1039                 case ETH_RX_CQE_TYPE_TPA_CONT:
1040                         qede_rx_process_tpa_cont_cqe(qdev, rxq,
1041                                                      &cqe->fast_path_tpa_cont);
1042                         goto next_cqe;
1043                 case ETH_RX_CQE_TYPE_TPA_END:
1044                         qede_rx_process_tpa_end_cqe(qdev, rxq,
1045                                                     &cqe->fast_path_tpa_end);
1046                         tpa_agg_idx = cqe->fast_path_tpa_end.tpa_agg_index;
1047                         tpa_info = &rxq->tpa_info[tpa_agg_idx];
1048                         rx_mb = rxq->tpa_info[tpa_agg_idx].tpa_head;
1049                         goto tpa_end;
1050                 case ETH_RX_CQE_TYPE_SLOW_PATH:
1051                         PMD_RX_LOG(INFO, rxq, "Got unexpected slowpath CQE\n");
1052                         qdev->ops->eth_cqe_completion(edev, fp->id,
1053                                 (struct eth_slow_path_rx_cqe *)cqe);
1054                         /* fall-thru */
1055                 default:
1056                         goto next_cqe;
1057                 }
1058
1059                 /* Get the data from the SW ring */
1060                 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1061                 rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf;
1062                 assert(rx_mb != NULL);
1063
1064                 /* Handle regular CQE or TPA start CQE */
1065                 if (!tpa_start_flg) {
1066                         parse_flag = rte_le_to_cpu_16(fp_cqe->pars_flags.flags);
1067                         bitfield_val = fp_cqe->bitfields;
1068                         offset = fp_cqe->placement_offset;
1069                         len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd);
1070                         pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
1071                 } else {
1072                         parse_flag =
1073                             rte_le_to_cpu_16(cqe_start_tpa->pars_flags.flags);
1074                         bitfield_val = cqe_start_tpa->bitfields;
1075                         offset = cqe_start_tpa->placement_offset;
1076                         /* seg_len = len_on_first_bd */
1077                         len = rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd);
1078                 }
1079                 if (qede_tunn_exist(parse_flag)) {
1080                         PMD_RX_LOG(INFO, rxq, "Rx tunneled packet\n");
1081                         if (unlikely(qede_check_tunn_csum_l4(parse_flag))) {
1082                                 PMD_RX_LOG(ERR, rxq,
1083                                             "L4 csum failed, flags = 0x%x\n",
1084                                             parse_flag);
1085                                 rxq->rx_hw_errors++;
1086                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
1087                         } else {
1088                                 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1089                                 if (tpa_start_flg)
1090                                         flags =
1091                                          cqe_start_tpa->tunnel_pars_flags.flags;
1092                                 else
1093                                         flags = fp_cqe->tunnel_pars_flags.flags;
1094                                 tunn_parse_flag = flags;
1095                                 packet_type =
1096                                 qede_rx_cqe_to_tunn_pkt_type(tunn_parse_flag);
1097                         }
1098                 } else {
1099                         PMD_RX_LOG(INFO, rxq, "Rx non-tunneled packet\n");
1100                         if (unlikely(qede_check_notunn_csum_l4(parse_flag))) {
1101                                 PMD_RX_LOG(ERR, rxq,
1102                                             "L4 csum failed, flags = 0x%x\n",
1103                                             parse_flag);
1104                                 rxq->rx_hw_errors++;
1105                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
1106                         } else {
1107                                 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1108                         }
1109                         if (unlikely(qede_check_notunn_csum_l3(rx_mb,
1110                                                         parse_flag))) {
1111                                 PMD_RX_LOG(ERR, rxq,
1112                                            "IP csum failed, flags = 0x%x\n",
1113                                            parse_flag);
1114                                 rxq->rx_hw_errors++;
1115                                 ol_flags |= PKT_RX_IP_CKSUM_BAD;
1116                         } else {
1117                                 ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1118                                 packet_type =
1119                                         qede_rx_cqe_to_pkt_type(parse_flag);
1120                         }
1121                 }
1122
1123                 if (CQE_HAS_VLAN(parse_flag)) {
1124                         vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag);
1125                         ol_flags |= PKT_RX_VLAN_PKT;
1126                 }
1127
1128                 if (CQE_HAS_OUTER_VLAN(parse_flag)) {
1129                         vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag);
1130                         ol_flags |= PKT_RX_QINQ_PKT;
1131                         rx_mb->vlan_tci_outer = 0;
1132                 }
1133
1134                 /* RSS Hash */
1135                 htype = (uint8_t)GET_FIELD(bitfield_val,
1136                                         ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE);
1137                 if (qdev->rss_enable && htype) {
1138                         ol_flags |= PKT_RX_RSS_HASH;
1139                         rx_mb->hash.rss = rte_le_to_cpu_32(fp_cqe->rss_hash);
1140                         PMD_RX_LOG(INFO, rxq, "Hash result 0x%x\n",
1141                                    rx_mb->hash.rss);
1142                 }
1143
1144                 if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) {
1145                         PMD_RX_LOG(ERR, rxq,
1146                                    "New buffer allocation failed,"
1147                                    "dropping incoming packet\n");
1148                         qede_recycle_rx_bd_ring(rxq, qdev, fp_cqe->bd_num);
1149                         rte_eth_devices[rxq->port_id].
1150                             data->rx_mbuf_alloc_failed++;
1151                         rxq->rx_alloc_errors++;
1152                         break;
1153                 }
1154                 qede_rx_bd_ring_consume(rxq);
1155
1156                 if (!tpa_start_flg && fp_cqe->bd_num > 1) {
1157                         PMD_RX_LOG(DEBUG, rxq, "Jumbo-over-BD packet: %02x BDs"
1158                                    " len on first: %04x Total Len: %04x",
1159                                    fp_cqe->bd_num, len, pkt_len);
1160                         num_segs = fp_cqe->bd_num - 1;
1161                         seg1 = rx_mb;
1162                         if (qede_process_sg_pkts(p_rxq, seg1, num_segs,
1163                                                  pkt_len - len))
1164                                 goto next_cqe;
1165                         for (j = 0; j < num_segs; j++) {
1166                                 if (qede_alloc_rx_buffer(rxq)) {
1167                                         PMD_RX_LOG(ERR, rxq,
1168                                                 "Buffer allocation failed");
1169                                         rte_eth_devices[rxq->port_id].
1170                                                 data->rx_mbuf_alloc_failed++;
1171                                         rxq->rx_alloc_errors++;
1172                                         break;
1173                                 }
1174                                 rxq->rx_segs++;
1175                         }
1176                 }
1177                 rxq->rx_segs++; /* for the first segment */
1178
1179                 /* Prefetch next mbuf while processing current one. */
1180                 preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1181                 rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf);
1182
1183                 /* Update rest of the MBUF fields */
1184                 rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
1185                 rx_mb->port = rxq->port_id;
1186                 rx_mb->ol_flags = ol_flags;
1187                 rx_mb->data_len = len;
1188                 rx_mb->vlan_tci = vlan_tci;
1189                 rx_mb->packet_type = packet_type;
1190                 PMD_RX_LOG(INFO, rxq, "pkt_type %04x len %04x flags %04lx\n",
1191                            packet_type, len, (unsigned long)ol_flags);
1192                 if (!tpa_start_flg) {
1193                         rx_mb->nb_segs = fp_cqe->bd_num;
1194                         rx_mb->pkt_len = pkt_len;
1195                 } else {
1196                         /* store ref to the updated mbuf */
1197                         tpa_info->tpa_head = rx_mb;
1198                         tpa_info->tpa_tail = tpa_info->tpa_head;
1199                 }
1200                 rte_prefetch1(rte_pktmbuf_mtod(rx_mb, void *));
1201 tpa_end:
1202                 if (!tpa_start_flg) {
1203                         rx_pkts[rx_pkt] = rx_mb;
1204                         rx_pkt++;
1205                 }
1206 next_cqe:
1207                 ecore_chain_recycle_consumed(&rxq->rx_comp_ring);
1208                 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
1209                 if (rx_pkt == nb_pkts) {
1210                         PMD_RX_LOG(DEBUG, rxq,
1211                                    "Budget reached nb_pkts=%u received=%u",
1212                                    rx_pkt, nb_pkts);
1213                         break;
1214                 }
1215         }
1216
1217         qede_update_rx_prod(qdev, rxq);
1218
1219         rxq->rcv_pkts += rx_pkt;
1220
1221         PMD_RX_LOG(DEBUG, rxq, "rx_pkts=%u core=%d", rx_pkt, rte_lcore_id());
1222
1223         return rx_pkt;
1224 }
1225
1226 static inline void
1227 qede_free_tx_pkt(struct qede_tx_queue *txq)
1228 {
1229         struct rte_mbuf *mbuf;
1230         uint16_t nb_segs;
1231         uint16_t idx;
1232         uint8_t nbds;
1233
1234         idx = TX_CONS(txq);
1235         mbuf = txq->sw_tx_ring[idx].mbuf;
1236         if (mbuf) {
1237                 nb_segs = mbuf->nb_segs;
1238                 PMD_TX_LOG(DEBUG, txq, "nb_segs to free %u\n", nb_segs);
1239                 while (nb_segs) {
1240                         /* It's like consuming rxbuf in recv() */
1241                         ecore_chain_consume(&txq->tx_pbl);
1242                         txq->nb_tx_avail++;
1243                         nb_segs--;
1244                 }
1245                 rte_pktmbuf_free(mbuf);
1246                 txq->sw_tx_ring[idx].mbuf = NULL;
1247                 txq->sw_tx_cons++;
1248                 PMD_TX_LOG(DEBUG, txq, "Freed tx packet\n");
1249         } else {
1250                 ecore_chain_consume(&txq->tx_pbl);
1251                 txq->nb_tx_avail++;
1252         }
1253 }
1254
1255 static inline void
1256 qede_process_tx_compl(struct ecore_dev *edev, struct qede_tx_queue *txq)
1257 {
1258         uint16_t hw_bd_cons;
1259         uint16_t sw_tx_cons;
1260
1261         rte_compiler_barrier();
1262         hw_bd_cons = rte_le_to_cpu_16(*txq->hw_cons_ptr);
1263         sw_tx_cons = ecore_chain_get_cons_idx(&txq->tx_pbl);
1264         PMD_TX_LOG(DEBUG, txq, "Tx Completions = %u\n",
1265                    abs(hw_bd_cons - sw_tx_cons));
1266         while (hw_bd_cons !=  ecore_chain_get_cons_idx(&txq->tx_pbl))
1267                 qede_free_tx_pkt(txq);
1268 }
1269
1270 /* Populate scatter gather buffer descriptor fields */
1271 static inline uint8_t
1272 qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg,
1273                   struct eth_tx_2nd_bd **bd2, struct eth_tx_3rd_bd **bd3)
1274 {
1275         struct qede_tx_queue *txq = p_txq;
1276         struct eth_tx_bd *tx_bd = NULL;
1277         dma_addr_t mapping;
1278         uint8_t nb_segs = 0;
1279
1280         /* Check for scattered buffers */
1281         while (m_seg) {
1282                 if (nb_segs == 0) {
1283                         if (!*bd2) {
1284                                 *bd2 = (struct eth_tx_2nd_bd *)
1285                                         ecore_chain_produce(&txq->tx_pbl);
1286                                 memset(*bd2, 0, sizeof(struct eth_tx_2nd_bd));
1287                                 nb_segs++;
1288                         }
1289                         mapping = rte_mbuf_data_dma_addr(m_seg);
1290                         QEDE_BD_SET_ADDR_LEN(*bd2, mapping, m_seg->data_len);
1291                         PMD_TX_LOG(DEBUG, txq, "BD2 len %04x", m_seg->data_len);
1292                 } else if (nb_segs == 1) {
1293                         if (!*bd3) {
1294                                 *bd3 = (struct eth_tx_3rd_bd *)
1295                                         ecore_chain_produce(&txq->tx_pbl);
1296                                 memset(*bd3, 0, sizeof(struct eth_tx_3rd_bd));
1297                                 nb_segs++;
1298                         }
1299                         mapping = rte_mbuf_data_dma_addr(m_seg);
1300                         QEDE_BD_SET_ADDR_LEN(*bd3, mapping, m_seg->data_len);
1301                         PMD_TX_LOG(DEBUG, txq, "BD3 len %04x", m_seg->data_len);
1302                 } else {
1303                         tx_bd = (struct eth_tx_bd *)
1304                                 ecore_chain_produce(&txq->tx_pbl);
1305                         memset(tx_bd, 0, sizeof(*tx_bd));
1306                         nb_segs++;
1307                         mapping = rte_mbuf_data_dma_addr(m_seg);
1308                         QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len);
1309                         PMD_TX_LOG(DEBUG, txq, "BD len %04x", m_seg->data_len);
1310                 }
1311                 m_seg = m_seg->next;
1312         }
1313
1314         /* Return total scattered buffers */
1315         return nb_segs;
1316 }
1317
1318 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1319 static inline void
1320 print_tx_bd_info(struct qede_tx_queue *txq,
1321                  struct eth_tx_1st_bd *bd1,
1322                  struct eth_tx_2nd_bd *bd2,
1323                  struct eth_tx_3rd_bd *bd3,
1324                  uint64_t tx_ol_flags)
1325 {
1326         char ol_buf[256] = { 0 }; /* for verbose prints */
1327
1328         if (bd1)
1329                 PMD_TX_LOG(INFO, txq,
1330                            "BD1: nbytes=%u nbds=%u bd_flags=04%x bf=%04x",
1331                            rte_cpu_to_le_16(bd1->nbytes), bd1->data.nbds,
1332                            bd1->data.bd_flags.bitfields,
1333                            rte_cpu_to_le_16(bd1->data.bitfields));
1334         if (bd2)
1335                 PMD_TX_LOG(INFO, txq,
1336                            "BD2: nbytes=%u bf=%04x\n",
1337                            rte_cpu_to_le_16(bd2->nbytes), bd2->data.bitfields1);
1338         if (bd3)
1339                 PMD_TX_LOG(INFO, txq,
1340                            "BD3: nbytes=%u bf=%04x mss=%u\n",
1341                            rte_cpu_to_le_16(bd3->nbytes),
1342                            rte_cpu_to_le_16(bd3->data.bitfields),
1343                            rte_cpu_to_le_16(bd3->data.lso_mss));
1344
1345         rte_get_tx_ol_flag_list(tx_ol_flags, ol_buf, sizeof(ol_buf));
1346         PMD_TX_LOG(INFO, txq, "TX offloads = %s\n", ol_buf);
1347 }
1348 #endif
1349
1350 /* TX prepare to check packets meets TX conditions */
1351 uint16_t
1352 qede_xmit_prep_pkts(void *p_txq, struct rte_mbuf **tx_pkts,
1353                     uint16_t nb_pkts)
1354 {
1355         struct qede_tx_queue *txq = p_txq;
1356         uint64_t ol_flags;
1357         struct rte_mbuf *m;
1358         uint16_t i;
1359         int ret;
1360
1361         for (i = 0; i < nb_pkts; i++) {
1362                 m = tx_pkts[i];
1363                 ol_flags = m->ol_flags;
1364                 if (ol_flags & PKT_TX_TCP_SEG) {
1365                         if (m->nb_segs >= ETH_TX_MAX_BDS_PER_LSO_PACKET) {
1366                                 rte_errno = -EINVAL;
1367                                 break;
1368                         }
1369                         /* TBD: confirm its ~9700B for both ? */
1370                         if (m->tso_segsz > ETH_TX_MAX_NON_LSO_PKT_LEN) {
1371                                 rte_errno = -EINVAL;
1372                                 break;
1373                         }
1374                 } else {
1375                         if (m->nb_segs >= ETH_TX_MAX_BDS_PER_NON_LSO_PACKET) {
1376                                 rte_errno = -EINVAL;
1377                                 break;
1378                         }
1379                 }
1380                 if (ol_flags & QEDE_TX_OFFLOAD_NOTSUP_MASK) {
1381                         rte_errno = -ENOTSUP;
1382                         break;
1383                 }
1384
1385 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1386                 ret = rte_validate_tx_offload(m);
1387                 if (ret != 0) {
1388                         rte_errno = ret;
1389                         break;
1390                 }
1391 #endif
1392                 /* TBD: pseudo csum calcuation required iff
1393                  * ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE not set?
1394                  */
1395                 ret = rte_net_intel_cksum_prepare(m);
1396                 if (ret != 0) {
1397                         rte_errno = ret;
1398                         break;
1399                 }
1400         }
1401
1402         if (unlikely(i != nb_pkts))
1403                 PMD_TX_LOG(ERR, txq, "TX prepare failed for %u\n",
1404                            nb_pkts - i);
1405         return i;
1406 }
1407
1408 uint16_t
1409 qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1410 {
1411         struct qede_tx_queue *txq = p_txq;
1412         struct qede_dev *qdev = txq->qdev;
1413         struct ecore_dev *edev = &qdev->edev;
1414         struct rte_mbuf *mbuf;
1415         struct rte_mbuf *m_seg = NULL;
1416         uint16_t nb_tx_pkts;
1417         uint16_t bd_prod;
1418         uint16_t idx;
1419         uint16_t nb_frags;
1420         uint16_t nb_pkt_sent = 0;
1421         uint8_t nbds;
1422         bool ipv6_ext_flg;
1423         bool lso_flg;
1424         bool tunn_flg;
1425         struct eth_tx_1st_bd *bd1;
1426         struct eth_tx_2nd_bd *bd2;
1427         struct eth_tx_3rd_bd *bd3;
1428         uint64_t tx_ol_flags;
1429         uint16_t hdr_size;
1430
1431         if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) {
1432                 PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u",
1433                            nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh);
1434                 qede_process_tx_compl(edev, txq);
1435         }
1436
1437         nb_tx_pkts  = nb_pkts;
1438         bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
1439         while (nb_tx_pkts--) {
1440                 /* Init flags/values */
1441                 ipv6_ext_flg = false;
1442                 tunn_flg = false;
1443                 lso_flg = false;
1444                 nbds = 0;
1445                 bd1 = NULL;
1446                 bd2 = NULL;
1447                 bd3 = NULL;
1448                 hdr_size = 0;
1449
1450                 mbuf = *tx_pkts;
1451                 assert(mbuf);
1452
1453                 /* Check minimum TX BDS availability against available BDs */
1454                 if (unlikely(txq->nb_tx_avail < mbuf->nb_segs))
1455                         break;
1456
1457                 tx_ol_flags = mbuf->ol_flags;
1458
1459 #define RTE_ETH_IS_IPV6_HDR_EXT(ptype) ((ptype) & RTE_PTYPE_L3_IPV6_EXT)
1460                 if (RTE_ETH_IS_IPV6_HDR_EXT(mbuf->packet_type))
1461                         ipv6_ext_flg = true;
1462
1463                 if (RTE_ETH_IS_TUNNEL_PKT(mbuf->packet_type))
1464                         tunn_flg = true;
1465
1466                 if (tx_ol_flags & PKT_TX_TCP_SEG)
1467                         lso_flg = true;
1468
1469                 if (lso_flg) {
1470                         if (unlikely(txq->nb_tx_avail <
1471                                                 ETH_TX_MIN_BDS_PER_LSO_PKT))
1472                                 break;
1473                 } else {
1474                         if (unlikely(txq->nb_tx_avail <
1475                                         ETH_TX_MIN_BDS_PER_NON_LSO_PKT))
1476                                 break;
1477                 }
1478
1479                 if (tunn_flg && ipv6_ext_flg) {
1480                         if (unlikely(txq->nb_tx_avail <
1481                                 ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT))
1482                                 break;
1483                 }
1484                 if (ipv6_ext_flg) {
1485                         if (unlikely(txq->nb_tx_avail <
1486                                         ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT))
1487                                 break;
1488                 }
1489
1490                 /* Fill the entry in the SW ring and the BDs in the FW ring */
1491                 idx = TX_PROD(txq);
1492                 *tx_pkts++;
1493                 txq->sw_tx_ring[idx].mbuf = mbuf;
1494
1495                 /* BD1 */
1496                 bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);
1497                 memset(bd1, 0, sizeof(struct eth_tx_1st_bd));
1498                 nbds++;
1499
1500                 bd1->data.bd_flags.bitfields |=
1501                         1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
1502                 /* FW 8.10.x specific change */
1503                 if (!lso_flg) {
1504                         bd1->data.bitfields |=
1505                         (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
1506                                 << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
1507                         /* Map MBUF linear data for DMA and set in the BD1 */
1508                         QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_dma_addr(mbuf),
1509                                              mbuf->data_len);
1510                 } else {
1511                         /* For LSO, packet header and payload must reside on
1512                          * buffers pointed by different BDs. Using BD1 for HDR
1513                          * and BD2 onwards for data.
1514                          */
1515                         hdr_size = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
1516                         QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_dma_addr(mbuf),
1517                                              hdr_size);
1518                 }
1519
1520                 if (tunn_flg) {
1521                         /* First indicate its a tunnel pkt */
1522                         bd1->data.bd_flags.bitfields |=
1523                                 ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK <<
1524                                 ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
1525
1526                         /* Legacy FW had flipped behavior in regard to this bit
1527                          * i.e. it needed to set to prevent FW from touching
1528                          * encapsulated packets when it didn't need to.
1529                          */
1530                         if (unlikely(txq->is_legacy))
1531                                 bd1->data.bitfields ^=
1532                                         1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
1533
1534                         /* Outer IP checksum offload */
1535                         if (tx_ol_flags & PKT_TX_OUTER_IP_CKSUM) {
1536                                 bd1->data.bd_flags.bitfields |=
1537                                         ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK <<
1538                                         ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT;
1539                         }
1540
1541                         /* Outer UDP checksum offload */
1542                         bd1->data.bd_flags.bitfields |=
1543                                 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK <<
1544                                 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT;
1545                 }
1546
1547                 /* Descriptor based VLAN insertion */
1548                 if (tx_ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_QINQ_PKT)) {
1549                         bd1->data.vlan = rte_cpu_to_le_16(mbuf->vlan_tci);
1550                         bd1->data.bd_flags.bitfields |=
1551                             1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT;
1552                 }
1553
1554                 if (lso_flg)
1555                         bd1->data.bd_flags.bitfields |=
1556                                 1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT;
1557
1558                 /* Offload the IP checksum in the hardware */
1559                 if ((lso_flg) || (tx_ol_flags & PKT_TX_IP_CKSUM))
1560                         bd1->data.bd_flags.bitfields |=
1561                             1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
1562
1563                 /* L4 checksum offload (tcp or udp) */
1564                 if ((lso_flg) || (tx_ol_flags & (PKT_TX_TCP_CKSUM |
1565                                                 PKT_TX_UDP_CKSUM)))
1566                         /* PKT_TX_TCP_SEG implies PKT_TX_TCP_CKSUM */
1567                         bd1->data.bd_flags.bitfields |=
1568                             1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
1569
1570                 /* BD2 */
1571                 if (lso_flg || ipv6_ext_flg) {
1572                         bd2 = (struct eth_tx_2nd_bd *)ecore_chain_produce
1573                                                         (&txq->tx_pbl);
1574                         memset(bd2, 0, sizeof(struct eth_tx_2nd_bd));
1575                         nbds++;
1576                         QEDE_BD_SET_ADDR_LEN(bd2,
1577                                             (hdr_size +
1578                                             rte_mbuf_data_dma_addr(mbuf)),
1579                                             mbuf->data_len - hdr_size);
1580                         /* TBD: check pseudo csum iff tx_prepare not called? */
1581                         if (ipv6_ext_flg) {
1582                                 bd2->data.bitfields1 |=
1583                                 ETH_L4_PSEUDO_CSUM_ZERO_LENGTH <<
1584                                 ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT;
1585                         }
1586                 }
1587
1588                 /* BD3 */
1589                 if (lso_flg || ipv6_ext_flg) {
1590                         bd3 = (struct eth_tx_3rd_bd *)ecore_chain_produce
1591                                                         (&txq->tx_pbl);
1592                         memset(bd3, 0, sizeof(struct eth_tx_3rd_bd));
1593                         nbds++;
1594                         if (lso_flg) {
1595                                 bd3->data.lso_mss =
1596                                         rte_cpu_to_le_16(mbuf->tso_segsz);
1597                                 /* Using one header BD */
1598                                 bd3->data.bitfields |=
1599                                         rte_cpu_to_le_16(1 <<
1600                                         ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT);
1601                         }
1602                 }
1603
1604                 /* Handle fragmented MBUF */
1605                 m_seg = mbuf->next;
1606                 /* Encode scatter gather buffer descriptors if required */
1607                 nb_frags = qede_encode_sg_bd(txq, m_seg, &bd2, &bd3);
1608                 bd1->data.nbds = nbds + nb_frags;
1609                 txq->nb_tx_avail -= bd1->data.nbds;
1610                 txq->sw_tx_prod++;
1611                 rte_prefetch0(txq->sw_tx_ring[TX_PROD(txq)].mbuf);
1612                 bd_prod =
1613                     rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
1614 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1615                 print_tx_bd_info(txq, bd1, bd2, bd3, tx_ol_flags);
1616                 PMD_TX_LOG(INFO, txq, "lso=%d tunn=%d ipv6_ext=%d\n",
1617                            lso_flg, tunn_flg, ipv6_ext_flg);
1618 #endif
1619                 nb_pkt_sent++;
1620                 txq->xmit_pkts++;
1621         }
1622
1623         /* Write value of prod idx into bd_prod */
1624         txq->tx_db.data.bd_prod = bd_prod;
1625         rte_wmb();
1626         rte_compiler_barrier();
1627         DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw);
1628         rte_wmb();
1629
1630         /* Check again for Tx completions */
1631         qede_process_tx_compl(edev, txq);
1632
1633         PMD_TX_LOG(DEBUG, txq, "to_send=%u sent=%u bd_prod=%u core=%d",
1634                    nb_pkts, nb_pkt_sent, TX_PROD(txq), rte_lcore_id());
1635
1636         return nb_pkt_sent;
1637 }
1638
1639 static void qede_init_fp_queue(struct rte_eth_dev *eth_dev)
1640 {
1641         struct qede_dev *qdev = eth_dev->data->dev_private;
1642         struct qede_fastpath *fp;
1643         uint8_t i, rss_id, txq_index, tc;
1644         int rxq = 0, txq = 0;
1645
1646         for_each_queue(i) {
1647                 fp = &qdev->fp_array[i];
1648                 if (fp->type & QEDE_FASTPATH_RX) {
1649                         fp->rxq = eth_dev->data->rx_queues[i];
1650                         fp->rxq->queue_id = rxq++;
1651                 }
1652
1653                 if (fp->type & QEDE_FASTPATH_TX) {
1654                         for (tc = 0; tc < qdev->num_tc; tc++) {
1655                                 txq_index = tc * QEDE_TSS_COUNT(qdev) + txq;
1656                                 fp->txqs[tc] =
1657                                         eth_dev->data->tx_queues[txq_index];
1658                                 fp->txqs[tc]->queue_id = txq_index;
1659                                 if (qdev->dev_info.is_legacy)
1660                                         fp->txqs[tc]->is_legacy = true;
1661                         }
1662                         txq++;
1663                 }
1664         }
1665 }
1666
1667 int qede_dev_start(struct rte_eth_dev *eth_dev)
1668 {
1669         struct qede_dev *qdev = eth_dev->data->dev_private;
1670         struct ecore_dev *edev = &qdev->edev;
1671         struct qed_link_output link_output;
1672         struct qede_fastpath *fp;
1673         int rc;
1674
1675         DP_INFO(edev, "Device state is %d\n", qdev->state);
1676
1677         if (qdev->state == QEDE_DEV_START) {
1678                 DP_INFO(edev, "Port is already started\n");
1679                 return 0;
1680         }
1681
1682         if (qdev->state == QEDE_DEV_CONFIG)
1683                 qede_init_fp_queue(eth_dev);
1684
1685         rc = qede_start_queues(eth_dev, true);
1686         if (rc) {
1687                 DP_ERR(edev, "Failed to start queues\n");
1688                 /* TBD: free */
1689                 return rc;
1690         }
1691
1692         /* Newer SR-IOV PF driver expects RX/TX queues to be started before
1693          * enabling RSS. Hence RSS configuration is deferred upto this point.
1694          * Also, we would like to retain similar behavior in PF case, so we
1695          * don't do PF/VF specific check here.
1696          */
1697         if (eth_dev->data->dev_conf.rxmode.mq_mode  == ETH_MQ_RX_RSS)
1698                 if (qede_config_rss(eth_dev))
1699                         return -1;
1700
1701         /* Bring-up the link */
1702         qede_dev_set_link_state(eth_dev, true);
1703
1704         /* Start/resume traffic */
1705         qdev->ops->fastpath_start(edev);
1706
1707         qdev->state = QEDE_DEV_START;
1708
1709         DP_INFO(edev, "dev_state is QEDE_DEV_START\n");
1710
1711         return 0;
1712 }
1713
1714 static int qede_drain_txq(struct qede_dev *qdev,
1715                           struct qede_tx_queue *txq, bool allow_drain)
1716 {
1717         struct ecore_dev *edev = &qdev->edev;
1718         int rc, cnt = 1000;
1719
1720         while (txq->sw_tx_cons != txq->sw_tx_prod) {
1721                 qede_process_tx_compl(edev, txq);
1722                 if (!cnt) {
1723                         if (allow_drain) {
1724                                 DP_ERR(edev, "Tx queue[%u] is stuck,"
1725                                           "requesting MCP to drain\n",
1726                                           txq->queue_id);
1727                                 rc = qdev->ops->common->drain(edev);
1728                                 if (rc)
1729                                         return rc;
1730                                 return qede_drain_txq(qdev, txq, false);
1731                         }
1732                         DP_ERR(edev, "Timeout waiting for tx queue[%d]:"
1733                                   "PROD=%d, CONS=%d\n",
1734                                   txq->queue_id, txq->sw_tx_prod,
1735                                   txq->sw_tx_cons);
1736                         return -1;
1737                 }
1738                 cnt--;
1739                 DELAY(1000);
1740                 rte_compiler_barrier();
1741         }
1742
1743         /* FW finished processing, wait for HW to transmit all tx packets */
1744         DELAY(2000);
1745
1746         return 0;
1747 }
1748
1749 static int qede_stop_queues(struct qede_dev *qdev)
1750 {
1751         struct qed_update_vport_params vport_update_params;
1752         struct ecore_dev *edev = &qdev->edev;
1753         struct ecore_sge_tpa_params tpa_params;
1754         struct qede_fastpath *fp;
1755         int rc, tc, i;
1756
1757         /* Disable the vport */
1758         memset(&vport_update_params, 0, sizeof(vport_update_params));
1759         vport_update_params.vport_id = 0;
1760         vport_update_params.update_vport_active_flg = 1;
1761         vport_update_params.vport_active_flg = 0;
1762         vport_update_params.update_rss_flg = 0;
1763         /* Disable TPA */
1764         if (qdev->enable_lro) {
1765                 DP_INFO(edev, "Disabling LRO\n");
1766                 memset(&tpa_params, 0, sizeof(struct ecore_sge_tpa_params));
1767                 qede_update_sge_tpa_params(&tpa_params, qdev->mtu, false);
1768                 vport_update_params.sge_tpa_params = &tpa_params;
1769         }
1770
1771         DP_INFO(edev, "Deactivate vport\n");
1772         rc = qdev->ops->vport_update(edev, &vport_update_params);
1773         if (rc) {
1774                 DP_ERR(edev, "Failed to update vport\n");
1775                 return rc;
1776         }
1777
1778         DP_INFO(edev, "Flushing tx queues\n");
1779
1780         /* Flush Tx queues. If needed, request drain from MCP */
1781         for_each_queue(i) {
1782                 fp = &qdev->fp_array[i];
1783
1784                 if (fp->type & QEDE_FASTPATH_TX) {
1785                         for (tc = 0; tc < qdev->num_tc; tc++) {
1786                                 struct qede_tx_queue *txq = fp->txqs[tc];
1787
1788                                 rc = qede_drain_txq(qdev, txq, true);
1789                                 if (rc)
1790                                         return rc;
1791                         }
1792                 }
1793         }
1794
1795         /* Stop all Queues in reverse order */
1796         for (i = QEDE_QUEUE_CNT(qdev) - 1; i >= 0; i--) {
1797                 fp = &qdev->fp_array[i];
1798
1799                 /* Stop the Tx Queue(s) */
1800                 if (qdev->fp_array[i].type & QEDE_FASTPATH_TX) {
1801                         for (tc = 0; tc < qdev->num_tc; tc++) {
1802                                 struct qede_tx_queue *txq = fp->txqs[tc];
1803                                 DP_INFO(edev, "Stopping tx queues\n");
1804                                 rc = qdev->ops->q_tx_stop(edev, i, txq->handle);
1805                                 if (rc) {
1806                                         DP_ERR(edev, "Failed to stop TXQ #%d\n",
1807                                                i);
1808                                         return rc;
1809                                 }
1810                         }
1811                 }
1812
1813                 /* Stop the Rx Queue */
1814                 if (qdev->fp_array[i].type & QEDE_FASTPATH_RX) {
1815                         DP_INFO(edev, "Stopping rx queues\n");
1816                         rc = qdev->ops->q_rx_stop(edev, i, fp->rxq->handle);
1817                         if (rc) {
1818                                 DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
1819                                 return rc;
1820                         }
1821                 }
1822         }
1823         qede_reset_fp_rings(qdev);
1824
1825         return 0;
1826 }
1827
1828 int qede_reset_fp_rings(struct qede_dev *qdev)
1829 {
1830         struct qede_fastpath *fp;
1831         struct qede_tx_queue *txq;
1832         uint8_t tc;
1833         uint16_t id, i;
1834
1835         for_each_queue(id) {
1836                 fp = &qdev->fp_array[id];
1837
1838                 if (fp->type & QEDE_FASTPATH_RX) {
1839                         DP_INFO(&qdev->edev,
1840                                 "Reset FP chain for RSS %u\n", id);
1841                         qede_rx_queue_release_mbufs(fp->rxq);
1842                         ecore_chain_reset(&fp->rxq->rx_bd_ring);
1843                         ecore_chain_reset(&fp->rxq->rx_comp_ring);
1844                         fp->rxq->sw_rx_prod = 0;
1845                         fp->rxq->sw_rx_cons = 0;
1846                         *fp->rxq->hw_cons_ptr = 0;
1847                         for (i = 0; i < fp->rxq->nb_rx_desc; i++) {
1848                                 if (qede_alloc_rx_buffer(fp->rxq)) {
1849                                         DP_ERR(&qdev->edev,
1850                                                "RX buffer allocation failed\n");
1851                                         return -ENOMEM;
1852                                 }
1853                         }
1854                 }
1855                 if (fp->type & QEDE_FASTPATH_TX) {
1856                         for (tc = 0; tc < qdev->num_tc; tc++) {
1857                                 txq = fp->txqs[tc];
1858                                 qede_tx_queue_release_mbufs(txq);
1859                                 ecore_chain_reset(&txq->tx_pbl);
1860                                 txq->sw_tx_cons = 0;
1861                                 txq->sw_tx_prod = 0;
1862                                 *txq->hw_cons_ptr = 0;
1863                         }
1864                 }
1865         }
1866
1867         return 0;
1868 }
1869
1870 /* This function frees all memory of a single fp */
1871 void qede_free_mem_load(struct rte_eth_dev *eth_dev)
1872 {
1873         struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
1874         struct qede_fastpath *fp;
1875         uint16_t txq_idx;
1876         uint8_t id;
1877         uint8_t tc;
1878
1879         for_each_queue(id) {
1880                 fp = &qdev->fp_array[id];
1881                 if (fp->type & QEDE_FASTPATH_RX) {
1882                         if (!fp->rxq)
1883                                 continue;
1884                         qede_rx_queue_release(fp->rxq);
1885                         eth_dev->data->rx_queues[id] = NULL;
1886                 } else {
1887                         for (tc = 0; tc < qdev->num_tc; tc++) {
1888                                 if (!fp->txqs[tc])
1889                                         continue;
1890                                 txq_idx = fp->txqs[tc]->queue_id;
1891                                 qede_tx_queue_release(fp->txqs[tc]);
1892                                 eth_dev->data->tx_queues[txq_idx] = NULL;
1893                         }
1894                 }
1895         }
1896 }
1897
1898 void qede_dev_stop(struct rte_eth_dev *eth_dev)
1899 {
1900         struct qede_dev *qdev = eth_dev->data->dev_private;
1901         struct ecore_dev *edev = &qdev->edev;
1902
1903         DP_INFO(edev, "port %u\n", eth_dev->data->port_id);
1904
1905         if (qdev->state != QEDE_DEV_START) {
1906                 DP_INFO(edev, "Device not yet started\n");
1907                 return;
1908         }
1909
1910         if (qede_stop_queues(qdev))
1911                 DP_ERR(edev, "Didn't succeed to close queues\n");
1912
1913         DP_INFO(edev, "Stopped queues\n");
1914
1915         qdev->ops->fastpath_stop(edev);
1916
1917         /* Bring the link down */
1918         qede_dev_set_link_state(eth_dev, false);
1919
1920         qdev->state = QEDE_DEV_STOP;
1921
1922         DP_INFO(edev, "dev_state is QEDE_DEV_STOP\n");
1923 }
1924
1925 uint16_t
1926 qede_rxtx_pkts_dummy(__rte_unused void *p_rxq,
1927                      __rte_unused struct rte_mbuf **pkts,
1928                      __rte_unused uint16_t nb_pkts)
1929 {
1930         return 0;
1931 }