fm10k: add dev start/stop
[dpdk.git] / lib / librte_pmd_fm10k / fm10k_ethdev.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <rte_ethdev.h>
35 #include <rte_malloc.h>
36 #include <rte_memzone.h>
37 #include <rte_string_fns.h>
38 #include <rte_dev.h>
39 #include <rte_spinlock.h>
40
41 #include "fm10k.h"
42 #include "base/fm10k_api.h"
43
44 #define FM10K_RX_BUFF_ALIGN 512
45 /* Default delay to acquire mailbox lock */
46 #define FM10K_MBXLOCK_DELAY_US 20
47 #define UINT64_LOWER_32BITS_MASK 0x00000000ffffffffULL
48
49 /* Number of chars per uint32 type */
50 #define CHARS_PER_UINT32 (sizeof(uint32_t))
51 #define BIT_MASK_PER_UINT32 ((1 << CHARS_PER_UINT32) - 1)
52
53 static void fm10k_close_mbx_service(struct fm10k_hw *hw);
54
55 static void
56 fm10k_mbx_initlock(struct fm10k_hw *hw)
57 {
58         rte_spinlock_init(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
59 }
60
61 static void
62 fm10k_mbx_lock(struct fm10k_hw *hw)
63 {
64         while (!rte_spinlock_trylock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back)))
65                 rte_delay_us(FM10K_MBXLOCK_DELAY_US);
66 }
67
68 static void
69 fm10k_mbx_unlock(struct fm10k_hw *hw)
70 {
71         rte_spinlock_unlock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
72 }
73
74 /*
75  * reset queue to initial state, allocate software buffers used when starting
76  * device.
77  * return 0 on success
78  * return -ENOMEM if buffers cannot be allocated
79  * return -EINVAL if buffers do not satisfy alignment condition
80  */
81 static inline int
82 rx_queue_reset(struct fm10k_rx_queue *q)
83 {
84         uint64_t dma_addr;
85         int i, diag;
86         PMD_INIT_FUNC_TRACE();
87
88         diag = rte_mempool_get_bulk(q->mp, (void **)q->sw_ring, q->nb_desc);
89         if (diag != 0)
90                 return -ENOMEM;
91
92         for (i = 0; i < q->nb_desc; ++i) {
93                 fm10k_pktmbuf_reset(q->sw_ring[i], q->port_id);
94                 if (!fm10k_addr_alignment_valid(q->sw_ring[i])) {
95                         rte_mempool_put_bulk(q->mp, (void **)q->sw_ring,
96                                                 q->nb_desc);
97                         return -EINVAL;
98                 }
99                 dma_addr = MBUF_DMA_ADDR_DEFAULT(q->sw_ring[i]);
100                 q->hw_ring[i].q.pkt_addr = dma_addr;
101                 q->hw_ring[i].q.hdr_addr = dma_addr;
102         }
103
104         q->next_dd = 0;
105         q->next_alloc = 0;
106         q->next_trigger = q->alloc_thresh - 1;
107         FM10K_PCI_REG_WRITE(q->tail_ptr, q->nb_desc - 1);
108         return 0;
109 }
110
111 /*
112  * clean queue, descriptor rings, free software buffers used when stopping
113  * device.
114  */
115 static inline void
116 rx_queue_clean(struct fm10k_rx_queue *q)
117 {
118         union fm10k_rx_desc zero = {.q = {0, 0, 0, 0} };
119         uint32_t i;
120         PMD_INIT_FUNC_TRACE();
121
122         /* zero descriptor rings */
123         for (i = 0; i < q->nb_desc; ++i)
124                 q->hw_ring[i] = zero;
125
126         /* free software buffers */
127         for (i = 0; i < q->nb_desc; ++i) {
128                 if (q->sw_ring[i]) {
129                         rte_pktmbuf_free_seg(q->sw_ring[i]);
130                         q->sw_ring[i] = NULL;
131                 }
132         }
133 }
134
135 /*
136  * free all queue memory used when releasing the queue (i.e. configure)
137  */
138 static inline void
139 rx_queue_free(struct fm10k_rx_queue *q)
140 {
141         PMD_INIT_FUNC_TRACE();
142         if (q) {
143                 PMD_INIT_LOG(DEBUG, "Freeing rx queue %p", q);
144                 rx_queue_clean(q);
145                 if (q->sw_ring)
146                         rte_free(q->sw_ring);
147                 rte_free(q);
148         }
149 }
150
151 /*
152  * disable RX queue, wait unitl HW finished necessary flush operation
153  */
154 static inline int
155 rx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
156 {
157         uint32_t reg, i;
158
159         reg = FM10K_READ_REG(hw, FM10K_RXQCTL(qnum));
160         FM10K_WRITE_REG(hw, FM10K_RXQCTL(qnum),
161                         reg & ~FM10K_RXQCTL_ENABLE);
162
163         /* Wait 100us at most */
164         for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
165                 rte_delay_us(1);
166                 reg = FM10K_READ_REG(hw, FM10K_RXQCTL(i));
167                 if (!(reg & FM10K_RXQCTL_ENABLE))
168                         break;
169         }
170
171         if (i == FM10K_QUEUE_DISABLE_TIMEOUT)
172                 return -1;
173
174         return 0;
175 }
176
177 /*
178  * reset queue to initial state, allocate software buffers used when starting
179  * device
180  */
181 static inline void
182 tx_queue_reset(struct fm10k_tx_queue *q)
183 {
184         PMD_INIT_FUNC_TRACE();
185         q->last_free = 0;
186         q->next_free = 0;
187         q->nb_used = 0;
188         q->nb_free = q->nb_desc - 1;
189         q->free_trigger = q->nb_free - q->free_thresh;
190         fifo_reset(&q->rs_tracker, (q->nb_desc + 1) / q->rs_thresh);
191         FM10K_PCI_REG_WRITE(q->tail_ptr, 0);
192 }
193
194 /*
195  * clean queue, descriptor rings, free software buffers used when stopping
196  * device
197  */
198 static inline void
199 tx_queue_clean(struct fm10k_tx_queue *q)
200 {
201         struct fm10k_tx_desc zero = {0, 0, 0, 0, 0, 0};
202         uint32_t i;
203         PMD_INIT_FUNC_TRACE();
204
205         /* zero descriptor rings */
206         for (i = 0; i < q->nb_desc; ++i)
207                 q->hw_ring[i] = zero;
208
209         /* free software buffers */
210         for (i = 0; i < q->nb_desc; ++i) {
211                 if (q->sw_ring[i]) {
212                         rte_pktmbuf_free_seg(q->sw_ring[i]);
213                         q->sw_ring[i] = NULL;
214                 }
215         }
216 }
217
218 /*
219  * free all queue memory used when releasing the queue (i.e. configure)
220  */
221 static inline void
222 tx_queue_free(struct fm10k_tx_queue *q)
223 {
224         PMD_INIT_FUNC_TRACE();
225         if (q) {
226                 PMD_INIT_LOG(DEBUG, "Freeing tx queue %p", q);
227                 tx_queue_clean(q);
228                 if (q->rs_tracker.list)
229                         rte_free(q->rs_tracker.list);
230                 if (q->sw_ring)
231                         rte_free(q->sw_ring);
232                 rte_free(q);
233         }
234 }
235
236 /*
237  * disable TX queue, wait unitl HW finished necessary flush operation
238  */
239 static inline int
240 tx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
241 {
242         uint32_t reg, i;
243
244         reg = FM10K_READ_REG(hw, FM10K_TXDCTL(qnum));
245         FM10K_WRITE_REG(hw, FM10K_TXDCTL(qnum),
246                         reg & ~FM10K_TXDCTL_ENABLE);
247
248         /* Wait 100us at most */
249         for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
250                 rte_delay_us(1);
251                 reg = FM10K_READ_REG(hw, FM10K_TXDCTL(i));
252                 if (!(reg & FM10K_TXDCTL_ENABLE))
253                         break;
254         }
255
256         if (i == FM10K_QUEUE_DISABLE_TIMEOUT)
257                 return -1;
258
259         return 0;
260 }
261
262 static int
263 fm10k_dev_configure(struct rte_eth_dev *dev)
264 {
265         PMD_INIT_FUNC_TRACE();
266
267         if (dev->data->dev_conf.rxmode.hw_strip_crc == 0)
268                 PMD_INIT_LOG(WARNING, "fm10k always strip CRC");
269
270         return 0;
271 }
272
273 static int
274 fm10k_dev_tx_init(struct rte_eth_dev *dev)
275 {
276         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
277         int i, ret;
278         struct fm10k_tx_queue *txq;
279         uint64_t base_addr;
280         uint32_t size;
281
282         /* Disable TXINT to avoid possible interrupt */
283         for (i = 0; i < hw->mac.max_queues; i++)
284                 FM10K_WRITE_REG(hw, FM10K_TXINT(i),
285                                 3 << FM10K_TXINT_TIMER_SHIFT);
286
287         /* Setup TX queue */
288         for (i = 0; i < dev->data->nb_tx_queues; ++i) {
289                 txq = dev->data->tx_queues[i];
290                 base_addr = txq->hw_ring_phys_addr;
291                 size = txq->nb_desc * sizeof(struct fm10k_tx_desc);
292
293                 /* disable queue to avoid issues while updating state */
294                 ret = tx_queue_disable(hw, i);
295                 if (ret) {
296                         PMD_INIT_LOG(ERR, "failed to disable queue %d", i);
297                         return -1;
298                 }
299
300                 /* set location and size for descriptor ring */
301                 FM10K_WRITE_REG(hw, FM10K_TDBAL(i),
302                                 base_addr & UINT64_LOWER_32BITS_MASK);
303                 FM10K_WRITE_REG(hw, FM10K_TDBAH(i),
304                                 base_addr >> (CHAR_BIT * sizeof(uint32_t)));
305                 FM10K_WRITE_REG(hw, FM10K_TDLEN(i), size);
306         }
307         return 0;
308 }
309
310 static int
311 fm10k_dev_rx_init(struct rte_eth_dev *dev)
312 {
313         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
314         int i, ret;
315         struct fm10k_rx_queue *rxq;
316         uint64_t base_addr;
317         uint32_t size;
318         uint32_t rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
319         uint16_t buf_size;
320         struct rte_pktmbuf_pool_private *mbp_priv;
321
322         /* Disable RXINT to avoid possible interrupt */
323         for (i = 0; i < hw->mac.max_queues; i++)
324                 FM10K_WRITE_REG(hw, FM10K_RXINT(i),
325                                 3 << FM10K_RXINT_TIMER_SHIFT);
326
327         /* Setup RX queues */
328         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
329                 rxq = dev->data->rx_queues[i];
330                 base_addr = rxq->hw_ring_phys_addr;
331                 size = rxq->nb_desc * sizeof(union fm10k_rx_desc);
332
333                 /* disable queue to avoid issues while updating state */
334                 ret = rx_queue_disable(hw, i);
335                 if (ret) {
336                         PMD_INIT_LOG(ERR, "failed to disable queue %d", i);
337                         return -1;
338                 }
339
340                 /* Setup the Base and Length of the Rx Descriptor Ring */
341                 FM10K_WRITE_REG(hw, FM10K_RDBAL(i),
342                                 base_addr & UINT64_LOWER_32BITS_MASK);
343                 FM10K_WRITE_REG(hw, FM10K_RDBAH(i),
344                                 base_addr >> (CHAR_BIT * sizeof(uint32_t)));
345                 FM10K_WRITE_REG(hw, FM10K_RDLEN(i), size);
346
347                 /* Configure the Rx buffer size for one buff without split */
348                 mbp_priv = rte_mempool_get_priv(rxq->mp);
349                 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
350                                         RTE_PKTMBUF_HEADROOM);
351                 FM10K_WRITE_REG(hw, FM10K_SRRCTL(i),
352                                 buf_size >> FM10K_SRRCTL_BSIZEPKT_SHIFT);
353
354                 /* Enable drop on empty, it's RO for VF */
355                 if (hw->mac.type == fm10k_mac_pf && rxq->drop_en)
356                         rxdctl |= FM10K_RXDCTL_DROP_ON_EMPTY;
357
358                 FM10K_WRITE_REG(hw, FM10K_RXDCTL(i), rxdctl);
359                 FM10K_WRITE_FLUSH(hw);
360         }
361
362         return 0;
363 }
364
365 static int
366 fm10k_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
367 {
368         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
369         int err = -1;
370         uint32_t reg;
371         struct fm10k_rx_queue *rxq;
372
373         PMD_INIT_FUNC_TRACE();
374
375         if (rx_queue_id < dev->data->nb_rx_queues) {
376                 rxq = dev->data->rx_queues[rx_queue_id];
377                 err = rx_queue_reset(rxq);
378                 if (err == -ENOMEM) {
379                         PMD_INIT_LOG(ERR, "Failed to alloc memory : %d", err);
380                         return err;
381                 } else if (err == -EINVAL) {
382                         PMD_INIT_LOG(ERR, "Invalid buffer address alignment :"
383                                 " %d", err);
384                         return err;
385                 }
386
387                 /* Setup the HW Rx Head and Tail Descriptor Pointers
388                  * Note: this must be done AFTER the queue is enabled on real
389                  * hardware, but BEFORE the queue is enabled when using the
390                  * emulation platform. Do it in both places for now and remove
391                  * this comment and the following two register writes when the
392                  * emulation platform is no longer being used.
393                  */
394                 FM10K_WRITE_REG(hw, FM10K_RDH(rx_queue_id), 0);
395                 FM10K_WRITE_REG(hw, FM10K_RDT(rx_queue_id), rxq->nb_desc - 1);
396
397                 /* Set PF ownership flag for PF devices */
398                 reg = FM10K_READ_REG(hw, FM10K_RXQCTL(rx_queue_id));
399                 if (hw->mac.type == fm10k_mac_pf)
400                         reg |= FM10K_RXQCTL_PF;
401                 reg |= FM10K_RXQCTL_ENABLE;
402                 /* enable RX queue */
403                 FM10K_WRITE_REG(hw, FM10K_RXQCTL(rx_queue_id), reg);
404                 FM10K_WRITE_FLUSH(hw);
405
406                 /* Setup the HW Rx Head and Tail Descriptor Pointers
407                  * Note: this must be done AFTER the queue is enabled
408                  */
409                 FM10K_WRITE_REG(hw, FM10K_RDH(rx_queue_id), 0);
410                 FM10K_WRITE_REG(hw, FM10K_RDT(rx_queue_id), rxq->nb_desc - 1);
411         }
412
413         return err;
414 }
415
416 static int
417 fm10k_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
418 {
419         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
420
421         PMD_INIT_FUNC_TRACE();
422
423         if (rx_queue_id < dev->data->nb_rx_queues) {
424                 /* Disable RX queue */
425                 rx_queue_disable(hw, rx_queue_id);
426
427                 /* Free mbuf and clean HW ring */
428                 rx_queue_clean(dev->data->rx_queues[rx_queue_id]);
429         }
430
431         return 0;
432 }
433
434 static int
435 fm10k_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
436 {
437         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
438         /** @todo - this should be defined in the shared code */
439 #define FM10K_TXDCTL_WRITE_BACK_MIN_DELAY       0x00010000
440         uint32_t txdctl = FM10K_TXDCTL_WRITE_BACK_MIN_DELAY;
441         int err = 0;
442
443         PMD_INIT_FUNC_TRACE();
444
445         if (tx_queue_id < dev->data->nb_tx_queues) {
446                 tx_queue_reset(dev->data->tx_queues[tx_queue_id]);
447
448                 /* reset head and tail pointers */
449                 FM10K_WRITE_REG(hw, FM10K_TDH(tx_queue_id), 0);
450                 FM10K_WRITE_REG(hw, FM10K_TDT(tx_queue_id), 0);
451
452                 /* enable TX queue */
453                 FM10K_WRITE_REG(hw, FM10K_TXDCTL(tx_queue_id),
454                                         FM10K_TXDCTL_ENABLE | txdctl);
455                 FM10K_WRITE_FLUSH(hw);
456         } else
457                 err = -1;
458
459         return err;
460 }
461
462 static int
463 fm10k_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
464 {
465         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
466
467         PMD_INIT_FUNC_TRACE();
468
469         if (tx_queue_id < dev->data->nb_tx_queues) {
470                 tx_queue_disable(hw, tx_queue_id);
471                 tx_queue_clean(dev->data->tx_queues[tx_queue_id]);
472         }
473
474         return 0;
475 }
476
477 /* fls = find last set bit = 32 minus the number of leading zeros */
478 #ifndef fls
479 #define fls(x) (((x) == 0) ? 0 : (32 - __builtin_clz((x))))
480 #endif
481 #define BSIZEPKT_ROUNDUP ((1 << FM10K_SRRCTL_BSIZEPKT_SHIFT) - 1)
482 static int
483 fm10k_dev_start(struct rte_eth_dev *dev)
484 {
485         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
486         int i, diag;
487
488         PMD_INIT_FUNC_TRACE();
489
490         /* stop, init, then start the hw */
491         diag = fm10k_stop_hw(hw);
492         if (diag != FM10K_SUCCESS) {
493                 PMD_INIT_LOG(ERR, "Hardware stop failed: %d", diag);
494                 return -EIO;
495         }
496
497         diag = fm10k_init_hw(hw);
498         if (diag != FM10K_SUCCESS) {
499                 PMD_INIT_LOG(ERR, "Hardware init failed: %d", diag);
500                 return -EIO;
501         }
502
503         diag = fm10k_start_hw(hw);
504         if (diag != FM10K_SUCCESS) {
505                 PMD_INIT_LOG(ERR, "Hardware start failed: %d", diag);
506                 return -EIO;
507         }
508
509         diag = fm10k_dev_tx_init(dev);
510         if (diag) {
511                 PMD_INIT_LOG(ERR, "TX init failed: %d", diag);
512                 return diag;
513         }
514
515         diag = fm10k_dev_rx_init(dev);
516         if (diag) {
517                 PMD_INIT_LOG(ERR, "RX init failed: %d", diag);
518                 return diag;
519         }
520
521         if (hw->mac.type == fm10k_mac_pf) {
522                 /* Establish only VSI 0 as valid */
523                 FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(0), FM10K_DGLORTMAP_ANY);
524
525                 /* Configure RSS bits used in RETA table */
526                 FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(0),
527                                 fls(dev->data->nb_rx_queues - 1) <<
528                                 FM10K_DGLORTDEC_RSSLENGTH_SHIFT);
529
530                 /* Invalidate all other GLORT entries */
531                 for (i = 1; i < FM10K_DGLORT_COUNT; i++)
532                         FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(i),
533                                         FM10K_DGLORTMAP_NONE);
534         }
535
536         for (i = 0; i < dev->data->nb_rx_queues; i++) {
537                 struct fm10k_rx_queue *rxq;
538                 rxq = dev->data->rx_queues[i];
539
540                 if (rxq->rx_deferred_start)
541                         continue;
542                 diag = fm10k_dev_rx_queue_start(dev, i);
543                 if (diag != 0) {
544                         int j;
545                         for (j = 0; j < i; ++j)
546                                 rx_queue_clean(dev->data->rx_queues[j]);
547                         return diag;
548                 }
549         }
550
551         for (i = 0; i < dev->data->nb_tx_queues; i++) {
552                 struct fm10k_tx_queue *txq;
553                 txq = dev->data->tx_queues[i];
554
555                 if (txq->tx_deferred_start)
556                         continue;
557                 diag = fm10k_dev_tx_queue_start(dev, i);
558                 if (diag != 0) {
559                         int j;
560                         for (j = 0; j < dev->data->nb_rx_queues; ++j)
561                                 rx_queue_clean(dev->data->rx_queues[j]);
562                         return diag;
563                 }
564         }
565
566         return 0;
567 }
568
569 static void
570 fm10k_dev_stop(struct rte_eth_dev *dev)
571 {
572         int i;
573
574         PMD_INIT_FUNC_TRACE();
575
576         for (i = 0; i < dev->data->nb_tx_queues; i++)
577                 fm10k_dev_tx_queue_stop(dev, i);
578
579         for (i = 0; i < dev->data->nb_rx_queues; i++)
580                 fm10k_dev_rx_queue_stop(dev, i);
581 }
582
583 static void
584 fm10k_dev_close(struct rte_eth_dev *dev)
585 {
586         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
587
588         PMD_INIT_FUNC_TRACE();
589
590         /* Stop mailbox service first */
591         fm10k_close_mbx_service(hw);
592         fm10k_dev_stop(dev);
593         fm10k_stop_hw(hw);
594 }
595
596 static int
597 fm10k_link_update(struct rte_eth_dev *dev,
598         __rte_unused int wait_to_complete)
599 {
600         PMD_INIT_FUNC_TRACE();
601
602         /* The host-interface link is always up.  The speed is ~50Gbps per Gen3
603          * x8 PCIe interface. For now, we leave the speed undefined since there
604          * is no 50Gbps Ethernet. */
605         dev->data->dev_link.link_speed  = 0;
606         dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
607         dev->data->dev_link.link_status = 1;
608
609         return 0;
610 }
611
612 static void
613 fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
614 {
615         uint64_t ipackets, opackets, ibytes, obytes;
616         struct fm10k_hw *hw =
617                 FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
618         struct fm10k_hw_stats *hw_stats =
619                 FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
620         int i;
621
622         PMD_INIT_FUNC_TRACE();
623
624         fm10k_update_hw_stats(hw, hw_stats);
625
626         ipackets = opackets = ibytes = obytes = 0;
627         for (i = 0; (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) &&
628                 (i < FM10K_MAX_QUEUES_PF); ++i) {
629                 stats->q_ipackets[i] = hw_stats->q[i].rx_packets.count;
630                 stats->q_opackets[i] = hw_stats->q[i].tx_packets.count;
631                 stats->q_ibytes[i]   = hw_stats->q[i].rx_bytes.count;
632                 stats->q_obytes[i]   = hw_stats->q[i].tx_bytes.count;
633                 ipackets += stats->q_ipackets[i];
634                 opackets += stats->q_opackets[i];
635                 ibytes   += stats->q_ibytes[i];
636                 obytes   += stats->q_obytes[i];
637         }
638         stats->ipackets = ipackets;
639         stats->opackets = opackets;
640         stats->ibytes = ibytes;
641         stats->obytes = obytes;
642 }
643
644 static void
645 fm10k_stats_reset(struct rte_eth_dev *dev)
646 {
647         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
648         struct fm10k_hw_stats *hw_stats =
649                 FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
650
651         PMD_INIT_FUNC_TRACE();
652
653         memset(hw_stats, 0, sizeof(*hw_stats));
654         fm10k_rebind_hw_stats(hw, hw_stats);
655 }
656
657 static void
658 fm10k_dev_infos_get(struct rte_eth_dev *dev,
659         struct rte_eth_dev_info *dev_info)
660 {
661         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
662
663         PMD_INIT_FUNC_TRACE();
664
665         dev_info->min_rx_bufsize     = FM10K_MIN_RX_BUF_SIZE;
666         dev_info->max_rx_pktlen      = FM10K_MAX_PKT_SIZE;
667         dev_info->max_rx_queues      = hw->mac.max_queues;
668         dev_info->max_tx_queues      = hw->mac.max_queues;
669         dev_info->max_mac_addrs      = 1;
670         dev_info->max_hash_mac_addrs = 0;
671         dev_info->max_vfs            = FM10K_MAX_VF_NUM;
672         dev_info->max_vmdq_pools     = ETH_64_POOLS;
673         dev_info->rx_offload_capa =
674                 DEV_RX_OFFLOAD_IPV4_CKSUM |
675                 DEV_RX_OFFLOAD_UDP_CKSUM  |
676                 DEV_RX_OFFLOAD_TCP_CKSUM;
677         dev_info->tx_offload_capa    = 0;
678         dev_info->reta_size = FM10K_MAX_RSS_INDICES;
679
680         dev_info->default_rxconf = (struct rte_eth_rxconf) {
681                 .rx_thresh = {
682                         .pthresh = FM10K_DEFAULT_RX_PTHRESH,
683                         .hthresh = FM10K_DEFAULT_RX_HTHRESH,
684                         .wthresh = FM10K_DEFAULT_RX_WTHRESH,
685                 },
686                 .rx_free_thresh = FM10K_RX_FREE_THRESH_DEFAULT(0),
687                 .rx_drop_en = 0,
688         };
689
690         dev_info->default_txconf = (struct rte_eth_txconf) {
691                 .tx_thresh = {
692                         .pthresh = FM10K_DEFAULT_TX_PTHRESH,
693                         .hthresh = FM10K_DEFAULT_TX_HTHRESH,
694                         .wthresh = FM10K_DEFAULT_TX_WTHRESH,
695                 },
696                 .tx_free_thresh = FM10K_TX_FREE_THRESH_DEFAULT(0),
697                 .tx_rs_thresh = FM10K_TX_RS_THRESH_DEFAULT(0),
698                 .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
699                                 ETH_TXQ_FLAGS_NOOFFLOADS,
700         };
701
702 }
703
704 static inline int
705 check_nb_desc(uint16_t min, uint16_t max, uint16_t mult, uint16_t request)
706 {
707         if ((request < min) || (request > max) || ((request % mult) != 0))
708                 return -1;
709         else
710                 return 0;
711 }
712
713 /*
714  * Create a memzone for hardware descriptor rings. Malloc cannot be used since
715  * the physical address is required. If the memzone is already created, then
716  * this function returns a pointer to the existing memzone.
717  */
718 static inline const struct rte_memzone *
719 allocate_hw_ring(const char *driver_name, const char *ring_name,
720         uint8_t port_id, uint16_t queue_id, int socket_id,
721         uint32_t size, uint32_t align)
722 {
723         char name[RTE_MEMZONE_NAMESIZE];
724         const struct rte_memzone *mz;
725
726         snprintf(name, sizeof(name), "%s_%s_%d_%d_%d",
727                  driver_name, ring_name, port_id, queue_id, socket_id);
728
729         /* return the memzone if it already exists */
730         mz = rte_memzone_lookup(name);
731         if (mz)
732                 return mz;
733
734 #ifdef RTE_LIBRTE_XEN_DOM0
735         return rte_memzone_reserve_bounded(name, size, socket_id, 0, align,
736                                            RTE_PGSIZE_2M);
737 #else
738         return rte_memzone_reserve_aligned(name, size, socket_id, 0, align);
739 #endif
740 }
741
742 static inline int
743 check_thresh(uint16_t min, uint16_t max, uint16_t div, uint16_t request)
744 {
745         if ((request < min) || (request > max) || ((div % request) != 0))
746                 return -1;
747         else
748                 return 0;
749 }
750
751 static inline int
752 handle_rxconf(struct fm10k_rx_queue *q, const struct rte_eth_rxconf *conf)
753 {
754         uint16_t rx_free_thresh;
755
756         if (conf->rx_free_thresh == 0)
757                 rx_free_thresh = FM10K_RX_FREE_THRESH_DEFAULT(q);
758         else
759                 rx_free_thresh = conf->rx_free_thresh;
760
761         /* make sure the requested threshold satisfies the constraints */
762         if (check_thresh(FM10K_RX_FREE_THRESH_MIN(q),
763                         FM10K_RX_FREE_THRESH_MAX(q),
764                         FM10K_RX_FREE_THRESH_DIV(q),
765                         rx_free_thresh)) {
766                 PMD_INIT_LOG(ERR, "rx_free_thresh (%u) must be "
767                         "less than or equal to %u, "
768                         "greater than or equal to %u, "
769                         "and a divisor of %u",
770                         rx_free_thresh, FM10K_RX_FREE_THRESH_MAX(q),
771                         FM10K_RX_FREE_THRESH_MIN(q),
772                         FM10K_RX_FREE_THRESH_DIV(q));
773                 return (-EINVAL);
774         }
775
776         q->alloc_thresh = rx_free_thresh;
777         q->drop_en = conf->rx_drop_en;
778         q->rx_deferred_start = conf->rx_deferred_start;
779
780         return 0;
781 }
782
783 /*
784  * Hardware requires specific alignment for Rx packet buffers. At
785  * least one of the following two conditions must be satisfied.
786  *  1. Address is 512B aligned
787  *  2. Address is 8B aligned and buffer does not cross 4K boundary.
788  *
789  * As such, the driver may need to adjust the DMA address within the
790  * buffer by up to 512B. The mempool element size is checked here
791  * to make sure a maximally sized Ethernet frame can still be wholly
792  * contained within the buffer after 512B alignment.
793  *
794  * return 1 if the element size is valid, otherwise return 0.
795  */
796 static int
797 mempool_element_size_valid(struct rte_mempool *mp)
798 {
799         uint32_t min_size;
800
801         /* elt_size includes mbuf header and headroom */
802         min_size = mp->elt_size - sizeof(struct rte_mbuf) -
803                         RTE_PKTMBUF_HEADROOM;
804
805         /* account for up to 512B of alignment */
806         min_size -= FM10K_RX_BUFF_ALIGN;
807
808         /* sanity check for overflow */
809         if (min_size > mp->elt_size)
810                 return 0;
811
812         if (min_size < ETHER_MAX_VLAN_FRAME_LEN)
813                 return 0;
814
815         /* size is valid */
816         return 1;
817 }
818
819 static int
820 fm10k_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
821         uint16_t nb_desc, unsigned int socket_id,
822         const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
823 {
824         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
825         struct fm10k_rx_queue *q;
826         const struct rte_memzone *mz;
827
828         PMD_INIT_FUNC_TRACE();
829
830         /* make sure the mempool element size can account for alignment. */
831         if (!mempool_element_size_valid(mp)) {
832                 PMD_INIT_LOG(ERR, "Error : Mempool element size is too small");
833                 return (-EINVAL);
834         }
835
836         /* make sure a valid number of descriptors have been requested */
837         if (check_nb_desc(FM10K_MIN_RX_DESC, FM10K_MAX_RX_DESC,
838                                 FM10K_MULT_RX_DESC, nb_desc)) {
839                 PMD_INIT_LOG(ERR, "Number of Rx descriptors (%u) must be "
840                         "less than or equal to %"PRIu32", "
841                         "greater than or equal to %u, "
842                         "and a multiple of %u",
843                         nb_desc, (uint32_t)FM10K_MAX_RX_DESC, FM10K_MIN_RX_DESC,
844                         FM10K_MULT_RX_DESC);
845                 return (-EINVAL);
846         }
847
848         /*
849          * if this queue existed already, free the associated memory. The
850          * queue cannot be reused in case we need to allocate memory on
851          * different socket than was previously used.
852          */
853         if (dev->data->rx_queues[queue_id] != NULL) {
854                 rx_queue_free(dev->data->rx_queues[queue_id]);
855                 dev->data->rx_queues[queue_id] = NULL;
856         }
857
858         /* allocate memory for the queue structure */
859         q = rte_zmalloc_socket("fm10k", sizeof(*q), RTE_CACHE_LINE_SIZE,
860                                 socket_id);
861         if (q == NULL) {
862                 PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
863                 return (-ENOMEM);
864         }
865
866         /* setup queue */
867         q->mp = mp;
868         q->nb_desc = nb_desc;
869         q->port_id = dev->data->port_id;
870         q->queue_id = queue_id;
871         q->tail_ptr = (volatile uint32_t *)
872                 &((uint32_t *)hw->hw_addr)[FM10K_RDT(queue_id)];
873         if (handle_rxconf(q, conf))
874                 return (-EINVAL);
875
876         /* allocate memory for the software ring */
877         q->sw_ring = rte_zmalloc_socket("fm10k sw ring",
878                                         nb_desc * sizeof(struct rte_mbuf *),
879                                         RTE_CACHE_LINE_SIZE, socket_id);
880         if (q->sw_ring == NULL) {
881                 PMD_INIT_LOG(ERR, "Cannot allocate software ring");
882                 rte_free(q);
883                 return (-ENOMEM);
884         }
885
886         /*
887          * allocate memory for the hardware descriptor ring. A memzone large
888          * enough to hold the maximum ring size is requested to allow for
889          * resizing in later calls to the queue setup function.
890          */
891         mz = allocate_hw_ring(dev->driver->pci_drv.name, "rx_ring",
892                                 dev->data->port_id, queue_id, socket_id,
893                                 FM10K_MAX_RX_RING_SZ, FM10K_ALIGN_RX_DESC);
894         if (mz == NULL) {
895                 PMD_INIT_LOG(ERR, "Cannot allocate hardware ring");
896                 rte_free(q->sw_ring);
897                 rte_free(q);
898                 return (-ENOMEM);
899         }
900         q->hw_ring = mz->addr;
901         q->hw_ring_phys_addr = mz->phys_addr;
902
903         dev->data->rx_queues[queue_id] = q;
904         return 0;
905 }
906
907 static void
908 fm10k_rx_queue_release(void *queue)
909 {
910         PMD_INIT_FUNC_TRACE();
911
912         rx_queue_free(queue);
913 }
914
915 static inline int
916 handle_txconf(struct fm10k_tx_queue *q, const struct rte_eth_txconf *conf)
917 {
918         uint16_t tx_free_thresh;
919         uint16_t tx_rs_thresh;
920
921         /* constraint MACROs require that tx_free_thresh is configured
922          * before tx_rs_thresh */
923         if (conf->tx_free_thresh == 0)
924                 tx_free_thresh = FM10K_TX_FREE_THRESH_DEFAULT(q);
925         else
926                 tx_free_thresh = conf->tx_free_thresh;
927
928         /* make sure the requested threshold satisfies the constraints */
929         if (check_thresh(FM10K_TX_FREE_THRESH_MIN(q),
930                         FM10K_TX_FREE_THRESH_MAX(q),
931                         FM10K_TX_FREE_THRESH_DIV(q),
932                         tx_free_thresh)) {
933                 PMD_INIT_LOG(ERR, "tx_free_thresh (%u) must be "
934                         "less than or equal to %u, "
935                         "greater than or equal to %u, "
936                         "and a divisor of %u",
937                         tx_free_thresh, FM10K_TX_FREE_THRESH_MAX(q),
938                         FM10K_TX_FREE_THRESH_MIN(q),
939                         FM10K_TX_FREE_THRESH_DIV(q));
940                 return (-EINVAL);
941         }
942
943         q->free_thresh = tx_free_thresh;
944
945         if (conf->tx_rs_thresh == 0)
946                 tx_rs_thresh = FM10K_TX_RS_THRESH_DEFAULT(q);
947         else
948                 tx_rs_thresh = conf->tx_rs_thresh;
949
950         q->tx_deferred_start = conf->tx_deferred_start;
951
952         /* make sure the requested threshold satisfies the constraints */
953         if (check_thresh(FM10K_TX_RS_THRESH_MIN(q),
954                         FM10K_TX_RS_THRESH_MAX(q),
955                         FM10K_TX_RS_THRESH_DIV(q),
956                         tx_rs_thresh)) {
957                 PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be "
958                         "less than or equal to %u, "
959                         "greater than or equal to %u, "
960                         "and a divisor of %u",
961                         tx_rs_thresh, FM10K_TX_RS_THRESH_MAX(q),
962                         FM10K_TX_RS_THRESH_MIN(q),
963                         FM10K_TX_RS_THRESH_DIV(q));
964                 return (-EINVAL);
965         }
966
967         q->rs_thresh = tx_rs_thresh;
968
969         return 0;
970 }
971
972 static int
973 fm10k_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
974         uint16_t nb_desc, unsigned int socket_id,
975         const struct rte_eth_txconf *conf)
976 {
977         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
978         struct fm10k_tx_queue *q;
979         const struct rte_memzone *mz;
980
981         PMD_INIT_FUNC_TRACE();
982
983         /* make sure a valid number of descriptors have been requested */
984         if (check_nb_desc(FM10K_MIN_TX_DESC, FM10K_MAX_TX_DESC,
985                                 FM10K_MULT_TX_DESC, nb_desc)) {
986                 PMD_INIT_LOG(ERR, "Number of Tx descriptors (%u) must be "
987                         "less than or equal to %"PRIu32", "
988                         "greater than or equal to %u, "
989                         "and a multiple of %u",
990                         nb_desc, (uint32_t)FM10K_MAX_TX_DESC, FM10K_MIN_TX_DESC,
991                         FM10K_MULT_TX_DESC);
992                 return (-EINVAL);
993         }
994
995         /*
996          * if this queue existed already, free the associated memory. The
997          * queue cannot be reused in case we need to allocate memory on
998          * different socket than was previously used.
999          */
1000         if (dev->data->tx_queues[queue_id] != NULL) {
1001                 tx_queue_free(dev->data->tx_queues[queue_id]);
1002                 dev->data->tx_queues[queue_id] = NULL;
1003         }
1004
1005         /* allocate memory for the queue structure */
1006         q = rte_zmalloc_socket("fm10k", sizeof(*q), RTE_CACHE_LINE_SIZE,
1007                                 socket_id);
1008         if (q == NULL) {
1009                 PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
1010                 return (-ENOMEM);
1011         }
1012
1013         /* setup queue */
1014         q->nb_desc = nb_desc;
1015         q->port_id = dev->data->port_id;
1016         q->queue_id = queue_id;
1017         q->tail_ptr = (volatile uint32_t *)
1018                 &((uint32_t *)hw->hw_addr)[FM10K_TDT(queue_id)];
1019         if (handle_txconf(q, conf))
1020                 return (-EINVAL);
1021
1022         /* allocate memory for the software ring */
1023         q->sw_ring = rte_zmalloc_socket("fm10k sw ring",
1024                                         nb_desc * sizeof(struct rte_mbuf *),
1025                                         RTE_CACHE_LINE_SIZE, socket_id);
1026         if (q->sw_ring == NULL) {
1027                 PMD_INIT_LOG(ERR, "Cannot allocate software ring");
1028                 rte_free(q);
1029                 return (-ENOMEM);
1030         }
1031
1032         /*
1033          * allocate memory for the hardware descriptor ring. A memzone large
1034          * enough to hold the maximum ring size is requested to allow for
1035          * resizing in later calls to the queue setup function.
1036          */
1037         mz = allocate_hw_ring(dev->driver->pci_drv.name, "tx_ring",
1038                                 dev->data->port_id, queue_id, socket_id,
1039                                 FM10K_MAX_TX_RING_SZ, FM10K_ALIGN_TX_DESC);
1040         if (mz == NULL) {
1041                 PMD_INIT_LOG(ERR, "Cannot allocate hardware ring");
1042                 rte_free(q->sw_ring);
1043                 rte_free(q);
1044                 return (-ENOMEM);
1045         }
1046         q->hw_ring = mz->addr;
1047         q->hw_ring_phys_addr = mz->phys_addr;
1048
1049         /*
1050          * allocate memory for the RS bit tracker. Enough slots to hold the
1051          * descriptor index for each RS bit needing to be set are required.
1052          */
1053         q->rs_tracker.list = rte_zmalloc_socket("fm10k rs tracker",
1054                                 ((nb_desc + 1) / q->rs_thresh) *
1055                                 sizeof(uint16_t),
1056                                 RTE_CACHE_LINE_SIZE, socket_id);
1057         if (q->rs_tracker.list == NULL) {
1058                 PMD_INIT_LOG(ERR, "Cannot allocate RS bit tracker");
1059                 rte_free(q->sw_ring);
1060                 rte_free(q);
1061                 return (-ENOMEM);
1062         }
1063
1064         dev->data->tx_queues[queue_id] = q;
1065         return 0;
1066 }
1067
1068 static void
1069 fm10k_tx_queue_release(void *queue)
1070 {
1071         PMD_INIT_FUNC_TRACE();
1072
1073         tx_queue_free(queue);
1074 }
1075
1076 static int
1077 fm10k_reta_update(struct rte_eth_dev *dev,
1078                         struct rte_eth_rss_reta_entry64 *reta_conf,
1079                         uint16_t reta_size)
1080 {
1081         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1082         uint16_t i, j, idx, shift;
1083         uint8_t mask;
1084         uint32_t reta;
1085
1086         PMD_INIT_FUNC_TRACE();
1087
1088         if (reta_size > FM10K_MAX_RSS_INDICES) {
1089                 PMD_INIT_LOG(ERR, "The size of hash lookup table configured "
1090                         "(%d) doesn't match the number hardware can supported "
1091                         "(%d)", reta_size, FM10K_MAX_RSS_INDICES);
1092                 return -EINVAL;
1093         }
1094
1095         /*
1096          * Update Redirection Table RETA[n], n=0..31. The redirection table has
1097          * 128-entries in 32 registers
1098          */
1099         for (i = 0; i < FM10K_MAX_RSS_INDICES; i += CHARS_PER_UINT32) {
1100                 idx = i / RTE_RETA_GROUP_SIZE;
1101                 shift = i % RTE_RETA_GROUP_SIZE;
1102                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
1103                                 BIT_MASK_PER_UINT32);
1104                 if (mask == 0)
1105                         continue;
1106
1107                 reta = 0;
1108                 if (mask != BIT_MASK_PER_UINT32)
1109                         reta = FM10K_READ_REG(hw, FM10K_RETA(0, i >> 2));
1110
1111                 for (j = 0; j < CHARS_PER_UINT32; j++) {
1112                         if (mask & (0x1 << j)) {
1113                                 if (mask != 0xF)
1114                                         reta &= ~(UINT8_MAX << CHAR_BIT * j);
1115                                 reta |= reta_conf[idx].reta[shift + j] <<
1116                                                 (CHAR_BIT * j);
1117                         }
1118                 }
1119                 FM10K_WRITE_REG(hw, FM10K_RETA(0, i >> 2), reta);
1120         }
1121
1122         return 0;
1123 }
1124
1125 static int
1126 fm10k_reta_query(struct rte_eth_dev *dev,
1127                         struct rte_eth_rss_reta_entry64 *reta_conf,
1128                         uint16_t reta_size)
1129 {
1130         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1131         uint16_t i, j, idx, shift;
1132         uint8_t mask;
1133         uint32_t reta;
1134
1135         PMD_INIT_FUNC_TRACE();
1136
1137         if (reta_size < FM10K_MAX_RSS_INDICES) {
1138                 PMD_INIT_LOG(ERR, "The size of hash lookup table configured "
1139                         "(%d) doesn't match the number hardware can supported "
1140                         "(%d)", reta_size, FM10K_MAX_RSS_INDICES);
1141                 return -EINVAL;
1142         }
1143
1144         /*
1145          * Read Redirection Table RETA[n], n=0..31. The redirection table has
1146          * 128-entries in 32 registers
1147          */
1148         for (i = 0; i < FM10K_MAX_RSS_INDICES; i += CHARS_PER_UINT32) {
1149                 idx = i / RTE_RETA_GROUP_SIZE;
1150                 shift = i % RTE_RETA_GROUP_SIZE;
1151                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
1152                                 BIT_MASK_PER_UINT32);
1153                 if (mask == 0)
1154                         continue;
1155
1156                 reta = FM10K_READ_REG(hw, FM10K_RETA(0, i >> 2));
1157                 for (j = 0; j < CHARS_PER_UINT32; j++) {
1158                         if (mask & (0x1 << j))
1159                                 reta_conf[idx].reta[shift + j] = ((reta >>
1160                                         CHAR_BIT * j) & UINT8_MAX);
1161                 }
1162         }
1163
1164         return 0;
1165 }
1166
1167 /* Mailbox message handler in VF */
1168 static const struct fm10k_msg_data fm10k_msgdata_vf[] = {
1169         FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
1170         FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_msg_mac_vlan_vf),
1171         FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_msg_lport_state_vf),
1172         FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
1173 };
1174
1175 /* Mailbox message handler in PF */
1176 static const struct fm10k_msg_data fm10k_msgdata_pf[] = {
1177         FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
1178         FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
1179         FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_msg_lport_map_pf),
1180         FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
1181         FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
1182         FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_msg_update_pvid_pf),
1183         FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
1184 };
1185
1186 static int
1187 fm10k_setup_mbx_service(struct fm10k_hw *hw)
1188 {
1189         int err;
1190
1191         /* Initialize mailbox lock */
1192         fm10k_mbx_initlock(hw);
1193
1194         /* Replace default message handler with new ones */
1195         if (hw->mac.type == fm10k_mac_pf)
1196                 err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_pf);
1197         else
1198                 err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_vf);
1199
1200         if (err) {
1201                 PMD_INIT_LOG(ERR, "Failed to register mailbox handler.err:%d",
1202                                 err);
1203                 return err;
1204         }
1205         /* Connect to SM for PF device or PF for VF device */
1206         return hw->mbx.ops.connect(hw, &hw->mbx);
1207 }
1208
1209 static void
1210 fm10k_close_mbx_service(struct fm10k_hw *hw)
1211 {
1212         /* Disconnect from SM for PF device or PF for VF device */
1213         hw->mbx.ops.disconnect(hw, &hw->mbx);
1214 }
1215
1216 static struct eth_dev_ops fm10k_eth_dev_ops = {
1217         .dev_configure          = fm10k_dev_configure,
1218         .dev_start              = fm10k_dev_start,
1219         .dev_stop               = fm10k_dev_stop,
1220         .dev_close              = fm10k_dev_close,
1221         .stats_get              = fm10k_stats_get,
1222         .stats_reset            = fm10k_stats_reset,
1223         .link_update            = fm10k_link_update,
1224         .dev_infos_get          = fm10k_dev_infos_get,
1225         .rx_queue_start         = fm10k_dev_rx_queue_start,
1226         .rx_queue_stop          = fm10k_dev_rx_queue_stop,
1227         .tx_queue_start         = fm10k_dev_tx_queue_start,
1228         .tx_queue_stop          = fm10k_dev_tx_queue_stop,
1229         .rx_queue_setup         = fm10k_rx_queue_setup,
1230         .rx_queue_release       = fm10k_rx_queue_release,
1231         .tx_queue_setup         = fm10k_tx_queue_setup,
1232         .tx_queue_release       = fm10k_tx_queue_release,
1233         .reta_update            = fm10k_reta_update,
1234         .reta_query             = fm10k_reta_query,
1235 };
1236
1237 static int
1238 eth_fm10k_dev_init(__rte_unused struct eth_driver *eth_drv,
1239         struct rte_eth_dev *dev)
1240 {
1241         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1242         int diag;
1243
1244         PMD_INIT_FUNC_TRACE();
1245
1246         dev->dev_ops = &fm10k_eth_dev_ops;
1247
1248         /* only initialize in the primary process */
1249         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1250                 return 0;
1251
1252         /* Vendor and Device ID need to be set before init of shared code */
1253         memset(hw, 0, sizeof(*hw));
1254         hw->device_id = dev->pci_dev->id.device_id;
1255         hw->vendor_id = dev->pci_dev->id.vendor_id;
1256         hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id;
1257         hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id;
1258         hw->revision_id = 0;
1259         hw->hw_addr = (void *)dev->pci_dev->mem_resource[0].addr;
1260         if (hw->hw_addr == NULL) {
1261                 PMD_INIT_LOG(ERR, "Bad mem resource."
1262                         " Try to blacklist unused devices.");
1263                 return -EIO;
1264         }
1265
1266         /* Store fm10k_adapter pointer */
1267         hw->back = dev->data->dev_private;
1268
1269         /* Initialize the shared code */
1270         diag = fm10k_init_shared_code(hw);
1271         if (diag != FM10K_SUCCESS) {
1272                 PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
1273                 return -EIO;
1274         }
1275
1276         /*
1277          * Inialize bus info. Normally we would call fm10k_get_bus_info(), but
1278          * there is no way to get link status without reading BAR4.  Until this
1279          * works, assume we have maximum bandwidth.
1280          * @todo - fix bus info
1281          */
1282         hw->bus_caps.speed = fm10k_bus_speed_8000;
1283         hw->bus_caps.width = fm10k_bus_width_pcie_x8;
1284         hw->bus_caps.payload = fm10k_bus_payload_512;
1285         hw->bus.speed = fm10k_bus_speed_8000;
1286         hw->bus.width = fm10k_bus_width_pcie_x8;
1287         hw->bus.payload = fm10k_bus_payload_256;
1288
1289         /* Initialize the hw */
1290         diag = fm10k_init_hw(hw);
1291         if (diag != FM10K_SUCCESS) {
1292                 PMD_INIT_LOG(ERR, "Hardware init failed: %d", diag);
1293                 return -EIO;
1294         }
1295
1296         /* Initialize MAC address(es) */
1297         dev->data->mac_addrs = rte_zmalloc("fm10k", ETHER_ADDR_LEN, 0);
1298         if (dev->data->mac_addrs == NULL) {
1299                 PMD_INIT_LOG(ERR, "Cannot allocate memory for MAC addresses");
1300                 return -ENOMEM;
1301         }
1302
1303         diag = fm10k_read_mac_addr(hw);
1304         if (diag != FM10K_SUCCESS) {
1305                 /*
1306                  * TODO: remove special handling on VF. Need shared code to
1307                  * fix first.
1308                  */
1309                 if (hw->mac.type == fm10k_mac_pf) {
1310                         PMD_INIT_LOG(ERR, "Read MAC addr failed: %d", diag);
1311                         return -EIO;
1312                 } else {
1313                         /* Generate a random addr */
1314                         eth_random_addr(hw->mac.addr);
1315                         memcpy(hw->mac.perm_addr, hw->mac.addr, ETH_ALEN);
1316                 }
1317         }
1318
1319         ether_addr_copy((const struct ether_addr *)hw->mac.addr,
1320                         &dev->data->mac_addrs[0]);
1321
1322         /* Reset the hw statistics */
1323         fm10k_stats_reset(dev);
1324
1325         /* Reset the hw */
1326         diag = fm10k_reset_hw(hw);
1327         if (diag != FM10K_SUCCESS) {
1328                 PMD_INIT_LOG(ERR, "Hardware reset failed: %d", diag);
1329                 return -EIO;
1330         }
1331
1332         /* Setup mailbox service */
1333         diag = fm10k_setup_mbx_service(hw);
1334         if (diag != FM10K_SUCCESS) {
1335                 PMD_INIT_LOG(ERR, "Failed to setup mailbox: %d", diag);
1336                 return -EIO;
1337         }
1338
1339         /*
1340          * Below function will trigger operations on mailbox, acquire lock to
1341          * avoid race condition from interrupt handler. Operations on mailbox
1342          * FIFO will trigger interrupt to PF/SM, in which interrupt handler
1343          * will handle and generate an interrupt to our side. Then,  FIFO in
1344          * mailbox will be touched.
1345          */
1346         fm10k_mbx_lock(hw);
1347         /* Enable port first */
1348         hw->mac.ops.update_lport_state(hw, 0, 0, 1);
1349
1350         /* Update default vlan */
1351         hw->mac.ops.update_vlan(hw, hw->mac.default_vid, 0, true);
1352
1353         /*
1354          * Add default mac/vlan filter. glort is assigned by SM for PF, while is
1355          * unused for VF. PF will assign correct glort for VF.
1356          */
1357         hw->mac.ops.update_uc_addr(hw, hw->mac.dglort_map, hw->mac.addr,
1358                               hw->mac.default_vid, 1, 0);
1359
1360         /* Set unicast mode by default. App can change to other mode in other
1361          * API func.
1362          */
1363         hw->mac.ops.update_xcast_mode(hw, hw->mac.dglort_map,
1364                                         FM10K_XCAST_MODE_MULTI);
1365
1366         fm10k_mbx_unlock(hw);
1367
1368         return 0;
1369 }
1370
1371 /*
1372  * The set of PCI devices this driver supports. This driver will enable both PF
1373  * and SRIOV-VF devices.
1374  */
1375 static struct rte_pci_id pci_id_fm10k_map[] = {
1376 #define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev) { RTE_PCI_DEVICE(vend, dev) },
1377 #include "rte_pci_dev_ids.h"
1378         { .vendor_id = 0, /* sentinel */ },
1379 };
1380
1381 static struct eth_driver rte_pmd_fm10k = {
1382         {
1383                 .name = "rte_pmd_fm10k",
1384                 .id_table = pci_id_fm10k_map,
1385                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1386         },
1387         .eth_dev_init = eth_fm10k_dev_init,
1388         .dev_private_size = sizeof(struct fm10k_adapter),
1389 };
1390
1391 /*
1392  * Driver initialization routine.
1393  * Invoked once at EAL init time.
1394  * Register itself as the [Poll Mode] Driver of PCI FM10K devices.
1395  */
1396 static int
1397 rte_pmd_fm10k_init(__rte_unused const char *name,
1398         __rte_unused const char *params)
1399 {
1400         PMD_INIT_FUNC_TRACE();
1401         rte_eth_driver_register(&rte_pmd_fm10k);
1402         return 0;
1403 }
1404
1405 static struct rte_driver rte_fm10k_driver = {
1406         .type = PMD_PDEV,
1407         .init = rte_pmd_fm10k_init,
1408 };
1409
1410 PMD_REGISTER_DRIVER(rte_fm10k_driver);