fm10k: add PF RSS
[dpdk.git] / lib / librte_pmd_fm10k / fm10k_ethdev.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <rte_ethdev.h>
35 #include <rte_malloc.h>
36 #include <rte_memzone.h>
37 #include <rte_string_fns.h>
38 #include <rte_dev.h>
39 #include <rte_spinlock.h>
40
41 #include "fm10k.h"
42 #include "base/fm10k_api.h"
43
44 #define FM10K_RX_BUFF_ALIGN 512
45 /* Default delay to acquire mailbox lock */
46 #define FM10K_MBXLOCK_DELAY_US 20
47 #define UINT64_LOWER_32BITS_MASK 0x00000000ffffffffULL
48
49 /* Number of chars per uint32 type */
50 #define CHARS_PER_UINT32 (sizeof(uint32_t))
51 #define BIT_MASK_PER_UINT32 ((1 << CHARS_PER_UINT32) - 1)
52
53 static void fm10k_close_mbx_service(struct fm10k_hw *hw);
54
55 static void
56 fm10k_mbx_initlock(struct fm10k_hw *hw)
57 {
58         rte_spinlock_init(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
59 }
60
61 static void
62 fm10k_mbx_lock(struct fm10k_hw *hw)
63 {
64         while (!rte_spinlock_trylock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back)))
65                 rte_delay_us(FM10K_MBXLOCK_DELAY_US);
66 }
67
68 static void
69 fm10k_mbx_unlock(struct fm10k_hw *hw)
70 {
71         rte_spinlock_unlock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
72 }
73
74 /*
75  * reset queue to initial state, allocate software buffers used when starting
76  * device.
77  * return 0 on success
78  * return -ENOMEM if buffers cannot be allocated
79  * return -EINVAL if buffers do not satisfy alignment condition
80  */
81 static inline int
82 rx_queue_reset(struct fm10k_rx_queue *q)
83 {
84         uint64_t dma_addr;
85         int i, diag;
86         PMD_INIT_FUNC_TRACE();
87
88         diag = rte_mempool_get_bulk(q->mp, (void **)q->sw_ring, q->nb_desc);
89         if (diag != 0)
90                 return -ENOMEM;
91
92         for (i = 0; i < q->nb_desc; ++i) {
93                 fm10k_pktmbuf_reset(q->sw_ring[i], q->port_id);
94                 if (!fm10k_addr_alignment_valid(q->sw_ring[i])) {
95                         rte_mempool_put_bulk(q->mp, (void **)q->sw_ring,
96                                                 q->nb_desc);
97                         return -EINVAL;
98                 }
99                 dma_addr = MBUF_DMA_ADDR_DEFAULT(q->sw_ring[i]);
100                 q->hw_ring[i].q.pkt_addr = dma_addr;
101                 q->hw_ring[i].q.hdr_addr = dma_addr;
102         }
103
104         q->next_dd = 0;
105         q->next_alloc = 0;
106         q->next_trigger = q->alloc_thresh - 1;
107         FM10K_PCI_REG_WRITE(q->tail_ptr, q->nb_desc - 1);
108         return 0;
109 }
110
111 /*
112  * clean queue, descriptor rings, free software buffers used when stopping
113  * device.
114  */
115 static inline void
116 rx_queue_clean(struct fm10k_rx_queue *q)
117 {
118         union fm10k_rx_desc zero = {.q = {0, 0, 0, 0} };
119         uint32_t i;
120         PMD_INIT_FUNC_TRACE();
121
122         /* zero descriptor rings */
123         for (i = 0; i < q->nb_desc; ++i)
124                 q->hw_ring[i] = zero;
125
126         /* free software buffers */
127         for (i = 0; i < q->nb_desc; ++i) {
128                 if (q->sw_ring[i]) {
129                         rte_pktmbuf_free_seg(q->sw_ring[i]);
130                         q->sw_ring[i] = NULL;
131                 }
132         }
133 }
134
135 /*
136  * free all queue memory used when releasing the queue (i.e. configure)
137  */
138 static inline void
139 rx_queue_free(struct fm10k_rx_queue *q)
140 {
141         PMD_INIT_FUNC_TRACE();
142         if (q) {
143                 PMD_INIT_LOG(DEBUG, "Freeing rx queue %p", q);
144                 rx_queue_clean(q);
145                 if (q->sw_ring)
146                         rte_free(q->sw_ring);
147                 rte_free(q);
148         }
149 }
150
151 /*
152  * disable RX queue, wait unitl HW finished necessary flush operation
153  */
154 static inline int
155 rx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
156 {
157         uint32_t reg, i;
158
159         reg = FM10K_READ_REG(hw, FM10K_RXQCTL(qnum));
160         FM10K_WRITE_REG(hw, FM10K_RXQCTL(qnum),
161                         reg & ~FM10K_RXQCTL_ENABLE);
162
163         /* Wait 100us at most */
164         for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
165                 rte_delay_us(1);
166                 reg = FM10K_READ_REG(hw, FM10K_RXQCTL(i));
167                 if (!(reg & FM10K_RXQCTL_ENABLE))
168                         break;
169         }
170
171         if (i == FM10K_QUEUE_DISABLE_TIMEOUT)
172                 return -1;
173
174         return 0;
175 }
176
177 /*
178  * reset queue to initial state, allocate software buffers used when starting
179  * device
180  */
181 static inline void
182 tx_queue_reset(struct fm10k_tx_queue *q)
183 {
184         PMD_INIT_FUNC_TRACE();
185         q->last_free = 0;
186         q->next_free = 0;
187         q->nb_used = 0;
188         q->nb_free = q->nb_desc - 1;
189         q->free_trigger = q->nb_free - q->free_thresh;
190         fifo_reset(&q->rs_tracker, (q->nb_desc + 1) / q->rs_thresh);
191         FM10K_PCI_REG_WRITE(q->tail_ptr, 0);
192 }
193
194 /*
195  * clean queue, descriptor rings, free software buffers used when stopping
196  * device
197  */
198 static inline void
199 tx_queue_clean(struct fm10k_tx_queue *q)
200 {
201         struct fm10k_tx_desc zero = {0, 0, 0, 0, 0, 0};
202         uint32_t i;
203         PMD_INIT_FUNC_TRACE();
204
205         /* zero descriptor rings */
206         for (i = 0; i < q->nb_desc; ++i)
207                 q->hw_ring[i] = zero;
208
209         /* free software buffers */
210         for (i = 0; i < q->nb_desc; ++i) {
211                 if (q->sw_ring[i]) {
212                         rte_pktmbuf_free_seg(q->sw_ring[i]);
213                         q->sw_ring[i] = NULL;
214                 }
215         }
216 }
217
218 /*
219  * free all queue memory used when releasing the queue (i.e. configure)
220  */
221 static inline void
222 tx_queue_free(struct fm10k_tx_queue *q)
223 {
224         PMD_INIT_FUNC_TRACE();
225         if (q) {
226                 PMD_INIT_LOG(DEBUG, "Freeing tx queue %p", q);
227                 tx_queue_clean(q);
228                 if (q->rs_tracker.list)
229                         rte_free(q->rs_tracker.list);
230                 if (q->sw_ring)
231                         rte_free(q->sw_ring);
232                 rte_free(q);
233         }
234 }
235
236 /*
237  * disable TX queue, wait unitl HW finished necessary flush operation
238  */
239 static inline int
240 tx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
241 {
242         uint32_t reg, i;
243
244         reg = FM10K_READ_REG(hw, FM10K_TXDCTL(qnum));
245         FM10K_WRITE_REG(hw, FM10K_TXDCTL(qnum),
246                         reg & ~FM10K_TXDCTL_ENABLE);
247
248         /* Wait 100us at most */
249         for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
250                 rte_delay_us(1);
251                 reg = FM10K_READ_REG(hw, FM10K_TXDCTL(i));
252                 if (!(reg & FM10K_TXDCTL_ENABLE))
253                         break;
254         }
255
256         if (i == FM10K_QUEUE_DISABLE_TIMEOUT)
257                 return -1;
258
259         return 0;
260 }
261
262 static int
263 fm10k_dev_configure(struct rte_eth_dev *dev)
264 {
265         PMD_INIT_FUNC_TRACE();
266
267         if (dev->data->dev_conf.rxmode.hw_strip_crc == 0)
268                 PMD_INIT_LOG(WARNING, "fm10k always strip CRC");
269
270         return 0;
271 }
272
273 static void
274 fm10k_dev_mq_rx_configure(struct rte_eth_dev *dev)
275 {
276         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
277         struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
278         uint32_t mrqc, *key, i, reta, j;
279         uint64_t hf;
280
281 #define RSS_KEY_SIZE 40
282         static uint8_t rss_intel_key[RSS_KEY_SIZE] = {
283                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
284                 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
285                 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
286                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
287                 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
288         };
289
290         if (dev->data->nb_rx_queues == 1 ||
291             dev_conf->rxmode.mq_mode != ETH_MQ_RX_RSS ||
292             dev_conf->rx_adv_conf.rss_conf.rss_hf == 0)
293                 return;
294
295         /* random key is rss_intel_key (default) or user provided (rss_key) */
296         if (dev_conf->rx_adv_conf.rss_conf.rss_key == NULL)
297                 key = (uint32_t *)rss_intel_key;
298         else
299                 key = (uint32_t *)dev_conf->rx_adv_conf.rss_conf.rss_key;
300
301         /* Now fill our hash function seeds, 4 bytes at a time */
302         for (i = 0; i < RSS_KEY_SIZE / sizeof(*key); ++i)
303                 FM10K_WRITE_REG(hw, FM10K_RSSRK(0, i), key[i]);
304
305         /*
306          * Fill in redirection table
307          * The byte-swap is needed because NIC registers are in
308          * little-endian order.
309          */
310         reta = 0;
311         for (i = 0, j = 0; i < FM10K_RETA_SIZE; i++, j++) {
312                 if (j == dev->data->nb_rx_queues)
313                         j = 0;
314                 reta = (reta << CHAR_BIT) | j;
315                 if ((i & 3) == 3)
316                         FM10K_WRITE_REG(hw, FM10K_RETA(0, i >> 2),
317                                         rte_bswap32(reta));
318         }
319
320         /*
321          * Generate RSS hash based on packet types, TCP/UDP
322          * port numbers and/or IPv4/v6 src and dst addresses
323          */
324         hf = dev_conf->rx_adv_conf.rss_conf.rss_hf;
325         mrqc = 0;
326         mrqc |= (hf & ETH_RSS_IPV4_TCP)    ? FM10K_MRQC_TCP_IPV4 : 0;
327         mrqc |= (hf & ETH_RSS_IPV4)        ? FM10K_MRQC_IPV4     : 0;
328         mrqc |= (hf & ETH_RSS_IPV6)        ? FM10K_MRQC_IPV6     : 0;
329         mrqc |= (hf & ETH_RSS_IPV6_EX)     ? FM10K_MRQC_IPV6     : 0;
330         mrqc |= (hf & ETH_RSS_IPV6_TCP)    ? FM10K_MRQC_TCP_IPV6 : 0;
331         mrqc |= (hf & ETH_RSS_IPV6_TCP_EX) ? FM10K_MRQC_TCP_IPV6 : 0;
332         mrqc |= (hf & ETH_RSS_IPV4_UDP)    ? FM10K_MRQC_UDP_IPV4 : 0;
333         mrqc |= (hf & ETH_RSS_IPV6_UDP)    ? FM10K_MRQC_UDP_IPV6 : 0;
334         mrqc |= (hf & ETH_RSS_IPV6_UDP_EX) ? FM10K_MRQC_UDP_IPV6 : 0;
335
336         if (mrqc == 0) {
337                 PMD_INIT_LOG(ERR, "Specified RSS mode 0x%"PRIx64"is not"
338                         "supported", hf);
339                 return;
340         }
341
342         FM10K_WRITE_REG(hw, FM10K_MRQC(0), mrqc);
343 }
344
345 static int
346 fm10k_dev_tx_init(struct rte_eth_dev *dev)
347 {
348         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
349         int i, ret;
350         struct fm10k_tx_queue *txq;
351         uint64_t base_addr;
352         uint32_t size;
353
354         /* Disable TXINT to avoid possible interrupt */
355         for (i = 0; i < hw->mac.max_queues; i++)
356                 FM10K_WRITE_REG(hw, FM10K_TXINT(i),
357                                 3 << FM10K_TXINT_TIMER_SHIFT);
358
359         /* Setup TX queue */
360         for (i = 0; i < dev->data->nb_tx_queues; ++i) {
361                 txq = dev->data->tx_queues[i];
362                 base_addr = txq->hw_ring_phys_addr;
363                 size = txq->nb_desc * sizeof(struct fm10k_tx_desc);
364
365                 /* disable queue to avoid issues while updating state */
366                 ret = tx_queue_disable(hw, i);
367                 if (ret) {
368                         PMD_INIT_LOG(ERR, "failed to disable queue %d", i);
369                         return -1;
370                 }
371
372                 /* set location and size for descriptor ring */
373                 FM10K_WRITE_REG(hw, FM10K_TDBAL(i),
374                                 base_addr & UINT64_LOWER_32BITS_MASK);
375                 FM10K_WRITE_REG(hw, FM10K_TDBAH(i),
376                                 base_addr >> (CHAR_BIT * sizeof(uint32_t)));
377                 FM10K_WRITE_REG(hw, FM10K_TDLEN(i), size);
378         }
379         return 0;
380 }
381
382 static int
383 fm10k_dev_rx_init(struct rte_eth_dev *dev)
384 {
385         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
386         int i, ret;
387         struct fm10k_rx_queue *rxq;
388         uint64_t base_addr;
389         uint32_t size;
390         uint32_t rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
391         uint16_t buf_size;
392         struct rte_pktmbuf_pool_private *mbp_priv;
393
394         /* Disable RXINT to avoid possible interrupt */
395         for (i = 0; i < hw->mac.max_queues; i++)
396                 FM10K_WRITE_REG(hw, FM10K_RXINT(i),
397                                 3 << FM10K_RXINT_TIMER_SHIFT);
398
399         /* Setup RX queues */
400         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
401                 rxq = dev->data->rx_queues[i];
402                 base_addr = rxq->hw_ring_phys_addr;
403                 size = rxq->nb_desc * sizeof(union fm10k_rx_desc);
404
405                 /* disable queue to avoid issues while updating state */
406                 ret = rx_queue_disable(hw, i);
407                 if (ret) {
408                         PMD_INIT_LOG(ERR, "failed to disable queue %d", i);
409                         return -1;
410                 }
411
412                 /* Setup the Base and Length of the Rx Descriptor Ring */
413                 FM10K_WRITE_REG(hw, FM10K_RDBAL(i),
414                                 base_addr & UINT64_LOWER_32BITS_MASK);
415                 FM10K_WRITE_REG(hw, FM10K_RDBAH(i),
416                                 base_addr >> (CHAR_BIT * sizeof(uint32_t)));
417                 FM10K_WRITE_REG(hw, FM10K_RDLEN(i), size);
418
419                 /* Configure the Rx buffer size for one buff without split */
420                 mbp_priv = rte_mempool_get_priv(rxq->mp);
421                 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
422                                         RTE_PKTMBUF_HEADROOM);
423                 FM10K_WRITE_REG(hw, FM10K_SRRCTL(i),
424                                 buf_size >> FM10K_SRRCTL_BSIZEPKT_SHIFT);
425
426                 /* Enable drop on empty, it's RO for VF */
427                 if (hw->mac.type == fm10k_mac_pf && rxq->drop_en)
428                         rxdctl |= FM10K_RXDCTL_DROP_ON_EMPTY;
429
430                 FM10K_WRITE_REG(hw, FM10K_RXDCTL(i), rxdctl);
431                 FM10K_WRITE_FLUSH(hw);
432         }
433
434         /* Configure RSS if applicable */
435         fm10k_dev_mq_rx_configure(dev);
436         return 0;
437 }
438
439 static int
440 fm10k_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
441 {
442         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
443         int err = -1;
444         uint32_t reg;
445         struct fm10k_rx_queue *rxq;
446
447         PMD_INIT_FUNC_TRACE();
448
449         if (rx_queue_id < dev->data->nb_rx_queues) {
450                 rxq = dev->data->rx_queues[rx_queue_id];
451                 err = rx_queue_reset(rxq);
452                 if (err == -ENOMEM) {
453                         PMD_INIT_LOG(ERR, "Failed to alloc memory : %d", err);
454                         return err;
455                 } else if (err == -EINVAL) {
456                         PMD_INIT_LOG(ERR, "Invalid buffer address alignment :"
457                                 " %d", err);
458                         return err;
459                 }
460
461                 /* Setup the HW Rx Head and Tail Descriptor Pointers
462                  * Note: this must be done AFTER the queue is enabled on real
463                  * hardware, but BEFORE the queue is enabled when using the
464                  * emulation platform. Do it in both places for now and remove
465                  * this comment and the following two register writes when the
466                  * emulation platform is no longer being used.
467                  */
468                 FM10K_WRITE_REG(hw, FM10K_RDH(rx_queue_id), 0);
469                 FM10K_WRITE_REG(hw, FM10K_RDT(rx_queue_id), rxq->nb_desc - 1);
470
471                 /* Set PF ownership flag for PF devices */
472                 reg = FM10K_READ_REG(hw, FM10K_RXQCTL(rx_queue_id));
473                 if (hw->mac.type == fm10k_mac_pf)
474                         reg |= FM10K_RXQCTL_PF;
475                 reg |= FM10K_RXQCTL_ENABLE;
476                 /* enable RX queue */
477                 FM10K_WRITE_REG(hw, FM10K_RXQCTL(rx_queue_id), reg);
478                 FM10K_WRITE_FLUSH(hw);
479
480                 /* Setup the HW Rx Head and Tail Descriptor Pointers
481                  * Note: this must be done AFTER the queue is enabled
482                  */
483                 FM10K_WRITE_REG(hw, FM10K_RDH(rx_queue_id), 0);
484                 FM10K_WRITE_REG(hw, FM10K_RDT(rx_queue_id), rxq->nb_desc - 1);
485         }
486
487         return err;
488 }
489
490 static int
491 fm10k_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
492 {
493         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
494
495         PMD_INIT_FUNC_TRACE();
496
497         if (rx_queue_id < dev->data->nb_rx_queues) {
498                 /* Disable RX queue */
499                 rx_queue_disable(hw, rx_queue_id);
500
501                 /* Free mbuf and clean HW ring */
502                 rx_queue_clean(dev->data->rx_queues[rx_queue_id]);
503         }
504
505         return 0;
506 }
507
508 static int
509 fm10k_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
510 {
511         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
512         /** @todo - this should be defined in the shared code */
513 #define FM10K_TXDCTL_WRITE_BACK_MIN_DELAY       0x00010000
514         uint32_t txdctl = FM10K_TXDCTL_WRITE_BACK_MIN_DELAY;
515         int err = 0;
516
517         PMD_INIT_FUNC_TRACE();
518
519         if (tx_queue_id < dev->data->nb_tx_queues) {
520                 tx_queue_reset(dev->data->tx_queues[tx_queue_id]);
521
522                 /* reset head and tail pointers */
523                 FM10K_WRITE_REG(hw, FM10K_TDH(tx_queue_id), 0);
524                 FM10K_WRITE_REG(hw, FM10K_TDT(tx_queue_id), 0);
525
526                 /* enable TX queue */
527                 FM10K_WRITE_REG(hw, FM10K_TXDCTL(tx_queue_id),
528                                         FM10K_TXDCTL_ENABLE | txdctl);
529                 FM10K_WRITE_FLUSH(hw);
530         } else
531                 err = -1;
532
533         return err;
534 }
535
536 static int
537 fm10k_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
538 {
539         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
540
541         PMD_INIT_FUNC_TRACE();
542
543         if (tx_queue_id < dev->data->nb_tx_queues) {
544                 tx_queue_disable(hw, tx_queue_id);
545                 tx_queue_clean(dev->data->tx_queues[tx_queue_id]);
546         }
547
548         return 0;
549 }
550
551 /* fls = find last set bit = 32 minus the number of leading zeros */
552 #ifndef fls
553 #define fls(x) (((x) == 0) ? 0 : (32 - __builtin_clz((x))))
554 #endif
555 #define BSIZEPKT_ROUNDUP ((1 << FM10K_SRRCTL_BSIZEPKT_SHIFT) - 1)
556 static int
557 fm10k_dev_start(struct rte_eth_dev *dev)
558 {
559         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
560         int i, diag;
561
562         PMD_INIT_FUNC_TRACE();
563
564         /* stop, init, then start the hw */
565         diag = fm10k_stop_hw(hw);
566         if (diag != FM10K_SUCCESS) {
567                 PMD_INIT_LOG(ERR, "Hardware stop failed: %d", diag);
568                 return -EIO;
569         }
570
571         diag = fm10k_init_hw(hw);
572         if (diag != FM10K_SUCCESS) {
573                 PMD_INIT_LOG(ERR, "Hardware init failed: %d", diag);
574                 return -EIO;
575         }
576
577         diag = fm10k_start_hw(hw);
578         if (diag != FM10K_SUCCESS) {
579                 PMD_INIT_LOG(ERR, "Hardware start failed: %d", diag);
580                 return -EIO;
581         }
582
583         diag = fm10k_dev_tx_init(dev);
584         if (diag) {
585                 PMD_INIT_LOG(ERR, "TX init failed: %d", diag);
586                 return diag;
587         }
588
589         diag = fm10k_dev_rx_init(dev);
590         if (diag) {
591                 PMD_INIT_LOG(ERR, "RX init failed: %d", diag);
592                 return diag;
593         }
594
595         if (hw->mac.type == fm10k_mac_pf) {
596                 /* Establish only VSI 0 as valid */
597                 FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(0), FM10K_DGLORTMAP_ANY);
598
599                 /* Configure RSS bits used in RETA table */
600                 FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(0),
601                                 fls(dev->data->nb_rx_queues - 1) <<
602                                 FM10K_DGLORTDEC_RSSLENGTH_SHIFT);
603
604                 /* Invalidate all other GLORT entries */
605                 for (i = 1; i < FM10K_DGLORT_COUNT; i++)
606                         FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(i),
607                                         FM10K_DGLORTMAP_NONE);
608         }
609
610         for (i = 0; i < dev->data->nb_rx_queues; i++) {
611                 struct fm10k_rx_queue *rxq;
612                 rxq = dev->data->rx_queues[i];
613
614                 if (rxq->rx_deferred_start)
615                         continue;
616                 diag = fm10k_dev_rx_queue_start(dev, i);
617                 if (diag != 0) {
618                         int j;
619                         for (j = 0; j < i; ++j)
620                                 rx_queue_clean(dev->data->rx_queues[j]);
621                         return diag;
622                 }
623         }
624
625         for (i = 0; i < dev->data->nb_tx_queues; i++) {
626                 struct fm10k_tx_queue *txq;
627                 txq = dev->data->tx_queues[i];
628
629                 if (txq->tx_deferred_start)
630                         continue;
631                 diag = fm10k_dev_tx_queue_start(dev, i);
632                 if (diag != 0) {
633                         int j;
634                         for (j = 0; j < dev->data->nb_rx_queues; ++j)
635                                 rx_queue_clean(dev->data->rx_queues[j]);
636                         return diag;
637                 }
638         }
639
640         return 0;
641 }
642
643 static void
644 fm10k_dev_stop(struct rte_eth_dev *dev)
645 {
646         int i;
647
648         PMD_INIT_FUNC_TRACE();
649
650         for (i = 0; i < dev->data->nb_tx_queues; i++)
651                 fm10k_dev_tx_queue_stop(dev, i);
652
653         for (i = 0; i < dev->data->nb_rx_queues; i++)
654                 fm10k_dev_rx_queue_stop(dev, i);
655 }
656
657 static void
658 fm10k_dev_close(struct rte_eth_dev *dev)
659 {
660         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
661
662         PMD_INIT_FUNC_TRACE();
663
664         /* Stop mailbox service first */
665         fm10k_close_mbx_service(hw);
666         fm10k_dev_stop(dev);
667         fm10k_stop_hw(hw);
668 }
669
670 static int
671 fm10k_link_update(struct rte_eth_dev *dev,
672         __rte_unused int wait_to_complete)
673 {
674         PMD_INIT_FUNC_TRACE();
675
676         /* The host-interface link is always up.  The speed is ~50Gbps per Gen3
677          * x8 PCIe interface. For now, we leave the speed undefined since there
678          * is no 50Gbps Ethernet. */
679         dev->data->dev_link.link_speed  = 0;
680         dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
681         dev->data->dev_link.link_status = 1;
682
683         return 0;
684 }
685
686 static void
687 fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
688 {
689         uint64_t ipackets, opackets, ibytes, obytes;
690         struct fm10k_hw *hw =
691                 FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
692         struct fm10k_hw_stats *hw_stats =
693                 FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
694         int i;
695
696         PMD_INIT_FUNC_TRACE();
697
698         fm10k_update_hw_stats(hw, hw_stats);
699
700         ipackets = opackets = ibytes = obytes = 0;
701         for (i = 0; (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) &&
702                 (i < FM10K_MAX_QUEUES_PF); ++i) {
703                 stats->q_ipackets[i] = hw_stats->q[i].rx_packets.count;
704                 stats->q_opackets[i] = hw_stats->q[i].tx_packets.count;
705                 stats->q_ibytes[i]   = hw_stats->q[i].rx_bytes.count;
706                 stats->q_obytes[i]   = hw_stats->q[i].tx_bytes.count;
707                 ipackets += stats->q_ipackets[i];
708                 opackets += stats->q_opackets[i];
709                 ibytes   += stats->q_ibytes[i];
710                 obytes   += stats->q_obytes[i];
711         }
712         stats->ipackets = ipackets;
713         stats->opackets = opackets;
714         stats->ibytes = ibytes;
715         stats->obytes = obytes;
716 }
717
718 static void
719 fm10k_stats_reset(struct rte_eth_dev *dev)
720 {
721         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
722         struct fm10k_hw_stats *hw_stats =
723                 FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
724
725         PMD_INIT_FUNC_TRACE();
726
727         memset(hw_stats, 0, sizeof(*hw_stats));
728         fm10k_rebind_hw_stats(hw, hw_stats);
729 }
730
731 static void
732 fm10k_dev_infos_get(struct rte_eth_dev *dev,
733         struct rte_eth_dev_info *dev_info)
734 {
735         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
736
737         PMD_INIT_FUNC_TRACE();
738
739         dev_info->min_rx_bufsize     = FM10K_MIN_RX_BUF_SIZE;
740         dev_info->max_rx_pktlen      = FM10K_MAX_PKT_SIZE;
741         dev_info->max_rx_queues      = hw->mac.max_queues;
742         dev_info->max_tx_queues      = hw->mac.max_queues;
743         dev_info->max_mac_addrs      = 1;
744         dev_info->max_hash_mac_addrs = 0;
745         dev_info->max_vfs            = FM10K_MAX_VF_NUM;
746         dev_info->max_vmdq_pools     = ETH_64_POOLS;
747         dev_info->rx_offload_capa =
748                 DEV_RX_OFFLOAD_IPV4_CKSUM |
749                 DEV_RX_OFFLOAD_UDP_CKSUM  |
750                 DEV_RX_OFFLOAD_TCP_CKSUM;
751         dev_info->tx_offload_capa    = 0;
752         dev_info->reta_size = FM10K_MAX_RSS_INDICES;
753
754         dev_info->default_rxconf = (struct rte_eth_rxconf) {
755                 .rx_thresh = {
756                         .pthresh = FM10K_DEFAULT_RX_PTHRESH,
757                         .hthresh = FM10K_DEFAULT_RX_HTHRESH,
758                         .wthresh = FM10K_DEFAULT_RX_WTHRESH,
759                 },
760                 .rx_free_thresh = FM10K_RX_FREE_THRESH_DEFAULT(0),
761                 .rx_drop_en = 0,
762         };
763
764         dev_info->default_txconf = (struct rte_eth_txconf) {
765                 .tx_thresh = {
766                         .pthresh = FM10K_DEFAULT_TX_PTHRESH,
767                         .hthresh = FM10K_DEFAULT_TX_HTHRESH,
768                         .wthresh = FM10K_DEFAULT_TX_WTHRESH,
769                 },
770                 .tx_free_thresh = FM10K_TX_FREE_THRESH_DEFAULT(0),
771                 .tx_rs_thresh = FM10K_TX_RS_THRESH_DEFAULT(0),
772                 .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
773                                 ETH_TXQ_FLAGS_NOOFFLOADS,
774         };
775
776 }
777
778 static inline int
779 check_nb_desc(uint16_t min, uint16_t max, uint16_t mult, uint16_t request)
780 {
781         if ((request < min) || (request > max) || ((request % mult) != 0))
782                 return -1;
783         else
784                 return 0;
785 }
786
787 /*
788  * Create a memzone for hardware descriptor rings. Malloc cannot be used since
789  * the physical address is required. If the memzone is already created, then
790  * this function returns a pointer to the existing memzone.
791  */
792 static inline const struct rte_memzone *
793 allocate_hw_ring(const char *driver_name, const char *ring_name,
794         uint8_t port_id, uint16_t queue_id, int socket_id,
795         uint32_t size, uint32_t align)
796 {
797         char name[RTE_MEMZONE_NAMESIZE];
798         const struct rte_memzone *mz;
799
800         snprintf(name, sizeof(name), "%s_%s_%d_%d_%d",
801                  driver_name, ring_name, port_id, queue_id, socket_id);
802
803         /* return the memzone if it already exists */
804         mz = rte_memzone_lookup(name);
805         if (mz)
806                 return mz;
807
808 #ifdef RTE_LIBRTE_XEN_DOM0
809         return rte_memzone_reserve_bounded(name, size, socket_id, 0, align,
810                                            RTE_PGSIZE_2M);
811 #else
812         return rte_memzone_reserve_aligned(name, size, socket_id, 0, align);
813 #endif
814 }
815
816 static inline int
817 check_thresh(uint16_t min, uint16_t max, uint16_t div, uint16_t request)
818 {
819         if ((request < min) || (request > max) || ((div % request) != 0))
820                 return -1;
821         else
822                 return 0;
823 }
824
825 static inline int
826 handle_rxconf(struct fm10k_rx_queue *q, const struct rte_eth_rxconf *conf)
827 {
828         uint16_t rx_free_thresh;
829
830         if (conf->rx_free_thresh == 0)
831                 rx_free_thresh = FM10K_RX_FREE_THRESH_DEFAULT(q);
832         else
833                 rx_free_thresh = conf->rx_free_thresh;
834
835         /* make sure the requested threshold satisfies the constraints */
836         if (check_thresh(FM10K_RX_FREE_THRESH_MIN(q),
837                         FM10K_RX_FREE_THRESH_MAX(q),
838                         FM10K_RX_FREE_THRESH_DIV(q),
839                         rx_free_thresh)) {
840                 PMD_INIT_LOG(ERR, "rx_free_thresh (%u) must be "
841                         "less than or equal to %u, "
842                         "greater than or equal to %u, "
843                         "and a divisor of %u",
844                         rx_free_thresh, FM10K_RX_FREE_THRESH_MAX(q),
845                         FM10K_RX_FREE_THRESH_MIN(q),
846                         FM10K_RX_FREE_THRESH_DIV(q));
847                 return (-EINVAL);
848         }
849
850         q->alloc_thresh = rx_free_thresh;
851         q->drop_en = conf->rx_drop_en;
852         q->rx_deferred_start = conf->rx_deferred_start;
853
854         return 0;
855 }
856
857 /*
858  * Hardware requires specific alignment for Rx packet buffers. At
859  * least one of the following two conditions must be satisfied.
860  *  1. Address is 512B aligned
861  *  2. Address is 8B aligned and buffer does not cross 4K boundary.
862  *
863  * As such, the driver may need to adjust the DMA address within the
864  * buffer by up to 512B. The mempool element size is checked here
865  * to make sure a maximally sized Ethernet frame can still be wholly
866  * contained within the buffer after 512B alignment.
867  *
868  * return 1 if the element size is valid, otherwise return 0.
869  */
870 static int
871 mempool_element_size_valid(struct rte_mempool *mp)
872 {
873         uint32_t min_size;
874
875         /* elt_size includes mbuf header and headroom */
876         min_size = mp->elt_size - sizeof(struct rte_mbuf) -
877                         RTE_PKTMBUF_HEADROOM;
878
879         /* account for up to 512B of alignment */
880         min_size -= FM10K_RX_BUFF_ALIGN;
881
882         /* sanity check for overflow */
883         if (min_size > mp->elt_size)
884                 return 0;
885
886         if (min_size < ETHER_MAX_VLAN_FRAME_LEN)
887                 return 0;
888
889         /* size is valid */
890         return 1;
891 }
892
893 static int
894 fm10k_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
895         uint16_t nb_desc, unsigned int socket_id,
896         const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
897 {
898         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
899         struct fm10k_rx_queue *q;
900         const struct rte_memzone *mz;
901
902         PMD_INIT_FUNC_TRACE();
903
904         /* make sure the mempool element size can account for alignment. */
905         if (!mempool_element_size_valid(mp)) {
906                 PMD_INIT_LOG(ERR, "Error : Mempool element size is too small");
907                 return (-EINVAL);
908         }
909
910         /* make sure a valid number of descriptors have been requested */
911         if (check_nb_desc(FM10K_MIN_RX_DESC, FM10K_MAX_RX_DESC,
912                                 FM10K_MULT_RX_DESC, nb_desc)) {
913                 PMD_INIT_LOG(ERR, "Number of Rx descriptors (%u) must be "
914                         "less than or equal to %"PRIu32", "
915                         "greater than or equal to %u, "
916                         "and a multiple of %u",
917                         nb_desc, (uint32_t)FM10K_MAX_RX_DESC, FM10K_MIN_RX_DESC,
918                         FM10K_MULT_RX_DESC);
919                 return (-EINVAL);
920         }
921
922         /*
923          * if this queue existed already, free the associated memory. The
924          * queue cannot be reused in case we need to allocate memory on
925          * different socket than was previously used.
926          */
927         if (dev->data->rx_queues[queue_id] != NULL) {
928                 rx_queue_free(dev->data->rx_queues[queue_id]);
929                 dev->data->rx_queues[queue_id] = NULL;
930         }
931
932         /* allocate memory for the queue structure */
933         q = rte_zmalloc_socket("fm10k", sizeof(*q), RTE_CACHE_LINE_SIZE,
934                                 socket_id);
935         if (q == NULL) {
936                 PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
937                 return (-ENOMEM);
938         }
939
940         /* setup queue */
941         q->mp = mp;
942         q->nb_desc = nb_desc;
943         q->port_id = dev->data->port_id;
944         q->queue_id = queue_id;
945         q->tail_ptr = (volatile uint32_t *)
946                 &((uint32_t *)hw->hw_addr)[FM10K_RDT(queue_id)];
947         if (handle_rxconf(q, conf))
948                 return (-EINVAL);
949
950         /* allocate memory for the software ring */
951         q->sw_ring = rte_zmalloc_socket("fm10k sw ring",
952                                         nb_desc * sizeof(struct rte_mbuf *),
953                                         RTE_CACHE_LINE_SIZE, socket_id);
954         if (q->sw_ring == NULL) {
955                 PMD_INIT_LOG(ERR, "Cannot allocate software ring");
956                 rte_free(q);
957                 return (-ENOMEM);
958         }
959
960         /*
961          * allocate memory for the hardware descriptor ring. A memzone large
962          * enough to hold the maximum ring size is requested to allow for
963          * resizing in later calls to the queue setup function.
964          */
965         mz = allocate_hw_ring(dev->driver->pci_drv.name, "rx_ring",
966                                 dev->data->port_id, queue_id, socket_id,
967                                 FM10K_MAX_RX_RING_SZ, FM10K_ALIGN_RX_DESC);
968         if (mz == NULL) {
969                 PMD_INIT_LOG(ERR, "Cannot allocate hardware ring");
970                 rte_free(q->sw_ring);
971                 rte_free(q);
972                 return (-ENOMEM);
973         }
974         q->hw_ring = mz->addr;
975         q->hw_ring_phys_addr = mz->phys_addr;
976
977         dev->data->rx_queues[queue_id] = q;
978         return 0;
979 }
980
981 static void
982 fm10k_rx_queue_release(void *queue)
983 {
984         PMD_INIT_FUNC_TRACE();
985
986         rx_queue_free(queue);
987 }
988
989 static inline int
990 handle_txconf(struct fm10k_tx_queue *q, const struct rte_eth_txconf *conf)
991 {
992         uint16_t tx_free_thresh;
993         uint16_t tx_rs_thresh;
994
995         /* constraint MACROs require that tx_free_thresh is configured
996          * before tx_rs_thresh */
997         if (conf->tx_free_thresh == 0)
998                 tx_free_thresh = FM10K_TX_FREE_THRESH_DEFAULT(q);
999         else
1000                 tx_free_thresh = conf->tx_free_thresh;
1001
1002         /* make sure the requested threshold satisfies the constraints */
1003         if (check_thresh(FM10K_TX_FREE_THRESH_MIN(q),
1004                         FM10K_TX_FREE_THRESH_MAX(q),
1005                         FM10K_TX_FREE_THRESH_DIV(q),
1006                         tx_free_thresh)) {
1007                 PMD_INIT_LOG(ERR, "tx_free_thresh (%u) must be "
1008                         "less than or equal to %u, "
1009                         "greater than or equal to %u, "
1010                         "and a divisor of %u",
1011                         tx_free_thresh, FM10K_TX_FREE_THRESH_MAX(q),
1012                         FM10K_TX_FREE_THRESH_MIN(q),
1013                         FM10K_TX_FREE_THRESH_DIV(q));
1014                 return (-EINVAL);
1015         }
1016
1017         q->free_thresh = tx_free_thresh;
1018
1019         if (conf->tx_rs_thresh == 0)
1020                 tx_rs_thresh = FM10K_TX_RS_THRESH_DEFAULT(q);
1021         else
1022                 tx_rs_thresh = conf->tx_rs_thresh;
1023
1024         q->tx_deferred_start = conf->tx_deferred_start;
1025
1026         /* make sure the requested threshold satisfies the constraints */
1027         if (check_thresh(FM10K_TX_RS_THRESH_MIN(q),
1028                         FM10K_TX_RS_THRESH_MAX(q),
1029                         FM10K_TX_RS_THRESH_DIV(q),
1030                         tx_rs_thresh)) {
1031                 PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be "
1032                         "less than or equal to %u, "
1033                         "greater than or equal to %u, "
1034                         "and a divisor of %u",
1035                         tx_rs_thresh, FM10K_TX_RS_THRESH_MAX(q),
1036                         FM10K_TX_RS_THRESH_MIN(q),
1037                         FM10K_TX_RS_THRESH_DIV(q));
1038                 return (-EINVAL);
1039         }
1040
1041         q->rs_thresh = tx_rs_thresh;
1042
1043         return 0;
1044 }
1045
1046 static int
1047 fm10k_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
1048         uint16_t nb_desc, unsigned int socket_id,
1049         const struct rte_eth_txconf *conf)
1050 {
1051         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1052         struct fm10k_tx_queue *q;
1053         const struct rte_memzone *mz;
1054
1055         PMD_INIT_FUNC_TRACE();
1056
1057         /* make sure a valid number of descriptors have been requested */
1058         if (check_nb_desc(FM10K_MIN_TX_DESC, FM10K_MAX_TX_DESC,
1059                                 FM10K_MULT_TX_DESC, nb_desc)) {
1060                 PMD_INIT_LOG(ERR, "Number of Tx descriptors (%u) must be "
1061                         "less than or equal to %"PRIu32", "
1062                         "greater than or equal to %u, "
1063                         "and a multiple of %u",
1064                         nb_desc, (uint32_t)FM10K_MAX_TX_DESC, FM10K_MIN_TX_DESC,
1065                         FM10K_MULT_TX_DESC);
1066                 return (-EINVAL);
1067         }
1068
1069         /*
1070          * if this queue existed already, free the associated memory. The
1071          * queue cannot be reused in case we need to allocate memory on
1072          * different socket than was previously used.
1073          */
1074         if (dev->data->tx_queues[queue_id] != NULL) {
1075                 tx_queue_free(dev->data->tx_queues[queue_id]);
1076                 dev->data->tx_queues[queue_id] = NULL;
1077         }
1078
1079         /* allocate memory for the queue structure */
1080         q = rte_zmalloc_socket("fm10k", sizeof(*q), RTE_CACHE_LINE_SIZE,
1081                                 socket_id);
1082         if (q == NULL) {
1083                 PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
1084                 return (-ENOMEM);
1085         }
1086
1087         /* setup queue */
1088         q->nb_desc = nb_desc;
1089         q->port_id = dev->data->port_id;
1090         q->queue_id = queue_id;
1091         q->tail_ptr = (volatile uint32_t *)
1092                 &((uint32_t *)hw->hw_addr)[FM10K_TDT(queue_id)];
1093         if (handle_txconf(q, conf))
1094                 return (-EINVAL);
1095
1096         /* allocate memory for the software ring */
1097         q->sw_ring = rte_zmalloc_socket("fm10k sw ring",
1098                                         nb_desc * sizeof(struct rte_mbuf *),
1099                                         RTE_CACHE_LINE_SIZE, socket_id);
1100         if (q->sw_ring == NULL) {
1101                 PMD_INIT_LOG(ERR, "Cannot allocate software ring");
1102                 rte_free(q);
1103                 return (-ENOMEM);
1104         }
1105
1106         /*
1107          * allocate memory for the hardware descriptor ring. A memzone large
1108          * enough to hold the maximum ring size is requested to allow for
1109          * resizing in later calls to the queue setup function.
1110          */
1111         mz = allocate_hw_ring(dev->driver->pci_drv.name, "tx_ring",
1112                                 dev->data->port_id, queue_id, socket_id,
1113                                 FM10K_MAX_TX_RING_SZ, FM10K_ALIGN_TX_DESC);
1114         if (mz == NULL) {
1115                 PMD_INIT_LOG(ERR, "Cannot allocate hardware ring");
1116                 rte_free(q->sw_ring);
1117                 rte_free(q);
1118                 return (-ENOMEM);
1119         }
1120         q->hw_ring = mz->addr;
1121         q->hw_ring_phys_addr = mz->phys_addr;
1122
1123         /*
1124          * allocate memory for the RS bit tracker. Enough slots to hold the
1125          * descriptor index for each RS bit needing to be set are required.
1126          */
1127         q->rs_tracker.list = rte_zmalloc_socket("fm10k rs tracker",
1128                                 ((nb_desc + 1) / q->rs_thresh) *
1129                                 sizeof(uint16_t),
1130                                 RTE_CACHE_LINE_SIZE, socket_id);
1131         if (q->rs_tracker.list == NULL) {
1132                 PMD_INIT_LOG(ERR, "Cannot allocate RS bit tracker");
1133                 rte_free(q->sw_ring);
1134                 rte_free(q);
1135                 return (-ENOMEM);
1136         }
1137
1138         dev->data->tx_queues[queue_id] = q;
1139         return 0;
1140 }
1141
1142 static void
1143 fm10k_tx_queue_release(void *queue)
1144 {
1145         PMD_INIT_FUNC_TRACE();
1146
1147         tx_queue_free(queue);
1148 }
1149
1150 static int
1151 fm10k_reta_update(struct rte_eth_dev *dev,
1152                         struct rte_eth_rss_reta_entry64 *reta_conf,
1153                         uint16_t reta_size)
1154 {
1155         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1156         uint16_t i, j, idx, shift;
1157         uint8_t mask;
1158         uint32_t reta;
1159
1160         PMD_INIT_FUNC_TRACE();
1161
1162         if (reta_size > FM10K_MAX_RSS_INDICES) {
1163                 PMD_INIT_LOG(ERR, "The size of hash lookup table configured "
1164                         "(%d) doesn't match the number hardware can supported "
1165                         "(%d)", reta_size, FM10K_MAX_RSS_INDICES);
1166                 return -EINVAL;
1167         }
1168
1169         /*
1170          * Update Redirection Table RETA[n], n=0..31. The redirection table has
1171          * 128-entries in 32 registers
1172          */
1173         for (i = 0; i < FM10K_MAX_RSS_INDICES; i += CHARS_PER_UINT32) {
1174                 idx = i / RTE_RETA_GROUP_SIZE;
1175                 shift = i % RTE_RETA_GROUP_SIZE;
1176                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
1177                                 BIT_MASK_PER_UINT32);
1178                 if (mask == 0)
1179                         continue;
1180
1181                 reta = 0;
1182                 if (mask != BIT_MASK_PER_UINT32)
1183                         reta = FM10K_READ_REG(hw, FM10K_RETA(0, i >> 2));
1184
1185                 for (j = 0; j < CHARS_PER_UINT32; j++) {
1186                         if (mask & (0x1 << j)) {
1187                                 if (mask != 0xF)
1188                                         reta &= ~(UINT8_MAX << CHAR_BIT * j);
1189                                 reta |= reta_conf[idx].reta[shift + j] <<
1190                                                 (CHAR_BIT * j);
1191                         }
1192                 }
1193                 FM10K_WRITE_REG(hw, FM10K_RETA(0, i >> 2), reta);
1194         }
1195
1196         return 0;
1197 }
1198
1199 static int
1200 fm10k_reta_query(struct rte_eth_dev *dev,
1201                         struct rte_eth_rss_reta_entry64 *reta_conf,
1202                         uint16_t reta_size)
1203 {
1204         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1205         uint16_t i, j, idx, shift;
1206         uint8_t mask;
1207         uint32_t reta;
1208
1209         PMD_INIT_FUNC_TRACE();
1210
1211         if (reta_size < FM10K_MAX_RSS_INDICES) {
1212                 PMD_INIT_LOG(ERR, "The size of hash lookup table configured "
1213                         "(%d) doesn't match the number hardware can supported "
1214                         "(%d)", reta_size, FM10K_MAX_RSS_INDICES);
1215                 return -EINVAL;
1216         }
1217
1218         /*
1219          * Read Redirection Table RETA[n], n=0..31. The redirection table has
1220          * 128-entries in 32 registers
1221          */
1222         for (i = 0; i < FM10K_MAX_RSS_INDICES; i += CHARS_PER_UINT32) {
1223                 idx = i / RTE_RETA_GROUP_SIZE;
1224                 shift = i % RTE_RETA_GROUP_SIZE;
1225                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
1226                                 BIT_MASK_PER_UINT32);
1227                 if (mask == 0)
1228                         continue;
1229
1230                 reta = FM10K_READ_REG(hw, FM10K_RETA(0, i >> 2));
1231                 for (j = 0; j < CHARS_PER_UINT32; j++) {
1232                         if (mask & (0x1 << j))
1233                                 reta_conf[idx].reta[shift + j] = ((reta >>
1234                                         CHAR_BIT * j) & UINT8_MAX);
1235                 }
1236         }
1237
1238         return 0;
1239 }
1240
1241 static int
1242 fm10k_rss_hash_update(struct rte_eth_dev *dev,
1243         struct rte_eth_rss_conf *rss_conf)
1244 {
1245         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1246         uint32_t *key = (uint32_t *)rss_conf->rss_key;
1247         uint32_t mrqc;
1248         uint64_t hf = rss_conf->rss_hf;
1249         int i;
1250
1251         PMD_INIT_FUNC_TRACE();
1252
1253         if (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
1254                 FM10K_RSSRK_ENTRIES_PER_REG)
1255                 return -EINVAL;
1256
1257         if (hf == 0)
1258                 return -EINVAL;
1259
1260         mrqc = 0;
1261         mrqc |= (hf & ETH_RSS_IPV4_TCP)    ? FM10K_MRQC_TCP_IPV4 : 0;
1262         mrqc |= (hf & ETH_RSS_IPV4)        ? FM10K_MRQC_IPV4     : 0;
1263         mrqc |= (hf & ETH_RSS_IPV6)        ? FM10K_MRQC_IPV6     : 0;
1264         mrqc |= (hf & ETH_RSS_IPV6_EX)     ? FM10K_MRQC_IPV6     : 0;
1265         mrqc |= (hf & ETH_RSS_IPV6_TCP)    ? FM10K_MRQC_TCP_IPV6 : 0;
1266         mrqc |= (hf & ETH_RSS_IPV6_TCP_EX) ? FM10K_MRQC_TCP_IPV6 : 0;
1267         mrqc |= (hf & ETH_RSS_IPV4_UDP)    ? FM10K_MRQC_UDP_IPV4 : 0;
1268         mrqc |= (hf & ETH_RSS_IPV6_UDP)    ? FM10K_MRQC_UDP_IPV6 : 0;
1269         mrqc |= (hf & ETH_RSS_IPV6_UDP_EX) ? FM10K_MRQC_UDP_IPV6 : 0;
1270
1271         /* If the mapping doesn't fit any supported, return */
1272         if (mrqc == 0)
1273                 return -EINVAL;
1274
1275         if (key != NULL)
1276                 for (i = 0; i < FM10K_RSSRK_SIZE; ++i)
1277                         FM10K_WRITE_REG(hw, FM10K_RSSRK(0, i), key[i]);
1278
1279         FM10K_WRITE_REG(hw, FM10K_MRQC(0), mrqc);
1280
1281         return 0;
1282 }
1283
1284 static int
1285 fm10k_rss_hash_conf_get(struct rte_eth_dev *dev,
1286         struct rte_eth_rss_conf *rss_conf)
1287 {
1288         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1289         uint32_t *key = (uint32_t *)rss_conf->rss_key;
1290         uint32_t mrqc;
1291         uint64_t hf;
1292         int i;
1293
1294         PMD_INIT_FUNC_TRACE();
1295
1296         if (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
1297                                 FM10K_RSSRK_ENTRIES_PER_REG)
1298                 return -EINVAL;
1299
1300         if (key != NULL)
1301                 for (i = 0; i < FM10K_RSSRK_SIZE; ++i)
1302                         key[i] = FM10K_READ_REG(hw, FM10K_RSSRK(0, i));
1303
1304         mrqc = FM10K_READ_REG(hw, FM10K_MRQC(0));
1305         hf = 0;
1306         hf |= (mrqc & FM10K_MRQC_TCP_IPV4) ? ETH_RSS_IPV4_TCP    : 0;
1307         hf |= (mrqc & FM10K_MRQC_IPV4)     ? ETH_RSS_IPV4        : 0;
1308         hf |= (mrqc & FM10K_MRQC_IPV6)     ? ETH_RSS_IPV6        : 0;
1309         hf |= (mrqc & FM10K_MRQC_IPV6)     ? ETH_RSS_IPV6_EX     : 0;
1310         hf |= (mrqc & FM10K_MRQC_TCP_IPV6) ? ETH_RSS_IPV6_TCP    : 0;
1311         hf |= (mrqc & FM10K_MRQC_TCP_IPV6) ? ETH_RSS_IPV6_TCP_EX : 0;
1312         hf |= (mrqc & FM10K_MRQC_UDP_IPV4) ? ETH_RSS_IPV4_UDP    : 0;
1313         hf |= (mrqc & FM10K_MRQC_UDP_IPV6) ? ETH_RSS_IPV6_UDP    : 0;
1314         hf |= (mrqc & FM10K_MRQC_UDP_IPV6) ? ETH_RSS_IPV6_UDP_EX : 0;
1315
1316         rss_conf->rss_hf = hf;
1317
1318         return 0;
1319 }
1320
1321 /* Mailbox message handler in VF */
1322 static const struct fm10k_msg_data fm10k_msgdata_vf[] = {
1323         FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
1324         FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_msg_mac_vlan_vf),
1325         FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_msg_lport_state_vf),
1326         FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
1327 };
1328
1329 /* Mailbox message handler in PF */
1330 static const struct fm10k_msg_data fm10k_msgdata_pf[] = {
1331         FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
1332         FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
1333         FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_msg_lport_map_pf),
1334         FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
1335         FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
1336         FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_msg_update_pvid_pf),
1337         FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
1338 };
1339
1340 static int
1341 fm10k_setup_mbx_service(struct fm10k_hw *hw)
1342 {
1343         int err;
1344
1345         /* Initialize mailbox lock */
1346         fm10k_mbx_initlock(hw);
1347
1348         /* Replace default message handler with new ones */
1349         if (hw->mac.type == fm10k_mac_pf)
1350                 err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_pf);
1351         else
1352                 err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_vf);
1353
1354         if (err) {
1355                 PMD_INIT_LOG(ERR, "Failed to register mailbox handler.err:%d",
1356                                 err);
1357                 return err;
1358         }
1359         /* Connect to SM for PF device or PF for VF device */
1360         return hw->mbx.ops.connect(hw, &hw->mbx);
1361 }
1362
1363 static void
1364 fm10k_close_mbx_service(struct fm10k_hw *hw)
1365 {
1366         /* Disconnect from SM for PF device or PF for VF device */
1367         hw->mbx.ops.disconnect(hw, &hw->mbx);
1368 }
1369
1370 static struct eth_dev_ops fm10k_eth_dev_ops = {
1371         .dev_configure          = fm10k_dev_configure,
1372         .dev_start              = fm10k_dev_start,
1373         .dev_stop               = fm10k_dev_stop,
1374         .dev_close              = fm10k_dev_close,
1375         .stats_get              = fm10k_stats_get,
1376         .stats_reset            = fm10k_stats_reset,
1377         .link_update            = fm10k_link_update,
1378         .dev_infos_get          = fm10k_dev_infos_get,
1379         .rx_queue_start         = fm10k_dev_rx_queue_start,
1380         .rx_queue_stop          = fm10k_dev_rx_queue_stop,
1381         .tx_queue_start         = fm10k_dev_tx_queue_start,
1382         .tx_queue_stop          = fm10k_dev_tx_queue_stop,
1383         .rx_queue_setup         = fm10k_rx_queue_setup,
1384         .rx_queue_release       = fm10k_rx_queue_release,
1385         .tx_queue_setup         = fm10k_tx_queue_setup,
1386         .tx_queue_release       = fm10k_tx_queue_release,
1387         .reta_update            = fm10k_reta_update,
1388         .reta_query             = fm10k_reta_query,
1389         .rss_hash_update        = fm10k_rss_hash_update,
1390         .rss_hash_conf_get      = fm10k_rss_hash_conf_get,
1391 };
1392
1393 static int
1394 eth_fm10k_dev_init(__rte_unused struct eth_driver *eth_drv,
1395         struct rte_eth_dev *dev)
1396 {
1397         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1398         int diag;
1399
1400         PMD_INIT_FUNC_TRACE();
1401
1402         dev->dev_ops = &fm10k_eth_dev_ops;
1403         dev->rx_pkt_burst = &fm10k_recv_pkts;
1404         dev->tx_pkt_burst = &fm10k_xmit_pkts;
1405
1406         /* only initialize in the primary process */
1407         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1408                 return 0;
1409
1410         /* Vendor and Device ID need to be set before init of shared code */
1411         memset(hw, 0, sizeof(*hw));
1412         hw->device_id = dev->pci_dev->id.device_id;
1413         hw->vendor_id = dev->pci_dev->id.vendor_id;
1414         hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id;
1415         hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id;
1416         hw->revision_id = 0;
1417         hw->hw_addr = (void *)dev->pci_dev->mem_resource[0].addr;
1418         if (hw->hw_addr == NULL) {
1419                 PMD_INIT_LOG(ERR, "Bad mem resource."
1420                         " Try to blacklist unused devices.");
1421                 return -EIO;
1422         }
1423
1424         /* Store fm10k_adapter pointer */
1425         hw->back = dev->data->dev_private;
1426
1427         /* Initialize the shared code */
1428         diag = fm10k_init_shared_code(hw);
1429         if (diag != FM10K_SUCCESS) {
1430                 PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
1431                 return -EIO;
1432         }
1433
1434         /*
1435          * Inialize bus info. Normally we would call fm10k_get_bus_info(), but
1436          * there is no way to get link status without reading BAR4.  Until this
1437          * works, assume we have maximum bandwidth.
1438          * @todo - fix bus info
1439          */
1440         hw->bus_caps.speed = fm10k_bus_speed_8000;
1441         hw->bus_caps.width = fm10k_bus_width_pcie_x8;
1442         hw->bus_caps.payload = fm10k_bus_payload_512;
1443         hw->bus.speed = fm10k_bus_speed_8000;
1444         hw->bus.width = fm10k_bus_width_pcie_x8;
1445         hw->bus.payload = fm10k_bus_payload_256;
1446
1447         /* Initialize the hw */
1448         diag = fm10k_init_hw(hw);
1449         if (diag != FM10K_SUCCESS) {
1450                 PMD_INIT_LOG(ERR, "Hardware init failed: %d", diag);
1451                 return -EIO;
1452         }
1453
1454         /* Initialize MAC address(es) */
1455         dev->data->mac_addrs = rte_zmalloc("fm10k", ETHER_ADDR_LEN, 0);
1456         if (dev->data->mac_addrs == NULL) {
1457                 PMD_INIT_LOG(ERR, "Cannot allocate memory for MAC addresses");
1458                 return -ENOMEM;
1459         }
1460
1461         diag = fm10k_read_mac_addr(hw);
1462         if (diag != FM10K_SUCCESS) {
1463                 /*
1464                  * TODO: remove special handling on VF. Need shared code to
1465                  * fix first.
1466                  */
1467                 if (hw->mac.type == fm10k_mac_pf) {
1468                         PMD_INIT_LOG(ERR, "Read MAC addr failed: %d", diag);
1469                         return -EIO;
1470                 } else {
1471                         /* Generate a random addr */
1472                         eth_random_addr(hw->mac.addr);
1473                         memcpy(hw->mac.perm_addr, hw->mac.addr, ETH_ALEN);
1474                 }
1475         }
1476
1477         ether_addr_copy((const struct ether_addr *)hw->mac.addr,
1478                         &dev->data->mac_addrs[0]);
1479
1480         /* Reset the hw statistics */
1481         fm10k_stats_reset(dev);
1482
1483         /* Reset the hw */
1484         diag = fm10k_reset_hw(hw);
1485         if (diag != FM10K_SUCCESS) {
1486                 PMD_INIT_LOG(ERR, "Hardware reset failed: %d", diag);
1487                 return -EIO;
1488         }
1489
1490         /* Setup mailbox service */
1491         diag = fm10k_setup_mbx_service(hw);
1492         if (diag != FM10K_SUCCESS) {
1493                 PMD_INIT_LOG(ERR, "Failed to setup mailbox: %d", diag);
1494                 return -EIO;
1495         }
1496
1497         /*
1498          * Below function will trigger operations on mailbox, acquire lock to
1499          * avoid race condition from interrupt handler. Operations on mailbox
1500          * FIFO will trigger interrupt to PF/SM, in which interrupt handler
1501          * will handle and generate an interrupt to our side. Then,  FIFO in
1502          * mailbox will be touched.
1503          */
1504         fm10k_mbx_lock(hw);
1505         /* Enable port first */
1506         hw->mac.ops.update_lport_state(hw, 0, 0, 1);
1507
1508         /* Update default vlan */
1509         hw->mac.ops.update_vlan(hw, hw->mac.default_vid, 0, true);
1510
1511         /*
1512          * Add default mac/vlan filter. glort is assigned by SM for PF, while is
1513          * unused for VF. PF will assign correct glort for VF.
1514          */
1515         hw->mac.ops.update_uc_addr(hw, hw->mac.dglort_map, hw->mac.addr,
1516                               hw->mac.default_vid, 1, 0);
1517
1518         /* Set unicast mode by default. App can change to other mode in other
1519          * API func.
1520          */
1521         hw->mac.ops.update_xcast_mode(hw, hw->mac.dglort_map,
1522                                         FM10K_XCAST_MODE_MULTI);
1523
1524         fm10k_mbx_unlock(hw);
1525
1526         return 0;
1527 }
1528
1529 /*
1530  * The set of PCI devices this driver supports. This driver will enable both PF
1531  * and SRIOV-VF devices.
1532  */
1533 static struct rte_pci_id pci_id_fm10k_map[] = {
1534 #define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev) { RTE_PCI_DEVICE(vend, dev) },
1535 #include "rte_pci_dev_ids.h"
1536         { .vendor_id = 0, /* sentinel */ },
1537 };
1538
1539 static struct eth_driver rte_pmd_fm10k = {
1540         {
1541                 .name = "rte_pmd_fm10k",
1542                 .id_table = pci_id_fm10k_map,
1543                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1544         },
1545         .eth_dev_init = eth_fm10k_dev_init,
1546         .dev_private_size = sizeof(struct fm10k_adapter),
1547 };
1548
1549 /*
1550  * Driver initialization routine.
1551  * Invoked once at EAL init time.
1552  * Register itself as the [Poll Mode] Driver of PCI FM10K devices.
1553  */
1554 static int
1555 rte_pmd_fm10k_init(__rte_unused const char *name,
1556         __rte_unused const char *params)
1557 {
1558         PMD_INIT_FUNC_TRACE();
1559         rte_eth_driver_register(&rte_pmd_fm10k);
1560         return 0;
1561 }
1562
1563 static struct rte_driver rte_fm10k_driver = {
1564         .type = PMD_PDEV,
1565         .init = rte_pmd_fm10k_init,
1566 };
1567
1568 PMD_REGISTER_DRIVER(rte_fm10k_driver);