fm10k: add VF support
[dpdk.git] / lib / librte_pmd_fm10k / fm10k_ethdev.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <rte_ethdev.h>
35 #include <rte_malloc.h>
36 #include <rte_memzone.h>
37 #include <rte_string_fns.h>
38 #include <rte_dev.h>
39 #include <rte_spinlock.h>
40
41 #include "fm10k.h"
42 #include "base/fm10k_api.h"
43
44 #define FM10K_RX_BUFF_ALIGN 512
45 /* Default delay to acquire mailbox lock */
46 #define FM10K_MBXLOCK_DELAY_US 20
47 #define UINT64_LOWER_32BITS_MASK 0x00000000ffffffffULL
48
49 /* Number of chars per uint32 type */
50 #define CHARS_PER_UINT32 (sizeof(uint32_t))
51 #define BIT_MASK_PER_UINT32 ((1 << CHARS_PER_UINT32) - 1)
52
53 static void fm10k_close_mbx_service(struct fm10k_hw *hw);
54
55 static void
56 fm10k_mbx_initlock(struct fm10k_hw *hw)
57 {
58         rte_spinlock_init(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
59 }
60
61 static void
62 fm10k_mbx_lock(struct fm10k_hw *hw)
63 {
64         while (!rte_spinlock_trylock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back)))
65                 rte_delay_us(FM10K_MBXLOCK_DELAY_US);
66 }
67
68 static void
69 fm10k_mbx_unlock(struct fm10k_hw *hw)
70 {
71         rte_spinlock_unlock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
72 }
73
74 /*
75  * reset queue to initial state, allocate software buffers used when starting
76  * device.
77  * return 0 on success
78  * return -ENOMEM if buffers cannot be allocated
79  * return -EINVAL if buffers do not satisfy alignment condition
80  */
81 static inline int
82 rx_queue_reset(struct fm10k_rx_queue *q)
83 {
84         uint64_t dma_addr;
85         int i, diag;
86         PMD_INIT_FUNC_TRACE();
87
88         diag = rte_mempool_get_bulk(q->mp, (void **)q->sw_ring, q->nb_desc);
89         if (diag != 0)
90                 return -ENOMEM;
91
92         for (i = 0; i < q->nb_desc; ++i) {
93                 fm10k_pktmbuf_reset(q->sw_ring[i], q->port_id);
94                 if (!fm10k_addr_alignment_valid(q->sw_ring[i])) {
95                         rte_mempool_put_bulk(q->mp, (void **)q->sw_ring,
96                                                 q->nb_desc);
97                         return -EINVAL;
98                 }
99                 dma_addr = MBUF_DMA_ADDR_DEFAULT(q->sw_ring[i]);
100                 q->hw_ring[i].q.pkt_addr = dma_addr;
101                 q->hw_ring[i].q.hdr_addr = dma_addr;
102         }
103
104         q->next_dd = 0;
105         q->next_alloc = 0;
106         q->next_trigger = q->alloc_thresh - 1;
107         FM10K_PCI_REG_WRITE(q->tail_ptr, q->nb_desc - 1);
108         return 0;
109 }
110
111 /*
112  * clean queue, descriptor rings, free software buffers used when stopping
113  * device.
114  */
115 static inline void
116 rx_queue_clean(struct fm10k_rx_queue *q)
117 {
118         union fm10k_rx_desc zero = {.q = {0, 0, 0, 0} };
119         uint32_t i;
120         PMD_INIT_FUNC_TRACE();
121
122         /* zero descriptor rings */
123         for (i = 0; i < q->nb_desc; ++i)
124                 q->hw_ring[i] = zero;
125
126         /* free software buffers */
127         for (i = 0; i < q->nb_desc; ++i) {
128                 if (q->sw_ring[i]) {
129                         rte_pktmbuf_free_seg(q->sw_ring[i]);
130                         q->sw_ring[i] = NULL;
131                 }
132         }
133 }
134
135 /*
136  * free all queue memory used when releasing the queue (i.e. configure)
137  */
138 static inline void
139 rx_queue_free(struct fm10k_rx_queue *q)
140 {
141         PMD_INIT_FUNC_TRACE();
142         if (q) {
143                 PMD_INIT_LOG(DEBUG, "Freeing rx queue %p", q);
144                 rx_queue_clean(q);
145                 if (q->sw_ring)
146                         rte_free(q->sw_ring);
147                 rte_free(q);
148         }
149 }
150
151 /*
152  * disable RX queue, wait unitl HW finished necessary flush operation
153  */
154 static inline int
155 rx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
156 {
157         uint32_t reg, i;
158
159         reg = FM10K_READ_REG(hw, FM10K_RXQCTL(qnum));
160         FM10K_WRITE_REG(hw, FM10K_RXQCTL(qnum),
161                         reg & ~FM10K_RXQCTL_ENABLE);
162
163         /* Wait 100us at most */
164         for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
165                 rte_delay_us(1);
166                 reg = FM10K_READ_REG(hw, FM10K_RXQCTL(i));
167                 if (!(reg & FM10K_RXQCTL_ENABLE))
168                         break;
169         }
170
171         if (i == FM10K_QUEUE_DISABLE_TIMEOUT)
172                 return -1;
173
174         return 0;
175 }
176
177 /*
178  * reset queue to initial state, allocate software buffers used when starting
179  * device
180  */
181 static inline void
182 tx_queue_reset(struct fm10k_tx_queue *q)
183 {
184         PMD_INIT_FUNC_TRACE();
185         q->last_free = 0;
186         q->next_free = 0;
187         q->nb_used = 0;
188         q->nb_free = q->nb_desc - 1;
189         q->free_trigger = q->nb_free - q->free_thresh;
190         fifo_reset(&q->rs_tracker, (q->nb_desc + 1) / q->rs_thresh);
191         FM10K_PCI_REG_WRITE(q->tail_ptr, 0);
192 }
193
194 /*
195  * clean queue, descriptor rings, free software buffers used when stopping
196  * device
197  */
198 static inline void
199 tx_queue_clean(struct fm10k_tx_queue *q)
200 {
201         struct fm10k_tx_desc zero = {0, 0, 0, 0, 0, 0};
202         uint32_t i;
203         PMD_INIT_FUNC_TRACE();
204
205         /* zero descriptor rings */
206         for (i = 0; i < q->nb_desc; ++i)
207                 q->hw_ring[i] = zero;
208
209         /* free software buffers */
210         for (i = 0; i < q->nb_desc; ++i) {
211                 if (q->sw_ring[i]) {
212                         rte_pktmbuf_free_seg(q->sw_ring[i]);
213                         q->sw_ring[i] = NULL;
214                 }
215         }
216 }
217
218 /*
219  * free all queue memory used when releasing the queue (i.e. configure)
220  */
221 static inline void
222 tx_queue_free(struct fm10k_tx_queue *q)
223 {
224         PMD_INIT_FUNC_TRACE();
225         if (q) {
226                 PMD_INIT_LOG(DEBUG, "Freeing tx queue %p", q);
227                 tx_queue_clean(q);
228                 if (q->rs_tracker.list)
229                         rte_free(q->rs_tracker.list);
230                 if (q->sw_ring)
231                         rte_free(q->sw_ring);
232                 rte_free(q);
233         }
234 }
235
236 /*
237  * disable TX queue, wait unitl HW finished necessary flush operation
238  */
239 static inline int
240 tx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
241 {
242         uint32_t reg, i;
243
244         reg = FM10K_READ_REG(hw, FM10K_TXDCTL(qnum));
245         FM10K_WRITE_REG(hw, FM10K_TXDCTL(qnum),
246                         reg & ~FM10K_TXDCTL_ENABLE);
247
248         /* Wait 100us at most */
249         for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
250                 rte_delay_us(1);
251                 reg = FM10K_READ_REG(hw, FM10K_TXDCTL(i));
252                 if (!(reg & FM10K_TXDCTL_ENABLE))
253                         break;
254         }
255
256         if (i == FM10K_QUEUE_DISABLE_TIMEOUT)
257                 return -1;
258
259         return 0;
260 }
261
262 static int
263 fm10k_dev_configure(struct rte_eth_dev *dev)
264 {
265         PMD_INIT_FUNC_TRACE();
266
267         if (dev->data->dev_conf.rxmode.hw_strip_crc == 0)
268                 PMD_INIT_LOG(WARNING, "fm10k always strip CRC");
269
270         return 0;
271 }
272
273 static void
274 fm10k_dev_mq_rx_configure(struct rte_eth_dev *dev)
275 {
276         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
277         struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
278         uint32_t mrqc, *key, i, reta, j;
279         uint64_t hf;
280
281 #define RSS_KEY_SIZE 40
282         static uint8_t rss_intel_key[RSS_KEY_SIZE] = {
283                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
284                 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
285                 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
286                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
287                 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
288         };
289
290         if (dev->data->nb_rx_queues == 1 ||
291             dev_conf->rxmode.mq_mode != ETH_MQ_RX_RSS ||
292             dev_conf->rx_adv_conf.rss_conf.rss_hf == 0)
293                 return;
294
295         /* random key is rss_intel_key (default) or user provided (rss_key) */
296         if (dev_conf->rx_adv_conf.rss_conf.rss_key == NULL)
297                 key = (uint32_t *)rss_intel_key;
298         else
299                 key = (uint32_t *)dev_conf->rx_adv_conf.rss_conf.rss_key;
300
301         /* Now fill our hash function seeds, 4 bytes at a time */
302         for (i = 0; i < RSS_KEY_SIZE / sizeof(*key); ++i)
303                 FM10K_WRITE_REG(hw, FM10K_RSSRK(0, i), key[i]);
304
305         /*
306          * Fill in redirection table
307          * The byte-swap is needed because NIC registers are in
308          * little-endian order.
309          */
310         reta = 0;
311         for (i = 0, j = 0; i < FM10K_RETA_SIZE; i++, j++) {
312                 if (j == dev->data->nb_rx_queues)
313                         j = 0;
314                 reta = (reta << CHAR_BIT) | j;
315                 if ((i & 3) == 3)
316                         FM10K_WRITE_REG(hw, FM10K_RETA(0, i >> 2),
317                                         rte_bswap32(reta));
318         }
319
320         /*
321          * Generate RSS hash based on packet types, TCP/UDP
322          * port numbers and/or IPv4/v6 src and dst addresses
323          */
324         hf = dev_conf->rx_adv_conf.rss_conf.rss_hf;
325         mrqc = 0;
326         mrqc |= (hf & ETH_RSS_IPV4_TCP)    ? FM10K_MRQC_TCP_IPV4 : 0;
327         mrqc |= (hf & ETH_RSS_IPV4)        ? FM10K_MRQC_IPV4     : 0;
328         mrqc |= (hf & ETH_RSS_IPV6)        ? FM10K_MRQC_IPV6     : 0;
329         mrqc |= (hf & ETH_RSS_IPV6_EX)     ? FM10K_MRQC_IPV6     : 0;
330         mrqc |= (hf & ETH_RSS_IPV6_TCP)    ? FM10K_MRQC_TCP_IPV6 : 0;
331         mrqc |= (hf & ETH_RSS_IPV6_TCP_EX) ? FM10K_MRQC_TCP_IPV6 : 0;
332         mrqc |= (hf & ETH_RSS_IPV4_UDP)    ? FM10K_MRQC_UDP_IPV4 : 0;
333         mrqc |= (hf & ETH_RSS_IPV6_UDP)    ? FM10K_MRQC_UDP_IPV6 : 0;
334         mrqc |= (hf & ETH_RSS_IPV6_UDP_EX) ? FM10K_MRQC_UDP_IPV6 : 0;
335
336         if (mrqc == 0) {
337                 PMD_INIT_LOG(ERR, "Specified RSS mode 0x%"PRIx64"is not"
338                         "supported", hf);
339                 return;
340         }
341
342         FM10K_WRITE_REG(hw, FM10K_MRQC(0), mrqc);
343 }
344
345 static int
346 fm10k_dev_tx_init(struct rte_eth_dev *dev)
347 {
348         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
349         int i, ret;
350         struct fm10k_tx_queue *txq;
351         uint64_t base_addr;
352         uint32_t size;
353
354         /* Disable TXINT to avoid possible interrupt */
355         for (i = 0; i < hw->mac.max_queues; i++)
356                 FM10K_WRITE_REG(hw, FM10K_TXINT(i),
357                                 3 << FM10K_TXINT_TIMER_SHIFT);
358
359         /* Setup TX queue */
360         for (i = 0; i < dev->data->nb_tx_queues; ++i) {
361                 txq = dev->data->tx_queues[i];
362                 base_addr = txq->hw_ring_phys_addr;
363                 size = txq->nb_desc * sizeof(struct fm10k_tx_desc);
364
365                 /* disable queue to avoid issues while updating state */
366                 ret = tx_queue_disable(hw, i);
367                 if (ret) {
368                         PMD_INIT_LOG(ERR, "failed to disable queue %d", i);
369                         return -1;
370                 }
371
372                 /* set location and size for descriptor ring */
373                 FM10K_WRITE_REG(hw, FM10K_TDBAL(i),
374                                 base_addr & UINT64_LOWER_32BITS_MASK);
375                 FM10K_WRITE_REG(hw, FM10K_TDBAH(i),
376                                 base_addr >> (CHAR_BIT * sizeof(uint32_t)));
377                 FM10K_WRITE_REG(hw, FM10K_TDLEN(i), size);
378         }
379         return 0;
380 }
381
382 static int
383 fm10k_dev_rx_init(struct rte_eth_dev *dev)
384 {
385         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
386         int i, ret;
387         struct fm10k_rx_queue *rxq;
388         uint64_t base_addr;
389         uint32_t size;
390         uint32_t rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
391         uint16_t buf_size;
392         struct rte_pktmbuf_pool_private *mbp_priv;
393
394         /* Disable RXINT to avoid possible interrupt */
395         for (i = 0; i < hw->mac.max_queues; i++)
396                 FM10K_WRITE_REG(hw, FM10K_RXINT(i),
397                                 3 << FM10K_RXINT_TIMER_SHIFT);
398
399         /* Setup RX queues */
400         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
401                 rxq = dev->data->rx_queues[i];
402                 base_addr = rxq->hw_ring_phys_addr;
403                 size = rxq->nb_desc * sizeof(union fm10k_rx_desc);
404
405                 /* disable queue to avoid issues while updating state */
406                 ret = rx_queue_disable(hw, i);
407                 if (ret) {
408                         PMD_INIT_LOG(ERR, "failed to disable queue %d", i);
409                         return -1;
410                 }
411
412                 /* Setup the Base and Length of the Rx Descriptor Ring */
413                 FM10K_WRITE_REG(hw, FM10K_RDBAL(i),
414                                 base_addr & UINT64_LOWER_32BITS_MASK);
415                 FM10K_WRITE_REG(hw, FM10K_RDBAH(i),
416                                 base_addr >> (CHAR_BIT * sizeof(uint32_t)));
417                 FM10K_WRITE_REG(hw, FM10K_RDLEN(i), size);
418
419                 /* Configure the Rx buffer size for one buff without split */
420                 mbp_priv = rte_mempool_get_priv(rxq->mp);
421                 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
422                                         RTE_PKTMBUF_HEADROOM);
423                 FM10K_WRITE_REG(hw, FM10K_SRRCTL(i),
424                                 buf_size >> FM10K_SRRCTL_BSIZEPKT_SHIFT);
425
426                 /* It adds dual VLAN length for supporting dual VLAN */
427                 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
428                                 2 * FM10K_VLAN_TAG_SIZE) > buf_size){
429                         dev->data->scattered_rx = 1;
430                         dev->rx_pkt_burst = fm10k_recv_scattered_pkts;
431                 }
432
433                 /* Enable drop on empty, it's RO for VF */
434                 if (hw->mac.type == fm10k_mac_pf && rxq->drop_en)
435                         rxdctl |= FM10K_RXDCTL_DROP_ON_EMPTY;
436
437                 FM10K_WRITE_REG(hw, FM10K_RXDCTL(i), rxdctl);
438                 FM10K_WRITE_FLUSH(hw);
439         }
440
441         if (dev->data->dev_conf.rxmode.enable_scatter) {
442                 dev->rx_pkt_burst = fm10k_recv_scattered_pkts;
443                 dev->data->scattered_rx = 1;
444         }
445
446         /* Configure RSS if applicable */
447         fm10k_dev_mq_rx_configure(dev);
448         return 0;
449 }
450
451 static int
452 fm10k_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
453 {
454         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
455         int err = -1;
456         uint32_t reg;
457         struct fm10k_rx_queue *rxq;
458
459         PMD_INIT_FUNC_TRACE();
460
461         if (rx_queue_id < dev->data->nb_rx_queues) {
462                 rxq = dev->data->rx_queues[rx_queue_id];
463                 err = rx_queue_reset(rxq);
464                 if (err == -ENOMEM) {
465                         PMD_INIT_LOG(ERR, "Failed to alloc memory : %d", err);
466                         return err;
467                 } else if (err == -EINVAL) {
468                         PMD_INIT_LOG(ERR, "Invalid buffer address alignment :"
469                                 " %d", err);
470                         return err;
471                 }
472
473                 /* Setup the HW Rx Head and Tail Descriptor Pointers
474                  * Note: this must be done AFTER the queue is enabled on real
475                  * hardware, but BEFORE the queue is enabled when using the
476                  * emulation platform. Do it in both places for now and remove
477                  * this comment and the following two register writes when the
478                  * emulation platform is no longer being used.
479                  */
480                 FM10K_WRITE_REG(hw, FM10K_RDH(rx_queue_id), 0);
481                 FM10K_WRITE_REG(hw, FM10K_RDT(rx_queue_id), rxq->nb_desc - 1);
482
483                 /* Set PF ownership flag for PF devices */
484                 reg = FM10K_READ_REG(hw, FM10K_RXQCTL(rx_queue_id));
485                 if (hw->mac.type == fm10k_mac_pf)
486                         reg |= FM10K_RXQCTL_PF;
487                 reg |= FM10K_RXQCTL_ENABLE;
488                 /* enable RX queue */
489                 FM10K_WRITE_REG(hw, FM10K_RXQCTL(rx_queue_id), reg);
490                 FM10K_WRITE_FLUSH(hw);
491
492                 /* Setup the HW Rx Head and Tail Descriptor Pointers
493                  * Note: this must be done AFTER the queue is enabled
494                  */
495                 FM10K_WRITE_REG(hw, FM10K_RDH(rx_queue_id), 0);
496                 FM10K_WRITE_REG(hw, FM10K_RDT(rx_queue_id), rxq->nb_desc - 1);
497         }
498
499         return err;
500 }
501
502 static int
503 fm10k_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
504 {
505         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
506
507         PMD_INIT_FUNC_TRACE();
508
509         if (rx_queue_id < dev->data->nb_rx_queues) {
510                 /* Disable RX queue */
511                 rx_queue_disable(hw, rx_queue_id);
512
513                 /* Free mbuf and clean HW ring */
514                 rx_queue_clean(dev->data->rx_queues[rx_queue_id]);
515         }
516
517         return 0;
518 }
519
520 static int
521 fm10k_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
522 {
523         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
524         /** @todo - this should be defined in the shared code */
525 #define FM10K_TXDCTL_WRITE_BACK_MIN_DELAY       0x00010000
526         uint32_t txdctl = FM10K_TXDCTL_WRITE_BACK_MIN_DELAY;
527         int err = 0;
528
529         PMD_INIT_FUNC_TRACE();
530
531         if (tx_queue_id < dev->data->nb_tx_queues) {
532                 tx_queue_reset(dev->data->tx_queues[tx_queue_id]);
533
534                 /* reset head and tail pointers */
535                 FM10K_WRITE_REG(hw, FM10K_TDH(tx_queue_id), 0);
536                 FM10K_WRITE_REG(hw, FM10K_TDT(tx_queue_id), 0);
537
538                 /* enable TX queue */
539                 FM10K_WRITE_REG(hw, FM10K_TXDCTL(tx_queue_id),
540                                         FM10K_TXDCTL_ENABLE | txdctl);
541                 FM10K_WRITE_FLUSH(hw);
542         } else
543                 err = -1;
544
545         return err;
546 }
547
548 static int
549 fm10k_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
550 {
551         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
552
553         PMD_INIT_FUNC_TRACE();
554
555         if (tx_queue_id < dev->data->nb_tx_queues) {
556                 tx_queue_disable(hw, tx_queue_id);
557                 tx_queue_clean(dev->data->tx_queues[tx_queue_id]);
558         }
559
560         return 0;
561 }
562
563 /* fls = find last set bit = 32 minus the number of leading zeros */
564 #ifndef fls
565 #define fls(x) (((x) == 0) ? 0 : (32 - __builtin_clz((x))))
566 #endif
567 #define BSIZEPKT_ROUNDUP ((1 << FM10K_SRRCTL_BSIZEPKT_SHIFT) - 1)
568 static int
569 fm10k_dev_start(struct rte_eth_dev *dev)
570 {
571         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
572         int i, diag;
573
574         PMD_INIT_FUNC_TRACE();
575
576         /* stop, init, then start the hw */
577         diag = fm10k_stop_hw(hw);
578         if (diag != FM10K_SUCCESS) {
579                 PMD_INIT_LOG(ERR, "Hardware stop failed: %d", diag);
580                 return -EIO;
581         }
582
583         diag = fm10k_init_hw(hw);
584         if (diag != FM10K_SUCCESS) {
585                 PMD_INIT_LOG(ERR, "Hardware init failed: %d", diag);
586                 return -EIO;
587         }
588
589         diag = fm10k_start_hw(hw);
590         if (diag != FM10K_SUCCESS) {
591                 PMD_INIT_LOG(ERR, "Hardware start failed: %d", diag);
592                 return -EIO;
593         }
594
595         diag = fm10k_dev_tx_init(dev);
596         if (diag) {
597                 PMD_INIT_LOG(ERR, "TX init failed: %d", diag);
598                 return diag;
599         }
600
601         diag = fm10k_dev_rx_init(dev);
602         if (diag) {
603                 PMD_INIT_LOG(ERR, "RX init failed: %d", diag);
604                 return diag;
605         }
606
607         if (hw->mac.type == fm10k_mac_pf) {
608                 /* Establish only VSI 0 as valid */
609                 FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(0), FM10K_DGLORTMAP_ANY);
610
611                 /* Configure RSS bits used in RETA table */
612                 FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(0),
613                                 fls(dev->data->nb_rx_queues - 1) <<
614                                 FM10K_DGLORTDEC_RSSLENGTH_SHIFT);
615
616                 /* Invalidate all other GLORT entries */
617                 for (i = 1; i < FM10K_DGLORT_COUNT; i++)
618                         FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(i),
619                                         FM10K_DGLORTMAP_NONE);
620         }
621
622         for (i = 0; i < dev->data->nb_rx_queues; i++) {
623                 struct fm10k_rx_queue *rxq;
624                 rxq = dev->data->rx_queues[i];
625
626                 if (rxq->rx_deferred_start)
627                         continue;
628                 diag = fm10k_dev_rx_queue_start(dev, i);
629                 if (diag != 0) {
630                         int j;
631                         for (j = 0; j < i; ++j)
632                                 rx_queue_clean(dev->data->rx_queues[j]);
633                         return diag;
634                 }
635         }
636
637         for (i = 0; i < dev->data->nb_tx_queues; i++) {
638                 struct fm10k_tx_queue *txq;
639                 txq = dev->data->tx_queues[i];
640
641                 if (txq->tx_deferred_start)
642                         continue;
643                 diag = fm10k_dev_tx_queue_start(dev, i);
644                 if (diag != 0) {
645                         int j;
646                         for (j = 0; j < dev->data->nb_rx_queues; ++j)
647                                 rx_queue_clean(dev->data->rx_queues[j]);
648                         return diag;
649                 }
650         }
651
652         return 0;
653 }
654
655 static void
656 fm10k_dev_stop(struct rte_eth_dev *dev)
657 {
658         int i;
659
660         PMD_INIT_FUNC_TRACE();
661
662         for (i = 0; i < dev->data->nb_tx_queues; i++)
663                 fm10k_dev_tx_queue_stop(dev, i);
664
665         for (i = 0; i < dev->data->nb_rx_queues; i++)
666                 fm10k_dev_rx_queue_stop(dev, i);
667 }
668
669 static void
670 fm10k_dev_close(struct rte_eth_dev *dev)
671 {
672         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
673
674         PMD_INIT_FUNC_TRACE();
675
676         /* Stop mailbox service first */
677         fm10k_close_mbx_service(hw);
678         fm10k_dev_stop(dev);
679         fm10k_stop_hw(hw);
680 }
681
682 static int
683 fm10k_link_update(struct rte_eth_dev *dev,
684         __rte_unused int wait_to_complete)
685 {
686         PMD_INIT_FUNC_TRACE();
687
688         /* The host-interface link is always up.  The speed is ~50Gbps per Gen3
689          * x8 PCIe interface. For now, we leave the speed undefined since there
690          * is no 50Gbps Ethernet. */
691         dev->data->dev_link.link_speed  = 0;
692         dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
693         dev->data->dev_link.link_status = 1;
694
695         return 0;
696 }
697
698 static void
699 fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
700 {
701         uint64_t ipackets, opackets, ibytes, obytes;
702         struct fm10k_hw *hw =
703                 FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
704         struct fm10k_hw_stats *hw_stats =
705                 FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
706         int i;
707
708         PMD_INIT_FUNC_TRACE();
709
710         fm10k_update_hw_stats(hw, hw_stats);
711
712         ipackets = opackets = ibytes = obytes = 0;
713         for (i = 0; (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) &&
714                 (i < FM10K_MAX_QUEUES_PF); ++i) {
715                 stats->q_ipackets[i] = hw_stats->q[i].rx_packets.count;
716                 stats->q_opackets[i] = hw_stats->q[i].tx_packets.count;
717                 stats->q_ibytes[i]   = hw_stats->q[i].rx_bytes.count;
718                 stats->q_obytes[i]   = hw_stats->q[i].tx_bytes.count;
719                 ipackets += stats->q_ipackets[i];
720                 opackets += stats->q_opackets[i];
721                 ibytes   += stats->q_ibytes[i];
722                 obytes   += stats->q_obytes[i];
723         }
724         stats->ipackets = ipackets;
725         stats->opackets = opackets;
726         stats->ibytes = ibytes;
727         stats->obytes = obytes;
728 }
729
730 static void
731 fm10k_stats_reset(struct rte_eth_dev *dev)
732 {
733         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
734         struct fm10k_hw_stats *hw_stats =
735                 FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
736
737         PMD_INIT_FUNC_TRACE();
738
739         memset(hw_stats, 0, sizeof(*hw_stats));
740         fm10k_rebind_hw_stats(hw, hw_stats);
741 }
742
743 static void
744 fm10k_dev_infos_get(struct rte_eth_dev *dev,
745         struct rte_eth_dev_info *dev_info)
746 {
747         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
748
749         PMD_INIT_FUNC_TRACE();
750
751         dev_info->min_rx_bufsize     = FM10K_MIN_RX_BUF_SIZE;
752         dev_info->max_rx_pktlen      = FM10K_MAX_PKT_SIZE;
753         dev_info->max_rx_queues      = hw->mac.max_queues;
754         dev_info->max_tx_queues      = hw->mac.max_queues;
755         dev_info->max_mac_addrs      = 1;
756         dev_info->max_hash_mac_addrs = 0;
757         dev_info->max_vfs            = FM10K_MAX_VF_NUM;
758         dev_info->max_vmdq_pools     = ETH_64_POOLS;
759         dev_info->rx_offload_capa =
760                 DEV_RX_OFFLOAD_IPV4_CKSUM |
761                 DEV_RX_OFFLOAD_UDP_CKSUM  |
762                 DEV_RX_OFFLOAD_TCP_CKSUM;
763         dev_info->tx_offload_capa    = 0;
764         dev_info->reta_size = FM10K_MAX_RSS_INDICES;
765
766         dev_info->default_rxconf = (struct rte_eth_rxconf) {
767                 .rx_thresh = {
768                         .pthresh = FM10K_DEFAULT_RX_PTHRESH,
769                         .hthresh = FM10K_DEFAULT_RX_HTHRESH,
770                         .wthresh = FM10K_DEFAULT_RX_WTHRESH,
771                 },
772                 .rx_free_thresh = FM10K_RX_FREE_THRESH_DEFAULT(0),
773                 .rx_drop_en = 0,
774         };
775
776         dev_info->default_txconf = (struct rte_eth_txconf) {
777                 .tx_thresh = {
778                         .pthresh = FM10K_DEFAULT_TX_PTHRESH,
779                         .hthresh = FM10K_DEFAULT_TX_HTHRESH,
780                         .wthresh = FM10K_DEFAULT_TX_WTHRESH,
781                 },
782                 .tx_free_thresh = FM10K_TX_FREE_THRESH_DEFAULT(0),
783                 .tx_rs_thresh = FM10K_TX_RS_THRESH_DEFAULT(0),
784                 .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
785                                 ETH_TXQ_FLAGS_NOOFFLOADS,
786         };
787
788 }
789
790 static int
791 fm10k_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
792 {
793         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
794
795         PMD_INIT_FUNC_TRACE();
796
797         /* @todo - add support for the VF */
798         if (hw->mac.type != fm10k_mac_pf)
799                 return -ENOTSUP;
800
801         return fm10k_update_vlan(hw, vlan_id, 0, on);
802 }
803
804 static inline int
805 check_nb_desc(uint16_t min, uint16_t max, uint16_t mult, uint16_t request)
806 {
807         if ((request < min) || (request > max) || ((request % mult) != 0))
808                 return -1;
809         else
810                 return 0;
811 }
812
813 /*
814  * Create a memzone for hardware descriptor rings. Malloc cannot be used since
815  * the physical address is required. If the memzone is already created, then
816  * this function returns a pointer to the existing memzone.
817  */
818 static inline const struct rte_memzone *
819 allocate_hw_ring(const char *driver_name, const char *ring_name,
820         uint8_t port_id, uint16_t queue_id, int socket_id,
821         uint32_t size, uint32_t align)
822 {
823         char name[RTE_MEMZONE_NAMESIZE];
824         const struct rte_memzone *mz;
825
826         snprintf(name, sizeof(name), "%s_%s_%d_%d_%d",
827                  driver_name, ring_name, port_id, queue_id, socket_id);
828
829         /* return the memzone if it already exists */
830         mz = rte_memzone_lookup(name);
831         if (mz)
832                 return mz;
833
834 #ifdef RTE_LIBRTE_XEN_DOM0
835         return rte_memzone_reserve_bounded(name, size, socket_id, 0, align,
836                                            RTE_PGSIZE_2M);
837 #else
838         return rte_memzone_reserve_aligned(name, size, socket_id, 0, align);
839 #endif
840 }
841
842 static inline int
843 check_thresh(uint16_t min, uint16_t max, uint16_t div, uint16_t request)
844 {
845         if ((request < min) || (request > max) || ((div % request) != 0))
846                 return -1;
847         else
848                 return 0;
849 }
850
851 static inline int
852 handle_rxconf(struct fm10k_rx_queue *q, const struct rte_eth_rxconf *conf)
853 {
854         uint16_t rx_free_thresh;
855
856         if (conf->rx_free_thresh == 0)
857                 rx_free_thresh = FM10K_RX_FREE_THRESH_DEFAULT(q);
858         else
859                 rx_free_thresh = conf->rx_free_thresh;
860
861         /* make sure the requested threshold satisfies the constraints */
862         if (check_thresh(FM10K_RX_FREE_THRESH_MIN(q),
863                         FM10K_RX_FREE_THRESH_MAX(q),
864                         FM10K_RX_FREE_THRESH_DIV(q),
865                         rx_free_thresh)) {
866                 PMD_INIT_LOG(ERR, "rx_free_thresh (%u) must be "
867                         "less than or equal to %u, "
868                         "greater than or equal to %u, "
869                         "and a divisor of %u",
870                         rx_free_thresh, FM10K_RX_FREE_THRESH_MAX(q),
871                         FM10K_RX_FREE_THRESH_MIN(q),
872                         FM10K_RX_FREE_THRESH_DIV(q));
873                 return (-EINVAL);
874         }
875
876         q->alloc_thresh = rx_free_thresh;
877         q->drop_en = conf->rx_drop_en;
878         q->rx_deferred_start = conf->rx_deferred_start;
879
880         return 0;
881 }
882
883 /*
884  * Hardware requires specific alignment for Rx packet buffers. At
885  * least one of the following two conditions must be satisfied.
886  *  1. Address is 512B aligned
887  *  2. Address is 8B aligned and buffer does not cross 4K boundary.
888  *
889  * As such, the driver may need to adjust the DMA address within the
890  * buffer by up to 512B. The mempool element size is checked here
891  * to make sure a maximally sized Ethernet frame can still be wholly
892  * contained within the buffer after 512B alignment.
893  *
894  * return 1 if the element size is valid, otherwise return 0.
895  */
896 static int
897 mempool_element_size_valid(struct rte_mempool *mp)
898 {
899         uint32_t min_size;
900
901         /* elt_size includes mbuf header and headroom */
902         min_size = mp->elt_size - sizeof(struct rte_mbuf) -
903                         RTE_PKTMBUF_HEADROOM;
904
905         /* account for up to 512B of alignment */
906         min_size -= FM10K_RX_BUFF_ALIGN;
907
908         /* sanity check for overflow */
909         if (min_size > mp->elt_size)
910                 return 0;
911
912         if (min_size < ETHER_MAX_VLAN_FRAME_LEN)
913                 return 0;
914
915         /* size is valid */
916         return 1;
917 }
918
919 static int
920 fm10k_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
921         uint16_t nb_desc, unsigned int socket_id,
922         const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
923 {
924         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
925         struct fm10k_rx_queue *q;
926         const struct rte_memzone *mz;
927
928         PMD_INIT_FUNC_TRACE();
929
930         /* make sure the mempool element size can account for alignment. */
931         if (!mempool_element_size_valid(mp)) {
932                 PMD_INIT_LOG(ERR, "Error : Mempool element size is too small");
933                 return (-EINVAL);
934         }
935
936         /* make sure a valid number of descriptors have been requested */
937         if (check_nb_desc(FM10K_MIN_RX_DESC, FM10K_MAX_RX_DESC,
938                                 FM10K_MULT_RX_DESC, nb_desc)) {
939                 PMD_INIT_LOG(ERR, "Number of Rx descriptors (%u) must be "
940                         "less than or equal to %"PRIu32", "
941                         "greater than or equal to %u, "
942                         "and a multiple of %u",
943                         nb_desc, (uint32_t)FM10K_MAX_RX_DESC, FM10K_MIN_RX_DESC,
944                         FM10K_MULT_RX_DESC);
945                 return (-EINVAL);
946         }
947
948         /*
949          * if this queue existed already, free the associated memory. The
950          * queue cannot be reused in case we need to allocate memory on
951          * different socket than was previously used.
952          */
953         if (dev->data->rx_queues[queue_id] != NULL) {
954                 rx_queue_free(dev->data->rx_queues[queue_id]);
955                 dev->data->rx_queues[queue_id] = NULL;
956         }
957
958         /* allocate memory for the queue structure */
959         q = rte_zmalloc_socket("fm10k", sizeof(*q), RTE_CACHE_LINE_SIZE,
960                                 socket_id);
961         if (q == NULL) {
962                 PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
963                 return (-ENOMEM);
964         }
965
966         /* setup queue */
967         q->mp = mp;
968         q->nb_desc = nb_desc;
969         q->port_id = dev->data->port_id;
970         q->queue_id = queue_id;
971         q->tail_ptr = (volatile uint32_t *)
972                 &((uint32_t *)hw->hw_addr)[FM10K_RDT(queue_id)];
973         if (handle_rxconf(q, conf))
974                 return (-EINVAL);
975
976         /* allocate memory for the software ring */
977         q->sw_ring = rte_zmalloc_socket("fm10k sw ring",
978                                         nb_desc * sizeof(struct rte_mbuf *),
979                                         RTE_CACHE_LINE_SIZE, socket_id);
980         if (q->sw_ring == NULL) {
981                 PMD_INIT_LOG(ERR, "Cannot allocate software ring");
982                 rte_free(q);
983                 return (-ENOMEM);
984         }
985
986         /*
987          * allocate memory for the hardware descriptor ring. A memzone large
988          * enough to hold the maximum ring size is requested to allow for
989          * resizing in later calls to the queue setup function.
990          */
991         mz = allocate_hw_ring(dev->driver->pci_drv.name, "rx_ring",
992                                 dev->data->port_id, queue_id, socket_id,
993                                 FM10K_MAX_RX_RING_SZ, FM10K_ALIGN_RX_DESC);
994         if (mz == NULL) {
995                 PMD_INIT_LOG(ERR, "Cannot allocate hardware ring");
996                 rte_free(q->sw_ring);
997                 rte_free(q);
998                 return (-ENOMEM);
999         }
1000         q->hw_ring = mz->addr;
1001         q->hw_ring_phys_addr = mz->phys_addr;
1002
1003         dev->data->rx_queues[queue_id] = q;
1004         return 0;
1005 }
1006
1007 static void
1008 fm10k_rx_queue_release(void *queue)
1009 {
1010         PMD_INIT_FUNC_TRACE();
1011
1012         rx_queue_free(queue);
1013 }
1014
1015 static inline int
1016 handle_txconf(struct fm10k_tx_queue *q, const struct rte_eth_txconf *conf)
1017 {
1018         uint16_t tx_free_thresh;
1019         uint16_t tx_rs_thresh;
1020
1021         /* constraint MACROs require that tx_free_thresh is configured
1022          * before tx_rs_thresh */
1023         if (conf->tx_free_thresh == 0)
1024                 tx_free_thresh = FM10K_TX_FREE_THRESH_DEFAULT(q);
1025         else
1026                 tx_free_thresh = conf->tx_free_thresh;
1027
1028         /* make sure the requested threshold satisfies the constraints */
1029         if (check_thresh(FM10K_TX_FREE_THRESH_MIN(q),
1030                         FM10K_TX_FREE_THRESH_MAX(q),
1031                         FM10K_TX_FREE_THRESH_DIV(q),
1032                         tx_free_thresh)) {
1033                 PMD_INIT_LOG(ERR, "tx_free_thresh (%u) must be "
1034                         "less than or equal to %u, "
1035                         "greater than or equal to %u, "
1036                         "and a divisor of %u",
1037                         tx_free_thresh, FM10K_TX_FREE_THRESH_MAX(q),
1038                         FM10K_TX_FREE_THRESH_MIN(q),
1039                         FM10K_TX_FREE_THRESH_DIV(q));
1040                 return (-EINVAL);
1041         }
1042
1043         q->free_thresh = tx_free_thresh;
1044
1045         if (conf->tx_rs_thresh == 0)
1046                 tx_rs_thresh = FM10K_TX_RS_THRESH_DEFAULT(q);
1047         else
1048                 tx_rs_thresh = conf->tx_rs_thresh;
1049
1050         q->tx_deferred_start = conf->tx_deferred_start;
1051
1052         /* make sure the requested threshold satisfies the constraints */
1053         if (check_thresh(FM10K_TX_RS_THRESH_MIN(q),
1054                         FM10K_TX_RS_THRESH_MAX(q),
1055                         FM10K_TX_RS_THRESH_DIV(q),
1056                         tx_rs_thresh)) {
1057                 PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be "
1058                         "less than or equal to %u, "
1059                         "greater than or equal to %u, "
1060                         "and a divisor of %u",
1061                         tx_rs_thresh, FM10K_TX_RS_THRESH_MAX(q),
1062                         FM10K_TX_RS_THRESH_MIN(q),
1063                         FM10K_TX_RS_THRESH_DIV(q));
1064                 return (-EINVAL);
1065         }
1066
1067         q->rs_thresh = tx_rs_thresh;
1068
1069         return 0;
1070 }
1071
1072 static int
1073 fm10k_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
1074         uint16_t nb_desc, unsigned int socket_id,
1075         const struct rte_eth_txconf *conf)
1076 {
1077         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1078         struct fm10k_tx_queue *q;
1079         const struct rte_memzone *mz;
1080
1081         PMD_INIT_FUNC_TRACE();
1082
1083         /* make sure a valid number of descriptors have been requested */
1084         if (check_nb_desc(FM10K_MIN_TX_DESC, FM10K_MAX_TX_DESC,
1085                                 FM10K_MULT_TX_DESC, nb_desc)) {
1086                 PMD_INIT_LOG(ERR, "Number of Tx descriptors (%u) must be "
1087                         "less than or equal to %"PRIu32", "
1088                         "greater than or equal to %u, "
1089                         "and a multiple of %u",
1090                         nb_desc, (uint32_t)FM10K_MAX_TX_DESC, FM10K_MIN_TX_DESC,
1091                         FM10K_MULT_TX_DESC);
1092                 return (-EINVAL);
1093         }
1094
1095         /*
1096          * if this queue existed already, free the associated memory. The
1097          * queue cannot be reused in case we need to allocate memory on
1098          * different socket than was previously used.
1099          */
1100         if (dev->data->tx_queues[queue_id] != NULL) {
1101                 tx_queue_free(dev->data->tx_queues[queue_id]);
1102                 dev->data->tx_queues[queue_id] = NULL;
1103         }
1104
1105         /* allocate memory for the queue structure */
1106         q = rte_zmalloc_socket("fm10k", sizeof(*q), RTE_CACHE_LINE_SIZE,
1107                                 socket_id);
1108         if (q == NULL) {
1109                 PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
1110                 return (-ENOMEM);
1111         }
1112
1113         /* setup queue */
1114         q->nb_desc = nb_desc;
1115         q->port_id = dev->data->port_id;
1116         q->queue_id = queue_id;
1117         q->tail_ptr = (volatile uint32_t *)
1118                 &((uint32_t *)hw->hw_addr)[FM10K_TDT(queue_id)];
1119         if (handle_txconf(q, conf))
1120                 return (-EINVAL);
1121
1122         /* allocate memory for the software ring */
1123         q->sw_ring = rte_zmalloc_socket("fm10k sw ring",
1124                                         nb_desc * sizeof(struct rte_mbuf *),
1125                                         RTE_CACHE_LINE_SIZE, socket_id);
1126         if (q->sw_ring == NULL) {
1127                 PMD_INIT_LOG(ERR, "Cannot allocate software ring");
1128                 rte_free(q);
1129                 return (-ENOMEM);
1130         }
1131
1132         /*
1133          * allocate memory for the hardware descriptor ring. A memzone large
1134          * enough to hold the maximum ring size is requested to allow for
1135          * resizing in later calls to the queue setup function.
1136          */
1137         mz = allocate_hw_ring(dev->driver->pci_drv.name, "tx_ring",
1138                                 dev->data->port_id, queue_id, socket_id,
1139                                 FM10K_MAX_TX_RING_SZ, FM10K_ALIGN_TX_DESC);
1140         if (mz == NULL) {
1141                 PMD_INIT_LOG(ERR, "Cannot allocate hardware ring");
1142                 rte_free(q->sw_ring);
1143                 rte_free(q);
1144                 return (-ENOMEM);
1145         }
1146         q->hw_ring = mz->addr;
1147         q->hw_ring_phys_addr = mz->phys_addr;
1148
1149         /*
1150          * allocate memory for the RS bit tracker. Enough slots to hold the
1151          * descriptor index for each RS bit needing to be set are required.
1152          */
1153         q->rs_tracker.list = rte_zmalloc_socket("fm10k rs tracker",
1154                                 ((nb_desc + 1) / q->rs_thresh) *
1155                                 sizeof(uint16_t),
1156                                 RTE_CACHE_LINE_SIZE, socket_id);
1157         if (q->rs_tracker.list == NULL) {
1158                 PMD_INIT_LOG(ERR, "Cannot allocate RS bit tracker");
1159                 rte_free(q->sw_ring);
1160                 rte_free(q);
1161                 return (-ENOMEM);
1162         }
1163
1164         dev->data->tx_queues[queue_id] = q;
1165         return 0;
1166 }
1167
1168 static void
1169 fm10k_tx_queue_release(void *queue)
1170 {
1171         PMD_INIT_FUNC_TRACE();
1172
1173         tx_queue_free(queue);
1174 }
1175
1176 static int
1177 fm10k_reta_update(struct rte_eth_dev *dev,
1178                         struct rte_eth_rss_reta_entry64 *reta_conf,
1179                         uint16_t reta_size)
1180 {
1181         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1182         uint16_t i, j, idx, shift;
1183         uint8_t mask;
1184         uint32_t reta;
1185
1186         PMD_INIT_FUNC_TRACE();
1187
1188         if (reta_size > FM10K_MAX_RSS_INDICES) {
1189                 PMD_INIT_LOG(ERR, "The size of hash lookup table configured "
1190                         "(%d) doesn't match the number hardware can supported "
1191                         "(%d)", reta_size, FM10K_MAX_RSS_INDICES);
1192                 return -EINVAL;
1193         }
1194
1195         /*
1196          * Update Redirection Table RETA[n], n=0..31. The redirection table has
1197          * 128-entries in 32 registers
1198          */
1199         for (i = 0; i < FM10K_MAX_RSS_INDICES; i += CHARS_PER_UINT32) {
1200                 idx = i / RTE_RETA_GROUP_SIZE;
1201                 shift = i % RTE_RETA_GROUP_SIZE;
1202                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
1203                                 BIT_MASK_PER_UINT32);
1204                 if (mask == 0)
1205                         continue;
1206
1207                 reta = 0;
1208                 if (mask != BIT_MASK_PER_UINT32)
1209                         reta = FM10K_READ_REG(hw, FM10K_RETA(0, i >> 2));
1210
1211                 for (j = 0; j < CHARS_PER_UINT32; j++) {
1212                         if (mask & (0x1 << j)) {
1213                                 if (mask != 0xF)
1214                                         reta &= ~(UINT8_MAX << CHAR_BIT * j);
1215                                 reta |= reta_conf[idx].reta[shift + j] <<
1216                                                 (CHAR_BIT * j);
1217                         }
1218                 }
1219                 FM10K_WRITE_REG(hw, FM10K_RETA(0, i >> 2), reta);
1220         }
1221
1222         return 0;
1223 }
1224
1225 static int
1226 fm10k_reta_query(struct rte_eth_dev *dev,
1227                         struct rte_eth_rss_reta_entry64 *reta_conf,
1228                         uint16_t reta_size)
1229 {
1230         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1231         uint16_t i, j, idx, shift;
1232         uint8_t mask;
1233         uint32_t reta;
1234
1235         PMD_INIT_FUNC_TRACE();
1236
1237         if (reta_size < FM10K_MAX_RSS_INDICES) {
1238                 PMD_INIT_LOG(ERR, "The size of hash lookup table configured "
1239                         "(%d) doesn't match the number hardware can supported "
1240                         "(%d)", reta_size, FM10K_MAX_RSS_INDICES);
1241                 return -EINVAL;
1242         }
1243
1244         /*
1245          * Read Redirection Table RETA[n], n=0..31. The redirection table has
1246          * 128-entries in 32 registers
1247          */
1248         for (i = 0; i < FM10K_MAX_RSS_INDICES; i += CHARS_PER_UINT32) {
1249                 idx = i / RTE_RETA_GROUP_SIZE;
1250                 shift = i % RTE_RETA_GROUP_SIZE;
1251                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
1252                                 BIT_MASK_PER_UINT32);
1253                 if (mask == 0)
1254                         continue;
1255
1256                 reta = FM10K_READ_REG(hw, FM10K_RETA(0, i >> 2));
1257                 for (j = 0; j < CHARS_PER_UINT32; j++) {
1258                         if (mask & (0x1 << j))
1259                                 reta_conf[idx].reta[shift + j] = ((reta >>
1260                                         CHAR_BIT * j) & UINT8_MAX);
1261                 }
1262         }
1263
1264         return 0;
1265 }
1266
1267 static int
1268 fm10k_rss_hash_update(struct rte_eth_dev *dev,
1269         struct rte_eth_rss_conf *rss_conf)
1270 {
1271         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1272         uint32_t *key = (uint32_t *)rss_conf->rss_key;
1273         uint32_t mrqc;
1274         uint64_t hf = rss_conf->rss_hf;
1275         int i;
1276
1277         PMD_INIT_FUNC_TRACE();
1278
1279         if (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
1280                 FM10K_RSSRK_ENTRIES_PER_REG)
1281                 return -EINVAL;
1282
1283         if (hf == 0)
1284                 return -EINVAL;
1285
1286         mrqc = 0;
1287         mrqc |= (hf & ETH_RSS_IPV4_TCP)    ? FM10K_MRQC_TCP_IPV4 : 0;
1288         mrqc |= (hf & ETH_RSS_IPV4)        ? FM10K_MRQC_IPV4     : 0;
1289         mrqc |= (hf & ETH_RSS_IPV6)        ? FM10K_MRQC_IPV6     : 0;
1290         mrqc |= (hf & ETH_RSS_IPV6_EX)     ? FM10K_MRQC_IPV6     : 0;
1291         mrqc |= (hf & ETH_RSS_IPV6_TCP)    ? FM10K_MRQC_TCP_IPV6 : 0;
1292         mrqc |= (hf & ETH_RSS_IPV6_TCP_EX) ? FM10K_MRQC_TCP_IPV6 : 0;
1293         mrqc |= (hf & ETH_RSS_IPV4_UDP)    ? FM10K_MRQC_UDP_IPV4 : 0;
1294         mrqc |= (hf & ETH_RSS_IPV6_UDP)    ? FM10K_MRQC_UDP_IPV6 : 0;
1295         mrqc |= (hf & ETH_RSS_IPV6_UDP_EX) ? FM10K_MRQC_UDP_IPV6 : 0;
1296
1297         /* If the mapping doesn't fit any supported, return */
1298         if (mrqc == 0)
1299                 return -EINVAL;
1300
1301         if (key != NULL)
1302                 for (i = 0; i < FM10K_RSSRK_SIZE; ++i)
1303                         FM10K_WRITE_REG(hw, FM10K_RSSRK(0, i), key[i]);
1304
1305         FM10K_WRITE_REG(hw, FM10K_MRQC(0), mrqc);
1306
1307         return 0;
1308 }
1309
1310 static int
1311 fm10k_rss_hash_conf_get(struct rte_eth_dev *dev,
1312         struct rte_eth_rss_conf *rss_conf)
1313 {
1314         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1315         uint32_t *key = (uint32_t *)rss_conf->rss_key;
1316         uint32_t mrqc;
1317         uint64_t hf;
1318         int i;
1319
1320         PMD_INIT_FUNC_TRACE();
1321
1322         if (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
1323                                 FM10K_RSSRK_ENTRIES_PER_REG)
1324                 return -EINVAL;
1325
1326         if (key != NULL)
1327                 for (i = 0; i < FM10K_RSSRK_SIZE; ++i)
1328                         key[i] = FM10K_READ_REG(hw, FM10K_RSSRK(0, i));
1329
1330         mrqc = FM10K_READ_REG(hw, FM10K_MRQC(0));
1331         hf = 0;
1332         hf |= (mrqc & FM10K_MRQC_TCP_IPV4) ? ETH_RSS_IPV4_TCP    : 0;
1333         hf |= (mrqc & FM10K_MRQC_IPV4)     ? ETH_RSS_IPV4        : 0;
1334         hf |= (mrqc & FM10K_MRQC_IPV6)     ? ETH_RSS_IPV6        : 0;
1335         hf |= (mrqc & FM10K_MRQC_IPV6)     ? ETH_RSS_IPV6_EX     : 0;
1336         hf |= (mrqc & FM10K_MRQC_TCP_IPV6) ? ETH_RSS_IPV6_TCP    : 0;
1337         hf |= (mrqc & FM10K_MRQC_TCP_IPV6) ? ETH_RSS_IPV6_TCP_EX : 0;
1338         hf |= (mrqc & FM10K_MRQC_UDP_IPV4) ? ETH_RSS_IPV4_UDP    : 0;
1339         hf |= (mrqc & FM10K_MRQC_UDP_IPV6) ? ETH_RSS_IPV6_UDP    : 0;
1340         hf |= (mrqc & FM10K_MRQC_UDP_IPV6) ? ETH_RSS_IPV6_UDP_EX : 0;
1341
1342         rss_conf->rss_hf = hf;
1343
1344         return 0;
1345 }
1346
1347 /* Mailbox message handler in VF */
1348 static const struct fm10k_msg_data fm10k_msgdata_vf[] = {
1349         FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
1350         FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_msg_mac_vlan_vf),
1351         FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_msg_lport_state_vf),
1352         FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
1353 };
1354
1355 /* Mailbox message handler in PF */
1356 static const struct fm10k_msg_data fm10k_msgdata_pf[] = {
1357         FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
1358         FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
1359         FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_msg_lport_map_pf),
1360         FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
1361         FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
1362         FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_msg_update_pvid_pf),
1363         FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
1364 };
1365
1366 static int
1367 fm10k_setup_mbx_service(struct fm10k_hw *hw)
1368 {
1369         int err;
1370
1371         /* Initialize mailbox lock */
1372         fm10k_mbx_initlock(hw);
1373
1374         /* Replace default message handler with new ones */
1375         if (hw->mac.type == fm10k_mac_pf)
1376                 err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_pf);
1377         else
1378                 err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_vf);
1379
1380         if (err) {
1381                 PMD_INIT_LOG(ERR, "Failed to register mailbox handler.err:%d",
1382                                 err);
1383                 return err;
1384         }
1385         /* Connect to SM for PF device or PF for VF device */
1386         return hw->mbx.ops.connect(hw, &hw->mbx);
1387 }
1388
1389 static void
1390 fm10k_close_mbx_service(struct fm10k_hw *hw)
1391 {
1392         /* Disconnect from SM for PF device or PF for VF device */
1393         hw->mbx.ops.disconnect(hw, &hw->mbx);
1394 }
1395
1396 static struct eth_dev_ops fm10k_eth_dev_ops = {
1397         .dev_configure          = fm10k_dev_configure,
1398         .dev_start              = fm10k_dev_start,
1399         .dev_stop               = fm10k_dev_stop,
1400         .dev_close              = fm10k_dev_close,
1401         .stats_get              = fm10k_stats_get,
1402         .stats_reset            = fm10k_stats_reset,
1403         .link_update            = fm10k_link_update,
1404         .dev_infos_get          = fm10k_dev_infos_get,
1405         .vlan_filter_set        = fm10k_vlan_filter_set,
1406         .rx_queue_start         = fm10k_dev_rx_queue_start,
1407         .rx_queue_stop          = fm10k_dev_rx_queue_stop,
1408         .tx_queue_start         = fm10k_dev_tx_queue_start,
1409         .tx_queue_stop          = fm10k_dev_tx_queue_stop,
1410         .rx_queue_setup         = fm10k_rx_queue_setup,
1411         .rx_queue_release       = fm10k_rx_queue_release,
1412         .tx_queue_setup         = fm10k_tx_queue_setup,
1413         .tx_queue_release       = fm10k_tx_queue_release,
1414         .reta_update            = fm10k_reta_update,
1415         .reta_query             = fm10k_reta_query,
1416         .rss_hash_update        = fm10k_rss_hash_update,
1417         .rss_hash_conf_get      = fm10k_rss_hash_conf_get,
1418 };
1419
1420 static int
1421 eth_fm10k_dev_init(__rte_unused struct eth_driver *eth_drv,
1422         struct rte_eth_dev *dev)
1423 {
1424         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1425         int diag;
1426
1427         PMD_INIT_FUNC_TRACE();
1428
1429         dev->dev_ops = &fm10k_eth_dev_ops;
1430         dev->rx_pkt_burst = &fm10k_recv_pkts;
1431         dev->tx_pkt_burst = &fm10k_xmit_pkts;
1432
1433         if (dev->data->scattered_rx)
1434                 dev->rx_pkt_burst = &fm10k_recv_scattered_pkts;
1435
1436         /* only initialize in the primary process */
1437         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1438                 return 0;
1439
1440         /* Vendor and Device ID need to be set before init of shared code */
1441         memset(hw, 0, sizeof(*hw));
1442         hw->device_id = dev->pci_dev->id.device_id;
1443         hw->vendor_id = dev->pci_dev->id.vendor_id;
1444         hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id;
1445         hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id;
1446         hw->revision_id = 0;
1447         hw->hw_addr = (void *)dev->pci_dev->mem_resource[0].addr;
1448         if (hw->hw_addr == NULL) {
1449                 PMD_INIT_LOG(ERR, "Bad mem resource."
1450                         " Try to blacklist unused devices.");
1451                 return -EIO;
1452         }
1453
1454         /* Store fm10k_adapter pointer */
1455         hw->back = dev->data->dev_private;
1456
1457         /* Initialize the shared code */
1458         diag = fm10k_init_shared_code(hw);
1459         if (diag != FM10K_SUCCESS) {
1460                 PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
1461                 return -EIO;
1462         }
1463
1464         /*
1465          * Inialize bus info. Normally we would call fm10k_get_bus_info(), but
1466          * there is no way to get link status without reading BAR4.  Until this
1467          * works, assume we have maximum bandwidth.
1468          * @todo - fix bus info
1469          */
1470         hw->bus_caps.speed = fm10k_bus_speed_8000;
1471         hw->bus_caps.width = fm10k_bus_width_pcie_x8;
1472         hw->bus_caps.payload = fm10k_bus_payload_512;
1473         hw->bus.speed = fm10k_bus_speed_8000;
1474         hw->bus.width = fm10k_bus_width_pcie_x8;
1475         hw->bus.payload = fm10k_bus_payload_256;
1476
1477         /* Initialize the hw */
1478         diag = fm10k_init_hw(hw);
1479         if (diag != FM10K_SUCCESS) {
1480                 PMD_INIT_LOG(ERR, "Hardware init failed: %d", diag);
1481                 return -EIO;
1482         }
1483
1484         /* Initialize MAC address(es) */
1485         dev->data->mac_addrs = rte_zmalloc("fm10k", ETHER_ADDR_LEN, 0);
1486         if (dev->data->mac_addrs == NULL) {
1487                 PMD_INIT_LOG(ERR, "Cannot allocate memory for MAC addresses");
1488                 return -ENOMEM;
1489         }
1490
1491         diag = fm10k_read_mac_addr(hw);
1492         if (diag != FM10K_SUCCESS) {
1493                 /*
1494                  * TODO: remove special handling on VF. Need shared code to
1495                  * fix first.
1496                  */
1497                 if (hw->mac.type == fm10k_mac_pf) {
1498                         PMD_INIT_LOG(ERR, "Read MAC addr failed: %d", diag);
1499                         return -EIO;
1500                 } else {
1501                         /* Generate a random addr */
1502                         eth_random_addr(hw->mac.addr);
1503                         memcpy(hw->mac.perm_addr, hw->mac.addr, ETH_ALEN);
1504                 }
1505         }
1506
1507         ether_addr_copy((const struct ether_addr *)hw->mac.addr,
1508                         &dev->data->mac_addrs[0]);
1509
1510         /* Reset the hw statistics */
1511         fm10k_stats_reset(dev);
1512
1513         /* Reset the hw */
1514         diag = fm10k_reset_hw(hw);
1515         if (diag != FM10K_SUCCESS) {
1516                 PMD_INIT_LOG(ERR, "Hardware reset failed: %d", diag);
1517                 return -EIO;
1518         }
1519
1520         /* Setup mailbox service */
1521         diag = fm10k_setup_mbx_service(hw);
1522         if (diag != FM10K_SUCCESS) {
1523                 PMD_INIT_LOG(ERR, "Failed to setup mailbox: %d", diag);
1524                 return -EIO;
1525         }
1526
1527         /*
1528          * Below function will trigger operations on mailbox, acquire lock to
1529          * avoid race condition from interrupt handler. Operations on mailbox
1530          * FIFO will trigger interrupt to PF/SM, in which interrupt handler
1531          * will handle and generate an interrupt to our side. Then,  FIFO in
1532          * mailbox will be touched.
1533          */
1534         fm10k_mbx_lock(hw);
1535         /* Enable port first */
1536         hw->mac.ops.update_lport_state(hw, 0, 0, 1);
1537
1538         /* Update default vlan */
1539         hw->mac.ops.update_vlan(hw, hw->mac.default_vid, 0, true);
1540
1541         /*
1542          * Add default mac/vlan filter. glort is assigned by SM for PF, while is
1543          * unused for VF. PF will assign correct glort for VF.
1544          */
1545         hw->mac.ops.update_uc_addr(hw, hw->mac.dglort_map, hw->mac.addr,
1546                               hw->mac.default_vid, 1, 0);
1547
1548         /* Set unicast mode by default. App can change to other mode in other
1549          * API func.
1550          */
1551         hw->mac.ops.update_xcast_mode(hw, hw->mac.dglort_map,
1552                                         FM10K_XCAST_MODE_MULTI);
1553
1554         fm10k_mbx_unlock(hw);
1555
1556         return 0;
1557 }
1558
1559 /*
1560  * The set of PCI devices this driver supports. This driver will enable both PF
1561  * and SRIOV-VF devices.
1562  */
1563 static struct rte_pci_id pci_id_fm10k_map[] = {
1564 #define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev) { RTE_PCI_DEVICE(vend, dev) },
1565 #define RTE_PCI_DEV_ID_DECL_FM10KVF(vend, dev) { RTE_PCI_DEVICE(vend, dev) },
1566 #include "rte_pci_dev_ids.h"
1567         { .vendor_id = 0, /* sentinel */ },
1568 };
1569
1570 static struct eth_driver rte_pmd_fm10k = {
1571         {
1572                 .name = "rte_pmd_fm10k",
1573                 .id_table = pci_id_fm10k_map,
1574                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1575         },
1576         .eth_dev_init = eth_fm10k_dev_init,
1577         .dev_private_size = sizeof(struct fm10k_adapter),
1578 };
1579
1580 /*
1581  * Driver initialization routine.
1582  * Invoked once at EAL init time.
1583  * Register itself as the [Poll Mode] Driver of PCI FM10K devices.
1584  */
1585 static int
1586 rte_pmd_fm10k_init(__rte_unused const char *name,
1587         __rte_unused const char *params)
1588 {
1589         PMD_INIT_FUNC_TRACE();
1590         rte_eth_driver_register(&rte_pmd_fm10k);
1591         return 0;
1592 }
1593
1594 static struct rte_driver rte_fm10k_driver = {
1595         .type = PMD_PDEV,
1596         .init = rte_pmd_fm10k_init,
1597 };
1598
1599 PMD_REGISTER_DRIVER(rte_fm10k_driver);