fm10k: add scatter receive
[dpdk.git] / lib / librte_pmd_fm10k / fm10k_ethdev.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <rte_ethdev.h>
35 #include <rte_malloc.h>
36 #include <rte_memzone.h>
37 #include <rte_string_fns.h>
38 #include <rte_dev.h>
39 #include <rte_spinlock.h>
40
41 #include "fm10k.h"
42 #include "base/fm10k_api.h"
43
44 #define FM10K_RX_BUFF_ALIGN 512
45 /* Default delay to acquire mailbox lock */
46 #define FM10K_MBXLOCK_DELAY_US 20
47 #define UINT64_LOWER_32BITS_MASK 0x00000000ffffffffULL
48
49 /* Number of chars per uint32 type */
50 #define CHARS_PER_UINT32 (sizeof(uint32_t))
51 #define BIT_MASK_PER_UINT32 ((1 << CHARS_PER_UINT32) - 1)
52
53 static void fm10k_close_mbx_service(struct fm10k_hw *hw);
54
55 static void
56 fm10k_mbx_initlock(struct fm10k_hw *hw)
57 {
58         rte_spinlock_init(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
59 }
60
61 static void
62 fm10k_mbx_lock(struct fm10k_hw *hw)
63 {
64         while (!rte_spinlock_trylock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back)))
65                 rte_delay_us(FM10K_MBXLOCK_DELAY_US);
66 }
67
68 static void
69 fm10k_mbx_unlock(struct fm10k_hw *hw)
70 {
71         rte_spinlock_unlock(FM10K_DEV_PRIVATE_TO_MBXLOCK(hw->back));
72 }
73
74 /*
75  * reset queue to initial state, allocate software buffers used when starting
76  * device.
77  * return 0 on success
78  * return -ENOMEM if buffers cannot be allocated
79  * return -EINVAL if buffers do not satisfy alignment condition
80  */
81 static inline int
82 rx_queue_reset(struct fm10k_rx_queue *q)
83 {
84         uint64_t dma_addr;
85         int i, diag;
86         PMD_INIT_FUNC_TRACE();
87
88         diag = rte_mempool_get_bulk(q->mp, (void **)q->sw_ring, q->nb_desc);
89         if (diag != 0)
90                 return -ENOMEM;
91
92         for (i = 0; i < q->nb_desc; ++i) {
93                 fm10k_pktmbuf_reset(q->sw_ring[i], q->port_id);
94                 if (!fm10k_addr_alignment_valid(q->sw_ring[i])) {
95                         rte_mempool_put_bulk(q->mp, (void **)q->sw_ring,
96                                                 q->nb_desc);
97                         return -EINVAL;
98                 }
99                 dma_addr = MBUF_DMA_ADDR_DEFAULT(q->sw_ring[i]);
100                 q->hw_ring[i].q.pkt_addr = dma_addr;
101                 q->hw_ring[i].q.hdr_addr = dma_addr;
102         }
103
104         q->next_dd = 0;
105         q->next_alloc = 0;
106         q->next_trigger = q->alloc_thresh - 1;
107         FM10K_PCI_REG_WRITE(q->tail_ptr, q->nb_desc - 1);
108         return 0;
109 }
110
111 /*
112  * clean queue, descriptor rings, free software buffers used when stopping
113  * device.
114  */
115 static inline void
116 rx_queue_clean(struct fm10k_rx_queue *q)
117 {
118         union fm10k_rx_desc zero = {.q = {0, 0, 0, 0} };
119         uint32_t i;
120         PMD_INIT_FUNC_TRACE();
121
122         /* zero descriptor rings */
123         for (i = 0; i < q->nb_desc; ++i)
124                 q->hw_ring[i] = zero;
125
126         /* free software buffers */
127         for (i = 0; i < q->nb_desc; ++i) {
128                 if (q->sw_ring[i]) {
129                         rte_pktmbuf_free_seg(q->sw_ring[i]);
130                         q->sw_ring[i] = NULL;
131                 }
132         }
133 }
134
135 /*
136  * free all queue memory used when releasing the queue (i.e. configure)
137  */
138 static inline void
139 rx_queue_free(struct fm10k_rx_queue *q)
140 {
141         PMD_INIT_FUNC_TRACE();
142         if (q) {
143                 PMD_INIT_LOG(DEBUG, "Freeing rx queue %p", q);
144                 rx_queue_clean(q);
145                 if (q->sw_ring)
146                         rte_free(q->sw_ring);
147                 rte_free(q);
148         }
149 }
150
151 /*
152  * disable RX queue, wait unitl HW finished necessary flush operation
153  */
154 static inline int
155 rx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
156 {
157         uint32_t reg, i;
158
159         reg = FM10K_READ_REG(hw, FM10K_RXQCTL(qnum));
160         FM10K_WRITE_REG(hw, FM10K_RXQCTL(qnum),
161                         reg & ~FM10K_RXQCTL_ENABLE);
162
163         /* Wait 100us at most */
164         for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
165                 rte_delay_us(1);
166                 reg = FM10K_READ_REG(hw, FM10K_RXQCTL(i));
167                 if (!(reg & FM10K_RXQCTL_ENABLE))
168                         break;
169         }
170
171         if (i == FM10K_QUEUE_DISABLE_TIMEOUT)
172                 return -1;
173
174         return 0;
175 }
176
177 /*
178  * reset queue to initial state, allocate software buffers used when starting
179  * device
180  */
181 static inline void
182 tx_queue_reset(struct fm10k_tx_queue *q)
183 {
184         PMD_INIT_FUNC_TRACE();
185         q->last_free = 0;
186         q->next_free = 0;
187         q->nb_used = 0;
188         q->nb_free = q->nb_desc - 1;
189         q->free_trigger = q->nb_free - q->free_thresh;
190         fifo_reset(&q->rs_tracker, (q->nb_desc + 1) / q->rs_thresh);
191         FM10K_PCI_REG_WRITE(q->tail_ptr, 0);
192 }
193
194 /*
195  * clean queue, descriptor rings, free software buffers used when stopping
196  * device
197  */
198 static inline void
199 tx_queue_clean(struct fm10k_tx_queue *q)
200 {
201         struct fm10k_tx_desc zero = {0, 0, 0, 0, 0, 0};
202         uint32_t i;
203         PMD_INIT_FUNC_TRACE();
204
205         /* zero descriptor rings */
206         for (i = 0; i < q->nb_desc; ++i)
207                 q->hw_ring[i] = zero;
208
209         /* free software buffers */
210         for (i = 0; i < q->nb_desc; ++i) {
211                 if (q->sw_ring[i]) {
212                         rte_pktmbuf_free_seg(q->sw_ring[i]);
213                         q->sw_ring[i] = NULL;
214                 }
215         }
216 }
217
218 /*
219  * free all queue memory used when releasing the queue (i.e. configure)
220  */
221 static inline void
222 tx_queue_free(struct fm10k_tx_queue *q)
223 {
224         PMD_INIT_FUNC_TRACE();
225         if (q) {
226                 PMD_INIT_LOG(DEBUG, "Freeing tx queue %p", q);
227                 tx_queue_clean(q);
228                 if (q->rs_tracker.list)
229                         rte_free(q->rs_tracker.list);
230                 if (q->sw_ring)
231                         rte_free(q->sw_ring);
232                 rte_free(q);
233         }
234 }
235
236 /*
237  * disable TX queue, wait unitl HW finished necessary flush operation
238  */
239 static inline int
240 tx_queue_disable(struct fm10k_hw *hw, uint16_t qnum)
241 {
242         uint32_t reg, i;
243
244         reg = FM10K_READ_REG(hw, FM10K_TXDCTL(qnum));
245         FM10K_WRITE_REG(hw, FM10K_TXDCTL(qnum),
246                         reg & ~FM10K_TXDCTL_ENABLE);
247
248         /* Wait 100us at most */
249         for (i = 0; i < FM10K_QUEUE_DISABLE_TIMEOUT; i++) {
250                 rte_delay_us(1);
251                 reg = FM10K_READ_REG(hw, FM10K_TXDCTL(i));
252                 if (!(reg & FM10K_TXDCTL_ENABLE))
253                         break;
254         }
255
256         if (i == FM10K_QUEUE_DISABLE_TIMEOUT)
257                 return -1;
258
259         return 0;
260 }
261
262 static int
263 fm10k_dev_configure(struct rte_eth_dev *dev)
264 {
265         PMD_INIT_FUNC_TRACE();
266
267         if (dev->data->dev_conf.rxmode.hw_strip_crc == 0)
268                 PMD_INIT_LOG(WARNING, "fm10k always strip CRC");
269
270         return 0;
271 }
272
273 static void
274 fm10k_dev_mq_rx_configure(struct rte_eth_dev *dev)
275 {
276         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
277         struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
278         uint32_t mrqc, *key, i, reta, j;
279         uint64_t hf;
280
281 #define RSS_KEY_SIZE 40
282         static uint8_t rss_intel_key[RSS_KEY_SIZE] = {
283                 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
284                 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
285                 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
286                 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
287                 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
288         };
289
290         if (dev->data->nb_rx_queues == 1 ||
291             dev_conf->rxmode.mq_mode != ETH_MQ_RX_RSS ||
292             dev_conf->rx_adv_conf.rss_conf.rss_hf == 0)
293                 return;
294
295         /* random key is rss_intel_key (default) or user provided (rss_key) */
296         if (dev_conf->rx_adv_conf.rss_conf.rss_key == NULL)
297                 key = (uint32_t *)rss_intel_key;
298         else
299                 key = (uint32_t *)dev_conf->rx_adv_conf.rss_conf.rss_key;
300
301         /* Now fill our hash function seeds, 4 bytes at a time */
302         for (i = 0; i < RSS_KEY_SIZE / sizeof(*key); ++i)
303                 FM10K_WRITE_REG(hw, FM10K_RSSRK(0, i), key[i]);
304
305         /*
306          * Fill in redirection table
307          * The byte-swap is needed because NIC registers are in
308          * little-endian order.
309          */
310         reta = 0;
311         for (i = 0, j = 0; i < FM10K_RETA_SIZE; i++, j++) {
312                 if (j == dev->data->nb_rx_queues)
313                         j = 0;
314                 reta = (reta << CHAR_BIT) | j;
315                 if ((i & 3) == 3)
316                         FM10K_WRITE_REG(hw, FM10K_RETA(0, i >> 2),
317                                         rte_bswap32(reta));
318         }
319
320         /*
321          * Generate RSS hash based on packet types, TCP/UDP
322          * port numbers and/or IPv4/v6 src and dst addresses
323          */
324         hf = dev_conf->rx_adv_conf.rss_conf.rss_hf;
325         mrqc = 0;
326         mrqc |= (hf & ETH_RSS_IPV4_TCP)    ? FM10K_MRQC_TCP_IPV4 : 0;
327         mrqc |= (hf & ETH_RSS_IPV4)        ? FM10K_MRQC_IPV4     : 0;
328         mrqc |= (hf & ETH_RSS_IPV6)        ? FM10K_MRQC_IPV6     : 0;
329         mrqc |= (hf & ETH_RSS_IPV6_EX)     ? FM10K_MRQC_IPV6     : 0;
330         mrqc |= (hf & ETH_RSS_IPV6_TCP)    ? FM10K_MRQC_TCP_IPV6 : 0;
331         mrqc |= (hf & ETH_RSS_IPV6_TCP_EX) ? FM10K_MRQC_TCP_IPV6 : 0;
332         mrqc |= (hf & ETH_RSS_IPV4_UDP)    ? FM10K_MRQC_UDP_IPV4 : 0;
333         mrqc |= (hf & ETH_RSS_IPV6_UDP)    ? FM10K_MRQC_UDP_IPV6 : 0;
334         mrqc |= (hf & ETH_RSS_IPV6_UDP_EX) ? FM10K_MRQC_UDP_IPV6 : 0;
335
336         if (mrqc == 0) {
337                 PMD_INIT_LOG(ERR, "Specified RSS mode 0x%"PRIx64"is not"
338                         "supported", hf);
339                 return;
340         }
341
342         FM10K_WRITE_REG(hw, FM10K_MRQC(0), mrqc);
343 }
344
345 static int
346 fm10k_dev_tx_init(struct rte_eth_dev *dev)
347 {
348         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
349         int i, ret;
350         struct fm10k_tx_queue *txq;
351         uint64_t base_addr;
352         uint32_t size;
353
354         /* Disable TXINT to avoid possible interrupt */
355         for (i = 0; i < hw->mac.max_queues; i++)
356                 FM10K_WRITE_REG(hw, FM10K_TXINT(i),
357                                 3 << FM10K_TXINT_TIMER_SHIFT);
358
359         /* Setup TX queue */
360         for (i = 0; i < dev->data->nb_tx_queues; ++i) {
361                 txq = dev->data->tx_queues[i];
362                 base_addr = txq->hw_ring_phys_addr;
363                 size = txq->nb_desc * sizeof(struct fm10k_tx_desc);
364
365                 /* disable queue to avoid issues while updating state */
366                 ret = tx_queue_disable(hw, i);
367                 if (ret) {
368                         PMD_INIT_LOG(ERR, "failed to disable queue %d", i);
369                         return -1;
370                 }
371
372                 /* set location and size for descriptor ring */
373                 FM10K_WRITE_REG(hw, FM10K_TDBAL(i),
374                                 base_addr & UINT64_LOWER_32BITS_MASK);
375                 FM10K_WRITE_REG(hw, FM10K_TDBAH(i),
376                                 base_addr >> (CHAR_BIT * sizeof(uint32_t)));
377                 FM10K_WRITE_REG(hw, FM10K_TDLEN(i), size);
378         }
379         return 0;
380 }
381
382 static int
383 fm10k_dev_rx_init(struct rte_eth_dev *dev)
384 {
385         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
386         int i, ret;
387         struct fm10k_rx_queue *rxq;
388         uint64_t base_addr;
389         uint32_t size;
390         uint32_t rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
391         uint16_t buf_size;
392         struct rte_pktmbuf_pool_private *mbp_priv;
393
394         /* Disable RXINT to avoid possible interrupt */
395         for (i = 0; i < hw->mac.max_queues; i++)
396                 FM10K_WRITE_REG(hw, FM10K_RXINT(i),
397                                 3 << FM10K_RXINT_TIMER_SHIFT);
398
399         /* Setup RX queues */
400         for (i = 0; i < dev->data->nb_rx_queues; ++i) {
401                 rxq = dev->data->rx_queues[i];
402                 base_addr = rxq->hw_ring_phys_addr;
403                 size = rxq->nb_desc * sizeof(union fm10k_rx_desc);
404
405                 /* disable queue to avoid issues while updating state */
406                 ret = rx_queue_disable(hw, i);
407                 if (ret) {
408                         PMD_INIT_LOG(ERR, "failed to disable queue %d", i);
409                         return -1;
410                 }
411
412                 /* Setup the Base and Length of the Rx Descriptor Ring */
413                 FM10K_WRITE_REG(hw, FM10K_RDBAL(i),
414                                 base_addr & UINT64_LOWER_32BITS_MASK);
415                 FM10K_WRITE_REG(hw, FM10K_RDBAH(i),
416                                 base_addr >> (CHAR_BIT * sizeof(uint32_t)));
417                 FM10K_WRITE_REG(hw, FM10K_RDLEN(i), size);
418
419                 /* Configure the Rx buffer size for one buff without split */
420                 mbp_priv = rte_mempool_get_priv(rxq->mp);
421                 buf_size = (uint16_t) (mbp_priv->mbuf_data_room_size -
422                                         RTE_PKTMBUF_HEADROOM);
423                 FM10K_WRITE_REG(hw, FM10K_SRRCTL(i),
424                                 buf_size >> FM10K_SRRCTL_BSIZEPKT_SHIFT);
425
426                 /* It adds dual VLAN length for supporting dual VLAN */
427                 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
428                                 2 * FM10K_VLAN_TAG_SIZE) > buf_size){
429                         dev->data->scattered_rx = 1;
430                         dev->rx_pkt_burst = fm10k_recv_scattered_pkts;
431                 }
432
433                 /* Enable drop on empty, it's RO for VF */
434                 if (hw->mac.type == fm10k_mac_pf && rxq->drop_en)
435                         rxdctl |= FM10K_RXDCTL_DROP_ON_EMPTY;
436
437                 FM10K_WRITE_REG(hw, FM10K_RXDCTL(i), rxdctl);
438                 FM10K_WRITE_FLUSH(hw);
439         }
440
441         if (dev->data->dev_conf.rxmode.enable_scatter) {
442                 dev->rx_pkt_burst = fm10k_recv_scattered_pkts;
443                 dev->data->scattered_rx = 1;
444         }
445
446         /* Configure RSS if applicable */
447         fm10k_dev_mq_rx_configure(dev);
448         return 0;
449 }
450
451 static int
452 fm10k_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
453 {
454         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
455         int err = -1;
456         uint32_t reg;
457         struct fm10k_rx_queue *rxq;
458
459         PMD_INIT_FUNC_TRACE();
460
461         if (rx_queue_id < dev->data->nb_rx_queues) {
462                 rxq = dev->data->rx_queues[rx_queue_id];
463                 err = rx_queue_reset(rxq);
464                 if (err == -ENOMEM) {
465                         PMD_INIT_LOG(ERR, "Failed to alloc memory : %d", err);
466                         return err;
467                 } else if (err == -EINVAL) {
468                         PMD_INIT_LOG(ERR, "Invalid buffer address alignment :"
469                                 " %d", err);
470                         return err;
471                 }
472
473                 /* Setup the HW Rx Head and Tail Descriptor Pointers
474                  * Note: this must be done AFTER the queue is enabled on real
475                  * hardware, but BEFORE the queue is enabled when using the
476                  * emulation platform. Do it in both places for now and remove
477                  * this comment and the following two register writes when the
478                  * emulation platform is no longer being used.
479                  */
480                 FM10K_WRITE_REG(hw, FM10K_RDH(rx_queue_id), 0);
481                 FM10K_WRITE_REG(hw, FM10K_RDT(rx_queue_id), rxq->nb_desc - 1);
482
483                 /* Set PF ownership flag for PF devices */
484                 reg = FM10K_READ_REG(hw, FM10K_RXQCTL(rx_queue_id));
485                 if (hw->mac.type == fm10k_mac_pf)
486                         reg |= FM10K_RXQCTL_PF;
487                 reg |= FM10K_RXQCTL_ENABLE;
488                 /* enable RX queue */
489                 FM10K_WRITE_REG(hw, FM10K_RXQCTL(rx_queue_id), reg);
490                 FM10K_WRITE_FLUSH(hw);
491
492                 /* Setup the HW Rx Head and Tail Descriptor Pointers
493                  * Note: this must be done AFTER the queue is enabled
494                  */
495                 FM10K_WRITE_REG(hw, FM10K_RDH(rx_queue_id), 0);
496                 FM10K_WRITE_REG(hw, FM10K_RDT(rx_queue_id), rxq->nb_desc - 1);
497         }
498
499         return err;
500 }
501
502 static int
503 fm10k_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
504 {
505         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
506
507         PMD_INIT_FUNC_TRACE();
508
509         if (rx_queue_id < dev->data->nb_rx_queues) {
510                 /* Disable RX queue */
511                 rx_queue_disable(hw, rx_queue_id);
512
513                 /* Free mbuf and clean HW ring */
514                 rx_queue_clean(dev->data->rx_queues[rx_queue_id]);
515         }
516
517         return 0;
518 }
519
520 static int
521 fm10k_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
522 {
523         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
524         /** @todo - this should be defined in the shared code */
525 #define FM10K_TXDCTL_WRITE_BACK_MIN_DELAY       0x00010000
526         uint32_t txdctl = FM10K_TXDCTL_WRITE_BACK_MIN_DELAY;
527         int err = 0;
528
529         PMD_INIT_FUNC_TRACE();
530
531         if (tx_queue_id < dev->data->nb_tx_queues) {
532                 tx_queue_reset(dev->data->tx_queues[tx_queue_id]);
533
534                 /* reset head and tail pointers */
535                 FM10K_WRITE_REG(hw, FM10K_TDH(tx_queue_id), 0);
536                 FM10K_WRITE_REG(hw, FM10K_TDT(tx_queue_id), 0);
537
538                 /* enable TX queue */
539                 FM10K_WRITE_REG(hw, FM10K_TXDCTL(tx_queue_id),
540                                         FM10K_TXDCTL_ENABLE | txdctl);
541                 FM10K_WRITE_FLUSH(hw);
542         } else
543                 err = -1;
544
545         return err;
546 }
547
548 static int
549 fm10k_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
550 {
551         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
552
553         PMD_INIT_FUNC_TRACE();
554
555         if (tx_queue_id < dev->data->nb_tx_queues) {
556                 tx_queue_disable(hw, tx_queue_id);
557                 tx_queue_clean(dev->data->tx_queues[tx_queue_id]);
558         }
559
560         return 0;
561 }
562
563 /* fls = find last set bit = 32 minus the number of leading zeros */
564 #ifndef fls
565 #define fls(x) (((x) == 0) ? 0 : (32 - __builtin_clz((x))))
566 #endif
567 #define BSIZEPKT_ROUNDUP ((1 << FM10K_SRRCTL_BSIZEPKT_SHIFT) - 1)
568 static int
569 fm10k_dev_start(struct rte_eth_dev *dev)
570 {
571         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
572         int i, diag;
573
574         PMD_INIT_FUNC_TRACE();
575
576         /* stop, init, then start the hw */
577         diag = fm10k_stop_hw(hw);
578         if (diag != FM10K_SUCCESS) {
579                 PMD_INIT_LOG(ERR, "Hardware stop failed: %d", diag);
580                 return -EIO;
581         }
582
583         diag = fm10k_init_hw(hw);
584         if (diag != FM10K_SUCCESS) {
585                 PMD_INIT_LOG(ERR, "Hardware init failed: %d", diag);
586                 return -EIO;
587         }
588
589         diag = fm10k_start_hw(hw);
590         if (diag != FM10K_SUCCESS) {
591                 PMD_INIT_LOG(ERR, "Hardware start failed: %d", diag);
592                 return -EIO;
593         }
594
595         diag = fm10k_dev_tx_init(dev);
596         if (diag) {
597                 PMD_INIT_LOG(ERR, "TX init failed: %d", diag);
598                 return diag;
599         }
600
601         diag = fm10k_dev_rx_init(dev);
602         if (diag) {
603                 PMD_INIT_LOG(ERR, "RX init failed: %d", diag);
604                 return diag;
605         }
606
607         if (hw->mac.type == fm10k_mac_pf) {
608                 /* Establish only VSI 0 as valid */
609                 FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(0), FM10K_DGLORTMAP_ANY);
610
611                 /* Configure RSS bits used in RETA table */
612                 FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(0),
613                                 fls(dev->data->nb_rx_queues - 1) <<
614                                 FM10K_DGLORTDEC_RSSLENGTH_SHIFT);
615
616                 /* Invalidate all other GLORT entries */
617                 for (i = 1; i < FM10K_DGLORT_COUNT; i++)
618                         FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(i),
619                                         FM10K_DGLORTMAP_NONE);
620         }
621
622         for (i = 0; i < dev->data->nb_rx_queues; i++) {
623                 struct fm10k_rx_queue *rxq;
624                 rxq = dev->data->rx_queues[i];
625
626                 if (rxq->rx_deferred_start)
627                         continue;
628                 diag = fm10k_dev_rx_queue_start(dev, i);
629                 if (diag != 0) {
630                         int j;
631                         for (j = 0; j < i; ++j)
632                                 rx_queue_clean(dev->data->rx_queues[j]);
633                         return diag;
634                 }
635         }
636
637         for (i = 0; i < dev->data->nb_tx_queues; i++) {
638                 struct fm10k_tx_queue *txq;
639                 txq = dev->data->tx_queues[i];
640
641                 if (txq->tx_deferred_start)
642                         continue;
643                 diag = fm10k_dev_tx_queue_start(dev, i);
644                 if (diag != 0) {
645                         int j;
646                         for (j = 0; j < dev->data->nb_rx_queues; ++j)
647                                 rx_queue_clean(dev->data->rx_queues[j]);
648                         return diag;
649                 }
650         }
651
652         return 0;
653 }
654
655 static void
656 fm10k_dev_stop(struct rte_eth_dev *dev)
657 {
658         int i;
659
660         PMD_INIT_FUNC_TRACE();
661
662         for (i = 0; i < dev->data->nb_tx_queues; i++)
663                 fm10k_dev_tx_queue_stop(dev, i);
664
665         for (i = 0; i < dev->data->nb_rx_queues; i++)
666                 fm10k_dev_rx_queue_stop(dev, i);
667 }
668
669 static void
670 fm10k_dev_close(struct rte_eth_dev *dev)
671 {
672         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
673
674         PMD_INIT_FUNC_TRACE();
675
676         /* Stop mailbox service first */
677         fm10k_close_mbx_service(hw);
678         fm10k_dev_stop(dev);
679         fm10k_stop_hw(hw);
680 }
681
682 static int
683 fm10k_link_update(struct rte_eth_dev *dev,
684         __rte_unused int wait_to_complete)
685 {
686         PMD_INIT_FUNC_TRACE();
687
688         /* The host-interface link is always up.  The speed is ~50Gbps per Gen3
689          * x8 PCIe interface. For now, we leave the speed undefined since there
690          * is no 50Gbps Ethernet. */
691         dev->data->dev_link.link_speed  = 0;
692         dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
693         dev->data->dev_link.link_status = 1;
694
695         return 0;
696 }
697
698 static void
699 fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
700 {
701         uint64_t ipackets, opackets, ibytes, obytes;
702         struct fm10k_hw *hw =
703                 FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
704         struct fm10k_hw_stats *hw_stats =
705                 FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
706         int i;
707
708         PMD_INIT_FUNC_TRACE();
709
710         fm10k_update_hw_stats(hw, hw_stats);
711
712         ipackets = opackets = ibytes = obytes = 0;
713         for (i = 0; (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) &&
714                 (i < FM10K_MAX_QUEUES_PF); ++i) {
715                 stats->q_ipackets[i] = hw_stats->q[i].rx_packets.count;
716                 stats->q_opackets[i] = hw_stats->q[i].tx_packets.count;
717                 stats->q_ibytes[i]   = hw_stats->q[i].rx_bytes.count;
718                 stats->q_obytes[i]   = hw_stats->q[i].tx_bytes.count;
719                 ipackets += stats->q_ipackets[i];
720                 opackets += stats->q_opackets[i];
721                 ibytes   += stats->q_ibytes[i];
722                 obytes   += stats->q_obytes[i];
723         }
724         stats->ipackets = ipackets;
725         stats->opackets = opackets;
726         stats->ibytes = ibytes;
727         stats->obytes = obytes;
728 }
729
730 static void
731 fm10k_stats_reset(struct rte_eth_dev *dev)
732 {
733         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
734         struct fm10k_hw_stats *hw_stats =
735                 FM10K_DEV_PRIVATE_TO_STATS(dev->data->dev_private);
736
737         PMD_INIT_FUNC_TRACE();
738
739         memset(hw_stats, 0, sizeof(*hw_stats));
740         fm10k_rebind_hw_stats(hw, hw_stats);
741 }
742
743 static void
744 fm10k_dev_infos_get(struct rte_eth_dev *dev,
745         struct rte_eth_dev_info *dev_info)
746 {
747         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
748
749         PMD_INIT_FUNC_TRACE();
750
751         dev_info->min_rx_bufsize     = FM10K_MIN_RX_BUF_SIZE;
752         dev_info->max_rx_pktlen      = FM10K_MAX_PKT_SIZE;
753         dev_info->max_rx_queues      = hw->mac.max_queues;
754         dev_info->max_tx_queues      = hw->mac.max_queues;
755         dev_info->max_mac_addrs      = 1;
756         dev_info->max_hash_mac_addrs = 0;
757         dev_info->max_vfs            = FM10K_MAX_VF_NUM;
758         dev_info->max_vmdq_pools     = ETH_64_POOLS;
759         dev_info->rx_offload_capa =
760                 DEV_RX_OFFLOAD_IPV4_CKSUM |
761                 DEV_RX_OFFLOAD_UDP_CKSUM  |
762                 DEV_RX_OFFLOAD_TCP_CKSUM;
763         dev_info->tx_offload_capa    = 0;
764         dev_info->reta_size = FM10K_MAX_RSS_INDICES;
765
766         dev_info->default_rxconf = (struct rte_eth_rxconf) {
767                 .rx_thresh = {
768                         .pthresh = FM10K_DEFAULT_RX_PTHRESH,
769                         .hthresh = FM10K_DEFAULT_RX_HTHRESH,
770                         .wthresh = FM10K_DEFAULT_RX_WTHRESH,
771                 },
772                 .rx_free_thresh = FM10K_RX_FREE_THRESH_DEFAULT(0),
773                 .rx_drop_en = 0,
774         };
775
776         dev_info->default_txconf = (struct rte_eth_txconf) {
777                 .tx_thresh = {
778                         .pthresh = FM10K_DEFAULT_TX_PTHRESH,
779                         .hthresh = FM10K_DEFAULT_TX_HTHRESH,
780                         .wthresh = FM10K_DEFAULT_TX_WTHRESH,
781                 },
782                 .tx_free_thresh = FM10K_TX_FREE_THRESH_DEFAULT(0),
783                 .tx_rs_thresh = FM10K_TX_RS_THRESH_DEFAULT(0),
784                 .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
785                                 ETH_TXQ_FLAGS_NOOFFLOADS,
786         };
787
788 }
789
790 static inline int
791 check_nb_desc(uint16_t min, uint16_t max, uint16_t mult, uint16_t request)
792 {
793         if ((request < min) || (request > max) || ((request % mult) != 0))
794                 return -1;
795         else
796                 return 0;
797 }
798
799 /*
800  * Create a memzone for hardware descriptor rings. Malloc cannot be used since
801  * the physical address is required. If the memzone is already created, then
802  * this function returns a pointer to the existing memzone.
803  */
804 static inline const struct rte_memzone *
805 allocate_hw_ring(const char *driver_name, const char *ring_name,
806         uint8_t port_id, uint16_t queue_id, int socket_id,
807         uint32_t size, uint32_t align)
808 {
809         char name[RTE_MEMZONE_NAMESIZE];
810         const struct rte_memzone *mz;
811
812         snprintf(name, sizeof(name), "%s_%s_%d_%d_%d",
813                  driver_name, ring_name, port_id, queue_id, socket_id);
814
815         /* return the memzone if it already exists */
816         mz = rte_memzone_lookup(name);
817         if (mz)
818                 return mz;
819
820 #ifdef RTE_LIBRTE_XEN_DOM0
821         return rte_memzone_reserve_bounded(name, size, socket_id, 0, align,
822                                            RTE_PGSIZE_2M);
823 #else
824         return rte_memzone_reserve_aligned(name, size, socket_id, 0, align);
825 #endif
826 }
827
828 static inline int
829 check_thresh(uint16_t min, uint16_t max, uint16_t div, uint16_t request)
830 {
831         if ((request < min) || (request > max) || ((div % request) != 0))
832                 return -1;
833         else
834                 return 0;
835 }
836
837 static inline int
838 handle_rxconf(struct fm10k_rx_queue *q, const struct rte_eth_rxconf *conf)
839 {
840         uint16_t rx_free_thresh;
841
842         if (conf->rx_free_thresh == 0)
843                 rx_free_thresh = FM10K_RX_FREE_THRESH_DEFAULT(q);
844         else
845                 rx_free_thresh = conf->rx_free_thresh;
846
847         /* make sure the requested threshold satisfies the constraints */
848         if (check_thresh(FM10K_RX_FREE_THRESH_MIN(q),
849                         FM10K_RX_FREE_THRESH_MAX(q),
850                         FM10K_RX_FREE_THRESH_DIV(q),
851                         rx_free_thresh)) {
852                 PMD_INIT_LOG(ERR, "rx_free_thresh (%u) must be "
853                         "less than or equal to %u, "
854                         "greater than or equal to %u, "
855                         "and a divisor of %u",
856                         rx_free_thresh, FM10K_RX_FREE_THRESH_MAX(q),
857                         FM10K_RX_FREE_THRESH_MIN(q),
858                         FM10K_RX_FREE_THRESH_DIV(q));
859                 return (-EINVAL);
860         }
861
862         q->alloc_thresh = rx_free_thresh;
863         q->drop_en = conf->rx_drop_en;
864         q->rx_deferred_start = conf->rx_deferred_start;
865
866         return 0;
867 }
868
869 /*
870  * Hardware requires specific alignment for Rx packet buffers. At
871  * least one of the following two conditions must be satisfied.
872  *  1. Address is 512B aligned
873  *  2. Address is 8B aligned and buffer does not cross 4K boundary.
874  *
875  * As such, the driver may need to adjust the DMA address within the
876  * buffer by up to 512B. The mempool element size is checked here
877  * to make sure a maximally sized Ethernet frame can still be wholly
878  * contained within the buffer after 512B alignment.
879  *
880  * return 1 if the element size is valid, otherwise return 0.
881  */
882 static int
883 mempool_element_size_valid(struct rte_mempool *mp)
884 {
885         uint32_t min_size;
886
887         /* elt_size includes mbuf header and headroom */
888         min_size = mp->elt_size - sizeof(struct rte_mbuf) -
889                         RTE_PKTMBUF_HEADROOM;
890
891         /* account for up to 512B of alignment */
892         min_size -= FM10K_RX_BUFF_ALIGN;
893
894         /* sanity check for overflow */
895         if (min_size > mp->elt_size)
896                 return 0;
897
898         if (min_size < ETHER_MAX_VLAN_FRAME_LEN)
899                 return 0;
900
901         /* size is valid */
902         return 1;
903 }
904
905 static int
906 fm10k_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
907         uint16_t nb_desc, unsigned int socket_id,
908         const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
909 {
910         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
911         struct fm10k_rx_queue *q;
912         const struct rte_memzone *mz;
913
914         PMD_INIT_FUNC_TRACE();
915
916         /* make sure the mempool element size can account for alignment. */
917         if (!mempool_element_size_valid(mp)) {
918                 PMD_INIT_LOG(ERR, "Error : Mempool element size is too small");
919                 return (-EINVAL);
920         }
921
922         /* make sure a valid number of descriptors have been requested */
923         if (check_nb_desc(FM10K_MIN_RX_DESC, FM10K_MAX_RX_DESC,
924                                 FM10K_MULT_RX_DESC, nb_desc)) {
925                 PMD_INIT_LOG(ERR, "Number of Rx descriptors (%u) must be "
926                         "less than or equal to %"PRIu32", "
927                         "greater than or equal to %u, "
928                         "and a multiple of %u",
929                         nb_desc, (uint32_t)FM10K_MAX_RX_DESC, FM10K_MIN_RX_DESC,
930                         FM10K_MULT_RX_DESC);
931                 return (-EINVAL);
932         }
933
934         /*
935          * if this queue existed already, free the associated memory. The
936          * queue cannot be reused in case we need to allocate memory on
937          * different socket than was previously used.
938          */
939         if (dev->data->rx_queues[queue_id] != NULL) {
940                 rx_queue_free(dev->data->rx_queues[queue_id]);
941                 dev->data->rx_queues[queue_id] = NULL;
942         }
943
944         /* allocate memory for the queue structure */
945         q = rte_zmalloc_socket("fm10k", sizeof(*q), RTE_CACHE_LINE_SIZE,
946                                 socket_id);
947         if (q == NULL) {
948                 PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
949                 return (-ENOMEM);
950         }
951
952         /* setup queue */
953         q->mp = mp;
954         q->nb_desc = nb_desc;
955         q->port_id = dev->data->port_id;
956         q->queue_id = queue_id;
957         q->tail_ptr = (volatile uint32_t *)
958                 &((uint32_t *)hw->hw_addr)[FM10K_RDT(queue_id)];
959         if (handle_rxconf(q, conf))
960                 return (-EINVAL);
961
962         /* allocate memory for the software ring */
963         q->sw_ring = rte_zmalloc_socket("fm10k sw ring",
964                                         nb_desc * sizeof(struct rte_mbuf *),
965                                         RTE_CACHE_LINE_SIZE, socket_id);
966         if (q->sw_ring == NULL) {
967                 PMD_INIT_LOG(ERR, "Cannot allocate software ring");
968                 rte_free(q);
969                 return (-ENOMEM);
970         }
971
972         /*
973          * allocate memory for the hardware descriptor ring. A memzone large
974          * enough to hold the maximum ring size is requested to allow for
975          * resizing in later calls to the queue setup function.
976          */
977         mz = allocate_hw_ring(dev->driver->pci_drv.name, "rx_ring",
978                                 dev->data->port_id, queue_id, socket_id,
979                                 FM10K_MAX_RX_RING_SZ, FM10K_ALIGN_RX_DESC);
980         if (mz == NULL) {
981                 PMD_INIT_LOG(ERR, "Cannot allocate hardware ring");
982                 rte_free(q->sw_ring);
983                 rte_free(q);
984                 return (-ENOMEM);
985         }
986         q->hw_ring = mz->addr;
987         q->hw_ring_phys_addr = mz->phys_addr;
988
989         dev->data->rx_queues[queue_id] = q;
990         return 0;
991 }
992
993 static void
994 fm10k_rx_queue_release(void *queue)
995 {
996         PMD_INIT_FUNC_TRACE();
997
998         rx_queue_free(queue);
999 }
1000
1001 static inline int
1002 handle_txconf(struct fm10k_tx_queue *q, const struct rte_eth_txconf *conf)
1003 {
1004         uint16_t tx_free_thresh;
1005         uint16_t tx_rs_thresh;
1006
1007         /* constraint MACROs require that tx_free_thresh is configured
1008          * before tx_rs_thresh */
1009         if (conf->tx_free_thresh == 0)
1010                 tx_free_thresh = FM10K_TX_FREE_THRESH_DEFAULT(q);
1011         else
1012                 tx_free_thresh = conf->tx_free_thresh;
1013
1014         /* make sure the requested threshold satisfies the constraints */
1015         if (check_thresh(FM10K_TX_FREE_THRESH_MIN(q),
1016                         FM10K_TX_FREE_THRESH_MAX(q),
1017                         FM10K_TX_FREE_THRESH_DIV(q),
1018                         tx_free_thresh)) {
1019                 PMD_INIT_LOG(ERR, "tx_free_thresh (%u) must be "
1020                         "less than or equal to %u, "
1021                         "greater than or equal to %u, "
1022                         "and a divisor of %u",
1023                         tx_free_thresh, FM10K_TX_FREE_THRESH_MAX(q),
1024                         FM10K_TX_FREE_THRESH_MIN(q),
1025                         FM10K_TX_FREE_THRESH_DIV(q));
1026                 return (-EINVAL);
1027         }
1028
1029         q->free_thresh = tx_free_thresh;
1030
1031         if (conf->tx_rs_thresh == 0)
1032                 tx_rs_thresh = FM10K_TX_RS_THRESH_DEFAULT(q);
1033         else
1034                 tx_rs_thresh = conf->tx_rs_thresh;
1035
1036         q->tx_deferred_start = conf->tx_deferred_start;
1037
1038         /* make sure the requested threshold satisfies the constraints */
1039         if (check_thresh(FM10K_TX_RS_THRESH_MIN(q),
1040                         FM10K_TX_RS_THRESH_MAX(q),
1041                         FM10K_TX_RS_THRESH_DIV(q),
1042                         tx_rs_thresh)) {
1043                 PMD_INIT_LOG(ERR, "tx_rs_thresh (%u) must be "
1044                         "less than or equal to %u, "
1045                         "greater than or equal to %u, "
1046                         "and a divisor of %u",
1047                         tx_rs_thresh, FM10K_TX_RS_THRESH_MAX(q),
1048                         FM10K_TX_RS_THRESH_MIN(q),
1049                         FM10K_TX_RS_THRESH_DIV(q));
1050                 return (-EINVAL);
1051         }
1052
1053         q->rs_thresh = tx_rs_thresh;
1054
1055         return 0;
1056 }
1057
1058 static int
1059 fm10k_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_id,
1060         uint16_t nb_desc, unsigned int socket_id,
1061         const struct rte_eth_txconf *conf)
1062 {
1063         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1064         struct fm10k_tx_queue *q;
1065         const struct rte_memzone *mz;
1066
1067         PMD_INIT_FUNC_TRACE();
1068
1069         /* make sure a valid number of descriptors have been requested */
1070         if (check_nb_desc(FM10K_MIN_TX_DESC, FM10K_MAX_TX_DESC,
1071                                 FM10K_MULT_TX_DESC, nb_desc)) {
1072                 PMD_INIT_LOG(ERR, "Number of Tx descriptors (%u) must be "
1073                         "less than or equal to %"PRIu32", "
1074                         "greater than or equal to %u, "
1075                         "and a multiple of %u",
1076                         nb_desc, (uint32_t)FM10K_MAX_TX_DESC, FM10K_MIN_TX_DESC,
1077                         FM10K_MULT_TX_DESC);
1078                 return (-EINVAL);
1079         }
1080
1081         /*
1082          * if this queue existed already, free the associated memory. The
1083          * queue cannot be reused in case we need to allocate memory on
1084          * different socket than was previously used.
1085          */
1086         if (dev->data->tx_queues[queue_id] != NULL) {
1087                 tx_queue_free(dev->data->tx_queues[queue_id]);
1088                 dev->data->tx_queues[queue_id] = NULL;
1089         }
1090
1091         /* allocate memory for the queue structure */
1092         q = rte_zmalloc_socket("fm10k", sizeof(*q), RTE_CACHE_LINE_SIZE,
1093                                 socket_id);
1094         if (q == NULL) {
1095                 PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
1096                 return (-ENOMEM);
1097         }
1098
1099         /* setup queue */
1100         q->nb_desc = nb_desc;
1101         q->port_id = dev->data->port_id;
1102         q->queue_id = queue_id;
1103         q->tail_ptr = (volatile uint32_t *)
1104                 &((uint32_t *)hw->hw_addr)[FM10K_TDT(queue_id)];
1105         if (handle_txconf(q, conf))
1106                 return (-EINVAL);
1107
1108         /* allocate memory for the software ring */
1109         q->sw_ring = rte_zmalloc_socket("fm10k sw ring",
1110                                         nb_desc * sizeof(struct rte_mbuf *),
1111                                         RTE_CACHE_LINE_SIZE, socket_id);
1112         if (q->sw_ring == NULL) {
1113                 PMD_INIT_LOG(ERR, "Cannot allocate software ring");
1114                 rte_free(q);
1115                 return (-ENOMEM);
1116         }
1117
1118         /*
1119          * allocate memory for the hardware descriptor ring. A memzone large
1120          * enough to hold the maximum ring size is requested to allow for
1121          * resizing in later calls to the queue setup function.
1122          */
1123         mz = allocate_hw_ring(dev->driver->pci_drv.name, "tx_ring",
1124                                 dev->data->port_id, queue_id, socket_id,
1125                                 FM10K_MAX_TX_RING_SZ, FM10K_ALIGN_TX_DESC);
1126         if (mz == NULL) {
1127                 PMD_INIT_LOG(ERR, "Cannot allocate hardware ring");
1128                 rte_free(q->sw_ring);
1129                 rte_free(q);
1130                 return (-ENOMEM);
1131         }
1132         q->hw_ring = mz->addr;
1133         q->hw_ring_phys_addr = mz->phys_addr;
1134
1135         /*
1136          * allocate memory for the RS bit tracker. Enough slots to hold the
1137          * descriptor index for each RS bit needing to be set are required.
1138          */
1139         q->rs_tracker.list = rte_zmalloc_socket("fm10k rs tracker",
1140                                 ((nb_desc + 1) / q->rs_thresh) *
1141                                 sizeof(uint16_t),
1142                                 RTE_CACHE_LINE_SIZE, socket_id);
1143         if (q->rs_tracker.list == NULL) {
1144                 PMD_INIT_LOG(ERR, "Cannot allocate RS bit tracker");
1145                 rte_free(q->sw_ring);
1146                 rte_free(q);
1147                 return (-ENOMEM);
1148         }
1149
1150         dev->data->tx_queues[queue_id] = q;
1151         return 0;
1152 }
1153
1154 static void
1155 fm10k_tx_queue_release(void *queue)
1156 {
1157         PMD_INIT_FUNC_TRACE();
1158
1159         tx_queue_free(queue);
1160 }
1161
1162 static int
1163 fm10k_reta_update(struct rte_eth_dev *dev,
1164                         struct rte_eth_rss_reta_entry64 *reta_conf,
1165                         uint16_t reta_size)
1166 {
1167         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1168         uint16_t i, j, idx, shift;
1169         uint8_t mask;
1170         uint32_t reta;
1171
1172         PMD_INIT_FUNC_TRACE();
1173
1174         if (reta_size > FM10K_MAX_RSS_INDICES) {
1175                 PMD_INIT_LOG(ERR, "The size of hash lookup table configured "
1176                         "(%d) doesn't match the number hardware can supported "
1177                         "(%d)", reta_size, FM10K_MAX_RSS_INDICES);
1178                 return -EINVAL;
1179         }
1180
1181         /*
1182          * Update Redirection Table RETA[n], n=0..31. The redirection table has
1183          * 128-entries in 32 registers
1184          */
1185         for (i = 0; i < FM10K_MAX_RSS_INDICES; i += CHARS_PER_UINT32) {
1186                 idx = i / RTE_RETA_GROUP_SIZE;
1187                 shift = i % RTE_RETA_GROUP_SIZE;
1188                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
1189                                 BIT_MASK_PER_UINT32);
1190                 if (mask == 0)
1191                         continue;
1192
1193                 reta = 0;
1194                 if (mask != BIT_MASK_PER_UINT32)
1195                         reta = FM10K_READ_REG(hw, FM10K_RETA(0, i >> 2));
1196
1197                 for (j = 0; j < CHARS_PER_UINT32; j++) {
1198                         if (mask & (0x1 << j)) {
1199                                 if (mask != 0xF)
1200                                         reta &= ~(UINT8_MAX << CHAR_BIT * j);
1201                                 reta |= reta_conf[idx].reta[shift + j] <<
1202                                                 (CHAR_BIT * j);
1203                         }
1204                 }
1205                 FM10K_WRITE_REG(hw, FM10K_RETA(0, i >> 2), reta);
1206         }
1207
1208         return 0;
1209 }
1210
1211 static int
1212 fm10k_reta_query(struct rte_eth_dev *dev,
1213                         struct rte_eth_rss_reta_entry64 *reta_conf,
1214                         uint16_t reta_size)
1215 {
1216         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1217         uint16_t i, j, idx, shift;
1218         uint8_t mask;
1219         uint32_t reta;
1220
1221         PMD_INIT_FUNC_TRACE();
1222
1223         if (reta_size < FM10K_MAX_RSS_INDICES) {
1224                 PMD_INIT_LOG(ERR, "The size of hash lookup table configured "
1225                         "(%d) doesn't match the number hardware can supported "
1226                         "(%d)", reta_size, FM10K_MAX_RSS_INDICES);
1227                 return -EINVAL;
1228         }
1229
1230         /*
1231          * Read Redirection Table RETA[n], n=0..31. The redirection table has
1232          * 128-entries in 32 registers
1233          */
1234         for (i = 0; i < FM10K_MAX_RSS_INDICES; i += CHARS_PER_UINT32) {
1235                 idx = i / RTE_RETA_GROUP_SIZE;
1236                 shift = i % RTE_RETA_GROUP_SIZE;
1237                 mask = (uint8_t)((reta_conf[idx].mask >> shift) &
1238                                 BIT_MASK_PER_UINT32);
1239                 if (mask == 0)
1240                         continue;
1241
1242                 reta = FM10K_READ_REG(hw, FM10K_RETA(0, i >> 2));
1243                 for (j = 0; j < CHARS_PER_UINT32; j++) {
1244                         if (mask & (0x1 << j))
1245                                 reta_conf[idx].reta[shift + j] = ((reta >>
1246                                         CHAR_BIT * j) & UINT8_MAX);
1247                 }
1248         }
1249
1250         return 0;
1251 }
1252
1253 static int
1254 fm10k_rss_hash_update(struct rte_eth_dev *dev,
1255         struct rte_eth_rss_conf *rss_conf)
1256 {
1257         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1258         uint32_t *key = (uint32_t *)rss_conf->rss_key;
1259         uint32_t mrqc;
1260         uint64_t hf = rss_conf->rss_hf;
1261         int i;
1262
1263         PMD_INIT_FUNC_TRACE();
1264
1265         if (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
1266                 FM10K_RSSRK_ENTRIES_PER_REG)
1267                 return -EINVAL;
1268
1269         if (hf == 0)
1270                 return -EINVAL;
1271
1272         mrqc = 0;
1273         mrqc |= (hf & ETH_RSS_IPV4_TCP)    ? FM10K_MRQC_TCP_IPV4 : 0;
1274         mrqc |= (hf & ETH_RSS_IPV4)        ? FM10K_MRQC_IPV4     : 0;
1275         mrqc |= (hf & ETH_RSS_IPV6)        ? FM10K_MRQC_IPV6     : 0;
1276         mrqc |= (hf & ETH_RSS_IPV6_EX)     ? FM10K_MRQC_IPV6     : 0;
1277         mrqc |= (hf & ETH_RSS_IPV6_TCP)    ? FM10K_MRQC_TCP_IPV6 : 0;
1278         mrqc |= (hf & ETH_RSS_IPV6_TCP_EX) ? FM10K_MRQC_TCP_IPV6 : 0;
1279         mrqc |= (hf & ETH_RSS_IPV4_UDP)    ? FM10K_MRQC_UDP_IPV4 : 0;
1280         mrqc |= (hf & ETH_RSS_IPV6_UDP)    ? FM10K_MRQC_UDP_IPV6 : 0;
1281         mrqc |= (hf & ETH_RSS_IPV6_UDP_EX) ? FM10K_MRQC_UDP_IPV6 : 0;
1282
1283         /* If the mapping doesn't fit any supported, return */
1284         if (mrqc == 0)
1285                 return -EINVAL;
1286
1287         if (key != NULL)
1288                 for (i = 0; i < FM10K_RSSRK_SIZE; ++i)
1289                         FM10K_WRITE_REG(hw, FM10K_RSSRK(0, i), key[i]);
1290
1291         FM10K_WRITE_REG(hw, FM10K_MRQC(0), mrqc);
1292
1293         return 0;
1294 }
1295
1296 static int
1297 fm10k_rss_hash_conf_get(struct rte_eth_dev *dev,
1298         struct rte_eth_rss_conf *rss_conf)
1299 {
1300         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1301         uint32_t *key = (uint32_t *)rss_conf->rss_key;
1302         uint32_t mrqc;
1303         uint64_t hf;
1304         int i;
1305
1306         PMD_INIT_FUNC_TRACE();
1307
1308         if (rss_conf->rss_key_len < FM10K_RSSRK_SIZE *
1309                                 FM10K_RSSRK_ENTRIES_PER_REG)
1310                 return -EINVAL;
1311
1312         if (key != NULL)
1313                 for (i = 0; i < FM10K_RSSRK_SIZE; ++i)
1314                         key[i] = FM10K_READ_REG(hw, FM10K_RSSRK(0, i));
1315
1316         mrqc = FM10K_READ_REG(hw, FM10K_MRQC(0));
1317         hf = 0;
1318         hf |= (mrqc & FM10K_MRQC_TCP_IPV4) ? ETH_RSS_IPV4_TCP    : 0;
1319         hf |= (mrqc & FM10K_MRQC_IPV4)     ? ETH_RSS_IPV4        : 0;
1320         hf |= (mrqc & FM10K_MRQC_IPV6)     ? ETH_RSS_IPV6        : 0;
1321         hf |= (mrqc & FM10K_MRQC_IPV6)     ? ETH_RSS_IPV6_EX     : 0;
1322         hf |= (mrqc & FM10K_MRQC_TCP_IPV6) ? ETH_RSS_IPV6_TCP    : 0;
1323         hf |= (mrqc & FM10K_MRQC_TCP_IPV6) ? ETH_RSS_IPV6_TCP_EX : 0;
1324         hf |= (mrqc & FM10K_MRQC_UDP_IPV4) ? ETH_RSS_IPV4_UDP    : 0;
1325         hf |= (mrqc & FM10K_MRQC_UDP_IPV6) ? ETH_RSS_IPV6_UDP    : 0;
1326         hf |= (mrqc & FM10K_MRQC_UDP_IPV6) ? ETH_RSS_IPV6_UDP_EX : 0;
1327
1328         rss_conf->rss_hf = hf;
1329
1330         return 0;
1331 }
1332
1333 /* Mailbox message handler in VF */
1334 static const struct fm10k_msg_data fm10k_msgdata_vf[] = {
1335         FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
1336         FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_msg_mac_vlan_vf),
1337         FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_msg_lport_state_vf),
1338         FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
1339 };
1340
1341 /* Mailbox message handler in PF */
1342 static const struct fm10k_msg_data fm10k_msgdata_pf[] = {
1343         FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
1344         FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
1345         FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_msg_lport_map_pf),
1346         FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
1347         FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
1348         FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_msg_update_pvid_pf),
1349         FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
1350 };
1351
1352 static int
1353 fm10k_setup_mbx_service(struct fm10k_hw *hw)
1354 {
1355         int err;
1356
1357         /* Initialize mailbox lock */
1358         fm10k_mbx_initlock(hw);
1359
1360         /* Replace default message handler with new ones */
1361         if (hw->mac.type == fm10k_mac_pf)
1362                 err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_pf);
1363         else
1364                 err = hw->mbx.ops.register_handlers(&hw->mbx, fm10k_msgdata_vf);
1365
1366         if (err) {
1367                 PMD_INIT_LOG(ERR, "Failed to register mailbox handler.err:%d",
1368                                 err);
1369                 return err;
1370         }
1371         /* Connect to SM for PF device or PF for VF device */
1372         return hw->mbx.ops.connect(hw, &hw->mbx);
1373 }
1374
1375 static void
1376 fm10k_close_mbx_service(struct fm10k_hw *hw)
1377 {
1378         /* Disconnect from SM for PF device or PF for VF device */
1379         hw->mbx.ops.disconnect(hw, &hw->mbx);
1380 }
1381
1382 static struct eth_dev_ops fm10k_eth_dev_ops = {
1383         .dev_configure          = fm10k_dev_configure,
1384         .dev_start              = fm10k_dev_start,
1385         .dev_stop               = fm10k_dev_stop,
1386         .dev_close              = fm10k_dev_close,
1387         .stats_get              = fm10k_stats_get,
1388         .stats_reset            = fm10k_stats_reset,
1389         .link_update            = fm10k_link_update,
1390         .dev_infos_get          = fm10k_dev_infos_get,
1391         .rx_queue_start         = fm10k_dev_rx_queue_start,
1392         .rx_queue_stop          = fm10k_dev_rx_queue_stop,
1393         .tx_queue_start         = fm10k_dev_tx_queue_start,
1394         .tx_queue_stop          = fm10k_dev_tx_queue_stop,
1395         .rx_queue_setup         = fm10k_rx_queue_setup,
1396         .rx_queue_release       = fm10k_rx_queue_release,
1397         .tx_queue_setup         = fm10k_tx_queue_setup,
1398         .tx_queue_release       = fm10k_tx_queue_release,
1399         .reta_update            = fm10k_reta_update,
1400         .reta_query             = fm10k_reta_query,
1401         .rss_hash_update        = fm10k_rss_hash_update,
1402         .rss_hash_conf_get      = fm10k_rss_hash_conf_get,
1403 };
1404
1405 static int
1406 eth_fm10k_dev_init(__rte_unused struct eth_driver *eth_drv,
1407         struct rte_eth_dev *dev)
1408 {
1409         struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1410         int diag;
1411
1412         PMD_INIT_FUNC_TRACE();
1413
1414         dev->dev_ops = &fm10k_eth_dev_ops;
1415         dev->rx_pkt_burst = &fm10k_recv_pkts;
1416         dev->tx_pkt_burst = &fm10k_xmit_pkts;
1417
1418         if (dev->data->scattered_rx)
1419                 dev->rx_pkt_burst = &fm10k_recv_scattered_pkts;
1420
1421         /* only initialize in the primary process */
1422         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1423                 return 0;
1424
1425         /* Vendor and Device ID need to be set before init of shared code */
1426         memset(hw, 0, sizeof(*hw));
1427         hw->device_id = dev->pci_dev->id.device_id;
1428         hw->vendor_id = dev->pci_dev->id.vendor_id;
1429         hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id;
1430         hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id;
1431         hw->revision_id = 0;
1432         hw->hw_addr = (void *)dev->pci_dev->mem_resource[0].addr;
1433         if (hw->hw_addr == NULL) {
1434                 PMD_INIT_LOG(ERR, "Bad mem resource."
1435                         " Try to blacklist unused devices.");
1436                 return -EIO;
1437         }
1438
1439         /* Store fm10k_adapter pointer */
1440         hw->back = dev->data->dev_private;
1441
1442         /* Initialize the shared code */
1443         diag = fm10k_init_shared_code(hw);
1444         if (diag != FM10K_SUCCESS) {
1445                 PMD_INIT_LOG(ERR, "Shared code init failed: %d", diag);
1446                 return -EIO;
1447         }
1448
1449         /*
1450          * Inialize bus info. Normally we would call fm10k_get_bus_info(), but
1451          * there is no way to get link status without reading BAR4.  Until this
1452          * works, assume we have maximum bandwidth.
1453          * @todo - fix bus info
1454          */
1455         hw->bus_caps.speed = fm10k_bus_speed_8000;
1456         hw->bus_caps.width = fm10k_bus_width_pcie_x8;
1457         hw->bus_caps.payload = fm10k_bus_payload_512;
1458         hw->bus.speed = fm10k_bus_speed_8000;
1459         hw->bus.width = fm10k_bus_width_pcie_x8;
1460         hw->bus.payload = fm10k_bus_payload_256;
1461
1462         /* Initialize the hw */
1463         diag = fm10k_init_hw(hw);
1464         if (diag != FM10K_SUCCESS) {
1465                 PMD_INIT_LOG(ERR, "Hardware init failed: %d", diag);
1466                 return -EIO;
1467         }
1468
1469         /* Initialize MAC address(es) */
1470         dev->data->mac_addrs = rte_zmalloc("fm10k", ETHER_ADDR_LEN, 0);
1471         if (dev->data->mac_addrs == NULL) {
1472                 PMD_INIT_LOG(ERR, "Cannot allocate memory for MAC addresses");
1473                 return -ENOMEM;
1474         }
1475
1476         diag = fm10k_read_mac_addr(hw);
1477         if (diag != FM10K_SUCCESS) {
1478                 /*
1479                  * TODO: remove special handling on VF. Need shared code to
1480                  * fix first.
1481                  */
1482                 if (hw->mac.type == fm10k_mac_pf) {
1483                         PMD_INIT_LOG(ERR, "Read MAC addr failed: %d", diag);
1484                         return -EIO;
1485                 } else {
1486                         /* Generate a random addr */
1487                         eth_random_addr(hw->mac.addr);
1488                         memcpy(hw->mac.perm_addr, hw->mac.addr, ETH_ALEN);
1489                 }
1490         }
1491
1492         ether_addr_copy((const struct ether_addr *)hw->mac.addr,
1493                         &dev->data->mac_addrs[0]);
1494
1495         /* Reset the hw statistics */
1496         fm10k_stats_reset(dev);
1497
1498         /* Reset the hw */
1499         diag = fm10k_reset_hw(hw);
1500         if (diag != FM10K_SUCCESS) {
1501                 PMD_INIT_LOG(ERR, "Hardware reset failed: %d", diag);
1502                 return -EIO;
1503         }
1504
1505         /* Setup mailbox service */
1506         diag = fm10k_setup_mbx_service(hw);
1507         if (diag != FM10K_SUCCESS) {
1508                 PMD_INIT_LOG(ERR, "Failed to setup mailbox: %d", diag);
1509                 return -EIO;
1510         }
1511
1512         /*
1513          * Below function will trigger operations on mailbox, acquire lock to
1514          * avoid race condition from interrupt handler. Operations on mailbox
1515          * FIFO will trigger interrupt to PF/SM, in which interrupt handler
1516          * will handle and generate an interrupt to our side. Then,  FIFO in
1517          * mailbox will be touched.
1518          */
1519         fm10k_mbx_lock(hw);
1520         /* Enable port first */
1521         hw->mac.ops.update_lport_state(hw, 0, 0, 1);
1522
1523         /* Update default vlan */
1524         hw->mac.ops.update_vlan(hw, hw->mac.default_vid, 0, true);
1525
1526         /*
1527          * Add default mac/vlan filter. glort is assigned by SM for PF, while is
1528          * unused for VF. PF will assign correct glort for VF.
1529          */
1530         hw->mac.ops.update_uc_addr(hw, hw->mac.dglort_map, hw->mac.addr,
1531                               hw->mac.default_vid, 1, 0);
1532
1533         /* Set unicast mode by default. App can change to other mode in other
1534          * API func.
1535          */
1536         hw->mac.ops.update_xcast_mode(hw, hw->mac.dglort_map,
1537                                         FM10K_XCAST_MODE_MULTI);
1538
1539         fm10k_mbx_unlock(hw);
1540
1541         return 0;
1542 }
1543
1544 /*
1545  * The set of PCI devices this driver supports. This driver will enable both PF
1546  * and SRIOV-VF devices.
1547  */
1548 static struct rte_pci_id pci_id_fm10k_map[] = {
1549 #define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev) { RTE_PCI_DEVICE(vend, dev) },
1550 #include "rte_pci_dev_ids.h"
1551         { .vendor_id = 0, /* sentinel */ },
1552 };
1553
1554 static struct eth_driver rte_pmd_fm10k = {
1555         {
1556                 .name = "rte_pmd_fm10k",
1557                 .id_table = pci_id_fm10k_map,
1558                 .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1559         },
1560         .eth_dev_init = eth_fm10k_dev_init,
1561         .dev_private_size = sizeof(struct fm10k_adapter),
1562 };
1563
1564 /*
1565  * Driver initialization routine.
1566  * Invoked once at EAL init time.
1567  * Register itself as the [Poll Mode] Driver of PCI FM10K devices.
1568  */
1569 static int
1570 rte_pmd_fm10k_init(__rte_unused const char *name,
1571         __rte_unused const char *params)
1572 {
1573         PMD_INIT_FUNC_TRACE();
1574         rte_eth_driver_register(&rte_pmd_fm10k);
1575         return 0;
1576 }
1577
1578 static struct rte_driver rte_fm10k_driver = {
1579         .type = PMD_PDEV,
1580         .init = rte_pmd_fm10k_init,
1581 };
1582
1583 PMD_REGISTER_DRIVER(rte_fm10k_driver);