net/sfc: support Rx descriptor status API
[dpdk.git] / drivers / net / sfc / sfc_rx.c
1 /*-
2  *   BSD LICENSE
3  *
4  * Copyright (c) 2016-2017 Solarflare Communications Inc.
5  * All rights reserved.
6  *
7  * This software was jointly developed between OKTET Labs (under contract
8  * for Solarflare) and Solarflare Communications, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright notice,
14  *    this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright notice,
16  *    this list of conditions and the following disclaimer in the documentation
17  *    and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31
32 #include <rte_mempool.h>
33
34 #include "efx.h"
35
36 #include "sfc.h"
37 #include "sfc_debug.h"
38 #include "sfc_log.h"
39 #include "sfc_ev.h"
40 #include "sfc_rx.h"
41 #include "sfc_kvargs.h"
42 #include "sfc_tweak.h"
43
44 /*
45  * Maximum number of Rx queue flush attempt in the case of failure or
46  * flush timeout
47  */
48 #define SFC_RX_QFLUSH_ATTEMPTS          (3)
49
50 /*
51  * Time to wait between event queue polling attempts when waiting for Rx
52  * queue flush done or failed events.
53  */
54 #define SFC_RX_QFLUSH_POLL_WAIT_MS      (1)
55
56 /*
57  * Maximum number of event queue polling attempts when waiting for Rx queue
58  * flush done or failed events. It defines Rx queue flush attempt timeout
59  * together with SFC_RX_QFLUSH_POLL_WAIT_MS.
60  */
61 #define SFC_RX_QFLUSH_POLL_ATTEMPTS     (2000)
62
63 void
64 sfc_rx_qflush_done(struct sfc_rxq *rxq)
65 {
66         rxq->state |= SFC_RXQ_FLUSHED;
67         rxq->state &= ~SFC_RXQ_FLUSHING;
68 }
69
70 void
71 sfc_rx_qflush_failed(struct sfc_rxq *rxq)
72 {
73         rxq->state |= SFC_RXQ_FLUSH_FAILED;
74         rxq->state &= ~SFC_RXQ_FLUSHING;
75 }
76
77 static void
78 sfc_efx_rx_qrefill(struct sfc_efx_rxq *rxq)
79 {
80         unsigned int free_space;
81         unsigned int bulks;
82         void *objs[SFC_RX_REFILL_BULK];
83         efsys_dma_addr_t addr[RTE_DIM(objs)];
84         unsigned int added = rxq->added;
85         unsigned int id;
86         unsigned int i;
87         struct sfc_efx_rx_sw_desc *rxd;
88         struct rte_mbuf *m;
89         uint16_t port_id = rxq->dp.dpq.port_id;
90
91         free_space = EFX_RXQ_LIMIT(rxq->ptr_mask + 1) -
92                 (added - rxq->completed);
93
94         if (free_space < rxq->refill_threshold)
95                 return;
96
97         bulks = free_space / RTE_DIM(objs);
98         /* refill_threshold guarantees that bulks is positive */
99         SFC_ASSERT(bulks > 0);
100
101         id = added & rxq->ptr_mask;
102         do {
103                 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
104                                                   RTE_DIM(objs)) < 0)) {
105                         /*
106                          * It is hardly a safe way to increment counter
107                          * from different contexts, but all PMDs do it.
108                          */
109                         rxq->evq->sa->eth_dev->data->rx_mbuf_alloc_failed +=
110                                 RTE_DIM(objs);
111                         /* Return if we have posted nothing yet */
112                         if (added == rxq->added)
113                                 return;
114                         /* Push posted */
115                         break;
116                 }
117
118                 for (i = 0; i < RTE_DIM(objs);
119                      ++i, id = (id + 1) & rxq->ptr_mask) {
120                         m = objs[i];
121
122                         rxd = &rxq->sw_desc[id];
123                         rxd->mbuf = m;
124
125                         SFC_ASSERT(rte_mbuf_refcnt_read(m) == 1);
126                         m->data_off = RTE_PKTMBUF_HEADROOM;
127                         SFC_ASSERT(m->next == NULL);
128                         SFC_ASSERT(m->nb_segs == 1);
129                         m->port = port_id;
130
131                         addr[i] = rte_pktmbuf_mtophys(m);
132                 }
133
134                 efx_rx_qpost(rxq->common, addr, rxq->buf_size,
135                              RTE_DIM(objs), rxq->completed, added);
136                 added += RTE_DIM(objs);
137         } while (--bulks > 0);
138
139         SFC_ASSERT(added != rxq->added);
140         rxq->added = added;
141         efx_rx_qpush(rxq->common, added, &rxq->pushed);
142 }
143
144 static uint64_t
145 sfc_efx_rx_desc_flags_to_offload_flags(const unsigned int desc_flags)
146 {
147         uint64_t mbuf_flags = 0;
148
149         switch (desc_flags & (EFX_PKT_IPV4 | EFX_CKSUM_IPV4)) {
150         case (EFX_PKT_IPV4 | EFX_CKSUM_IPV4):
151                 mbuf_flags |= PKT_RX_IP_CKSUM_GOOD;
152                 break;
153         case EFX_PKT_IPV4:
154                 mbuf_flags |= PKT_RX_IP_CKSUM_BAD;
155                 break;
156         default:
157                 RTE_BUILD_BUG_ON(PKT_RX_IP_CKSUM_UNKNOWN != 0);
158                 SFC_ASSERT((mbuf_flags & PKT_RX_IP_CKSUM_MASK) ==
159                            PKT_RX_IP_CKSUM_UNKNOWN);
160                 break;
161         }
162
163         switch ((desc_flags &
164                  (EFX_PKT_TCP | EFX_PKT_UDP | EFX_CKSUM_TCPUDP))) {
165         case (EFX_PKT_TCP | EFX_CKSUM_TCPUDP):
166         case (EFX_PKT_UDP | EFX_CKSUM_TCPUDP):
167                 mbuf_flags |= PKT_RX_L4_CKSUM_GOOD;
168                 break;
169         case EFX_PKT_TCP:
170         case EFX_PKT_UDP:
171                 mbuf_flags |= PKT_RX_L4_CKSUM_BAD;
172                 break;
173         default:
174                 RTE_BUILD_BUG_ON(PKT_RX_L4_CKSUM_UNKNOWN != 0);
175                 SFC_ASSERT((mbuf_flags & PKT_RX_L4_CKSUM_MASK) ==
176                            PKT_RX_L4_CKSUM_UNKNOWN);
177                 break;
178         }
179
180         return mbuf_flags;
181 }
182
183 static uint32_t
184 sfc_efx_rx_desc_flags_to_packet_type(const unsigned int desc_flags)
185 {
186         return RTE_PTYPE_L2_ETHER |
187                 ((desc_flags & EFX_PKT_IPV4) ?
188                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN : 0) |
189                 ((desc_flags & EFX_PKT_IPV6) ?
190                         RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : 0) |
191                 ((desc_flags & EFX_PKT_TCP) ? RTE_PTYPE_L4_TCP : 0) |
192                 ((desc_flags & EFX_PKT_UDP) ? RTE_PTYPE_L4_UDP : 0);
193 }
194
195 static const uint32_t *
196 sfc_efx_supported_ptypes_get(void)
197 {
198         static const uint32_t ptypes[] = {
199                 RTE_PTYPE_L2_ETHER,
200                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
201                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
202                 RTE_PTYPE_L4_TCP,
203                 RTE_PTYPE_L4_UDP,
204                 RTE_PTYPE_UNKNOWN
205         };
206
207         return ptypes;
208 }
209
210 static void
211 sfc_efx_rx_set_rss_hash(struct sfc_efx_rxq *rxq, unsigned int flags,
212                         struct rte_mbuf *m)
213 {
214 #if EFSYS_OPT_RX_SCALE
215         uint8_t *mbuf_data;
216
217
218         if ((rxq->flags & SFC_EFX_RXQ_FLAG_RSS_HASH) == 0)
219                 return;
220
221         mbuf_data = rte_pktmbuf_mtod(m, uint8_t *);
222
223         if (flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) {
224                 m->hash.rss = efx_pseudo_hdr_hash_get(rxq->common,
225                                                       EFX_RX_HASHALG_TOEPLITZ,
226                                                       mbuf_data);
227
228                 m->ol_flags |= PKT_RX_RSS_HASH;
229         }
230 #endif
231 }
232
233 static uint16_t
234 sfc_efx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
235 {
236         struct sfc_dp_rxq *dp_rxq = rx_queue;
237         struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
238         unsigned int completed;
239         unsigned int prefix_size = rxq->prefix_size;
240         unsigned int done_pkts = 0;
241         boolean_t discard_next = B_FALSE;
242         struct rte_mbuf *scatter_pkt = NULL;
243
244         if (unlikely((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 0))
245                 return 0;
246
247         sfc_ev_qpoll(rxq->evq);
248
249         completed = rxq->completed;
250         while (completed != rxq->pending && done_pkts < nb_pkts) {
251                 unsigned int id;
252                 struct sfc_efx_rx_sw_desc *rxd;
253                 struct rte_mbuf *m;
254                 unsigned int seg_len;
255                 unsigned int desc_flags;
256
257                 id = completed++ & rxq->ptr_mask;
258                 rxd = &rxq->sw_desc[id];
259                 m = rxd->mbuf;
260                 desc_flags = rxd->flags;
261
262                 if (discard_next)
263                         goto discard;
264
265                 if (desc_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
266                         goto discard;
267
268                 if (desc_flags & EFX_PKT_PREFIX_LEN) {
269                         uint16_t tmp_size;
270                         int rc __rte_unused;
271
272                         rc = efx_pseudo_hdr_pkt_length_get(rxq->common,
273                                 rte_pktmbuf_mtod(m, uint8_t *), &tmp_size);
274                         SFC_ASSERT(rc == 0);
275                         seg_len = tmp_size;
276                 } else {
277                         seg_len = rxd->size - prefix_size;
278                 }
279
280                 rte_pktmbuf_data_len(m) = seg_len;
281                 rte_pktmbuf_pkt_len(m) = seg_len;
282
283                 if (scatter_pkt != NULL) {
284                         if (rte_pktmbuf_chain(scatter_pkt, m) != 0) {
285                                 rte_pktmbuf_free(scatter_pkt);
286                                 goto discard;
287                         }
288                         /* The packet to deliver */
289                         m = scatter_pkt;
290                 }
291
292                 if (desc_flags & EFX_PKT_CONT) {
293                         /* The packet is scattered, more fragments to come */
294                         scatter_pkt = m;
295                         /* Further fragments have no prefix */
296                         prefix_size = 0;
297                         continue;
298                 }
299
300                 /* Scattered packet is done */
301                 scatter_pkt = NULL;
302                 /* The first fragment of the packet has prefix */
303                 prefix_size = rxq->prefix_size;
304
305                 m->ol_flags =
306                         sfc_efx_rx_desc_flags_to_offload_flags(desc_flags);
307                 m->packet_type =
308                         sfc_efx_rx_desc_flags_to_packet_type(desc_flags);
309
310                 /*
311                  * Extract RSS hash from the packet prefix and
312                  * set the corresponding field (if needed and possible)
313                  */
314                 sfc_efx_rx_set_rss_hash(rxq, desc_flags, m);
315
316                 m->data_off += prefix_size;
317
318                 *rx_pkts++ = m;
319                 done_pkts++;
320                 continue;
321
322 discard:
323                 discard_next = ((desc_flags & EFX_PKT_CONT) != 0);
324                 rte_mempool_put(rxq->refill_mb_pool, m);
325                 rxd->mbuf = NULL;
326         }
327
328         /* pending is only moved when entire packet is received */
329         SFC_ASSERT(scatter_pkt == NULL);
330
331         rxq->completed = completed;
332
333         sfc_efx_rx_qrefill(rxq);
334
335         return done_pkts;
336 }
337
338 static sfc_dp_rx_qdesc_npending_t sfc_efx_rx_qdesc_npending;
339 static unsigned int
340 sfc_efx_rx_qdesc_npending(struct sfc_dp_rxq *dp_rxq)
341 {
342         struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
343
344         if ((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == 0)
345                 return 0;
346
347         sfc_ev_qpoll(rxq->evq);
348
349         return rxq->pending - rxq->completed;
350 }
351
352 static sfc_dp_rx_qdesc_status_t sfc_efx_rx_qdesc_status;
353 static int
354 sfc_efx_rx_qdesc_status(struct sfc_dp_rxq *dp_rxq, uint16_t offset)
355 {
356         struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
357
358         if (unlikely(offset > rxq->ptr_mask))
359                 return -EINVAL;
360
361         /*
362          * Poll EvQ to derive up-to-date 'rxq->pending' figure;
363          * it is required for the queue to be running, but the
364          * check is omitted because API design assumes that it
365          * is the duty of the caller to satisfy all conditions
366          */
367         SFC_ASSERT((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) ==
368                    SFC_EFX_RXQ_FLAG_RUNNING);
369         sfc_ev_qpoll(rxq->evq);
370
371         /*
372          * There is a handful of reserved entries in the ring,
373          * but an explicit check whether the offset points to
374          * a reserved entry is neglected since the two checks
375          * below rely on the figures which take the HW limits
376          * into account and thus if an entry is reserved, the
377          * checks will fail and UNAVAIL code will be returned
378          */
379
380         if (offset < (rxq->pending - rxq->completed))
381                 return RTE_ETH_RX_DESC_DONE;
382
383         if (offset < (rxq->added - rxq->completed))
384                 return RTE_ETH_RX_DESC_AVAIL;
385
386         return RTE_ETH_RX_DESC_UNAVAIL;
387 }
388
389 struct sfc_rxq *
390 sfc_rxq_by_dp_rxq(const struct sfc_dp_rxq *dp_rxq)
391 {
392         const struct sfc_dp_queue *dpq = &dp_rxq->dpq;
393         struct rte_eth_dev *eth_dev;
394         struct sfc_adapter *sa;
395         struct sfc_rxq *rxq;
396
397         SFC_ASSERT(rte_eth_dev_is_valid_port(dpq->port_id));
398         eth_dev = &rte_eth_devices[dpq->port_id];
399
400         sa = eth_dev->data->dev_private;
401
402         SFC_ASSERT(dpq->queue_id < sa->rxq_count);
403         rxq = sa->rxq_info[dpq->queue_id].rxq;
404
405         SFC_ASSERT(rxq != NULL);
406         return rxq;
407 }
408
409 static sfc_dp_rx_qcreate_t sfc_efx_rx_qcreate;
410 static int
411 sfc_efx_rx_qcreate(uint16_t port_id, uint16_t queue_id,
412                    const struct rte_pci_addr *pci_addr, int socket_id,
413                    const struct sfc_dp_rx_qcreate_info *info,
414                    struct sfc_dp_rxq **dp_rxqp)
415 {
416         struct sfc_efx_rxq *rxq;
417         int rc;
418
419         rc = ENOMEM;
420         rxq = rte_zmalloc_socket("sfc-efx-rxq", sizeof(*rxq),
421                                  RTE_CACHE_LINE_SIZE, socket_id);
422         if (rxq == NULL)
423                 goto fail_rxq_alloc;
424
425         sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
426
427         rc = ENOMEM;
428         rxq->sw_desc = rte_calloc_socket("sfc-efx-rxq-sw_desc",
429                                          info->rxq_entries,
430                                          sizeof(*rxq->sw_desc),
431                                          RTE_CACHE_LINE_SIZE, socket_id);
432         if (rxq->sw_desc == NULL)
433                 goto fail_desc_alloc;
434
435         /* efx datapath is bound to efx control path */
436         rxq->evq = sfc_rxq_by_dp_rxq(&rxq->dp)->evq;
437         if (info->flags & SFC_RXQ_FLAG_RSS_HASH)
438                 rxq->flags |= SFC_EFX_RXQ_FLAG_RSS_HASH;
439         rxq->ptr_mask = info->rxq_entries - 1;
440         rxq->batch_max = info->batch_max;
441         rxq->prefix_size = info->prefix_size;
442         rxq->refill_threshold = info->refill_threshold;
443         rxq->buf_size = info->buf_size;
444         rxq->refill_mb_pool = info->refill_mb_pool;
445
446         *dp_rxqp = &rxq->dp;
447         return 0;
448
449 fail_desc_alloc:
450         rte_free(rxq);
451
452 fail_rxq_alloc:
453         return rc;
454 }
455
456 static sfc_dp_rx_qdestroy_t sfc_efx_rx_qdestroy;
457 static void
458 sfc_efx_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
459 {
460         struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
461
462         rte_free(rxq->sw_desc);
463         rte_free(rxq);
464 }
465
466 static sfc_dp_rx_qstart_t sfc_efx_rx_qstart;
467 static int
468 sfc_efx_rx_qstart(struct sfc_dp_rxq *dp_rxq,
469                   __rte_unused unsigned int evq_read_ptr)
470 {
471         /* libefx-based datapath is specific to libefx-based PMD */
472         struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
473         struct sfc_rxq *crxq = sfc_rxq_by_dp_rxq(dp_rxq);
474
475         rxq->common = crxq->common;
476
477         rxq->pending = rxq->completed = rxq->added = rxq->pushed = 0;
478
479         sfc_efx_rx_qrefill(rxq);
480
481         rxq->flags |= (SFC_EFX_RXQ_FLAG_STARTED | SFC_EFX_RXQ_FLAG_RUNNING);
482
483         return 0;
484 }
485
486 static sfc_dp_rx_qstop_t sfc_efx_rx_qstop;
487 static void
488 sfc_efx_rx_qstop(struct sfc_dp_rxq *dp_rxq,
489                  __rte_unused unsigned int *evq_read_ptr)
490 {
491         struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
492
493         rxq->flags &= ~SFC_EFX_RXQ_FLAG_RUNNING;
494
495         /* libefx-based datapath is bound to libefx-based PMD and uses
496          * event queue structure directly. So, there is no necessity to
497          * return EvQ read pointer.
498          */
499 }
500
501 static sfc_dp_rx_qpurge_t sfc_efx_rx_qpurge;
502 static void
503 sfc_efx_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
504 {
505         struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq);
506         unsigned int i;
507         struct sfc_efx_rx_sw_desc *rxd;
508
509         for (i = rxq->completed; i != rxq->added; ++i) {
510                 rxd = &rxq->sw_desc[i & rxq->ptr_mask];
511                 rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf);
512                 rxd->mbuf = NULL;
513                 /* Packed stream relies on 0 in inactive SW desc.
514                  * Rx queue stop is not performance critical, so
515                  * there is no harm to do it always.
516                  */
517                 rxd->flags = 0;
518                 rxd->size = 0;
519         }
520
521         rxq->flags &= ~SFC_EFX_RXQ_FLAG_STARTED;
522 }
523
524 struct sfc_dp_rx sfc_efx_rx = {
525         .dp = {
526                 .name           = SFC_KVARG_DATAPATH_EFX,
527                 .type           = SFC_DP_RX,
528                 .hw_fw_caps     = 0,
529         },
530         .features               = SFC_DP_RX_FEAT_SCATTER,
531         .qcreate                = sfc_efx_rx_qcreate,
532         .qdestroy               = sfc_efx_rx_qdestroy,
533         .qstart                 = sfc_efx_rx_qstart,
534         .qstop                  = sfc_efx_rx_qstop,
535         .qpurge                 = sfc_efx_rx_qpurge,
536         .supported_ptypes_get   = sfc_efx_supported_ptypes_get,
537         .qdesc_npending         = sfc_efx_rx_qdesc_npending,
538         .qdesc_status           = sfc_efx_rx_qdesc_status,
539         .pkt_burst              = sfc_efx_recv_pkts,
540 };
541
542 unsigned int
543 sfc_rx_qdesc_npending(struct sfc_adapter *sa, unsigned int sw_index)
544 {
545         struct sfc_rxq *rxq;
546
547         SFC_ASSERT(sw_index < sa->rxq_count);
548         rxq = sa->rxq_info[sw_index].rxq;
549
550         if (rxq == NULL || (rxq->state & SFC_RXQ_STARTED) == 0)
551                 return 0;
552
553         return sa->dp_rx->qdesc_npending(rxq->dp);
554 }
555
556 int
557 sfc_rx_qdesc_done(struct sfc_dp_rxq *dp_rxq, unsigned int offset)
558 {
559         struct sfc_rxq *rxq = sfc_rxq_by_dp_rxq(dp_rxq);
560
561         return offset < rxq->evq->sa->dp_rx->qdesc_npending(dp_rxq);
562 }
563
564 static void
565 sfc_rx_qflush(struct sfc_adapter *sa, unsigned int sw_index)
566 {
567         struct sfc_rxq *rxq;
568         unsigned int retry_count;
569         unsigned int wait_count;
570         int rc;
571
572         rxq = sa->rxq_info[sw_index].rxq;
573         SFC_ASSERT(rxq->state & SFC_RXQ_STARTED);
574
575         /*
576          * Retry Rx queue flushing in the case of flush failed or
577          * timeout. In the worst case it can delay for 6 seconds.
578          */
579         for (retry_count = 0;
580              ((rxq->state & SFC_RXQ_FLUSHED) == 0) &&
581              (retry_count < SFC_RX_QFLUSH_ATTEMPTS);
582              ++retry_count) {
583                 rc = efx_rx_qflush(rxq->common);
584                 if (rc != 0) {
585                         rxq->state |= (rc == EALREADY) ?
586                                 SFC_RXQ_FLUSHED : SFC_RXQ_FLUSH_FAILED;
587                         break;
588                 }
589                 rxq->state &= ~SFC_RXQ_FLUSH_FAILED;
590                 rxq->state |= SFC_RXQ_FLUSHING;
591
592                 /*
593                  * Wait for Rx queue flush done or failed event at least
594                  * SFC_RX_QFLUSH_POLL_WAIT_MS milliseconds and not more
595                  * than 2 seconds (SFC_RX_QFLUSH_POLL_WAIT_MS multiplied
596                  * by SFC_RX_QFLUSH_POLL_ATTEMPTS).
597                  */
598                 wait_count = 0;
599                 do {
600                         rte_delay_ms(SFC_RX_QFLUSH_POLL_WAIT_MS);
601                         sfc_ev_qpoll(rxq->evq);
602                 } while ((rxq->state & SFC_RXQ_FLUSHING) &&
603                          (wait_count++ < SFC_RX_QFLUSH_POLL_ATTEMPTS));
604
605                 if (rxq->state & SFC_RXQ_FLUSHING)
606                         sfc_err(sa, "RxQ %u flush timed out", sw_index);
607
608                 if (rxq->state & SFC_RXQ_FLUSH_FAILED)
609                         sfc_err(sa, "RxQ %u flush failed", sw_index);
610
611                 if (rxq->state & SFC_RXQ_FLUSHED)
612                         sfc_info(sa, "RxQ %u flushed", sw_index);
613         }
614
615         sa->dp_rx->qpurge(rxq->dp);
616 }
617
618 static int
619 sfc_rx_default_rxq_set_filter(struct sfc_adapter *sa, struct sfc_rxq *rxq)
620 {
621         boolean_t rss = (sa->rss_channels > 0) ? B_TRUE : B_FALSE;
622         struct sfc_port *port = &sa->port;
623         int rc;
624
625         /*
626          * If promiscuous or all-multicast mode has been requested, setting
627          * filter for the default Rx queue might fail, in particular, while
628          * running over PCI function which is not a member of corresponding
629          * privilege groups; if this occurs, few iterations will be made to
630          * repeat this step without promiscuous and all-multicast flags set
631          */
632 retry:
633         rc = efx_mac_filter_default_rxq_set(sa->nic, rxq->common, rss);
634         if (rc == 0)
635                 return 0;
636         else if (rc != EOPNOTSUPP)
637                 return rc;
638
639         if (port->promisc) {
640                 sfc_warn(sa, "promiscuous mode has been requested, "
641                              "but the HW rejects it");
642                 sfc_warn(sa, "promiscuous mode will be disabled");
643
644                 port->promisc = B_FALSE;
645                 rc = sfc_set_rx_mode(sa);
646                 if (rc != 0)
647                         return rc;
648
649                 goto retry;
650         }
651
652         if (port->allmulti) {
653                 sfc_warn(sa, "all-multicast mode has been requested, "
654                              "but the HW rejects it");
655                 sfc_warn(sa, "all-multicast mode will be disabled");
656
657                 port->allmulti = B_FALSE;
658                 rc = sfc_set_rx_mode(sa);
659                 if (rc != 0)
660                         return rc;
661
662                 goto retry;
663         }
664
665         return rc;
666 }
667
668 int
669 sfc_rx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
670 {
671         struct sfc_port *port = &sa->port;
672         struct sfc_rxq_info *rxq_info;
673         struct sfc_rxq *rxq;
674         struct sfc_evq *evq;
675         int rc;
676
677         sfc_log_init(sa, "sw_index=%u", sw_index);
678
679         SFC_ASSERT(sw_index < sa->rxq_count);
680
681         rxq_info = &sa->rxq_info[sw_index];
682         rxq = rxq_info->rxq;
683         SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED);
684
685         evq = rxq->evq;
686
687         rc = sfc_ev_qstart(evq, sfc_evq_index_by_rxq_sw_index(sa, sw_index));
688         if (rc != 0)
689                 goto fail_ev_qstart;
690
691         rc = efx_rx_qcreate(sa->nic, rxq->hw_index, 0, rxq_info->type,
692                             &rxq->mem, rxq_info->entries,
693                             0 /* not used on EF10 */, evq->common,
694                             &rxq->common);
695         if (rc != 0)
696                 goto fail_rx_qcreate;
697
698         efx_rx_qenable(rxq->common);
699
700         rc = sa->dp_rx->qstart(rxq->dp, evq->read_ptr);
701         if (rc != 0)
702                 goto fail_dp_qstart;
703
704         rxq->state |= SFC_RXQ_STARTED;
705
706         if ((sw_index == 0) && !port->isolated) {
707                 rc = sfc_rx_default_rxq_set_filter(sa, rxq);
708                 if (rc != 0)
709                         goto fail_mac_filter_default_rxq_set;
710         }
711
712         /* It seems to be used by DPDK for debug purposes only ('rte_ether') */
713         sa->eth_dev->data->rx_queue_state[sw_index] =
714                 RTE_ETH_QUEUE_STATE_STARTED;
715
716         return 0;
717
718 fail_mac_filter_default_rxq_set:
719         sa->dp_rx->qstop(rxq->dp, &rxq->evq->read_ptr);
720
721 fail_dp_qstart:
722         sfc_rx_qflush(sa, sw_index);
723
724 fail_rx_qcreate:
725         sfc_ev_qstop(evq);
726
727 fail_ev_qstart:
728         return rc;
729 }
730
731 void
732 sfc_rx_qstop(struct sfc_adapter *sa, unsigned int sw_index)
733 {
734         struct sfc_rxq_info *rxq_info;
735         struct sfc_rxq *rxq;
736
737         sfc_log_init(sa, "sw_index=%u", sw_index);
738
739         SFC_ASSERT(sw_index < sa->rxq_count);
740
741         rxq_info = &sa->rxq_info[sw_index];
742         rxq = rxq_info->rxq;
743
744         if (rxq->state == SFC_RXQ_INITIALIZED)
745                 return;
746         SFC_ASSERT(rxq->state & SFC_RXQ_STARTED);
747
748         /* It seems to be used by DPDK for debug purposes only ('rte_ether') */
749         sa->eth_dev->data->rx_queue_state[sw_index] =
750                 RTE_ETH_QUEUE_STATE_STOPPED;
751
752         sa->dp_rx->qstop(rxq->dp, &rxq->evq->read_ptr);
753
754         if (sw_index == 0)
755                 efx_mac_filter_default_rxq_clear(sa->nic);
756
757         sfc_rx_qflush(sa, sw_index);
758
759         rxq->state = SFC_RXQ_INITIALIZED;
760
761         efx_rx_qdestroy(rxq->common);
762
763         sfc_ev_qstop(rxq->evq);
764 }
765
766 static int
767 sfc_rx_qcheck_conf(struct sfc_adapter *sa, uint16_t nb_rx_desc,
768                    const struct rte_eth_rxconf *rx_conf)
769 {
770         const uint16_t rx_free_thresh_max = EFX_RXQ_LIMIT(nb_rx_desc);
771         int rc = 0;
772
773         if (rx_conf->rx_thresh.pthresh != 0 ||
774             rx_conf->rx_thresh.hthresh != 0 ||
775             rx_conf->rx_thresh.wthresh != 0) {
776                 sfc_err(sa,
777                         "RxQ prefetch/host/writeback thresholds are not supported");
778                 rc = EINVAL;
779         }
780
781         if (rx_conf->rx_free_thresh > rx_free_thresh_max) {
782                 sfc_err(sa,
783                         "RxQ free threshold too large: %u vs maximum %u",
784                         rx_conf->rx_free_thresh, rx_free_thresh_max);
785                 rc = EINVAL;
786         }
787
788         if (rx_conf->rx_drop_en == 0) {
789                 sfc_err(sa, "RxQ drop disable is not supported");
790                 rc = EINVAL;
791         }
792
793         return rc;
794 }
795
796 static unsigned int
797 sfc_rx_mbuf_data_alignment(struct rte_mempool *mb_pool)
798 {
799         uint32_t data_off;
800         uint32_t order;
801
802         /* The mbuf object itself is always cache line aligned */
803         order = rte_bsf32(RTE_CACHE_LINE_SIZE);
804
805         /* Data offset from mbuf object start */
806         data_off = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(mb_pool) +
807                 RTE_PKTMBUF_HEADROOM;
808
809         order = MIN(order, rte_bsf32(data_off));
810
811         return 1u << (order - 1);
812 }
813
814 static uint16_t
815 sfc_rx_mb_pool_buf_size(struct sfc_adapter *sa, struct rte_mempool *mb_pool)
816 {
817         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
818         const uint32_t nic_align_start = MAX(1, encp->enc_rx_buf_align_start);
819         const uint32_t nic_align_end = MAX(1, encp->enc_rx_buf_align_end);
820         uint16_t buf_size;
821         unsigned int buf_aligned;
822         unsigned int start_alignment;
823         unsigned int end_padding_alignment;
824
825         /* Below it is assumed that both alignments are power of 2 */
826         SFC_ASSERT(rte_is_power_of_2(nic_align_start));
827         SFC_ASSERT(rte_is_power_of_2(nic_align_end));
828
829         /*
830          * mbuf is always cache line aligned, double-check
831          * that it meets rx buffer start alignment requirements.
832          */
833
834         /* Start from mbuf pool data room size */
835         buf_size = rte_pktmbuf_data_room_size(mb_pool);
836
837         /* Remove headroom */
838         if (buf_size <= RTE_PKTMBUF_HEADROOM) {
839                 sfc_err(sa,
840                         "RxQ mbuf pool %s object data room size %u is smaller than headroom %u",
841                         mb_pool->name, buf_size, RTE_PKTMBUF_HEADROOM);
842                 return 0;
843         }
844         buf_size -= RTE_PKTMBUF_HEADROOM;
845
846         /* Calculate guaranteed data start alignment */
847         buf_aligned = sfc_rx_mbuf_data_alignment(mb_pool);
848
849         /* Reserve space for start alignment */
850         if (buf_aligned < nic_align_start) {
851                 start_alignment = nic_align_start - buf_aligned;
852                 if (buf_size <= start_alignment) {
853                         sfc_err(sa,
854                                 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u and buffer start alignment %u required by NIC",
855                                 mb_pool->name,
856                                 rte_pktmbuf_data_room_size(mb_pool),
857                                 RTE_PKTMBUF_HEADROOM, start_alignment);
858                         return 0;
859                 }
860                 buf_aligned = nic_align_start;
861                 buf_size -= start_alignment;
862         } else {
863                 start_alignment = 0;
864         }
865
866         /* Make sure that end padding does not write beyond the buffer */
867         if (buf_aligned < nic_align_end) {
868                 /*
869                  * Estimate space which can be lost. If guarnteed buffer
870                  * size is odd, lost space is (nic_align_end - 1). More
871                  * accurate formula is below.
872                  */
873                 end_padding_alignment = nic_align_end -
874                         MIN(buf_aligned, 1u << (rte_bsf32(buf_size) - 1));
875                 if (buf_size <= end_padding_alignment) {
876                         sfc_err(sa,
877                                 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u, buffer start alignment %u and end padding alignment %u required by NIC",
878                                 mb_pool->name,
879                                 rte_pktmbuf_data_room_size(mb_pool),
880                                 RTE_PKTMBUF_HEADROOM, start_alignment,
881                                 end_padding_alignment);
882                         return 0;
883                 }
884                 buf_size -= end_padding_alignment;
885         } else {
886                 /*
887                  * Start is aligned the same or better than end,
888                  * just align length.
889                  */
890                 buf_size = P2ALIGN(buf_size, nic_align_end);
891         }
892
893         return buf_size;
894 }
895
896 int
897 sfc_rx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
898              uint16_t nb_rx_desc, unsigned int socket_id,
899              const struct rte_eth_rxconf *rx_conf,
900              struct rte_mempool *mb_pool)
901 {
902         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
903         int rc;
904         uint16_t buf_size;
905         struct sfc_rxq_info *rxq_info;
906         struct sfc_evq *evq;
907         struct sfc_rxq *rxq;
908         struct sfc_dp_rx_qcreate_info info;
909
910         rc = sfc_rx_qcheck_conf(sa, nb_rx_desc, rx_conf);
911         if (rc != 0)
912                 goto fail_bad_conf;
913
914         buf_size = sfc_rx_mb_pool_buf_size(sa, mb_pool);
915         if (buf_size == 0) {
916                 sfc_err(sa, "RxQ %u mbuf pool object size is too small",
917                         sw_index);
918                 rc = EINVAL;
919                 goto fail_bad_conf;
920         }
921
922         if ((buf_size < sa->port.pdu + encp->enc_rx_prefix_size) &&
923             !sa->eth_dev->data->dev_conf.rxmode.enable_scatter) {
924                 sfc_err(sa, "Rx scatter is disabled and RxQ %u mbuf pool "
925                         "object size is too small", sw_index);
926                 sfc_err(sa, "RxQ %u calculated Rx buffer size is %u vs "
927                         "PDU size %u plus Rx prefix %u bytes",
928                         sw_index, buf_size, (unsigned int)sa->port.pdu,
929                         encp->enc_rx_prefix_size);
930                 rc = EINVAL;
931                 goto fail_bad_conf;
932         }
933
934         SFC_ASSERT(sw_index < sa->rxq_count);
935         rxq_info = &sa->rxq_info[sw_index];
936
937         SFC_ASSERT(nb_rx_desc <= rxq_info->max_entries);
938         rxq_info->entries = nb_rx_desc;
939         rxq_info->type =
940                 sa->eth_dev->data->dev_conf.rxmode.enable_scatter ?
941                 EFX_RXQ_TYPE_SCATTER : EFX_RXQ_TYPE_DEFAULT;
942
943         rc = sfc_ev_qinit(sa, SFC_EVQ_TYPE_RX, sw_index,
944                           rxq_info->entries, socket_id, &evq);
945         if (rc != 0)
946                 goto fail_ev_qinit;
947
948         rc = ENOMEM;
949         rxq = rte_zmalloc_socket("sfc-rxq", sizeof(*rxq), RTE_CACHE_LINE_SIZE,
950                                  socket_id);
951         if (rxq == NULL)
952                 goto fail_rxq_alloc;
953
954         rxq_info->rxq = rxq;
955
956         rxq->evq = evq;
957         rxq->hw_index = sw_index;
958         rxq->refill_threshold =
959                 RTE_MAX(rx_conf->rx_free_thresh, SFC_RX_REFILL_BULK);
960         rxq->refill_mb_pool = mb_pool;
961
962         rc = sfc_dma_alloc(sa, "rxq", sw_index, EFX_RXQ_SIZE(rxq_info->entries),
963                            socket_id, &rxq->mem);
964         if (rc != 0)
965                 goto fail_dma_alloc;
966
967         memset(&info, 0, sizeof(info));
968         info.refill_mb_pool = rxq->refill_mb_pool;
969         info.refill_threshold = rxq->refill_threshold;
970         info.buf_size = buf_size;
971         info.batch_max = encp->enc_rx_batch_max;
972         info.prefix_size = encp->enc_rx_prefix_size;
973
974 #if EFSYS_OPT_RX_SCALE
975         if (sa->hash_support == EFX_RX_HASH_AVAILABLE && sa->rss_channels > 0)
976                 info.flags |= SFC_RXQ_FLAG_RSS_HASH;
977 #endif
978
979         info.rxq_entries = rxq_info->entries;
980         info.rxq_hw_ring = rxq->mem.esm_base;
981         info.evq_entries = rxq_info->entries;
982         info.evq_hw_ring = evq->mem.esm_base;
983         info.hw_index = rxq->hw_index;
984         info.mem_bar = sa->mem_bar.esb_base;
985
986         rc = sa->dp_rx->qcreate(sa->eth_dev->data->port_id, sw_index,
987                                 &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
988                                 socket_id, &info, &rxq->dp);
989         if (rc != 0)
990                 goto fail_dp_rx_qcreate;
991
992         evq->dp_rxq = rxq->dp;
993
994         rxq->state = SFC_RXQ_INITIALIZED;
995
996         rxq_info->deferred_start = (rx_conf->rx_deferred_start != 0);
997
998         return 0;
999
1000 fail_dp_rx_qcreate:
1001         sfc_dma_free(sa, &rxq->mem);
1002
1003 fail_dma_alloc:
1004         rxq_info->rxq = NULL;
1005         rte_free(rxq);
1006
1007 fail_rxq_alloc:
1008         sfc_ev_qfini(evq);
1009
1010 fail_ev_qinit:
1011         rxq_info->entries = 0;
1012
1013 fail_bad_conf:
1014         sfc_log_init(sa, "failed %d", rc);
1015         return rc;
1016 }
1017
1018 void
1019 sfc_rx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
1020 {
1021         struct sfc_rxq_info *rxq_info;
1022         struct sfc_rxq *rxq;
1023
1024         SFC_ASSERT(sw_index < sa->rxq_count);
1025
1026         rxq_info = &sa->rxq_info[sw_index];
1027
1028         rxq = rxq_info->rxq;
1029         SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED);
1030
1031         sa->dp_rx->qdestroy(rxq->dp);
1032         rxq->dp = NULL;
1033
1034         rxq_info->rxq = NULL;
1035         rxq_info->entries = 0;
1036
1037         sfc_dma_free(sa, &rxq->mem);
1038
1039         sfc_ev_qfini(rxq->evq);
1040         rxq->evq = NULL;
1041
1042         rte_free(rxq);
1043 }
1044
1045 #if EFSYS_OPT_RX_SCALE
1046 efx_rx_hash_type_t
1047 sfc_rte_to_efx_hash_type(uint64_t rss_hf)
1048 {
1049         efx_rx_hash_type_t efx_hash_types = 0;
1050
1051         if ((rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1052                        ETH_RSS_NONFRAG_IPV4_OTHER)) != 0)
1053                 efx_hash_types |= EFX_RX_HASH_IPV4;
1054
1055         if ((rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) != 0)
1056                 efx_hash_types |= EFX_RX_HASH_TCPIPV4;
1057
1058         if ((rss_hf & (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
1059                         ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX)) != 0)
1060                 efx_hash_types |= EFX_RX_HASH_IPV6;
1061
1062         if ((rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX)) != 0)
1063                 efx_hash_types |= EFX_RX_HASH_TCPIPV6;
1064
1065         return efx_hash_types;
1066 }
1067
1068 uint64_t
1069 sfc_efx_to_rte_hash_type(efx_rx_hash_type_t efx_hash_types)
1070 {
1071         uint64_t rss_hf = 0;
1072
1073         if ((efx_hash_types & EFX_RX_HASH_IPV4) != 0)
1074                 rss_hf |= (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1075                            ETH_RSS_NONFRAG_IPV4_OTHER);
1076
1077         if ((efx_hash_types & EFX_RX_HASH_TCPIPV4) != 0)
1078                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
1079
1080         if ((efx_hash_types & EFX_RX_HASH_IPV6) != 0)
1081                 rss_hf |= (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
1082                            ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX);
1083
1084         if ((efx_hash_types & EFX_RX_HASH_TCPIPV6) != 0)
1085                 rss_hf |= (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX);
1086
1087         return rss_hf;
1088 }
1089 #endif
1090
1091 static int
1092 sfc_rx_rss_config(struct sfc_adapter *sa)
1093 {
1094         int rc = 0;
1095
1096 #if EFSYS_OPT_RX_SCALE
1097         if (sa->rss_channels > 0) {
1098                 rc = efx_rx_scale_mode_set(sa->nic, EFX_RX_HASHALG_TOEPLITZ,
1099                                            sa->rss_hash_types, B_TRUE);
1100                 if (rc != 0)
1101                         goto finish;
1102
1103                 rc = efx_rx_scale_key_set(sa->nic, sa->rss_key,
1104                                           sizeof(sa->rss_key));
1105                 if (rc != 0)
1106                         goto finish;
1107
1108                 rc = efx_rx_scale_tbl_set(sa->nic, sa->rss_tbl,
1109                                           RTE_DIM(sa->rss_tbl));
1110         }
1111
1112 finish:
1113 #endif
1114         return rc;
1115 }
1116
1117 int
1118 sfc_rx_start(struct sfc_adapter *sa)
1119 {
1120         unsigned int sw_index;
1121         int rc;
1122
1123         sfc_log_init(sa, "rxq_count=%u", sa->rxq_count);
1124
1125         rc = efx_rx_init(sa->nic);
1126         if (rc != 0)
1127                 goto fail_rx_init;
1128
1129         rc = sfc_rx_rss_config(sa);
1130         if (rc != 0)
1131                 goto fail_rss_config;
1132
1133         for (sw_index = 0; sw_index < sa->rxq_count; ++sw_index) {
1134                 if ((!sa->rxq_info[sw_index].deferred_start ||
1135                      sa->rxq_info[sw_index].deferred_started)) {
1136                         rc = sfc_rx_qstart(sa, sw_index);
1137                         if (rc != 0)
1138                                 goto fail_rx_qstart;
1139                 }
1140         }
1141
1142         return 0;
1143
1144 fail_rx_qstart:
1145         while (sw_index-- > 0)
1146                 sfc_rx_qstop(sa, sw_index);
1147
1148 fail_rss_config:
1149         efx_rx_fini(sa->nic);
1150
1151 fail_rx_init:
1152         sfc_log_init(sa, "failed %d", rc);
1153         return rc;
1154 }
1155
1156 void
1157 sfc_rx_stop(struct sfc_adapter *sa)
1158 {
1159         unsigned int sw_index;
1160
1161         sfc_log_init(sa, "rxq_count=%u", sa->rxq_count);
1162
1163         sw_index = sa->rxq_count;
1164         while (sw_index-- > 0) {
1165                 if (sa->rxq_info[sw_index].rxq != NULL)
1166                         sfc_rx_qstop(sa, sw_index);
1167         }
1168
1169         efx_rx_fini(sa->nic);
1170 }
1171
1172 static int
1173 sfc_rx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index)
1174 {
1175         struct sfc_rxq_info *rxq_info = &sa->rxq_info[sw_index];
1176         unsigned int max_entries;
1177
1178         max_entries = EFX_RXQ_MAXNDESCS;
1179         SFC_ASSERT(rte_is_power_of_2(max_entries));
1180
1181         rxq_info->max_entries = max_entries;
1182
1183         return 0;
1184 }
1185
1186 static int
1187 sfc_rx_check_mode(struct sfc_adapter *sa, struct rte_eth_rxmode *rxmode)
1188 {
1189         int rc = 0;
1190
1191         switch (rxmode->mq_mode) {
1192         case ETH_MQ_RX_NONE:
1193                 /* No special checks are required */
1194                 break;
1195 #if EFSYS_OPT_RX_SCALE
1196         case ETH_MQ_RX_RSS:
1197                 if (sa->rss_support == EFX_RX_SCALE_UNAVAILABLE) {
1198                         sfc_err(sa, "RSS is not available");
1199                         rc = EINVAL;
1200                 }
1201                 break;
1202 #endif
1203         default:
1204                 sfc_err(sa, "Rx multi-queue mode %u not supported",
1205                         rxmode->mq_mode);
1206                 rc = EINVAL;
1207         }
1208
1209         if (rxmode->header_split) {
1210                 sfc_err(sa, "Header split on Rx not supported");
1211                 rc = EINVAL;
1212         }
1213
1214         if (rxmode->hw_vlan_filter) {
1215                 sfc_err(sa, "HW VLAN filtering not supported");
1216                 rc = EINVAL;
1217         }
1218
1219         if (rxmode->hw_vlan_strip) {
1220                 sfc_err(sa, "HW VLAN stripping not supported");
1221                 rc = EINVAL;
1222         }
1223
1224         if (rxmode->hw_vlan_extend) {
1225                 sfc_err(sa,
1226                         "Q-in-Q HW VLAN stripping not supported");
1227                 rc = EINVAL;
1228         }
1229
1230         if (!rxmode->hw_strip_crc) {
1231                 sfc_warn(sa,
1232                          "FCS stripping control not supported - always stripped");
1233                 rxmode->hw_strip_crc = 1;
1234         }
1235
1236         if (rxmode->enable_scatter &&
1237             (~sa->dp_rx->features & SFC_DP_RX_FEAT_SCATTER)) {
1238                 sfc_err(sa, "Rx scatter not supported by %s datapath",
1239                         sa->dp_rx->dp.name);
1240                 rc = EINVAL;
1241         }
1242
1243         if (rxmode->enable_lro) {
1244                 sfc_err(sa, "LRO not supported");
1245                 rc = EINVAL;
1246         }
1247
1248         return rc;
1249 }
1250
1251 /**
1252  * Destroy excess queues that are no longer needed after reconfiguration
1253  * or complete close.
1254  */
1255 static void
1256 sfc_rx_fini_queues(struct sfc_adapter *sa, unsigned int nb_rx_queues)
1257 {
1258         int sw_index;
1259
1260         SFC_ASSERT(nb_rx_queues <= sa->rxq_count);
1261
1262         sw_index = sa->rxq_count;
1263         while (--sw_index >= (int)nb_rx_queues) {
1264                 if (sa->rxq_info[sw_index].rxq != NULL)
1265                         sfc_rx_qfini(sa, sw_index);
1266         }
1267
1268         sa->rxq_count = nb_rx_queues;
1269 }
1270
1271 /**
1272  * Initialize Rx subsystem.
1273  *
1274  * Called at device (re)configuration stage when number of receive queues is
1275  * specified together with other device level receive configuration.
1276  *
1277  * It should be used to allocate NUMA-unaware resources.
1278  */
1279 int
1280 sfc_rx_configure(struct sfc_adapter *sa)
1281 {
1282         struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf;
1283         const unsigned int nb_rx_queues = sa->eth_dev->data->nb_rx_queues;
1284         unsigned int sw_index;
1285         int rc;
1286
1287         sfc_log_init(sa, "nb_rx_queues=%u (old %u)",
1288                      nb_rx_queues, sa->rxq_count);
1289
1290         rc = sfc_rx_check_mode(sa, &dev_conf->rxmode);
1291         if (rc != 0)
1292                 goto fail_check_mode;
1293
1294         if (nb_rx_queues == sa->rxq_count)
1295                 goto done;
1296
1297         if (sa->rxq_info == NULL) {
1298                 rc = ENOMEM;
1299                 sa->rxq_info = rte_calloc_socket("sfc-rxqs", nb_rx_queues,
1300                                                  sizeof(sa->rxq_info[0]), 0,
1301                                                  sa->socket_id);
1302                 if (sa->rxq_info == NULL)
1303                         goto fail_rxqs_alloc;
1304         } else {
1305                 struct sfc_rxq_info *new_rxq_info;
1306
1307                 if (nb_rx_queues < sa->rxq_count)
1308                         sfc_rx_fini_queues(sa, nb_rx_queues);
1309
1310                 rc = ENOMEM;
1311                 new_rxq_info =
1312                         rte_realloc(sa->rxq_info,
1313                                     nb_rx_queues * sizeof(sa->rxq_info[0]), 0);
1314                 if (new_rxq_info == NULL && nb_rx_queues > 0)
1315                         goto fail_rxqs_realloc;
1316
1317                 sa->rxq_info = new_rxq_info;
1318                 if (nb_rx_queues > sa->rxq_count)
1319                         memset(&sa->rxq_info[sa->rxq_count], 0,
1320                                (nb_rx_queues - sa->rxq_count) *
1321                                sizeof(sa->rxq_info[0]));
1322         }
1323
1324         while (sa->rxq_count < nb_rx_queues) {
1325                 rc = sfc_rx_qinit_info(sa, sa->rxq_count);
1326                 if (rc != 0)
1327                         goto fail_rx_qinit_info;
1328
1329                 sa->rxq_count++;
1330         }
1331
1332 #if EFSYS_OPT_RX_SCALE
1333         sa->rss_channels = (dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ?
1334                            MIN(sa->rxq_count, EFX_MAXRSS) : 0;
1335
1336         if (sa->rss_channels > 0) {
1337                 for (sw_index = 0; sw_index < EFX_RSS_TBL_SIZE; ++sw_index)
1338                         sa->rss_tbl[sw_index] = sw_index % sa->rss_channels;
1339         }
1340 #endif
1341
1342 done:
1343         return 0;
1344
1345 fail_rx_qinit_info:
1346 fail_rxqs_realloc:
1347 fail_rxqs_alloc:
1348         sfc_rx_close(sa);
1349
1350 fail_check_mode:
1351         sfc_log_init(sa, "failed %d", rc);
1352         return rc;
1353 }
1354
1355 /**
1356  * Shutdown Rx subsystem.
1357  *
1358  * Called at device close stage, for example, before device shutdown.
1359  */
1360 void
1361 sfc_rx_close(struct sfc_adapter *sa)
1362 {
1363         sfc_rx_fini_queues(sa, 0);
1364
1365         sa->rss_channels = 0;
1366
1367         rte_free(sa->rxq_info);
1368         sa->rxq_info = NULL;
1369 }