net/sfc: get RxQ descriptor done
[dpdk.git] / drivers / net / sfc / sfc_rx.c
1 /*-
2  * Copyright (c) 2016 Solarflare Communications Inc.
3  * All rights reserved.
4  *
5  * This software was jointly developed between OKTET Labs (under contract
6  * for Solarflare) and Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice,
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <rte_mempool.h>
31
32 #include "efx.h"
33
34 #include "sfc.h"
35 #include "sfc_debug.h"
36 #include "sfc_log.h"
37 #include "sfc_ev.h"
38 #include "sfc_rx.h"
39 #include "sfc_tweak.h"
40
41 /*
42  * Maximum number of Rx queue flush attempt in the case of failure or
43  * flush timeout
44  */
45 #define SFC_RX_QFLUSH_ATTEMPTS          (3)
46
47 /*
48  * Time to wait between event queue polling attempts when waiting for Rx
49  * queue flush done or failed events.
50  */
51 #define SFC_RX_QFLUSH_POLL_WAIT_MS      (1)
52
53 /*
54  * Maximum number of event queue polling attempts when waiting for Rx queue
55  * flush done or failed events. It defines Rx queue flush attempt timeout
56  * together with SFC_RX_QFLUSH_POLL_WAIT_MS.
57  */
58 #define SFC_RX_QFLUSH_POLL_ATTEMPTS     (2000)
59
60 void
61 sfc_rx_qflush_done(struct sfc_rxq *rxq)
62 {
63         rxq->state |= SFC_RXQ_FLUSHED;
64         rxq->state &= ~SFC_RXQ_FLUSHING;
65 }
66
67 void
68 sfc_rx_qflush_failed(struct sfc_rxq *rxq)
69 {
70         rxq->state |= SFC_RXQ_FLUSH_FAILED;
71         rxq->state &= ~SFC_RXQ_FLUSHING;
72 }
73
74 static void
75 sfc_rx_qrefill(struct sfc_rxq *rxq)
76 {
77         unsigned int free_space;
78         unsigned int bulks;
79         void *objs[SFC_RX_REFILL_BULK];
80         efsys_dma_addr_t addr[RTE_DIM(objs)];
81         unsigned int added = rxq->added;
82         unsigned int id;
83         unsigned int i;
84         struct sfc_rx_sw_desc *rxd;
85         struct rte_mbuf *m;
86         uint8_t port_id = rxq->port_id;
87
88         free_space = EFX_RXQ_LIMIT(rxq->ptr_mask + 1) -
89                 (added - rxq->completed);
90
91         if (free_space < rxq->refill_threshold)
92                 return;
93
94         bulks = free_space / RTE_DIM(objs);
95
96         id = added & rxq->ptr_mask;
97         while (bulks-- > 0) {
98                 if (rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
99                                          RTE_DIM(objs)) < 0) {
100                         /*
101                          * It is hardly a safe way to increment counter
102                          * from different contexts, but all PMDs do it.
103                          */
104                         rxq->evq->sa->eth_dev->data->rx_mbuf_alloc_failed +=
105                                 RTE_DIM(objs);
106                         break;
107                 }
108
109                 for (i = 0; i < RTE_DIM(objs);
110                      ++i, id = (id + 1) & rxq->ptr_mask) {
111                         m = objs[i];
112
113                         rxd = &rxq->sw_desc[id];
114                         rxd->mbuf = m;
115
116                         rte_mbuf_refcnt_set(m, 1);
117                         m->data_off = RTE_PKTMBUF_HEADROOM;
118                         m->next = NULL;
119                         m->nb_segs = 1;
120                         m->port = port_id;
121
122                         addr[i] = rte_pktmbuf_mtophys(m);
123                 }
124
125                 efx_rx_qpost(rxq->common, addr, rxq->buf_size,
126                              RTE_DIM(objs), rxq->completed, added);
127                 added += RTE_DIM(objs);
128         }
129
130         /* Push doorbell if something is posted */
131         if (rxq->added != added) {
132                 rxq->added = added;
133                 efx_rx_qpush(rxq->common, added, &rxq->pushed);
134         }
135 }
136
137 static uint64_t
138 sfc_rx_desc_flags_to_offload_flags(const unsigned int desc_flags)
139 {
140         uint64_t mbuf_flags = 0;
141
142         switch (desc_flags & (EFX_PKT_IPV4 | EFX_CKSUM_IPV4)) {
143         case (EFX_PKT_IPV4 | EFX_CKSUM_IPV4):
144                 mbuf_flags |= PKT_RX_IP_CKSUM_GOOD;
145                 break;
146         case EFX_PKT_IPV4:
147                 mbuf_flags |= PKT_RX_IP_CKSUM_BAD;
148                 break;
149         default:
150                 RTE_BUILD_BUG_ON(PKT_RX_IP_CKSUM_UNKNOWN != 0);
151                 SFC_ASSERT((mbuf_flags & PKT_RX_IP_CKSUM_MASK) ==
152                            PKT_RX_IP_CKSUM_UNKNOWN);
153                 break;
154         }
155
156         switch ((desc_flags &
157                  (EFX_PKT_TCP | EFX_PKT_UDP | EFX_CKSUM_TCPUDP))) {
158         case (EFX_PKT_TCP | EFX_CKSUM_TCPUDP):
159         case (EFX_PKT_UDP | EFX_CKSUM_TCPUDP):
160                 mbuf_flags |= PKT_RX_L4_CKSUM_GOOD;
161                 break;
162         case EFX_PKT_TCP:
163         case EFX_PKT_UDP:
164                 mbuf_flags |= PKT_RX_L4_CKSUM_BAD;
165                 break;
166         default:
167                 RTE_BUILD_BUG_ON(PKT_RX_L4_CKSUM_UNKNOWN != 0);
168                 SFC_ASSERT((mbuf_flags & PKT_RX_L4_CKSUM_MASK) ==
169                            PKT_RX_L4_CKSUM_UNKNOWN);
170                 break;
171         }
172
173         return mbuf_flags;
174 }
175
176 static uint32_t
177 sfc_rx_desc_flags_to_packet_type(const unsigned int desc_flags)
178 {
179         return RTE_PTYPE_L2_ETHER |
180                 ((desc_flags & EFX_PKT_IPV4) ?
181                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN : 0) |
182                 ((desc_flags & EFX_PKT_IPV6) ?
183                         RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : 0) |
184                 ((desc_flags & EFX_PKT_TCP) ? RTE_PTYPE_L4_TCP : 0) |
185                 ((desc_flags & EFX_PKT_UDP) ? RTE_PTYPE_L4_UDP : 0);
186 }
187
188 uint16_t
189 sfc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
190 {
191         struct sfc_rxq *rxq = rx_queue;
192         unsigned int completed;
193         unsigned int prefix_size = rxq->prefix_size;
194         unsigned int done_pkts = 0;
195         boolean_t discard_next = B_FALSE;
196
197         if (unlikely((rxq->state & SFC_RXQ_RUNNING) == 0))
198                 return 0;
199
200         sfc_ev_qpoll(rxq->evq);
201
202         completed = rxq->completed;
203         while (completed != rxq->pending && done_pkts < nb_pkts) {
204                 unsigned int id;
205                 struct sfc_rx_sw_desc *rxd;
206                 struct rte_mbuf *m;
207                 unsigned int seg_len;
208                 unsigned int desc_flags;
209
210                 id = completed++ & rxq->ptr_mask;
211                 rxd = &rxq->sw_desc[id];
212                 m = rxd->mbuf;
213                 desc_flags = rxd->flags;
214
215                 if (discard_next)
216                         goto discard;
217
218                 if (desc_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
219                         goto discard;
220
221                 if (desc_flags & EFX_PKT_CONT)
222                         goto discard;
223
224                 if (desc_flags & EFX_PKT_PREFIX_LEN) {
225                         uint16_t tmp_size;
226                         int rc __rte_unused;
227
228                         rc = efx_pseudo_hdr_pkt_length_get(rxq->common,
229                                 rte_pktmbuf_mtod(m, uint8_t *), &tmp_size);
230                         SFC_ASSERT(rc == 0);
231                         seg_len = tmp_size;
232                 } else {
233                         seg_len = rxd->size - prefix_size;
234                 }
235
236                 m->data_off += prefix_size;
237                 rte_pktmbuf_data_len(m) = seg_len;
238                 rte_pktmbuf_pkt_len(m) = seg_len;
239
240                 m->ol_flags = sfc_rx_desc_flags_to_offload_flags(desc_flags);
241                 m->packet_type = sfc_rx_desc_flags_to_packet_type(desc_flags);
242
243                 *rx_pkts++ = m;
244                 done_pkts++;
245                 continue;
246
247 discard:
248                 discard_next = ((desc_flags & EFX_PKT_CONT) != 0);
249                 rte_mempool_put(rxq->refill_mb_pool, m);
250                 rxd->mbuf = NULL;
251         }
252
253         rxq->completed = completed;
254
255         sfc_rx_qrefill(rxq);
256
257         return done_pkts;
258 }
259
260 unsigned int
261 sfc_rx_qdesc_npending(struct sfc_adapter *sa, unsigned int sw_index)
262 {
263         struct sfc_rxq *rxq;
264
265         SFC_ASSERT(sw_index < sa->rxq_count);
266         rxq = sa->rxq_info[sw_index].rxq;
267
268         if (rxq == NULL || (rxq->state & SFC_RXQ_RUNNING) == 0)
269                 return 0;
270
271         sfc_ev_qpoll(rxq->evq);
272
273         return rxq->pending - rxq->completed;
274 }
275
276 int
277 sfc_rx_qdesc_done(struct sfc_rxq *rxq, unsigned int offset)
278 {
279         if ((rxq->state & SFC_RXQ_RUNNING) == 0)
280                 return 0;
281
282         sfc_ev_qpoll(rxq->evq);
283
284         return offset < (rxq->pending - rxq->completed);
285 }
286
287 static void
288 sfc_rx_qpurge(struct sfc_rxq *rxq)
289 {
290         unsigned int i;
291         struct sfc_rx_sw_desc *rxd;
292
293         for (i = rxq->completed; i != rxq->added; ++i) {
294                 rxd = &rxq->sw_desc[i & rxq->ptr_mask];
295                 rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf);
296                 rxd->mbuf = NULL;
297         }
298 }
299
300 static void
301 sfc_rx_qflush(struct sfc_adapter *sa, unsigned int sw_index)
302 {
303         struct sfc_rxq *rxq;
304         unsigned int retry_count;
305         unsigned int wait_count;
306
307         rxq = sa->rxq_info[sw_index].rxq;
308         SFC_ASSERT(rxq->state & SFC_RXQ_STARTED);
309
310         /*
311          * Retry Rx queue flushing in the case of flush failed or
312          * timeout. In the worst case it can delay for 6 seconds.
313          */
314         for (retry_count = 0;
315              ((rxq->state & SFC_RXQ_FLUSHED) == 0) &&
316              (retry_count < SFC_RX_QFLUSH_ATTEMPTS);
317              ++retry_count) {
318                 if (efx_rx_qflush(rxq->common) != 0) {
319                         rxq->state |= SFC_RXQ_FLUSH_FAILED;
320                         break;
321                 }
322                 rxq->state &= ~SFC_RXQ_FLUSH_FAILED;
323                 rxq->state |= SFC_RXQ_FLUSHING;
324
325                 /*
326                  * Wait for Rx queue flush done or failed event at least
327                  * SFC_RX_QFLUSH_POLL_WAIT_MS milliseconds and not more
328                  * than 2 seconds (SFC_RX_QFLUSH_POLL_WAIT_MS multiplied
329                  * by SFC_RX_QFLUSH_POLL_ATTEMPTS).
330                  */
331                 wait_count = 0;
332                 do {
333                         rte_delay_ms(SFC_RX_QFLUSH_POLL_WAIT_MS);
334                         sfc_ev_qpoll(rxq->evq);
335                 } while ((rxq->state & SFC_RXQ_FLUSHING) &&
336                          (wait_count++ < SFC_RX_QFLUSH_POLL_ATTEMPTS));
337
338                 if (rxq->state & SFC_RXQ_FLUSHING)
339                         sfc_err(sa, "RxQ %u flush timed out", sw_index);
340
341                 if (rxq->state & SFC_RXQ_FLUSH_FAILED)
342                         sfc_err(sa, "RxQ %u flush failed", sw_index);
343
344                 if (rxq->state & SFC_RXQ_FLUSHED)
345                         sfc_info(sa, "RxQ %u flushed", sw_index);
346         }
347
348         sfc_rx_qpurge(rxq);
349 }
350
351 int
352 sfc_rx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
353 {
354         struct sfc_rxq_info *rxq_info;
355         struct sfc_rxq *rxq;
356         struct sfc_evq *evq;
357         int rc;
358
359         sfc_log_init(sa, "sw_index=%u", sw_index);
360
361         SFC_ASSERT(sw_index < sa->rxq_count);
362
363         rxq_info = &sa->rxq_info[sw_index];
364         rxq = rxq_info->rxq;
365         SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED);
366
367         evq = rxq->evq;
368
369         rc = sfc_ev_qstart(sa, evq->evq_index);
370         if (rc != 0)
371                 goto fail_ev_qstart;
372
373         rc = efx_rx_qcreate(sa->nic, rxq->hw_index, 0, rxq_info->type,
374                             &rxq->mem, rxq_info->entries,
375                             0 /* not used on EF10 */, evq->common,
376                             &rxq->common);
377         if (rc != 0)
378                 goto fail_rx_qcreate;
379
380         efx_rx_qenable(rxq->common);
381
382         rxq->pending = rxq->completed = rxq->added = rxq->pushed = 0;
383
384         rxq->state |= (SFC_RXQ_STARTED | SFC_RXQ_RUNNING);
385
386         sfc_rx_qrefill(rxq);
387
388         if (sw_index == 0) {
389                 rc = efx_mac_filter_default_rxq_set(sa->nic, rxq->common,
390                                                     B_FALSE);
391                 if (rc != 0)
392                         goto fail_mac_filter_default_rxq_set;
393         }
394
395         /* It seems to be used by DPDK for debug purposes only ('rte_ether') */
396         sa->eth_dev->data->rx_queue_state[sw_index] =
397                 RTE_ETH_QUEUE_STATE_STARTED;
398
399         return 0;
400
401 fail_mac_filter_default_rxq_set:
402         sfc_rx_qflush(sa, sw_index);
403
404 fail_rx_qcreate:
405         sfc_ev_qstop(sa, evq->evq_index);
406
407 fail_ev_qstart:
408         return rc;
409 }
410
411 void
412 sfc_rx_qstop(struct sfc_adapter *sa, unsigned int sw_index)
413 {
414         struct sfc_rxq_info *rxq_info;
415         struct sfc_rxq *rxq;
416
417         sfc_log_init(sa, "sw_index=%u", sw_index);
418
419         SFC_ASSERT(sw_index < sa->rxq_count);
420
421         rxq_info = &sa->rxq_info[sw_index];
422         rxq = rxq_info->rxq;
423         SFC_ASSERT(rxq->state & SFC_RXQ_STARTED);
424
425         /* It seems to be used by DPDK for debug purposes only ('rte_ether') */
426         sa->eth_dev->data->rx_queue_state[sw_index] =
427                 RTE_ETH_QUEUE_STATE_STOPPED;
428
429         rxq->state &= ~SFC_RXQ_RUNNING;
430
431         if (sw_index == 0)
432                 efx_mac_filter_default_rxq_clear(sa->nic);
433
434         sfc_rx_qflush(sa, sw_index);
435
436         rxq->state = SFC_RXQ_INITIALIZED;
437
438         efx_rx_qdestroy(rxq->common);
439
440         sfc_ev_qstop(sa, rxq->evq->evq_index);
441 }
442
443 static int
444 sfc_rx_qcheck_conf(struct sfc_adapter *sa, uint16_t nb_rx_desc,
445                    const struct rte_eth_rxconf *rx_conf)
446 {
447         const uint16_t rx_free_thresh_max = EFX_RXQ_LIMIT(nb_rx_desc);
448         int rc = 0;
449
450         if (rx_conf->rx_thresh.pthresh != 0 ||
451             rx_conf->rx_thresh.hthresh != 0 ||
452             rx_conf->rx_thresh.wthresh != 0) {
453                 sfc_err(sa,
454                         "RxQ prefetch/host/writeback thresholds are not supported");
455                 rc = EINVAL;
456         }
457
458         if (rx_conf->rx_free_thresh > rx_free_thresh_max) {
459                 sfc_err(sa,
460                         "RxQ free threshold too large: %u vs maximum %u",
461                         rx_conf->rx_free_thresh, rx_free_thresh_max);
462                 rc = EINVAL;
463         }
464
465         if (rx_conf->rx_drop_en == 0) {
466                 sfc_err(sa, "RxQ drop disable is not supported");
467                 rc = EINVAL;
468         }
469
470         if (rx_conf->rx_deferred_start != 0) {
471                 sfc_err(sa, "RxQ deferred start is not supported");
472                 rc = EINVAL;
473         }
474
475         return rc;
476 }
477
478 static unsigned int
479 sfc_rx_mbuf_data_alignment(struct rte_mempool *mb_pool)
480 {
481         uint32_t data_off;
482         uint32_t order;
483
484         /* The mbuf object itself is always cache line aligned */
485         order = rte_bsf32(RTE_CACHE_LINE_SIZE);
486
487         /* Data offset from mbuf object start */
488         data_off = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(mb_pool) +
489                 RTE_PKTMBUF_HEADROOM;
490
491         order = MIN(order, rte_bsf32(data_off));
492
493         return 1u << (order - 1);
494 }
495
496 static uint16_t
497 sfc_rx_mb_pool_buf_size(struct sfc_adapter *sa, struct rte_mempool *mb_pool)
498 {
499         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
500         const uint32_t nic_align_start = MAX(1, encp->enc_rx_buf_align_start);
501         const uint32_t nic_align_end = MAX(1, encp->enc_rx_buf_align_end);
502         uint16_t buf_size;
503         unsigned int buf_aligned;
504         unsigned int start_alignment;
505         unsigned int end_padding_alignment;
506
507         /* Below it is assumed that both alignments are power of 2 */
508         SFC_ASSERT(rte_is_power_of_2(nic_align_start));
509         SFC_ASSERT(rte_is_power_of_2(nic_align_end));
510
511         /*
512          * mbuf is always cache line aligned, double-check
513          * that it meets rx buffer start alignment requirements.
514          */
515
516         /* Start from mbuf pool data room size */
517         buf_size = rte_pktmbuf_data_room_size(mb_pool);
518
519         /* Remove headroom */
520         if (buf_size <= RTE_PKTMBUF_HEADROOM) {
521                 sfc_err(sa,
522                         "RxQ mbuf pool %s object data room size %u is smaller than headroom %u",
523                         mb_pool->name, buf_size, RTE_PKTMBUF_HEADROOM);
524                 return 0;
525         }
526         buf_size -= RTE_PKTMBUF_HEADROOM;
527
528         /* Calculate guaranteed data start alignment */
529         buf_aligned = sfc_rx_mbuf_data_alignment(mb_pool);
530
531         /* Reserve space for start alignment */
532         if (buf_aligned < nic_align_start) {
533                 start_alignment = nic_align_start - buf_aligned;
534                 if (buf_size <= start_alignment) {
535                         sfc_err(sa,
536                                 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u and buffer start alignment %u required by NIC",
537                                 mb_pool->name,
538                                 rte_pktmbuf_data_room_size(mb_pool),
539                                 RTE_PKTMBUF_HEADROOM, start_alignment);
540                         return 0;
541                 }
542                 buf_aligned = nic_align_start;
543                 buf_size -= start_alignment;
544         } else {
545                 start_alignment = 0;
546         }
547
548         /* Make sure that end padding does not write beyond the buffer */
549         if (buf_aligned < nic_align_end) {
550                 /*
551                  * Estimate space which can be lost. If guarnteed buffer
552                  * size is odd, lost space is (nic_align_end - 1). More
553                  * accurate formula is below.
554                  */
555                 end_padding_alignment = nic_align_end -
556                         MIN(buf_aligned, 1u << (rte_bsf32(buf_size) - 1));
557                 if (buf_size <= end_padding_alignment) {
558                         sfc_err(sa,
559                                 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u, buffer start alignment %u and end padding alignment %u required by NIC",
560                                 mb_pool->name,
561                                 rte_pktmbuf_data_room_size(mb_pool),
562                                 RTE_PKTMBUF_HEADROOM, start_alignment,
563                                 end_padding_alignment);
564                         return 0;
565                 }
566                 buf_size -= end_padding_alignment;
567         } else {
568                 /*
569                  * Start is aligned the same or better than end,
570                  * just align length.
571                  */
572                 buf_size = P2ALIGN(buf_size, nic_align_end);
573         }
574
575         return buf_size;
576 }
577
578 int
579 sfc_rx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
580              uint16_t nb_rx_desc, unsigned int socket_id,
581              const struct rte_eth_rxconf *rx_conf,
582              struct rte_mempool *mb_pool)
583 {
584         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
585         int rc;
586         uint16_t buf_size;
587         struct sfc_rxq_info *rxq_info;
588         unsigned int evq_index;
589         struct sfc_evq *evq;
590         struct sfc_rxq *rxq;
591
592         rc = sfc_rx_qcheck_conf(sa, nb_rx_desc, rx_conf);
593         if (rc != 0)
594                 goto fail_bad_conf;
595
596         buf_size = sfc_rx_mb_pool_buf_size(sa, mb_pool);
597         if (buf_size == 0) {
598                 sfc_err(sa, "RxQ %u mbuf pool object size is too small",
599                         sw_index);
600                 rc = EINVAL;
601                 goto fail_bad_conf;
602         }
603
604         if ((buf_size < sa->port.pdu + encp->enc_rx_prefix_size) &&
605             !sa->eth_dev->data->dev_conf.rxmode.enable_scatter) {
606                 sfc_err(sa, "Rx scatter is disabled and RxQ %u mbuf pool "
607                         "object size is too small", sw_index);
608                 sfc_err(sa, "RxQ %u calculated Rx buffer size is %u vs "
609                         "PDU size %u plus Rx prefix %u bytes",
610                         sw_index, buf_size, (unsigned int)sa->port.pdu,
611                         encp->enc_rx_prefix_size);
612                 rc = EINVAL;
613                 goto fail_bad_conf;
614         }
615
616         SFC_ASSERT(sw_index < sa->rxq_count);
617         rxq_info = &sa->rxq_info[sw_index];
618
619         SFC_ASSERT(nb_rx_desc <= rxq_info->max_entries);
620         rxq_info->entries = nb_rx_desc;
621         rxq_info->type = EFX_RXQ_TYPE_DEFAULT;
622
623         evq_index = sfc_evq_index_by_rxq_sw_index(sa, sw_index);
624
625         rc = sfc_ev_qinit(sa, evq_index, rxq_info->entries, socket_id);
626         if (rc != 0)
627                 goto fail_ev_qinit;
628
629         evq = sa->evq_info[evq_index].evq;
630
631         rc = ENOMEM;
632         rxq = rte_zmalloc_socket("sfc-rxq", sizeof(*rxq), RTE_CACHE_LINE_SIZE,
633                                  socket_id);
634         if (rxq == NULL)
635                 goto fail_rxq_alloc;
636
637         rc = sfc_dma_alloc(sa, "rxq", sw_index, EFX_RXQ_SIZE(rxq_info->entries),
638                            socket_id, &rxq->mem);
639         if (rc != 0)
640                 goto fail_dma_alloc;
641
642         rc = ENOMEM;
643         rxq->sw_desc = rte_calloc_socket("sfc-rxq-sw_desc", rxq_info->entries,
644                                          sizeof(*rxq->sw_desc),
645                                          RTE_CACHE_LINE_SIZE, socket_id);
646         if (rxq->sw_desc == NULL)
647                 goto fail_desc_alloc;
648
649         evq->rxq = rxq;
650         rxq->evq = evq;
651         rxq->ptr_mask = rxq_info->entries - 1;
652         rxq->refill_threshold = rx_conf->rx_free_thresh;
653         rxq->refill_mb_pool = mb_pool;
654         rxq->buf_size = buf_size;
655         rxq->hw_index = sw_index;
656         rxq->port_id = sa->eth_dev->data->port_id;
657
658         /* Cache limits required on datapath in RxQ structure */
659         rxq->batch_max = encp->enc_rx_batch_max;
660         rxq->prefix_size = encp->enc_rx_prefix_size;
661
662         rxq->state = SFC_RXQ_INITIALIZED;
663
664         rxq_info->rxq = rxq;
665
666         return 0;
667
668 fail_desc_alloc:
669         sfc_dma_free(sa, &rxq->mem);
670
671 fail_dma_alloc:
672         rte_free(rxq);
673
674 fail_rxq_alloc:
675         sfc_ev_qfini(sa, evq_index);
676
677 fail_ev_qinit:
678         rxq_info->entries = 0;
679
680 fail_bad_conf:
681         sfc_log_init(sa, "failed %d", rc);
682         return rc;
683 }
684
685 void
686 sfc_rx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
687 {
688         struct sfc_rxq_info *rxq_info;
689         struct sfc_rxq *rxq;
690
691         SFC_ASSERT(sw_index < sa->rxq_count);
692
693         rxq_info = &sa->rxq_info[sw_index];
694
695         rxq = rxq_info->rxq;
696         SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED);
697
698         rxq_info->rxq = NULL;
699         rxq_info->entries = 0;
700
701         rte_free(rxq->sw_desc);
702         sfc_dma_free(sa, &rxq->mem);
703         rte_free(rxq);
704 }
705
706 int
707 sfc_rx_start(struct sfc_adapter *sa)
708 {
709         unsigned int sw_index;
710         int rc;
711
712         sfc_log_init(sa, "rxq_count=%u", sa->rxq_count);
713
714         rc = efx_rx_init(sa->nic);
715         if (rc != 0)
716                 goto fail_rx_init;
717
718         for (sw_index = 0; sw_index < sa->rxq_count; ++sw_index) {
719                 rc = sfc_rx_qstart(sa, sw_index);
720                 if (rc != 0)
721                         goto fail_rx_qstart;
722         }
723
724         return 0;
725
726 fail_rx_qstart:
727         while (sw_index-- > 0)
728                 sfc_rx_qstop(sa, sw_index);
729
730         efx_rx_fini(sa->nic);
731
732 fail_rx_init:
733         sfc_log_init(sa, "failed %d", rc);
734         return rc;
735 }
736
737 void
738 sfc_rx_stop(struct sfc_adapter *sa)
739 {
740         unsigned int sw_index;
741
742         sfc_log_init(sa, "rxq_count=%u", sa->rxq_count);
743
744         sw_index = sa->rxq_count;
745         while (sw_index-- > 0) {
746                 if (sa->rxq_info[sw_index].rxq != NULL)
747                         sfc_rx_qstop(sa, sw_index);
748         }
749
750         efx_rx_fini(sa->nic);
751 }
752
753 static int
754 sfc_rx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index)
755 {
756         struct sfc_rxq_info *rxq_info = &sa->rxq_info[sw_index];
757         unsigned int max_entries;
758
759         max_entries = EFX_RXQ_MAXNDESCS;
760         SFC_ASSERT(rte_is_power_of_2(max_entries));
761
762         rxq_info->max_entries = max_entries;
763
764         return 0;
765 }
766
767 static int
768 sfc_rx_check_mode(struct sfc_adapter *sa, struct rte_eth_rxmode *rxmode)
769 {
770         int rc = 0;
771
772         switch (rxmode->mq_mode) {
773         case ETH_MQ_RX_NONE:
774                 /* No special checks are required */
775                 break;
776         default:
777                 sfc_err(sa, "Rx multi-queue mode %u not supported",
778                         rxmode->mq_mode);
779                 rc = EINVAL;
780         }
781
782         if (rxmode->header_split) {
783                 sfc_err(sa, "Header split on Rx not supported");
784                 rc = EINVAL;
785         }
786
787         if (rxmode->hw_vlan_filter) {
788                 sfc_err(sa, "HW VLAN filtering not supported");
789                 rc = EINVAL;
790         }
791
792         if (rxmode->hw_vlan_strip) {
793                 sfc_err(sa, "HW VLAN stripping not supported");
794                 rc = EINVAL;
795         }
796
797         if (rxmode->hw_vlan_extend) {
798                 sfc_err(sa,
799                         "Q-in-Q HW VLAN stripping not supported");
800                 rc = EINVAL;
801         }
802
803         if (!rxmode->hw_strip_crc) {
804                 sfc_warn(sa,
805                          "FCS stripping control not supported - always stripped");
806                 rxmode->hw_strip_crc = 1;
807         }
808
809         if (rxmode->enable_scatter) {
810                 sfc_err(sa, "Scatter on Rx not supported");
811                 rc = EINVAL;
812         }
813
814         if (rxmode->enable_lro) {
815                 sfc_err(sa, "LRO not supported");
816                 rc = EINVAL;
817         }
818
819         return rc;
820 }
821
822 /**
823  * Initialize Rx subsystem.
824  *
825  * Called at device configuration stage when number of receive queues is
826  * specified together with other device level receive configuration.
827  *
828  * It should be used to allocate NUMA-unaware resources.
829  */
830 int
831 sfc_rx_init(struct sfc_adapter *sa)
832 {
833         struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf;
834         unsigned int sw_index;
835         int rc;
836
837         rc = sfc_rx_check_mode(sa, &dev_conf->rxmode);
838         if (rc != 0)
839                 goto fail_check_mode;
840
841         sa->rxq_count = sa->eth_dev->data->nb_rx_queues;
842
843         rc = ENOMEM;
844         sa->rxq_info = rte_calloc_socket("sfc-rxqs", sa->rxq_count,
845                                          sizeof(struct sfc_rxq_info), 0,
846                                          sa->socket_id);
847         if (sa->rxq_info == NULL)
848                 goto fail_rxqs_alloc;
849
850         for (sw_index = 0; sw_index < sa->rxq_count; ++sw_index) {
851                 rc = sfc_rx_qinit_info(sa, sw_index);
852                 if (rc != 0)
853                         goto fail_rx_qinit_info;
854         }
855
856         return 0;
857
858 fail_rx_qinit_info:
859         rte_free(sa->rxq_info);
860         sa->rxq_info = NULL;
861
862 fail_rxqs_alloc:
863         sa->rxq_count = 0;
864 fail_check_mode:
865         sfc_log_init(sa, "failed %d", rc);
866         return rc;
867 }
868
869 /**
870  * Shutdown Rx subsystem.
871  *
872  * Called at device close stage, for example, before device
873  * reconfiguration or shutdown.
874  */
875 void
876 sfc_rx_fini(struct sfc_adapter *sa)
877 {
878         unsigned int sw_index;
879
880         sw_index = sa->rxq_count;
881         while (sw_index-- > 0) {
882                 if (sa->rxq_info[sw_index].rxq != NULL)
883                         sfc_rx_qfini(sa, sw_index);
884         }
885
886         rte_free(sa->rxq_info);
887         sa->rxq_info = NULL;
888         sa->rxq_count = 0;
889 }