net/sfc: support Rx free threshold
[dpdk.git] / drivers / net / sfc / sfc_rx.c
1 /*-
2  * Copyright (c) 2016 Solarflare Communications Inc.
3  * All rights reserved.
4  *
5  * This software was jointly developed between OKTET Labs (under contract
6  * for Solarflare) and Solarflare Communications, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright notice,
12  *    this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 #include <rte_mempool.h>
31
32 #include "efx.h"
33
34 #include "sfc.h"
35 #include "sfc_debug.h"
36 #include "sfc_log.h"
37 #include "sfc_ev.h"
38 #include "sfc_rx.h"
39 #include "sfc_tweak.h"
40
41 /*
42  * Maximum number of Rx queue flush attempt in the case of failure or
43  * flush timeout
44  */
45 #define SFC_RX_QFLUSH_ATTEMPTS          (3)
46
47 /*
48  * Time to wait between event queue polling attempts when waiting for Rx
49  * queue flush done or failed events.
50  */
51 #define SFC_RX_QFLUSH_POLL_WAIT_MS      (1)
52
53 /*
54  * Maximum number of event queue polling attempts when waiting for Rx queue
55  * flush done or failed events. It defines Rx queue flush attempt timeout
56  * together with SFC_RX_QFLUSH_POLL_WAIT_MS.
57  */
58 #define SFC_RX_QFLUSH_POLL_ATTEMPTS     (2000)
59
60 void
61 sfc_rx_qflush_done(struct sfc_rxq *rxq)
62 {
63         rxq->state |= SFC_RXQ_FLUSHED;
64         rxq->state &= ~SFC_RXQ_FLUSHING;
65 }
66
67 void
68 sfc_rx_qflush_failed(struct sfc_rxq *rxq)
69 {
70         rxq->state |= SFC_RXQ_FLUSH_FAILED;
71         rxq->state &= ~SFC_RXQ_FLUSHING;
72 }
73
74 static void
75 sfc_rx_qrefill(struct sfc_rxq *rxq)
76 {
77         unsigned int free_space;
78         unsigned int bulks;
79         void *objs[SFC_RX_REFILL_BULK];
80         efsys_dma_addr_t addr[RTE_DIM(objs)];
81         unsigned int added = rxq->added;
82         unsigned int id;
83         unsigned int i;
84         struct sfc_rx_sw_desc *rxd;
85         struct rte_mbuf *m;
86         uint8_t port_id = rxq->port_id;
87
88         free_space = EFX_RXQ_LIMIT(rxq->ptr_mask + 1) -
89                 (added - rxq->completed);
90
91         if (free_space < rxq->refill_threshold)
92                 return;
93
94         bulks = free_space / RTE_DIM(objs);
95
96         id = added & rxq->ptr_mask;
97         while (bulks-- > 0) {
98                 if (rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
99                                          RTE_DIM(objs)) < 0) {
100                         /*
101                          * It is hardly a safe way to increment counter
102                          * from different contexts, but all PMDs do it.
103                          */
104                         rxq->evq->sa->eth_dev->data->rx_mbuf_alloc_failed +=
105                                 RTE_DIM(objs);
106                         break;
107                 }
108
109                 for (i = 0; i < RTE_DIM(objs);
110                      ++i, id = (id + 1) & rxq->ptr_mask) {
111                         m = objs[i];
112
113                         rxd = &rxq->sw_desc[id];
114                         rxd->mbuf = m;
115
116                         rte_mbuf_refcnt_set(m, 1);
117                         m->data_off = RTE_PKTMBUF_HEADROOM;
118                         m->next = NULL;
119                         m->nb_segs = 1;
120                         m->port = port_id;
121
122                         addr[i] = rte_pktmbuf_mtophys(m);
123                 }
124
125                 efx_rx_qpost(rxq->common, addr, rxq->buf_size,
126                              RTE_DIM(objs), rxq->completed, added);
127                 added += RTE_DIM(objs);
128         }
129
130         /* Push doorbell if something is posted */
131         if (rxq->added != added) {
132                 rxq->added = added;
133                 efx_rx_qpush(rxq->common, added, &rxq->pushed);
134         }
135 }
136
137 static uint64_t
138 sfc_rx_desc_flags_to_offload_flags(const unsigned int desc_flags)
139 {
140         uint64_t mbuf_flags = 0;
141
142         switch (desc_flags & (EFX_PKT_IPV4 | EFX_CKSUM_IPV4)) {
143         case (EFX_PKT_IPV4 | EFX_CKSUM_IPV4):
144                 mbuf_flags |= PKT_RX_IP_CKSUM_GOOD;
145                 break;
146         case EFX_PKT_IPV4:
147                 mbuf_flags |= PKT_RX_IP_CKSUM_BAD;
148                 break;
149         default:
150                 RTE_BUILD_BUG_ON(PKT_RX_IP_CKSUM_UNKNOWN != 0);
151                 SFC_ASSERT((mbuf_flags & PKT_RX_IP_CKSUM_MASK) ==
152                            PKT_RX_IP_CKSUM_UNKNOWN);
153                 break;
154         }
155
156         switch ((desc_flags &
157                  (EFX_PKT_TCP | EFX_PKT_UDP | EFX_CKSUM_TCPUDP))) {
158         case (EFX_PKT_TCP | EFX_CKSUM_TCPUDP):
159         case (EFX_PKT_UDP | EFX_CKSUM_TCPUDP):
160                 mbuf_flags |= PKT_RX_L4_CKSUM_GOOD;
161                 break;
162         case EFX_PKT_TCP:
163         case EFX_PKT_UDP:
164                 mbuf_flags |= PKT_RX_L4_CKSUM_BAD;
165                 break;
166         default:
167                 RTE_BUILD_BUG_ON(PKT_RX_L4_CKSUM_UNKNOWN != 0);
168                 SFC_ASSERT((mbuf_flags & PKT_RX_L4_CKSUM_MASK) ==
169                            PKT_RX_L4_CKSUM_UNKNOWN);
170                 break;
171         }
172
173         return mbuf_flags;
174 }
175
176 static uint32_t
177 sfc_rx_desc_flags_to_packet_type(const unsigned int desc_flags)
178 {
179         return RTE_PTYPE_L2_ETHER |
180                 ((desc_flags & EFX_PKT_IPV4) ?
181                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN : 0) |
182                 ((desc_flags & EFX_PKT_IPV6) ?
183                         RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : 0) |
184                 ((desc_flags & EFX_PKT_TCP) ? RTE_PTYPE_L4_TCP : 0) |
185                 ((desc_flags & EFX_PKT_UDP) ? RTE_PTYPE_L4_UDP : 0);
186 }
187
188 uint16_t
189 sfc_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
190 {
191         struct sfc_rxq *rxq = rx_queue;
192         unsigned int completed;
193         unsigned int prefix_size = rxq->prefix_size;
194         unsigned int done_pkts = 0;
195         boolean_t discard_next = B_FALSE;
196
197         if (unlikely((rxq->state & SFC_RXQ_RUNNING) == 0))
198                 return 0;
199
200         sfc_ev_qpoll(rxq->evq);
201
202         completed = rxq->completed;
203         while (completed != rxq->pending && done_pkts < nb_pkts) {
204                 unsigned int id;
205                 struct sfc_rx_sw_desc *rxd;
206                 struct rte_mbuf *m;
207                 unsigned int seg_len;
208                 unsigned int desc_flags;
209
210                 id = completed++ & rxq->ptr_mask;
211                 rxd = &rxq->sw_desc[id];
212                 m = rxd->mbuf;
213                 desc_flags = rxd->flags;
214
215                 if (discard_next)
216                         goto discard;
217
218                 if (desc_flags & (EFX_ADDR_MISMATCH | EFX_DISCARD))
219                         goto discard;
220
221                 if (desc_flags & EFX_PKT_CONT)
222                         goto discard;
223
224                 if (desc_flags & EFX_PKT_PREFIX_LEN) {
225                         uint16_t tmp_size;
226                         int rc __rte_unused;
227
228                         rc = efx_pseudo_hdr_pkt_length_get(rxq->common,
229                                 rte_pktmbuf_mtod(m, uint8_t *), &tmp_size);
230                         SFC_ASSERT(rc == 0);
231                         seg_len = tmp_size;
232                 } else {
233                         seg_len = rxd->size - prefix_size;
234                 }
235
236                 m->data_off += prefix_size;
237                 rte_pktmbuf_data_len(m) = seg_len;
238                 rte_pktmbuf_pkt_len(m) = seg_len;
239
240                 m->ol_flags = sfc_rx_desc_flags_to_offload_flags(desc_flags);
241                 m->packet_type = sfc_rx_desc_flags_to_packet_type(desc_flags);
242
243                 *rx_pkts++ = m;
244                 done_pkts++;
245                 continue;
246
247 discard:
248                 discard_next = ((desc_flags & EFX_PKT_CONT) != 0);
249                 rte_mempool_put(rxq->refill_mb_pool, m);
250                 rxd->mbuf = NULL;
251         }
252
253         rxq->completed = completed;
254
255         sfc_rx_qrefill(rxq);
256
257         return done_pkts;
258 }
259
260 static void
261 sfc_rx_qpurge(struct sfc_rxq *rxq)
262 {
263         unsigned int i;
264         struct sfc_rx_sw_desc *rxd;
265
266         for (i = rxq->completed; i != rxq->added; ++i) {
267                 rxd = &rxq->sw_desc[i & rxq->ptr_mask];
268                 rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf);
269                 rxd->mbuf = NULL;
270         }
271 }
272
273 static void
274 sfc_rx_qflush(struct sfc_adapter *sa, unsigned int sw_index)
275 {
276         struct sfc_rxq *rxq;
277         unsigned int retry_count;
278         unsigned int wait_count;
279
280         rxq = sa->rxq_info[sw_index].rxq;
281         SFC_ASSERT(rxq->state & SFC_RXQ_STARTED);
282
283         /*
284          * Retry Rx queue flushing in the case of flush failed or
285          * timeout. In the worst case it can delay for 6 seconds.
286          */
287         for (retry_count = 0;
288              ((rxq->state & SFC_RXQ_FLUSHED) == 0) &&
289              (retry_count < SFC_RX_QFLUSH_ATTEMPTS);
290              ++retry_count) {
291                 if (efx_rx_qflush(rxq->common) != 0) {
292                         rxq->state |= SFC_RXQ_FLUSH_FAILED;
293                         break;
294                 }
295                 rxq->state &= ~SFC_RXQ_FLUSH_FAILED;
296                 rxq->state |= SFC_RXQ_FLUSHING;
297
298                 /*
299                  * Wait for Rx queue flush done or failed event at least
300                  * SFC_RX_QFLUSH_POLL_WAIT_MS milliseconds and not more
301                  * than 2 seconds (SFC_RX_QFLUSH_POLL_WAIT_MS multiplied
302                  * by SFC_RX_QFLUSH_POLL_ATTEMPTS).
303                  */
304                 wait_count = 0;
305                 do {
306                         rte_delay_ms(SFC_RX_QFLUSH_POLL_WAIT_MS);
307                         sfc_ev_qpoll(rxq->evq);
308                 } while ((rxq->state & SFC_RXQ_FLUSHING) &&
309                          (wait_count++ < SFC_RX_QFLUSH_POLL_ATTEMPTS));
310
311                 if (rxq->state & SFC_RXQ_FLUSHING)
312                         sfc_err(sa, "RxQ %u flush timed out", sw_index);
313
314                 if (rxq->state & SFC_RXQ_FLUSH_FAILED)
315                         sfc_err(sa, "RxQ %u flush failed", sw_index);
316
317                 if (rxq->state & SFC_RXQ_FLUSHED)
318                         sfc_info(sa, "RxQ %u flushed", sw_index);
319         }
320
321         sfc_rx_qpurge(rxq);
322 }
323
324 int
325 sfc_rx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
326 {
327         struct sfc_rxq_info *rxq_info;
328         struct sfc_rxq *rxq;
329         struct sfc_evq *evq;
330         int rc;
331
332         sfc_log_init(sa, "sw_index=%u", sw_index);
333
334         SFC_ASSERT(sw_index < sa->rxq_count);
335
336         rxq_info = &sa->rxq_info[sw_index];
337         rxq = rxq_info->rxq;
338         SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED);
339
340         evq = rxq->evq;
341
342         rc = sfc_ev_qstart(sa, evq->evq_index);
343         if (rc != 0)
344                 goto fail_ev_qstart;
345
346         rc = efx_rx_qcreate(sa->nic, rxq->hw_index, 0, rxq_info->type,
347                             &rxq->mem, rxq_info->entries,
348                             0 /* not used on EF10 */, evq->common,
349                             &rxq->common);
350         if (rc != 0)
351                 goto fail_rx_qcreate;
352
353         efx_rx_qenable(rxq->common);
354
355         rxq->pending = rxq->completed = rxq->added = rxq->pushed = 0;
356
357         rxq->state |= (SFC_RXQ_STARTED | SFC_RXQ_RUNNING);
358
359         sfc_rx_qrefill(rxq);
360
361         if (sw_index == 0) {
362                 rc = efx_mac_filter_default_rxq_set(sa->nic, rxq->common,
363                                                     B_FALSE);
364                 if (rc != 0)
365                         goto fail_mac_filter_default_rxq_set;
366         }
367
368         /* It seems to be used by DPDK for debug purposes only ('rte_ether') */
369         sa->eth_dev->data->rx_queue_state[sw_index] =
370                 RTE_ETH_QUEUE_STATE_STARTED;
371
372         return 0;
373
374 fail_mac_filter_default_rxq_set:
375         sfc_rx_qflush(sa, sw_index);
376
377 fail_rx_qcreate:
378         sfc_ev_qstop(sa, evq->evq_index);
379
380 fail_ev_qstart:
381         return rc;
382 }
383
384 void
385 sfc_rx_qstop(struct sfc_adapter *sa, unsigned int sw_index)
386 {
387         struct sfc_rxq_info *rxq_info;
388         struct sfc_rxq *rxq;
389
390         sfc_log_init(sa, "sw_index=%u", sw_index);
391
392         SFC_ASSERT(sw_index < sa->rxq_count);
393
394         rxq_info = &sa->rxq_info[sw_index];
395         rxq = rxq_info->rxq;
396         SFC_ASSERT(rxq->state & SFC_RXQ_STARTED);
397
398         /* It seems to be used by DPDK for debug purposes only ('rte_ether') */
399         sa->eth_dev->data->rx_queue_state[sw_index] =
400                 RTE_ETH_QUEUE_STATE_STOPPED;
401
402         rxq->state &= ~SFC_RXQ_RUNNING;
403
404         if (sw_index == 0)
405                 efx_mac_filter_default_rxq_clear(sa->nic);
406
407         sfc_rx_qflush(sa, sw_index);
408
409         rxq->state = SFC_RXQ_INITIALIZED;
410
411         efx_rx_qdestroy(rxq->common);
412
413         sfc_ev_qstop(sa, rxq->evq->evq_index);
414 }
415
416 static int
417 sfc_rx_qcheck_conf(struct sfc_adapter *sa, uint16_t nb_rx_desc,
418                    const struct rte_eth_rxconf *rx_conf)
419 {
420         const uint16_t rx_free_thresh_max = EFX_RXQ_LIMIT(nb_rx_desc);
421         int rc = 0;
422
423         if (rx_conf->rx_thresh.pthresh != 0 ||
424             rx_conf->rx_thresh.hthresh != 0 ||
425             rx_conf->rx_thresh.wthresh != 0) {
426                 sfc_err(sa,
427                         "RxQ prefetch/host/writeback thresholds are not supported");
428                 rc = EINVAL;
429         }
430
431         if (rx_conf->rx_free_thresh > rx_free_thresh_max) {
432                 sfc_err(sa,
433                         "RxQ free threshold too large: %u vs maximum %u",
434                         rx_conf->rx_free_thresh, rx_free_thresh_max);
435                 rc = EINVAL;
436         }
437
438         if (rx_conf->rx_drop_en == 0) {
439                 sfc_err(sa, "RxQ drop disable is not supported");
440                 rc = EINVAL;
441         }
442
443         if (rx_conf->rx_deferred_start != 0) {
444                 sfc_err(sa, "RxQ deferred start is not supported");
445                 rc = EINVAL;
446         }
447
448         return rc;
449 }
450
451 static unsigned int
452 sfc_rx_mbuf_data_alignment(struct rte_mempool *mb_pool)
453 {
454         uint32_t data_off;
455         uint32_t order;
456
457         /* The mbuf object itself is always cache line aligned */
458         order = rte_bsf32(RTE_CACHE_LINE_SIZE);
459
460         /* Data offset from mbuf object start */
461         data_off = sizeof(struct rte_mbuf) + rte_pktmbuf_priv_size(mb_pool) +
462                 RTE_PKTMBUF_HEADROOM;
463
464         order = MIN(order, rte_bsf32(data_off));
465
466         return 1u << (order - 1);
467 }
468
469 static uint16_t
470 sfc_rx_mb_pool_buf_size(struct sfc_adapter *sa, struct rte_mempool *mb_pool)
471 {
472         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
473         const uint32_t nic_align_start = MAX(1, encp->enc_rx_buf_align_start);
474         const uint32_t nic_align_end = MAX(1, encp->enc_rx_buf_align_end);
475         uint16_t buf_size;
476         unsigned int buf_aligned;
477         unsigned int start_alignment;
478         unsigned int end_padding_alignment;
479
480         /* Below it is assumed that both alignments are power of 2 */
481         SFC_ASSERT(rte_is_power_of_2(nic_align_start));
482         SFC_ASSERT(rte_is_power_of_2(nic_align_end));
483
484         /*
485          * mbuf is always cache line aligned, double-check
486          * that it meets rx buffer start alignment requirements.
487          */
488
489         /* Start from mbuf pool data room size */
490         buf_size = rte_pktmbuf_data_room_size(mb_pool);
491
492         /* Remove headroom */
493         if (buf_size <= RTE_PKTMBUF_HEADROOM) {
494                 sfc_err(sa,
495                         "RxQ mbuf pool %s object data room size %u is smaller than headroom %u",
496                         mb_pool->name, buf_size, RTE_PKTMBUF_HEADROOM);
497                 return 0;
498         }
499         buf_size -= RTE_PKTMBUF_HEADROOM;
500
501         /* Calculate guaranteed data start alignment */
502         buf_aligned = sfc_rx_mbuf_data_alignment(mb_pool);
503
504         /* Reserve space for start alignment */
505         if (buf_aligned < nic_align_start) {
506                 start_alignment = nic_align_start - buf_aligned;
507                 if (buf_size <= start_alignment) {
508                         sfc_err(sa,
509                                 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u and buffer start alignment %u required by NIC",
510                                 mb_pool->name,
511                                 rte_pktmbuf_data_room_size(mb_pool),
512                                 RTE_PKTMBUF_HEADROOM, start_alignment);
513                         return 0;
514                 }
515                 buf_aligned = nic_align_start;
516                 buf_size -= start_alignment;
517         } else {
518                 start_alignment = 0;
519         }
520
521         /* Make sure that end padding does not write beyond the buffer */
522         if (buf_aligned < nic_align_end) {
523                 /*
524                  * Estimate space which can be lost. If guarnteed buffer
525                  * size is odd, lost space is (nic_align_end - 1). More
526                  * accurate formula is below.
527                  */
528                 end_padding_alignment = nic_align_end -
529                         MIN(buf_aligned, 1u << (rte_bsf32(buf_size) - 1));
530                 if (buf_size <= end_padding_alignment) {
531                         sfc_err(sa,
532                                 "RxQ mbuf pool %s object data room size %u is insufficient for headroom %u, buffer start alignment %u and end padding alignment %u required by NIC",
533                                 mb_pool->name,
534                                 rte_pktmbuf_data_room_size(mb_pool),
535                                 RTE_PKTMBUF_HEADROOM, start_alignment,
536                                 end_padding_alignment);
537                         return 0;
538                 }
539                 buf_size -= end_padding_alignment;
540         } else {
541                 /*
542                  * Start is aligned the same or better than end,
543                  * just align length.
544                  */
545                 buf_size = P2ALIGN(buf_size, nic_align_end);
546         }
547
548         return buf_size;
549 }
550
551 int
552 sfc_rx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
553              uint16_t nb_rx_desc, unsigned int socket_id,
554              const struct rte_eth_rxconf *rx_conf,
555              struct rte_mempool *mb_pool)
556 {
557         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
558         int rc;
559         uint16_t buf_size;
560         struct sfc_rxq_info *rxq_info;
561         unsigned int evq_index;
562         struct sfc_evq *evq;
563         struct sfc_rxq *rxq;
564
565         rc = sfc_rx_qcheck_conf(sa, nb_rx_desc, rx_conf);
566         if (rc != 0)
567                 goto fail_bad_conf;
568
569         buf_size = sfc_rx_mb_pool_buf_size(sa, mb_pool);
570         if (buf_size == 0) {
571                 sfc_err(sa, "RxQ %u mbuf pool object size is too small",
572                         sw_index);
573                 rc = EINVAL;
574                 goto fail_bad_conf;
575         }
576
577         if ((buf_size < sa->port.pdu + encp->enc_rx_prefix_size) &&
578             !sa->eth_dev->data->dev_conf.rxmode.enable_scatter) {
579                 sfc_err(sa, "Rx scatter is disabled and RxQ %u mbuf pool "
580                         "object size is too small", sw_index);
581                 sfc_err(sa, "RxQ %u calculated Rx buffer size is %u vs "
582                         "PDU size %u plus Rx prefix %u bytes",
583                         sw_index, buf_size, (unsigned int)sa->port.pdu,
584                         encp->enc_rx_prefix_size);
585                 rc = EINVAL;
586                 goto fail_bad_conf;
587         }
588
589         SFC_ASSERT(sw_index < sa->rxq_count);
590         rxq_info = &sa->rxq_info[sw_index];
591
592         SFC_ASSERT(nb_rx_desc <= rxq_info->max_entries);
593         rxq_info->entries = nb_rx_desc;
594         rxq_info->type = EFX_RXQ_TYPE_DEFAULT;
595
596         evq_index = sfc_evq_index_by_rxq_sw_index(sa, sw_index);
597
598         rc = sfc_ev_qinit(sa, evq_index, rxq_info->entries, socket_id);
599         if (rc != 0)
600                 goto fail_ev_qinit;
601
602         evq = sa->evq_info[evq_index].evq;
603
604         rc = ENOMEM;
605         rxq = rte_zmalloc_socket("sfc-rxq", sizeof(*rxq), RTE_CACHE_LINE_SIZE,
606                                  socket_id);
607         if (rxq == NULL)
608                 goto fail_rxq_alloc;
609
610         rc = sfc_dma_alloc(sa, "rxq", sw_index, EFX_RXQ_SIZE(rxq_info->entries),
611                            socket_id, &rxq->mem);
612         if (rc != 0)
613                 goto fail_dma_alloc;
614
615         rc = ENOMEM;
616         rxq->sw_desc = rte_calloc_socket("sfc-rxq-sw_desc", rxq_info->entries,
617                                          sizeof(*rxq->sw_desc),
618                                          RTE_CACHE_LINE_SIZE, socket_id);
619         if (rxq->sw_desc == NULL)
620                 goto fail_desc_alloc;
621
622         evq->rxq = rxq;
623         rxq->evq = evq;
624         rxq->ptr_mask = rxq_info->entries - 1;
625         rxq->refill_threshold = rx_conf->rx_free_thresh;
626         rxq->refill_mb_pool = mb_pool;
627         rxq->buf_size = buf_size;
628         rxq->hw_index = sw_index;
629         rxq->port_id = sa->eth_dev->data->port_id;
630
631         /* Cache limits required on datapath in RxQ structure */
632         rxq->batch_max = encp->enc_rx_batch_max;
633         rxq->prefix_size = encp->enc_rx_prefix_size;
634
635         rxq->state = SFC_RXQ_INITIALIZED;
636
637         rxq_info->rxq = rxq;
638
639         return 0;
640
641 fail_desc_alloc:
642         sfc_dma_free(sa, &rxq->mem);
643
644 fail_dma_alloc:
645         rte_free(rxq);
646
647 fail_rxq_alloc:
648         sfc_ev_qfini(sa, evq_index);
649
650 fail_ev_qinit:
651         rxq_info->entries = 0;
652
653 fail_bad_conf:
654         sfc_log_init(sa, "failed %d", rc);
655         return rc;
656 }
657
658 void
659 sfc_rx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
660 {
661         struct sfc_rxq_info *rxq_info;
662         struct sfc_rxq *rxq;
663
664         SFC_ASSERT(sw_index < sa->rxq_count);
665
666         rxq_info = &sa->rxq_info[sw_index];
667
668         rxq = rxq_info->rxq;
669         SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED);
670
671         rxq_info->rxq = NULL;
672         rxq_info->entries = 0;
673
674         rte_free(rxq->sw_desc);
675         sfc_dma_free(sa, &rxq->mem);
676         rte_free(rxq);
677 }
678
679 int
680 sfc_rx_start(struct sfc_adapter *sa)
681 {
682         unsigned int sw_index;
683         int rc;
684
685         sfc_log_init(sa, "rxq_count=%u", sa->rxq_count);
686
687         rc = efx_rx_init(sa->nic);
688         if (rc != 0)
689                 goto fail_rx_init;
690
691         for (sw_index = 0; sw_index < sa->rxq_count; ++sw_index) {
692                 rc = sfc_rx_qstart(sa, sw_index);
693                 if (rc != 0)
694                         goto fail_rx_qstart;
695         }
696
697         return 0;
698
699 fail_rx_qstart:
700         while (sw_index-- > 0)
701                 sfc_rx_qstop(sa, sw_index);
702
703         efx_rx_fini(sa->nic);
704
705 fail_rx_init:
706         sfc_log_init(sa, "failed %d", rc);
707         return rc;
708 }
709
710 void
711 sfc_rx_stop(struct sfc_adapter *sa)
712 {
713         unsigned int sw_index;
714
715         sfc_log_init(sa, "rxq_count=%u", sa->rxq_count);
716
717         sw_index = sa->rxq_count;
718         while (sw_index-- > 0) {
719                 if (sa->rxq_info[sw_index].rxq != NULL)
720                         sfc_rx_qstop(sa, sw_index);
721         }
722
723         efx_rx_fini(sa->nic);
724 }
725
726 static int
727 sfc_rx_qinit_info(struct sfc_adapter *sa, unsigned int sw_index)
728 {
729         struct sfc_rxq_info *rxq_info = &sa->rxq_info[sw_index];
730         unsigned int max_entries;
731
732         max_entries = EFX_RXQ_MAXNDESCS;
733         SFC_ASSERT(rte_is_power_of_2(max_entries));
734
735         rxq_info->max_entries = max_entries;
736
737         return 0;
738 }
739
740 static int
741 sfc_rx_check_mode(struct sfc_adapter *sa, struct rte_eth_rxmode *rxmode)
742 {
743         int rc = 0;
744
745         switch (rxmode->mq_mode) {
746         case ETH_MQ_RX_NONE:
747                 /* No special checks are required */
748                 break;
749         default:
750                 sfc_err(sa, "Rx multi-queue mode %u not supported",
751                         rxmode->mq_mode);
752                 rc = EINVAL;
753         }
754
755         if (rxmode->header_split) {
756                 sfc_err(sa, "Header split on Rx not supported");
757                 rc = EINVAL;
758         }
759
760         if (rxmode->hw_vlan_filter) {
761                 sfc_err(sa, "HW VLAN filtering not supported");
762                 rc = EINVAL;
763         }
764
765         if (rxmode->hw_vlan_strip) {
766                 sfc_err(sa, "HW VLAN stripping not supported");
767                 rc = EINVAL;
768         }
769
770         if (rxmode->hw_vlan_extend) {
771                 sfc_err(sa,
772                         "Q-in-Q HW VLAN stripping not supported");
773                 rc = EINVAL;
774         }
775
776         if (!rxmode->hw_strip_crc) {
777                 sfc_warn(sa,
778                          "FCS stripping control not supported - always stripped");
779                 rxmode->hw_strip_crc = 1;
780         }
781
782         if (rxmode->enable_scatter) {
783                 sfc_err(sa, "Scatter on Rx not supported");
784                 rc = EINVAL;
785         }
786
787         if (rxmode->enable_lro) {
788                 sfc_err(sa, "LRO not supported");
789                 rc = EINVAL;
790         }
791
792         return rc;
793 }
794
795 /**
796  * Initialize Rx subsystem.
797  *
798  * Called at device configuration stage when number of receive queues is
799  * specified together with other device level receive configuration.
800  *
801  * It should be used to allocate NUMA-unaware resources.
802  */
803 int
804 sfc_rx_init(struct sfc_adapter *sa)
805 {
806         struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf;
807         unsigned int sw_index;
808         int rc;
809
810         rc = sfc_rx_check_mode(sa, &dev_conf->rxmode);
811         if (rc != 0)
812                 goto fail_check_mode;
813
814         sa->rxq_count = sa->eth_dev->data->nb_rx_queues;
815
816         rc = ENOMEM;
817         sa->rxq_info = rte_calloc_socket("sfc-rxqs", sa->rxq_count,
818                                          sizeof(struct sfc_rxq_info), 0,
819                                          sa->socket_id);
820         if (sa->rxq_info == NULL)
821                 goto fail_rxqs_alloc;
822
823         for (sw_index = 0; sw_index < sa->rxq_count; ++sw_index) {
824                 rc = sfc_rx_qinit_info(sa, sw_index);
825                 if (rc != 0)
826                         goto fail_rx_qinit_info;
827         }
828
829         return 0;
830
831 fail_rx_qinit_info:
832         rte_free(sa->rxq_info);
833         sa->rxq_info = NULL;
834
835 fail_rxqs_alloc:
836         sa->rxq_count = 0;
837 fail_check_mode:
838         sfc_log_init(sa, "failed %d", rc);
839         return rc;
840 }
841
842 /**
843  * Shutdown Rx subsystem.
844  *
845  * Called at device close stage, for example, before device
846  * reconfiguration or shutdown.
847  */
848 void
849 sfc_rx_fini(struct sfc_adapter *sa)
850 {
851         unsigned int sw_index;
852
853         sw_index = sa->rxq_count;
854         while (sw_index-- > 0) {
855                 if (sa->rxq_info[sw_index].rxq != NULL)
856                         sfc_rx_qfini(sa, sw_index);
857         }
858
859         rte_free(sa->rxq_info);
860         sa->rxq_info = NULL;
861         sa->rxq_count = 0;
862 }