net/sfc: support per-queue Rx prefix for EF100
[dpdk.git] / drivers / net / sfc / sfc_ef100_rx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2020 Xilinx, Inc.
4  * Copyright(c) 2018-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9
10 /* EF100 native datapath implementation */
11
12 #include <stdbool.h>
13
14 #include <rte_byteorder.h>
15 #include <rte_mbuf_ptype.h>
16 #include <rte_mbuf.h>
17 #include <rte_io.h>
18
19 #include "efx_types.h"
20 #include "efx_regs_ef100.h"
21 #include "efx.h"
22
23 #include "sfc_debug.h"
24 #include "sfc_tweak.h"
25 #include "sfc_dp_rx.h"
26 #include "sfc_kvargs.h"
27 #include "sfc_ef100.h"
28
29
30 #define sfc_ef100_rx_err(_rxq, ...) \
31         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__)
32
33 #define sfc_ef100_rx_debug(_rxq, ...) \
34         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \
35                    __VA_ARGS__)
36
37 /**
38  * Maximum number of descriptors/buffers in the Rx ring.
39  * It should guarantee that corresponding event queue never overfill.
40  * EF10 native datapath uses event queue of the same size as Rx queue.
41  * Maximum number of events on datapath can be estimated as number of
42  * Rx queue entries (one event per Rx buffer in the worst case) plus
43  * Rx error and flush events.
44  */
45 #define SFC_EF100_RXQ_LIMIT(_ndesc) \
46         ((_ndesc) - 1 /* head must not step on tail */ - \
47          1 /* Rx error */ - 1 /* flush */)
48
49 struct sfc_ef100_rx_sw_desc {
50         struct rte_mbuf                 *mbuf;
51 };
52
53 struct sfc_ef100_rxq {
54         /* Used on data path */
55         unsigned int                    flags;
56 #define SFC_EF100_RXQ_STARTED           0x1
57 #define SFC_EF100_RXQ_NOT_RUNNING       0x2
58 #define SFC_EF100_RXQ_EXCEPTION         0x4
59         unsigned int                    ptr_mask;
60         unsigned int                    evq_phase_bit_shift;
61         unsigned int                    ready_pkts;
62         unsigned int                    completed;
63         unsigned int                    evq_read_ptr;
64         volatile efx_qword_t            *evq_hw_ring;
65         struct sfc_ef100_rx_sw_desc     *sw_ring;
66         uint64_t                        rearm_data;
67         uint16_t                        buf_size;
68         uint16_t                        prefix_size;
69
70         /* Used on refill */
71         unsigned int                    added;
72         unsigned int                    max_fill_level;
73         unsigned int                    refill_threshold;
74         struct rte_mempool              *refill_mb_pool;
75         efx_qword_t                     *rxq_hw_ring;
76         volatile void                   *doorbell;
77
78         /* Datapath receive queue anchor */
79         struct sfc_dp_rxq               dp;
80 };
81
82 static inline struct sfc_ef100_rxq *
83 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
84 {
85         return container_of(dp_rxq, struct sfc_ef100_rxq, dp);
86 }
87
88 static inline void
89 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
90 {
91         efx_dword_t dword;
92
93         EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask);
94
95         /* DMA sync to device is not required */
96
97         /*
98          * rte_write32() has rte_io_wmb() which guarantees that the STORE
99          * operations (i.e. Rx and event descriptor updates) that precede
100          * the rte_io_wmb() call are visible to NIC before the STORE
101          * operations that follow it (i.e. doorbell write).
102          */
103         rte_write32(dword.ed_u32[0], rxq->doorbell);
104
105         sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)",
106                            EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX),
107                            added);
108 }
109
110 static void
111 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
112 {
113         const unsigned int ptr_mask = rxq->ptr_mask;
114         unsigned int free_space;
115         unsigned int bulks;
116         void *objs[SFC_RX_REFILL_BULK];
117         unsigned int added = rxq->added;
118
119         free_space = rxq->max_fill_level - (added - rxq->completed);
120
121         if (free_space < rxq->refill_threshold)
122                 return;
123
124         bulks = free_space / RTE_DIM(objs);
125         /* refill_threshold guarantees that bulks is positive */
126         SFC_ASSERT(bulks > 0);
127
128         do {
129                 unsigned int id;
130                 unsigned int i;
131
132                 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
133                                                   RTE_DIM(objs)) < 0)) {
134                         struct rte_eth_dev_data *dev_data =
135                                 rte_eth_devices[rxq->dp.dpq.port_id].data;
136
137                         /*
138                          * It is hardly a safe way to increment counter
139                          * from different contexts, but all PMDs do it.
140                          */
141                         dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs);
142                         /* Return if we have posted nothing yet */
143                         if (added == rxq->added)
144                                 return;
145                         /* Push posted */
146                         break;
147                 }
148
149                 for (i = 0, id = added & ptr_mask;
150                      i < RTE_DIM(objs);
151                      ++i, ++id) {
152                         struct rte_mbuf *m = objs[i];
153                         struct sfc_ef100_rx_sw_desc *rxd;
154                         rte_iova_t phys_addr;
155
156                         MBUF_RAW_ALLOC_CHECK(m);
157
158                         SFC_ASSERT((id & ~ptr_mask) == 0);
159                         rxd = &rxq->sw_ring[id];
160                         rxd->mbuf = m;
161
162                         /*
163                          * Avoid writing to mbuf. It is cheaper to do it
164                          * when we receive packet and fill in nearby
165                          * structure members.
166                          */
167
168                         phys_addr = rte_mbuf_data_iova_default(m);
169                         EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
170                             ESF_GZ_RX_BUF_ADDR, phys_addr);
171                 }
172
173                 added += RTE_DIM(objs);
174         } while (--bulks > 0);
175
176         SFC_ASSERT(rxq->added != added);
177         rxq->added = added;
178         sfc_ef100_rx_qpush(rxq, added);
179 }
180
181 static inline uint64_t
182 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class)
183 {
184         return EFX_WORD_FIELD(class,
185                               ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) ==
186                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
187                 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
188 }
189
190 static inline uint64_t
191 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class)
192 {
193         return EFX_WORD_FIELD(class,
194                               ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) ==
195                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
196                 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_GOOD;
197 }
198
199 static uint32_t
200 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags)
201 {
202         uint32_t ptype;
203         bool no_tunnel = false;
204
205         if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) !=
206                      ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN))
207                 return 0;
208
209         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) {
210         case 0:
211                 ptype = RTE_PTYPE_L2_ETHER;
212                 break;
213         case 1:
214                 ptype = RTE_PTYPE_L2_ETHER_VLAN;
215                 break;
216         default:
217                 ptype = RTE_PTYPE_L2_ETHER_QINQ;
218                 break;
219         }
220
221         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) {
222         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE:
223                 no_tunnel = true;
224                 break;
225         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN:
226                 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
227                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
228                 break;
229         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE:
230                 ptype |= RTE_PTYPE_TUNNEL_NVGRE;
231                 break;
232         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE:
233                 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
234                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
235                 break;
236         default:
237                 /*
238                  * Driver does not know the tunnel, but it is
239                  * still a tunnel and NT_OR_INNER refer to inner
240                  * frame.
241                  */
242                 no_tunnel = false;
243         }
244
245         if (no_tunnel) {
246                 bool l4_valid = true;
247
248                 switch (EFX_WORD_FIELD(class,
249                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
250                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
251                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
252                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
253                         break;
254                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
255                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
256                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
257                         break;
258                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
259                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
260                         break;
261                 default:
262                         l4_valid = false;
263                 }
264
265                 if (l4_valid) {
266                         switch (EFX_WORD_FIELD(class,
267                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
268                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
269                                 ptype |= RTE_PTYPE_L4_TCP;
270                                 *ol_flags |=
271                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
272                                 break;
273                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
274                                 ptype |= RTE_PTYPE_L4_UDP;
275                                 *ol_flags |=
276                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
277                                 break;
278                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
279                                 ptype |= RTE_PTYPE_L4_FRAG;
280                                 break;
281                         }
282                 }
283         } else {
284                 bool l4_valid = true;
285
286                 switch (EFX_WORD_FIELD(class,
287                         ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) {
288                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
289                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
290                         break;
291                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
292                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
293                         *ol_flags |= PKT_RX_EIP_CKSUM_BAD;
294                         break;
295                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
296                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
297                         break;
298                 }
299
300                 switch (EFX_WORD_FIELD(class,
301                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
302                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
303                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
304                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
305                         break;
306                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
307                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
308                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
309                         break;
310                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
311                         ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
312                         break;
313                 default:
314                         l4_valid = false;
315                         break;
316                 }
317
318                 if (l4_valid) {
319                         switch (EFX_WORD_FIELD(class,
320                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
321                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
322                                 ptype |= RTE_PTYPE_INNER_L4_TCP;
323                                 *ol_flags |=
324                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
325                                 break;
326                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
327                                 ptype |= RTE_PTYPE_INNER_L4_UDP;
328                                 *ol_flags |=
329                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
330                                 break;
331                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
332                                 ptype |= RTE_PTYPE_INNER_L4_FRAG;
333                                 break;
334                         }
335                 }
336         }
337
338         return ptype;
339 }
340
341 /*
342  * Below function relies on the following fields in Rx prefix.
343  * Some fields are mandatory, some fields are optional.
344  * See sfc_ef100_rx_qstart() below.
345  */
346 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = {
347         .erpl_fields    = {
348 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \
349         EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian)
350
351                 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE),
352                 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE),
353
354 #undef  SFC_EF100_RX_PREFIX_FIELD
355         }
356 };
357
358 static bool
359 sfc_ef100_rx_prefix_to_offloads(const efx_oword_t *rx_prefix,
360                                 struct rte_mbuf *m)
361 {
362         const efx_word_t *class;
363         uint64_t ol_flags = 0;
364
365         RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
366         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
367         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT !=
368                          sizeof(*class));
369         class = (const efx_word_t *)((const uint8_t *)rx_prefix +
370                 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT);
371         if (unlikely(EFX_WORD_FIELD(*class,
372                                     ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) !=
373                      ESE_GZ_RH_HCLASS_L2_STATUS_OK))
374                 return false;
375
376         m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags);
377
378         m->ol_flags = ol_flags;
379         return true;
380 }
381
382 static const uint8_t *
383 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m)
384 {
385         return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM;
386 }
387
388 static struct rte_mbuf *
389 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq)
390 {
391         struct rte_mbuf *m;
392         unsigned int id;
393
394         /* mbuf associated with current Rx descriptor */
395         m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf;
396
397         /* completed is already moved to the next one */
398         if (unlikely(rxq->completed == rxq->added))
399                 goto done;
400
401         /*
402          * Prefetch Rx prefix of the next packet.
403          * Current packet is scattered and the next mbuf is its fragment
404          * it simply prefetches some data - no harm since packet rate
405          * should not be high if scatter is used.
406          */
407         id = rxq->completed & rxq->ptr_mask;
408         rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf));
409
410         if (unlikely(rxq->completed + 1 == rxq->added))
411                 goto done;
412
413         /*
414          * Prefetch mbuf control structure of the next after next Rx
415          * descriptor.
416          */
417         id = (id == rxq->ptr_mask) ? 0 : (id + 1);
418         rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf);
419
420         /*
421          * If the next time we'll need SW Rx descriptor from the next
422          * cache line, try to make sure that we have it in cache.
423          */
424         if ((id & 0x7) == 0x7)
425                 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]);
426
427 done:
428         return m;
429 }
430
431 static struct rte_mbuf **
432 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
433                                 struct rte_mbuf **rx_pkts,
434                                 struct rte_mbuf ** const rx_pkts_end)
435 {
436         while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) {
437                 struct rte_mbuf *pkt;
438                 struct rte_mbuf *lastseg;
439                 const efx_oword_t *rx_prefix;
440                 uint16_t pkt_len;
441                 uint16_t seg_len;
442                 bool deliver;
443
444                 rxq->ready_pkts--;
445
446                 pkt = sfc_ef100_rx_next_mbuf(rxq);
447                 MBUF_RAW_ALLOC_CHECK(pkt);
448
449                 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) !=
450                                  sizeof(rxq->rearm_data));
451                 pkt->rearm_data[0] = rxq->rearm_data;
452
453                 /* data_off already moved past Rx prefix */
454                 rx_prefix = (const efx_oword_t *)sfc_ef100_rx_pkt_prefix(pkt);
455
456                 pkt_len = EFX_OWORD_FIELD(rx_prefix[0],
457                                           ESF_GZ_RX_PREFIX_LENGTH);
458                 SFC_ASSERT(pkt_len > 0);
459                 rte_pktmbuf_pkt_len(pkt) = pkt_len;
460
461                 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size);
462                 rte_pktmbuf_data_len(pkt) = seg_len;
463
464                 deliver = sfc_ef100_rx_prefix_to_offloads(rx_prefix, pkt);
465
466                 lastseg = pkt;
467                 while ((pkt_len -= seg_len) > 0) {
468                         struct rte_mbuf *seg;
469
470                         seg = sfc_ef100_rx_next_mbuf(rxq);
471                         MBUF_RAW_ALLOC_CHECK(seg);
472
473                         seg->data_off = RTE_PKTMBUF_HEADROOM;
474
475                         seg_len = RTE_MIN(pkt_len, rxq->buf_size);
476                         rte_pktmbuf_data_len(seg) = seg_len;
477                         rte_pktmbuf_pkt_len(seg) = seg_len;
478
479                         pkt->nb_segs++;
480                         lastseg->next = seg;
481                         lastseg = seg;
482                 }
483
484                 if (likely(deliver))
485                         *rx_pkts++ = pkt;
486                 else
487                         rte_pktmbuf_free(pkt);
488         }
489
490         return rx_pkts;
491 }
492
493 static bool
494 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev)
495 {
496         *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask];
497
498         if (!sfc_ef100_ev_present(ev,
499                         (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1))
500                 return false;
501
502         if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) {
503                 /*
504                  * Do not move read_ptr to keep the event for exception
505                  * handling by the control path.
506                  */
507                 rxq->flags |= SFC_EF100_RXQ_EXCEPTION;
508                 sfc_ef100_rx_err(rxq,
509                         "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x",
510                         rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask,
511                         EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
512                         EFX_QWORD_FIELD(*ev, EFX_DWORD_0));
513                 return false;
514         }
515
516         sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)",
517                            EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
518                            EFX_QWORD_FIELD(*ev, EFX_DWORD_0),
519                            rxq->evq_read_ptr,
520                            rxq->evq_read_ptr & rxq->ptr_mask);
521
522         rxq->evq_read_ptr++;
523         return true;
524 }
525
526 static uint16_t
527 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
528 {
529         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue);
530         struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
531         efx_qword_t rx_ev;
532
533         rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end);
534
535         if (unlikely(rxq->flags &
536                      (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION)))
537                 goto done;
538
539         while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) {
540                 rxq->ready_pkts =
541                         EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT);
542                 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts,
543                                                           rx_pkts_end);
544         }
545
546         /* It is not a problem if we refill in the case of exception */
547         sfc_ef100_rx_qrefill(rxq);
548
549 done:
550         return nb_pkts - (rx_pkts_end - rx_pkts);
551 }
552
553 static const uint32_t *
554 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
555 {
556         static const uint32_t ef100_native_ptypes[] = {
557                 RTE_PTYPE_L2_ETHER,
558                 RTE_PTYPE_L2_ETHER_VLAN,
559                 RTE_PTYPE_L2_ETHER_QINQ,
560                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
561                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
562                 RTE_PTYPE_L4_TCP,
563                 RTE_PTYPE_L4_UDP,
564                 RTE_PTYPE_L4_FRAG,
565                 RTE_PTYPE_TUNNEL_VXLAN,
566                 RTE_PTYPE_TUNNEL_NVGRE,
567                 RTE_PTYPE_TUNNEL_GENEVE,
568                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
569                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
570                 RTE_PTYPE_INNER_L4_TCP,
571                 RTE_PTYPE_INNER_L4_UDP,
572                 RTE_PTYPE_INNER_L4_FRAG,
573                 RTE_PTYPE_UNKNOWN
574         };
575
576         return ef100_native_ptypes;
577 }
578
579 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending;
580 static unsigned int
581 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
582 {
583         return 0;
584 }
585
586 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status;
587 static int
588 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
589                           __rte_unused uint16_t offset)
590 {
591         return -ENOTSUP;
592 }
593
594
595 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info;
596 static void
597 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info)
598 {
599         /*
600          * Number of descriptors just defines maximum number of pushed
601          * descriptors (fill level).
602          */
603         dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK;
604         dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK;
605 }
606
607
608 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings;
609 static int
610 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc,
611                            struct sfc_dp_rx_hw_limits *limits,
612                            __rte_unused struct rte_mempool *mb_pool,
613                            unsigned int *rxq_entries,
614                            unsigned int *evq_entries,
615                            unsigned int *rxq_max_fill_level)
616 {
617         /*
618          * rte_ethdev API guarantees that the number meets min, max and
619          * alignment requirements.
620          */
621         if (nb_rx_desc <= limits->rxq_min_entries)
622                 *rxq_entries = limits->rxq_min_entries;
623         else
624                 *rxq_entries = rte_align32pow2(nb_rx_desc);
625
626         *evq_entries = *rxq_entries;
627
628         *rxq_max_fill_level = RTE_MIN(nb_rx_desc,
629                                       SFC_EF100_RXQ_LIMIT(*evq_entries));
630         return 0;
631 }
632
633
634 static uint64_t
635 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
636 {
637         struct rte_mbuf m;
638
639         memset(&m, 0, sizeof(m));
640
641         rte_mbuf_refcnt_set(&m, 1);
642         m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size;
643         m.nb_segs = 1;
644         m.port = port_id;
645
646         /* rearm_data covers structure members filled in above */
647         rte_compiler_barrier();
648         RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t));
649         return m.rearm_data[0];
650 }
651
652 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate;
653 static int
654 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
655                     const struct rte_pci_addr *pci_addr, int socket_id,
656                     const struct sfc_dp_rx_qcreate_info *info,
657                     struct sfc_dp_rxq **dp_rxqp)
658 {
659         struct sfc_ef100_rxq *rxq;
660         int rc;
661
662         rc = EINVAL;
663         if (info->rxq_entries != info->evq_entries)
664                 goto fail_rxq_args;
665
666         rc = ENOMEM;
667         rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq),
668                                  RTE_CACHE_LINE_SIZE, socket_id);
669         if (rxq == NULL)
670                 goto fail_rxq_alloc;
671
672         sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
673
674         rc = ENOMEM;
675         rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring",
676                                          info->rxq_entries,
677                                          sizeof(*rxq->sw_ring),
678                                          RTE_CACHE_LINE_SIZE, socket_id);
679         if (rxq->sw_ring == NULL)
680                 goto fail_desc_alloc;
681
682         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
683         rxq->ptr_mask = info->rxq_entries - 1;
684         rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries);
685         rxq->evq_hw_ring = info->evq_hw_ring;
686         rxq->max_fill_level = info->max_fill_level;
687         rxq->refill_threshold = info->refill_threshold;
688         rxq->prefix_size = info->prefix_size;
689         rxq->buf_size = info->buf_size;
690         rxq->refill_mb_pool = info->refill_mb_pool;
691         rxq->rxq_hw_ring = info->rxq_hw_ring;
692         rxq->doorbell = (volatile uint8_t *)info->mem_bar +
693                         ER_GZ_RX_RING_DOORBELL_OFST +
694                         (info->hw_index << info->vi_window_shift);
695
696         sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
697
698         *dp_rxqp = &rxq->dp;
699         return 0;
700
701 fail_desc_alloc:
702         rte_free(rxq);
703
704 fail_rxq_alloc:
705 fail_rxq_args:
706         return rc;
707 }
708
709 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy;
710 static void
711 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
712 {
713         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
714
715         rte_free(rxq->sw_ring);
716         rte_free(rxq);
717 }
718
719 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart;
720 static int
721 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr,
722                     const efx_rx_prefix_layout_t *pinfo)
723 {
724         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
725         uint32_t unsup_rx_prefix_fields;
726
727         SFC_ASSERT(rxq->completed == 0);
728         SFC_ASSERT(rxq->added == 0);
729
730         /* Prefix must fit into reserved Rx buffer space */
731         if (pinfo->erpl_length > rxq->prefix_size)
732                 return ENOTSUP;
733
734         unsup_rx_prefix_fields =
735                 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout);
736
737         /* LENGTH and CLASS filds must always be present */
738         if ((unsup_rx_prefix_fields &
739              ((1U << EFX_RX_PREFIX_FIELD_LENGTH) |
740               (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0)
741                 return ENOTSUP;
742
743         rxq->prefix_size = pinfo->erpl_length;
744         rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id,
745                                                        rxq->prefix_size);
746
747         sfc_ef100_rx_qrefill(rxq);
748
749         rxq->evq_read_ptr = evq_read_ptr;
750
751         rxq->flags |= SFC_EF100_RXQ_STARTED;
752         rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION);
753
754         return 0;
755 }
756
757 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop;
758 static void
759 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr)
760 {
761         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
762
763         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
764
765         *evq_read_ptr = rxq->evq_read_ptr;
766 }
767
768 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev;
769 static bool
770 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id)
771 {
772         __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
773
774         SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING);
775
776         /*
777          * It is safe to ignore Rx event since we free all mbufs on
778          * queue purge anyway.
779          */
780
781         return false;
782 }
783
784 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge;
785 static void
786 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
787 {
788         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
789         unsigned int i;
790         struct sfc_ef100_rx_sw_desc *rxd;
791
792         for (i = rxq->completed; i != rxq->added; ++i) {
793                 rxd = &rxq->sw_ring[i & rxq->ptr_mask];
794                 rte_mbuf_raw_free(rxd->mbuf);
795                 rxd->mbuf = NULL;
796         }
797
798         rxq->completed = rxq->added = 0;
799         rxq->ready_pkts = 0;
800
801         rxq->flags &= ~SFC_EF100_RXQ_STARTED;
802 }
803
804 struct sfc_dp_rx sfc_ef100_rx = {
805         .dp = {
806                 .name           = SFC_KVARG_DATAPATH_EF100,
807                 .type           = SFC_DP_RX,
808                 .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF100,
809         },
810         .features               = SFC_DP_RX_FEAT_MULTI_PROCESS,
811         .dev_offload_capa       = 0,
812         .queue_offload_capa     = DEV_RX_OFFLOAD_CHECKSUM |
813                                   DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
814                                   DEV_RX_OFFLOAD_OUTER_UDP_CKSUM |
815                                   DEV_RX_OFFLOAD_SCATTER,
816         .get_dev_info           = sfc_ef100_rx_get_dev_info,
817         .qsize_up_rings         = sfc_ef100_rx_qsize_up_rings,
818         .qcreate                = sfc_ef100_rx_qcreate,
819         .qdestroy               = sfc_ef100_rx_qdestroy,
820         .qstart                 = sfc_ef100_rx_qstart,
821         .qstop                  = sfc_ef100_rx_qstop,
822         .qrx_ev                 = sfc_ef100_rx_qrx_ev,
823         .qpurge                 = sfc_ef100_rx_qpurge,
824         .supported_ptypes_get   = sfc_ef100_supported_ptypes_get,
825         .qdesc_npending         = sfc_ef100_rx_qdesc_npending,
826         .qdesc_status           = sfc_ef100_rx_qdesc_status,
827         .pkt_burst              = sfc_ef100_recv_pkts,
828 };