net/sfc: collect per queue stats in EF100 Rx
[dpdk.git] / drivers / net / sfc / sfc_ef100_rx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2018-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9
10 /* EF100 native datapath implementation */
11
12 #include <stdbool.h>
13
14 #include <rte_byteorder.h>
15 #include <rte_mbuf_ptype.h>
16 #include <rte_mbuf.h>
17 #include <rte_io.h>
18
19 #include "efx_types.h"
20 #include "efx_regs_ef100.h"
21 #include "efx.h"
22
23 #include "sfc_debug.h"
24 #include "sfc_tweak.h"
25 #include "sfc_dp_rx.h"
26 #include "sfc_kvargs.h"
27 #include "sfc_ef100.h"
28
29
30 #define sfc_ef100_rx_err(_rxq, ...) \
31         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__)
32
33 #define sfc_ef100_rx_debug(_rxq, ...) \
34         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \
35                    __VA_ARGS__)
36
37 /**
38  * Maximum number of descriptors/buffers in the Rx ring.
39  * It should guarantee that corresponding event queue never overfill.
40  * EF10 native datapath uses event queue of the same size as Rx queue.
41  * Maximum number of events on datapath can be estimated as number of
42  * Rx queue entries (one event per Rx buffer in the worst case) plus
43  * Rx error and flush events.
44  */
45 #define SFC_EF100_RXQ_LIMIT(_ndesc) \
46         ((_ndesc) - 1 /* head must not step on tail */ - \
47          1 /* Rx error */ - 1 /* flush */)
48
49 /** Invalid user mark value when the mark should be treated as unset */
50 #define SFC_EF100_USER_MARK_INVALID     0
51
52 struct sfc_ef100_rx_sw_desc {
53         struct rte_mbuf                 *mbuf;
54 };
55
56 struct sfc_ef100_rxq {
57         /* Used on data path */
58         unsigned int                    flags;
59 #define SFC_EF100_RXQ_STARTED           0x1
60 #define SFC_EF100_RXQ_NOT_RUNNING       0x2
61 #define SFC_EF100_RXQ_EXCEPTION         0x4
62 #define SFC_EF100_RXQ_RSS_HASH          0x10
63 #define SFC_EF100_RXQ_USER_MARK         0x20
64 #define SFC_EF100_RXQ_FLAG_INTR_EN      0x40
65         unsigned int                    ptr_mask;
66         unsigned int                    evq_phase_bit_shift;
67         unsigned int                    ready_pkts;
68         unsigned int                    completed;
69         unsigned int                    evq_read_ptr;
70         unsigned int                    evq_read_ptr_primed;
71         volatile efx_qword_t            *evq_hw_ring;
72         struct sfc_ef100_rx_sw_desc     *sw_ring;
73         uint64_t                        rearm_data;
74         uint16_t                        buf_size;
75         uint16_t                        prefix_size;
76
77         unsigned int                    evq_hw_index;
78         volatile void                   *evq_prime;
79
80         /* Used on refill */
81         unsigned int                    added;
82         unsigned int                    max_fill_level;
83         unsigned int                    refill_threshold;
84         struct rte_mempool              *refill_mb_pool;
85         efx_qword_t                     *rxq_hw_ring;
86         volatile void                   *doorbell;
87
88         /* Datapath receive queue anchor */
89         struct sfc_dp_rxq               dp;
90 };
91
92 static inline struct sfc_ef100_rxq *
93 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
94 {
95         return container_of(dp_rxq, struct sfc_ef100_rxq, dp);
96 }
97
98 static void
99 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq)
100 {
101         sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index,
102                             rxq->evq_read_ptr & rxq->ptr_mask);
103         rxq->evq_read_ptr_primed = rxq->evq_read_ptr;
104 }
105
106 static inline void
107 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
108 {
109         efx_dword_t dword;
110
111         EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask);
112
113         /* DMA sync to device is not required */
114
115         /*
116          * rte_write32() has rte_io_wmb() which guarantees that the STORE
117          * operations (i.e. Rx and event descriptor updates) that precede
118          * the rte_io_wmb() call are visible to NIC before the STORE
119          * operations that follow it (i.e. doorbell write).
120          */
121         rte_write32(dword.ed_u32[0], rxq->doorbell);
122         rxq->dp.dpq.rx_dbells++;
123
124         sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)",
125                            EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX),
126                            added);
127 }
128
129 static void
130 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
131 {
132         const unsigned int ptr_mask = rxq->ptr_mask;
133         unsigned int free_space;
134         unsigned int bulks;
135         void *objs[SFC_RX_REFILL_BULK];
136         unsigned int added = rxq->added;
137
138         free_space = rxq->max_fill_level - (added - rxq->completed);
139
140         if (free_space < rxq->refill_threshold)
141                 return;
142
143         bulks = free_space / RTE_DIM(objs);
144         /* refill_threshold guarantees that bulks is positive */
145         SFC_ASSERT(bulks > 0);
146
147         do {
148                 unsigned int id;
149                 unsigned int i;
150
151                 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
152                                                   RTE_DIM(objs)) < 0)) {
153                         struct rte_eth_dev_data *dev_data =
154                                 rte_eth_devices[rxq->dp.dpq.port_id].data;
155
156                         /*
157                          * It is hardly a safe way to increment counter
158                          * from different contexts, but all PMDs do it.
159                          */
160                         dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs);
161                         /* Return if we have posted nothing yet */
162                         if (added == rxq->added)
163                                 return;
164                         /* Push posted */
165                         break;
166                 }
167
168                 for (i = 0, id = added & ptr_mask;
169                      i < RTE_DIM(objs);
170                      ++i, ++id) {
171                         struct rte_mbuf *m = objs[i];
172                         struct sfc_ef100_rx_sw_desc *rxd;
173                         rte_iova_t phys_addr;
174
175                         __rte_mbuf_raw_sanity_check(m);
176
177                         SFC_ASSERT((id & ~ptr_mask) == 0);
178                         rxd = &rxq->sw_ring[id];
179                         rxd->mbuf = m;
180
181                         /*
182                          * Avoid writing to mbuf. It is cheaper to do it
183                          * when we receive packet and fill in nearby
184                          * structure members.
185                          */
186
187                         phys_addr = rte_mbuf_data_iova_default(m);
188                         EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
189                             ESF_GZ_RX_BUF_ADDR, phys_addr);
190                 }
191
192                 added += RTE_DIM(objs);
193         } while (--bulks > 0);
194
195         SFC_ASSERT(rxq->added != added);
196         rxq->added = added;
197         sfc_ef100_rx_qpush(rxq, added);
198 }
199
200 static inline uint64_t
201 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class)
202 {
203         return EFX_WORD_FIELD(class,
204                               ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) ==
205                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
206                 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
207 }
208
209 static inline uint64_t
210 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class)
211 {
212         return EFX_WORD_FIELD(class,
213                               ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) ==
214                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
215                 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_BAD;
216 }
217
218 static uint32_t
219 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags)
220 {
221         uint32_t ptype;
222         bool no_tunnel = false;
223
224         if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) !=
225                      ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN))
226                 return 0;
227
228         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) {
229         case 0:
230                 ptype = RTE_PTYPE_L2_ETHER;
231                 break;
232         case 1:
233                 ptype = RTE_PTYPE_L2_ETHER_VLAN;
234                 break;
235         default:
236                 ptype = RTE_PTYPE_L2_ETHER_QINQ;
237                 break;
238         }
239
240         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) {
241         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE:
242                 no_tunnel = true;
243                 break;
244         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN:
245                 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
246                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
247                 break;
248         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE:
249                 ptype |= RTE_PTYPE_TUNNEL_NVGRE;
250                 break;
251         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE:
252                 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
253                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
254                 break;
255         default:
256                 /*
257                  * Driver does not know the tunnel, but it is
258                  * still a tunnel and NT_OR_INNER refer to inner
259                  * frame.
260                  */
261                 no_tunnel = false;
262         }
263
264         if (no_tunnel) {
265                 bool l4_valid = true;
266
267                 switch (EFX_WORD_FIELD(class,
268                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
269                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
270                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
271                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
272                         break;
273                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
274                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
275                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
276                         break;
277                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
278                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
279                         break;
280                 default:
281                         l4_valid = false;
282                 }
283
284                 if (l4_valid) {
285                         switch (EFX_WORD_FIELD(class,
286                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
287                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
288                                 ptype |= RTE_PTYPE_L4_TCP;
289                                 *ol_flags |=
290                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
291                                 break;
292                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
293                                 ptype |= RTE_PTYPE_L4_UDP;
294                                 *ol_flags |=
295                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
296                                 break;
297                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
298                                 ptype |= RTE_PTYPE_L4_FRAG;
299                                 break;
300                         }
301                 }
302         } else {
303                 bool l4_valid = true;
304
305                 switch (EFX_WORD_FIELD(class,
306                         ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) {
307                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
308                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
309                         break;
310                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
311                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
312                         *ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
313                         break;
314                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
315                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
316                         break;
317                 }
318
319                 switch (EFX_WORD_FIELD(class,
320                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
321                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
322                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
323                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
324                         break;
325                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
326                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
327                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
328                         break;
329                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
330                         ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
331                         break;
332                 default:
333                         l4_valid = false;
334                         break;
335                 }
336
337                 if (l4_valid) {
338                         switch (EFX_WORD_FIELD(class,
339                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
340                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
341                                 ptype |= RTE_PTYPE_INNER_L4_TCP;
342                                 *ol_flags |=
343                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
344                                 break;
345                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
346                                 ptype |= RTE_PTYPE_INNER_L4_UDP;
347                                 *ol_flags |=
348                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
349                                 break;
350                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
351                                 ptype |= RTE_PTYPE_INNER_L4_FRAG;
352                                 break;
353                         }
354                 }
355         }
356
357         return ptype;
358 }
359
360 /*
361  * Below function relies on the following fields in Rx prefix.
362  * Some fields are mandatory, some fields are optional.
363  * See sfc_ef100_rx_qstart() below.
364  */
365 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = {
366         .erpl_fields    = {
367 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \
368         EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian)
369
370                 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE),
371                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE),
372                 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE),
373                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE),
374                 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE),
375
376 #undef  SFC_EF100_RX_PREFIX_FIELD
377         }
378 };
379
380 static bool
381 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq,
382                                 const efx_oword_t *rx_prefix,
383                                 struct rte_mbuf *m)
384 {
385         const efx_word_t *class;
386         uint64_t ol_flags = 0;
387
388         RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
389         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
390         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT !=
391                          sizeof(*class));
392         class = (const efx_word_t *)((const uint8_t *)rx_prefix +
393                 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT);
394         if (unlikely(EFX_WORD_FIELD(*class,
395                                     ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) !=
396                      ESE_GZ_RH_HCLASS_L2_STATUS_OK))
397                 return false;
398
399         m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags);
400
401         if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) &&
402             EFX_TEST_OWORD_BIT(rx_prefix[0],
403                                ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) {
404                 ol_flags |= PKT_RX_RSS_HASH;
405                 /* EFX_OWORD_FIELD converts little-endian to CPU */
406                 m->hash.rss = EFX_OWORD_FIELD(rx_prefix[0],
407                                               ESF_GZ_RX_PREFIX_RSS_HASH);
408         }
409
410         if (rxq->flags & SFC_EF100_RXQ_USER_MARK) {
411                 uint32_t user_mark;
412
413                 /* EFX_OWORD_FIELD converts little-endian to CPU */
414                 user_mark = EFX_OWORD_FIELD(rx_prefix[0],
415                                             ESF_GZ_RX_PREFIX_USER_MARK);
416                 if (user_mark != SFC_EF100_USER_MARK_INVALID) {
417                         ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
418                         m->hash.fdir.hi = user_mark;
419                 }
420         }
421
422         m->ol_flags = ol_flags;
423         return true;
424 }
425
426 static const uint8_t *
427 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m)
428 {
429         return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM;
430 }
431
432 static struct rte_mbuf *
433 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq)
434 {
435         struct rte_mbuf *m;
436         unsigned int id;
437
438         /* mbuf associated with current Rx descriptor */
439         m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf;
440
441         /* completed is already moved to the next one */
442         if (unlikely(rxq->completed == rxq->added))
443                 goto done;
444
445         /*
446          * Prefetch Rx prefix of the next packet.
447          * Current packet is scattered and the next mbuf is its fragment
448          * it simply prefetches some data - no harm since packet rate
449          * should not be high if scatter is used.
450          */
451         id = rxq->completed & rxq->ptr_mask;
452         rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf));
453
454         if (unlikely(rxq->completed + 1 == rxq->added))
455                 goto done;
456
457         /*
458          * Prefetch mbuf control structure of the next after next Rx
459          * descriptor.
460          */
461         id = (id == rxq->ptr_mask) ? 0 : (id + 1);
462         rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf);
463
464         /*
465          * If the next time we'll need SW Rx descriptor from the next
466          * cache line, try to make sure that we have it in cache.
467          */
468         if ((id & 0x7) == 0x7)
469                 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]);
470
471 done:
472         return m;
473 }
474
475 static struct rte_mbuf **
476 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
477                                 struct rte_mbuf **rx_pkts,
478                                 struct rte_mbuf ** const rx_pkts_end)
479 {
480         while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) {
481                 struct rte_mbuf *pkt;
482                 struct rte_mbuf *lastseg;
483                 const efx_oword_t *rx_prefix;
484                 uint16_t pkt_len;
485                 uint16_t seg_len;
486                 bool deliver;
487
488                 rxq->ready_pkts--;
489
490                 pkt = sfc_ef100_rx_next_mbuf(rxq);
491                 __rte_mbuf_raw_sanity_check(pkt);
492
493                 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) !=
494                                  sizeof(rxq->rearm_data));
495                 pkt->rearm_data[0] = rxq->rearm_data;
496
497                 /* data_off already moved past Rx prefix */
498                 rx_prefix = (const efx_oword_t *)sfc_ef100_rx_pkt_prefix(pkt);
499
500                 pkt_len = EFX_OWORD_FIELD(rx_prefix[0],
501                                           ESF_GZ_RX_PREFIX_LENGTH);
502                 SFC_ASSERT(pkt_len > 0);
503                 rte_pktmbuf_pkt_len(pkt) = pkt_len;
504
505                 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size);
506                 rte_pktmbuf_data_len(pkt) = seg_len;
507
508                 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt);
509
510                 lastseg = pkt;
511                 while ((pkt_len -= seg_len) > 0) {
512                         struct rte_mbuf *seg;
513
514                         seg = sfc_ef100_rx_next_mbuf(rxq);
515                         __rte_mbuf_raw_sanity_check(seg);
516
517                         seg->data_off = RTE_PKTMBUF_HEADROOM;
518
519                         seg_len = RTE_MIN(pkt_len, rxq->buf_size);
520                         rte_pktmbuf_data_len(seg) = seg_len;
521                         rte_pktmbuf_pkt_len(seg) = seg_len;
522
523                         pkt->nb_segs++;
524                         lastseg->next = seg;
525                         lastseg = seg;
526                 }
527
528                 if (likely(deliver)) {
529                         *rx_pkts++ = pkt;
530                         sfc_pkts_bytes_add(&rxq->dp.dpq.stats, 1,
531                                            rte_pktmbuf_pkt_len(pkt));
532                 } else {
533                         rte_pktmbuf_free(pkt);
534                 }
535         }
536
537         return rx_pkts;
538 }
539
540 static bool
541 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev)
542 {
543         *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask];
544
545         if (!sfc_ef100_ev_present(ev,
546                         (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1))
547                 return false;
548
549         if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) {
550                 /*
551                  * Do not move read_ptr to keep the event for exception
552                  * handling by the control path.
553                  */
554                 rxq->flags |= SFC_EF100_RXQ_EXCEPTION;
555                 sfc_ef100_rx_err(rxq,
556                         "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x",
557                         rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask,
558                         EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
559                         EFX_QWORD_FIELD(*ev, EFX_DWORD_0));
560                 return false;
561         }
562
563         sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)",
564                            EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
565                            EFX_QWORD_FIELD(*ev, EFX_DWORD_0),
566                            rxq->evq_read_ptr,
567                            rxq->evq_read_ptr & rxq->ptr_mask);
568
569         rxq->evq_read_ptr++;
570         return true;
571 }
572
573 static uint16_t
574 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
575 {
576         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue);
577         struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
578         efx_qword_t rx_ev;
579
580         rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end);
581
582         if (unlikely(rxq->flags &
583                      (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION)))
584                 goto done;
585
586         while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) {
587                 rxq->ready_pkts =
588                         EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT);
589                 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts,
590                                                           rx_pkts_end);
591         }
592
593         /* It is not a problem if we refill in the case of exception */
594         sfc_ef100_rx_qrefill(rxq);
595
596         if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) &&
597             rxq->evq_read_ptr_primed != rxq->evq_read_ptr)
598                 sfc_ef100_rx_qprime(rxq);
599
600 done:
601         return nb_pkts - (rx_pkts_end - rx_pkts);
602 }
603
604 static const uint32_t *
605 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
606 {
607         static const uint32_t ef100_native_ptypes[] = {
608                 RTE_PTYPE_L2_ETHER,
609                 RTE_PTYPE_L2_ETHER_VLAN,
610                 RTE_PTYPE_L2_ETHER_QINQ,
611                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
612                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
613                 RTE_PTYPE_L4_TCP,
614                 RTE_PTYPE_L4_UDP,
615                 RTE_PTYPE_L4_FRAG,
616                 RTE_PTYPE_TUNNEL_VXLAN,
617                 RTE_PTYPE_TUNNEL_NVGRE,
618                 RTE_PTYPE_TUNNEL_GENEVE,
619                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
620                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
621                 RTE_PTYPE_INNER_L4_TCP,
622                 RTE_PTYPE_INNER_L4_UDP,
623                 RTE_PTYPE_INNER_L4_FRAG,
624                 RTE_PTYPE_UNKNOWN
625         };
626
627         return ef100_native_ptypes;
628 }
629
630 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending;
631 static unsigned int
632 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
633 {
634         return 0;
635 }
636
637 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status;
638 static int
639 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
640                           __rte_unused uint16_t offset)
641 {
642         return -ENOTSUP;
643 }
644
645
646 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info;
647 static void
648 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info)
649 {
650         /*
651          * Number of descriptors just defines maximum number of pushed
652          * descriptors (fill level).
653          */
654         dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK;
655         dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK;
656 }
657
658
659 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings;
660 static int
661 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc,
662                            struct sfc_dp_rx_hw_limits *limits,
663                            __rte_unused struct rte_mempool *mb_pool,
664                            unsigned int *rxq_entries,
665                            unsigned int *evq_entries,
666                            unsigned int *rxq_max_fill_level)
667 {
668         /*
669          * rte_ethdev API guarantees that the number meets min, max and
670          * alignment requirements.
671          */
672         if (nb_rx_desc <= limits->rxq_min_entries)
673                 *rxq_entries = limits->rxq_min_entries;
674         else
675                 *rxq_entries = rte_align32pow2(nb_rx_desc);
676
677         *evq_entries = *rxq_entries;
678
679         *rxq_max_fill_level = RTE_MIN(nb_rx_desc,
680                                       SFC_EF100_RXQ_LIMIT(*evq_entries));
681         return 0;
682 }
683
684
685 static uint64_t
686 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
687 {
688         struct rte_mbuf m;
689
690         memset(&m, 0, sizeof(m));
691
692         rte_mbuf_refcnt_set(&m, 1);
693         m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size;
694         m.nb_segs = 1;
695         m.port = port_id;
696
697         /* rearm_data covers structure members filled in above */
698         rte_compiler_barrier();
699         RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t));
700         return m.rearm_data[0];
701 }
702
703 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate;
704 static int
705 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
706                     const struct rte_pci_addr *pci_addr, int socket_id,
707                     const struct sfc_dp_rx_qcreate_info *info,
708                     struct sfc_dp_rxq **dp_rxqp)
709 {
710         struct sfc_ef100_rxq *rxq;
711         int rc;
712
713         rc = EINVAL;
714         if (info->rxq_entries != info->evq_entries)
715                 goto fail_rxq_args;
716
717         rc = ENOMEM;
718         rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq),
719                                  RTE_CACHE_LINE_SIZE, socket_id);
720         if (rxq == NULL)
721                 goto fail_rxq_alloc;
722
723         sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
724
725         rc = ENOMEM;
726         rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring",
727                                          info->rxq_entries,
728                                          sizeof(*rxq->sw_ring),
729                                          RTE_CACHE_LINE_SIZE, socket_id);
730         if (rxq->sw_ring == NULL)
731                 goto fail_desc_alloc;
732
733         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
734         rxq->ptr_mask = info->rxq_entries - 1;
735         rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries);
736         rxq->evq_hw_ring = info->evq_hw_ring;
737         rxq->max_fill_level = info->max_fill_level;
738         rxq->refill_threshold = info->refill_threshold;
739         rxq->prefix_size = info->prefix_size;
740         rxq->buf_size = info->buf_size;
741         rxq->refill_mb_pool = info->refill_mb_pool;
742         rxq->rxq_hw_ring = info->rxq_hw_ring;
743         rxq->doorbell = (volatile uint8_t *)info->mem_bar +
744                         ER_GZ_RX_RING_DOORBELL_OFST +
745                         (info->hw_index << info->vi_window_shift);
746
747         rxq->evq_hw_index = info->evq_hw_index;
748         rxq->evq_prime = (volatile uint8_t *)info->mem_bar +
749                          info->fcw_offset +
750                          ER_GZ_EVQ_INT_PRIME_OFST;
751
752         sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
753
754         *dp_rxqp = &rxq->dp;
755         return 0;
756
757 fail_desc_alloc:
758         rte_free(rxq);
759
760 fail_rxq_alloc:
761 fail_rxq_args:
762         return rc;
763 }
764
765 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy;
766 static void
767 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
768 {
769         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
770
771         rte_free(rxq->sw_ring);
772         rte_free(rxq);
773 }
774
775 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart;
776 static int
777 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr,
778                     const efx_rx_prefix_layout_t *pinfo)
779 {
780         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
781         uint32_t unsup_rx_prefix_fields;
782
783         SFC_ASSERT(rxq->completed == 0);
784         SFC_ASSERT(rxq->added == 0);
785
786         /* Prefix must fit into reserved Rx buffer space */
787         if (pinfo->erpl_length > rxq->prefix_size)
788                 return ENOTSUP;
789
790         unsup_rx_prefix_fields =
791                 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout);
792
793         /* LENGTH and CLASS filds must always be present */
794         if ((unsup_rx_prefix_fields &
795              ((1U << EFX_RX_PREFIX_FIELD_LENGTH) |
796               (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0)
797                 return ENOTSUP;
798
799         if ((unsup_rx_prefix_fields &
800              ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) |
801               (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0)
802                 rxq->flags |= SFC_EF100_RXQ_RSS_HASH;
803         else
804                 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH;
805
806         if ((unsup_rx_prefix_fields &
807              (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0)
808                 rxq->flags |= SFC_EF100_RXQ_USER_MARK;
809         else
810                 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK;
811
812         rxq->prefix_size = pinfo->erpl_length;
813         rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id,
814                                                        rxq->prefix_size);
815
816         sfc_ef100_rx_qrefill(rxq);
817
818         rxq->evq_read_ptr = evq_read_ptr;
819
820         rxq->flags |= SFC_EF100_RXQ_STARTED;
821         rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION);
822
823         if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN)
824                 sfc_ef100_rx_qprime(rxq);
825
826         return 0;
827 }
828
829 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop;
830 static void
831 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr)
832 {
833         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
834
835         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
836
837         *evq_read_ptr = rxq->evq_read_ptr;
838 }
839
840 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev;
841 static bool
842 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id)
843 {
844         __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
845
846         SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING);
847
848         /*
849          * It is safe to ignore Rx event since we free all mbufs on
850          * queue purge anyway.
851          */
852
853         return false;
854 }
855
856 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge;
857 static void
858 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
859 {
860         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
861         unsigned int i;
862         struct sfc_ef100_rx_sw_desc *rxd;
863
864         for (i = rxq->completed; i != rxq->added; ++i) {
865                 rxd = &rxq->sw_ring[i & rxq->ptr_mask];
866                 rte_mbuf_raw_free(rxd->mbuf);
867                 rxd->mbuf = NULL;
868         }
869
870         rxq->completed = rxq->added = 0;
871         rxq->ready_pkts = 0;
872
873         rxq->flags &= ~SFC_EF100_RXQ_STARTED;
874 }
875
876 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable;
877 static int
878 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq)
879 {
880         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
881
882         rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN;
883         if (rxq->flags & SFC_EF100_RXQ_STARTED)
884                 sfc_ef100_rx_qprime(rxq);
885         return 0;
886 }
887
888 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable;
889 static int
890 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq)
891 {
892         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
893
894         /* Cannot disarm, just disable rearm */
895         rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN;
896         return 0;
897 }
898
899 static sfc_dp_rx_get_pushed_t sfc_ef100_rx_get_pushed;
900 static unsigned int
901 sfc_ef100_rx_get_pushed(struct sfc_dp_rxq *dp_rxq)
902 {
903         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
904
905         /*
906          * The datapath keeps track only of added descriptors, since
907          * the number of pushed descriptors always equals the number
908          * of added descriptors due to enforced alignment.
909          */
910         return rxq->added;
911 }
912
913 struct sfc_dp_rx sfc_ef100_rx = {
914         .dp = {
915                 .name           = SFC_KVARG_DATAPATH_EF100,
916                 .type           = SFC_DP_RX,
917                 .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF100,
918         },
919         .features               = SFC_DP_RX_FEAT_MULTI_PROCESS |
920                                   SFC_DP_RX_FEAT_INTR |
921                                   SFC_DP_RX_FEAT_STATS,
922         .dev_offload_capa       = 0,
923         .queue_offload_capa     = DEV_RX_OFFLOAD_CHECKSUM |
924                                   DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
925                                   DEV_RX_OFFLOAD_OUTER_UDP_CKSUM |
926                                   DEV_RX_OFFLOAD_SCATTER |
927                                   DEV_RX_OFFLOAD_RSS_HASH,
928         .get_dev_info           = sfc_ef100_rx_get_dev_info,
929         .qsize_up_rings         = sfc_ef100_rx_qsize_up_rings,
930         .qcreate                = sfc_ef100_rx_qcreate,
931         .qdestroy               = sfc_ef100_rx_qdestroy,
932         .qstart                 = sfc_ef100_rx_qstart,
933         .qstop                  = sfc_ef100_rx_qstop,
934         .qrx_ev                 = sfc_ef100_rx_qrx_ev,
935         .qpurge                 = sfc_ef100_rx_qpurge,
936         .supported_ptypes_get   = sfc_ef100_supported_ptypes_get,
937         .qdesc_npending         = sfc_ef100_rx_qdesc_npending,
938         .qdesc_status           = sfc_ef100_rx_qdesc_status,
939         .intr_enable            = sfc_ef100_rx_intr_enable,
940         .intr_disable           = sfc_ef100_rx_intr_disable,
941         .get_pushed             = sfc_ef100_rx_get_pushed,
942         .pkt_burst              = sfc_ef100_recv_pkts,
943 };