net/sfc: add Rx datapath method to get pushed buffers count
[dpdk.git] / drivers / net / sfc / sfc_ef100_rx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2018-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9
10 /* EF100 native datapath implementation */
11
12 #include <stdbool.h>
13
14 #include <rte_byteorder.h>
15 #include <rte_mbuf_ptype.h>
16 #include <rte_mbuf.h>
17 #include <rte_io.h>
18
19 #include "efx_types.h"
20 #include "efx_regs_ef100.h"
21 #include "efx.h"
22
23 #include "sfc_debug.h"
24 #include "sfc_tweak.h"
25 #include "sfc_dp_rx.h"
26 #include "sfc_kvargs.h"
27 #include "sfc_ef100.h"
28
29
30 #define sfc_ef100_rx_err(_rxq, ...) \
31         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__)
32
33 #define sfc_ef100_rx_debug(_rxq, ...) \
34         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \
35                    __VA_ARGS__)
36
37 /**
38  * Maximum number of descriptors/buffers in the Rx ring.
39  * It should guarantee that corresponding event queue never overfill.
40  * EF10 native datapath uses event queue of the same size as Rx queue.
41  * Maximum number of events on datapath can be estimated as number of
42  * Rx queue entries (one event per Rx buffer in the worst case) plus
43  * Rx error and flush events.
44  */
45 #define SFC_EF100_RXQ_LIMIT(_ndesc) \
46         ((_ndesc) - 1 /* head must not step on tail */ - \
47          1 /* Rx error */ - 1 /* flush */)
48
49 /** Invalid user mark value when the mark should be treated as unset */
50 #define SFC_EF100_USER_MARK_INVALID     0
51
52 struct sfc_ef100_rx_sw_desc {
53         struct rte_mbuf                 *mbuf;
54 };
55
56 struct sfc_ef100_rxq {
57         /* Used on data path */
58         unsigned int                    flags;
59 #define SFC_EF100_RXQ_STARTED           0x1
60 #define SFC_EF100_RXQ_NOT_RUNNING       0x2
61 #define SFC_EF100_RXQ_EXCEPTION         0x4
62 #define SFC_EF100_RXQ_RSS_HASH          0x10
63 #define SFC_EF100_RXQ_USER_MARK         0x20
64 #define SFC_EF100_RXQ_FLAG_INTR_EN      0x40
65         unsigned int                    ptr_mask;
66         unsigned int                    evq_phase_bit_shift;
67         unsigned int                    ready_pkts;
68         unsigned int                    completed;
69         unsigned int                    evq_read_ptr;
70         unsigned int                    evq_read_ptr_primed;
71         volatile efx_qword_t            *evq_hw_ring;
72         struct sfc_ef100_rx_sw_desc     *sw_ring;
73         uint64_t                        rearm_data;
74         uint16_t                        buf_size;
75         uint16_t                        prefix_size;
76
77         unsigned int                    evq_hw_index;
78         volatile void                   *evq_prime;
79
80         /* Used on refill */
81         unsigned int                    added;
82         unsigned int                    max_fill_level;
83         unsigned int                    refill_threshold;
84         struct rte_mempool              *refill_mb_pool;
85         efx_qword_t                     *rxq_hw_ring;
86         volatile void                   *doorbell;
87
88         /* Datapath receive queue anchor */
89         struct sfc_dp_rxq               dp;
90 };
91
92 static inline struct sfc_ef100_rxq *
93 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
94 {
95         return container_of(dp_rxq, struct sfc_ef100_rxq, dp);
96 }
97
98 static void
99 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq)
100 {
101         sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index,
102                             rxq->evq_read_ptr & rxq->ptr_mask);
103         rxq->evq_read_ptr_primed = rxq->evq_read_ptr;
104 }
105
106 static inline void
107 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
108 {
109         efx_dword_t dword;
110
111         EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask);
112
113         /* DMA sync to device is not required */
114
115         /*
116          * rte_write32() has rte_io_wmb() which guarantees that the STORE
117          * operations (i.e. Rx and event descriptor updates) that precede
118          * the rte_io_wmb() call are visible to NIC before the STORE
119          * operations that follow it (i.e. doorbell write).
120          */
121         rte_write32(dword.ed_u32[0], rxq->doorbell);
122
123         sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)",
124                            EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX),
125                            added);
126 }
127
128 static void
129 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
130 {
131         const unsigned int ptr_mask = rxq->ptr_mask;
132         unsigned int free_space;
133         unsigned int bulks;
134         void *objs[SFC_RX_REFILL_BULK];
135         unsigned int added = rxq->added;
136
137         free_space = rxq->max_fill_level - (added - rxq->completed);
138
139         if (free_space < rxq->refill_threshold)
140                 return;
141
142         bulks = free_space / RTE_DIM(objs);
143         /* refill_threshold guarantees that bulks is positive */
144         SFC_ASSERT(bulks > 0);
145
146         do {
147                 unsigned int id;
148                 unsigned int i;
149
150                 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
151                                                   RTE_DIM(objs)) < 0)) {
152                         struct rte_eth_dev_data *dev_data =
153                                 rte_eth_devices[rxq->dp.dpq.port_id].data;
154
155                         /*
156                          * It is hardly a safe way to increment counter
157                          * from different contexts, but all PMDs do it.
158                          */
159                         dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs);
160                         /* Return if we have posted nothing yet */
161                         if (added == rxq->added)
162                                 return;
163                         /* Push posted */
164                         break;
165                 }
166
167                 for (i = 0, id = added & ptr_mask;
168                      i < RTE_DIM(objs);
169                      ++i, ++id) {
170                         struct rte_mbuf *m = objs[i];
171                         struct sfc_ef100_rx_sw_desc *rxd;
172                         rte_iova_t phys_addr;
173
174                         __rte_mbuf_raw_sanity_check(m);
175
176                         SFC_ASSERT((id & ~ptr_mask) == 0);
177                         rxd = &rxq->sw_ring[id];
178                         rxd->mbuf = m;
179
180                         /*
181                          * Avoid writing to mbuf. It is cheaper to do it
182                          * when we receive packet and fill in nearby
183                          * structure members.
184                          */
185
186                         phys_addr = rte_mbuf_data_iova_default(m);
187                         EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
188                             ESF_GZ_RX_BUF_ADDR, phys_addr);
189                 }
190
191                 added += RTE_DIM(objs);
192         } while (--bulks > 0);
193
194         SFC_ASSERT(rxq->added != added);
195         rxq->added = added;
196         sfc_ef100_rx_qpush(rxq, added);
197 }
198
199 static inline uint64_t
200 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class)
201 {
202         return EFX_WORD_FIELD(class,
203                               ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) ==
204                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
205                 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
206 }
207
208 static inline uint64_t
209 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class)
210 {
211         return EFX_WORD_FIELD(class,
212                               ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) ==
213                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
214                 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_BAD;
215 }
216
217 static uint32_t
218 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags)
219 {
220         uint32_t ptype;
221         bool no_tunnel = false;
222
223         if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) !=
224                      ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN))
225                 return 0;
226
227         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) {
228         case 0:
229                 ptype = RTE_PTYPE_L2_ETHER;
230                 break;
231         case 1:
232                 ptype = RTE_PTYPE_L2_ETHER_VLAN;
233                 break;
234         default:
235                 ptype = RTE_PTYPE_L2_ETHER_QINQ;
236                 break;
237         }
238
239         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) {
240         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE:
241                 no_tunnel = true;
242                 break;
243         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN:
244                 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
245                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
246                 break;
247         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE:
248                 ptype |= RTE_PTYPE_TUNNEL_NVGRE;
249                 break;
250         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE:
251                 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
252                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
253                 break;
254         default:
255                 /*
256                  * Driver does not know the tunnel, but it is
257                  * still a tunnel and NT_OR_INNER refer to inner
258                  * frame.
259                  */
260                 no_tunnel = false;
261         }
262
263         if (no_tunnel) {
264                 bool l4_valid = true;
265
266                 switch (EFX_WORD_FIELD(class,
267                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
268                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
269                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
270                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
271                         break;
272                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
273                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
274                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
275                         break;
276                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
277                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
278                         break;
279                 default:
280                         l4_valid = false;
281                 }
282
283                 if (l4_valid) {
284                         switch (EFX_WORD_FIELD(class,
285                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
286                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
287                                 ptype |= RTE_PTYPE_L4_TCP;
288                                 *ol_flags |=
289                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
290                                 break;
291                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
292                                 ptype |= RTE_PTYPE_L4_UDP;
293                                 *ol_flags |=
294                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
295                                 break;
296                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
297                                 ptype |= RTE_PTYPE_L4_FRAG;
298                                 break;
299                         }
300                 }
301         } else {
302                 bool l4_valid = true;
303
304                 switch (EFX_WORD_FIELD(class,
305                         ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) {
306                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
307                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
308                         break;
309                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
310                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
311                         *ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
312                         break;
313                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
314                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
315                         break;
316                 }
317
318                 switch (EFX_WORD_FIELD(class,
319                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
320                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
321                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
322                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
323                         break;
324                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
325                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
326                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
327                         break;
328                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
329                         ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
330                         break;
331                 default:
332                         l4_valid = false;
333                         break;
334                 }
335
336                 if (l4_valid) {
337                         switch (EFX_WORD_FIELD(class,
338                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
339                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
340                                 ptype |= RTE_PTYPE_INNER_L4_TCP;
341                                 *ol_flags |=
342                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
343                                 break;
344                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
345                                 ptype |= RTE_PTYPE_INNER_L4_UDP;
346                                 *ol_flags |=
347                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
348                                 break;
349                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
350                                 ptype |= RTE_PTYPE_INNER_L4_FRAG;
351                                 break;
352                         }
353                 }
354         }
355
356         return ptype;
357 }
358
359 /*
360  * Below function relies on the following fields in Rx prefix.
361  * Some fields are mandatory, some fields are optional.
362  * See sfc_ef100_rx_qstart() below.
363  */
364 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = {
365         .erpl_fields    = {
366 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \
367         EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian)
368
369                 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE),
370                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE),
371                 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE),
372                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE),
373                 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE),
374
375 #undef  SFC_EF100_RX_PREFIX_FIELD
376         }
377 };
378
379 static bool
380 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq,
381                                 const efx_oword_t *rx_prefix,
382                                 struct rte_mbuf *m)
383 {
384         const efx_word_t *class;
385         uint64_t ol_flags = 0;
386
387         RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
388         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
389         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT !=
390                          sizeof(*class));
391         class = (const efx_word_t *)((const uint8_t *)rx_prefix +
392                 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT);
393         if (unlikely(EFX_WORD_FIELD(*class,
394                                     ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) !=
395                      ESE_GZ_RH_HCLASS_L2_STATUS_OK))
396                 return false;
397
398         m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags);
399
400         if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) &&
401             EFX_TEST_OWORD_BIT(rx_prefix[0],
402                                ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) {
403                 ol_flags |= PKT_RX_RSS_HASH;
404                 /* EFX_OWORD_FIELD converts little-endian to CPU */
405                 m->hash.rss = EFX_OWORD_FIELD(rx_prefix[0],
406                                               ESF_GZ_RX_PREFIX_RSS_HASH);
407         }
408
409         if (rxq->flags & SFC_EF100_RXQ_USER_MARK) {
410                 uint32_t user_mark;
411
412                 /* EFX_OWORD_FIELD converts little-endian to CPU */
413                 user_mark = EFX_OWORD_FIELD(rx_prefix[0],
414                                             ESF_GZ_RX_PREFIX_USER_MARK);
415                 if (user_mark != SFC_EF100_USER_MARK_INVALID) {
416                         ol_flags |= PKT_RX_FDIR_ID;
417                         m->hash.fdir.hi = user_mark;
418                 }
419         }
420
421         m->ol_flags = ol_flags;
422         return true;
423 }
424
425 static const uint8_t *
426 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m)
427 {
428         return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM;
429 }
430
431 static struct rte_mbuf *
432 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq)
433 {
434         struct rte_mbuf *m;
435         unsigned int id;
436
437         /* mbuf associated with current Rx descriptor */
438         m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf;
439
440         /* completed is already moved to the next one */
441         if (unlikely(rxq->completed == rxq->added))
442                 goto done;
443
444         /*
445          * Prefetch Rx prefix of the next packet.
446          * Current packet is scattered and the next mbuf is its fragment
447          * it simply prefetches some data - no harm since packet rate
448          * should not be high if scatter is used.
449          */
450         id = rxq->completed & rxq->ptr_mask;
451         rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf));
452
453         if (unlikely(rxq->completed + 1 == rxq->added))
454                 goto done;
455
456         /*
457          * Prefetch mbuf control structure of the next after next Rx
458          * descriptor.
459          */
460         id = (id == rxq->ptr_mask) ? 0 : (id + 1);
461         rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf);
462
463         /*
464          * If the next time we'll need SW Rx descriptor from the next
465          * cache line, try to make sure that we have it in cache.
466          */
467         if ((id & 0x7) == 0x7)
468                 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]);
469
470 done:
471         return m;
472 }
473
474 static struct rte_mbuf **
475 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
476                                 struct rte_mbuf **rx_pkts,
477                                 struct rte_mbuf ** const rx_pkts_end)
478 {
479         while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) {
480                 struct rte_mbuf *pkt;
481                 struct rte_mbuf *lastseg;
482                 const efx_oword_t *rx_prefix;
483                 uint16_t pkt_len;
484                 uint16_t seg_len;
485                 bool deliver;
486
487                 rxq->ready_pkts--;
488
489                 pkt = sfc_ef100_rx_next_mbuf(rxq);
490                 __rte_mbuf_raw_sanity_check(pkt);
491
492                 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) !=
493                                  sizeof(rxq->rearm_data));
494                 pkt->rearm_data[0] = rxq->rearm_data;
495
496                 /* data_off already moved past Rx prefix */
497                 rx_prefix = (const efx_oword_t *)sfc_ef100_rx_pkt_prefix(pkt);
498
499                 pkt_len = EFX_OWORD_FIELD(rx_prefix[0],
500                                           ESF_GZ_RX_PREFIX_LENGTH);
501                 SFC_ASSERT(pkt_len > 0);
502                 rte_pktmbuf_pkt_len(pkt) = pkt_len;
503
504                 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size);
505                 rte_pktmbuf_data_len(pkt) = seg_len;
506
507                 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt);
508
509                 lastseg = pkt;
510                 while ((pkt_len -= seg_len) > 0) {
511                         struct rte_mbuf *seg;
512
513                         seg = sfc_ef100_rx_next_mbuf(rxq);
514                         __rte_mbuf_raw_sanity_check(seg);
515
516                         seg->data_off = RTE_PKTMBUF_HEADROOM;
517
518                         seg_len = RTE_MIN(pkt_len, rxq->buf_size);
519                         rte_pktmbuf_data_len(seg) = seg_len;
520                         rte_pktmbuf_pkt_len(seg) = seg_len;
521
522                         pkt->nb_segs++;
523                         lastseg->next = seg;
524                         lastseg = seg;
525                 }
526
527                 if (likely(deliver))
528                         *rx_pkts++ = pkt;
529                 else
530                         rte_pktmbuf_free(pkt);
531         }
532
533         return rx_pkts;
534 }
535
536 static bool
537 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev)
538 {
539         *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask];
540
541         if (!sfc_ef100_ev_present(ev,
542                         (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1))
543                 return false;
544
545         if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) {
546                 /*
547                  * Do not move read_ptr to keep the event for exception
548                  * handling by the control path.
549                  */
550                 rxq->flags |= SFC_EF100_RXQ_EXCEPTION;
551                 sfc_ef100_rx_err(rxq,
552                         "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x",
553                         rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask,
554                         EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
555                         EFX_QWORD_FIELD(*ev, EFX_DWORD_0));
556                 return false;
557         }
558
559         sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)",
560                            EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
561                            EFX_QWORD_FIELD(*ev, EFX_DWORD_0),
562                            rxq->evq_read_ptr,
563                            rxq->evq_read_ptr & rxq->ptr_mask);
564
565         rxq->evq_read_ptr++;
566         return true;
567 }
568
569 static uint16_t
570 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
571 {
572         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue);
573         struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
574         efx_qword_t rx_ev;
575
576         rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end);
577
578         if (unlikely(rxq->flags &
579                      (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION)))
580                 goto done;
581
582         while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) {
583                 rxq->ready_pkts =
584                         EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT);
585                 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts,
586                                                           rx_pkts_end);
587         }
588
589         /* It is not a problem if we refill in the case of exception */
590         sfc_ef100_rx_qrefill(rxq);
591
592         if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) &&
593             rxq->evq_read_ptr_primed != rxq->evq_read_ptr)
594                 sfc_ef100_rx_qprime(rxq);
595
596 done:
597         return nb_pkts - (rx_pkts_end - rx_pkts);
598 }
599
600 static const uint32_t *
601 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
602 {
603         static const uint32_t ef100_native_ptypes[] = {
604                 RTE_PTYPE_L2_ETHER,
605                 RTE_PTYPE_L2_ETHER_VLAN,
606                 RTE_PTYPE_L2_ETHER_QINQ,
607                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
608                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
609                 RTE_PTYPE_L4_TCP,
610                 RTE_PTYPE_L4_UDP,
611                 RTE_PTYPE_L4_FRAG,
612                 RTE_PTYPE_TUNNEL_VXLAN,
613                 RTE_PTYPE_TUNNEL_NVGRE,
614                 RTE_PTYPE_TUNNEL_GENEVE,
615                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
616                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
617                 RTE_PTYPE_INNER_L4_TCP,
618                 RTE_PTYPE_INNER_L4_UDP,
619                 RTE_PTYPE_INNER_L4_FRAG,
620                 RTE_PTYPE_UNKNOWN
621         };
622
623         return ef100_native_ptypes;
624 }
625
626 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending;
627 static unsigned int
628 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
629 {
630         return 0;
631 }
632
633 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status;
634 static int
635 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
636                           __rte_unused uint16_t offset)
637 {
638         return -ENOTSUP;
639 }
640
641
642 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info;
643 static void
644 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info)
645 {
646         /*
647          * Number of descriptors just defines maximum number of pushed
648          * descriptors (fill level).
649          */
650         dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK;
651         dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK;
652 }
653
654
655 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings;
656 static int
657 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc,
658                            struct sfc_dp_rx_hw_limits *limits,
659                            __rte_unused struct rte_mempool *mb_pool,
660                            unsigned int *rxq_entries,
661                            unsigned int *evq_entries,
662                            unsigned int *rxq_max_fill_level)
663 {
664         /*
665          * rte_ethdev API guarantees that the number meets min, max and
666          * alignment requirements.
667          */
668         if (nb_rx_desc <= limits->rxq_min_entries)
669                 *rxq_entries = limits->rxq_min_entries;
670         else
671                 *rxq_entries = rte_align32pow2(nb_rx_desc);
672
673         *evq_entries = *rxq_entries;
674
675         *rxq_max_fill_level = RTE_MIN(nb_rx_desc,
676                                       SFC_EF100_RXQ_LIMIT(*evq_entries));
677         return 0;
678 }
679
680
681 static uint64_t
682 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
683 {
684         struct rte_mbuf m;
685
686         memset(&m, 0, sizeof(m));
687
688         rte_mbuf_refcnt_set(&m, 1);
689         m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size;
690         m.nb_segs = 1;
691         m.port = port_id;
692
693         /* rearm_data covers structure members filled in above */
694         rte_compiler_barrier();
695         RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t));
696         return m.rearm_data[0];
697 }
698
699 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate;
700 static int
701 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
702                     const struct rte_pci_addr *pci_addr, int socket_id,
703                     const struct sfc_dp_rx_qcreate_info *info,
704                     struct sfc_dp_rxq **dp_rxqp)
705 {
706         struct sfc_ef100_rxq *rxq;
707         int rc;
708
709         rc = EINVAL;
710         if (info->rxq_entries != info->evq_entries)
711                 goto fail_rxq_args;
712
713         rc = ENOMEM;
714         rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq),
715                                  RTE_CACHE_LINE_SIZE, socket_id);
716         if (rxq == NULL)
717                 goto fail_rxq_alloc;
718
719         sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
720
721         rc = ENOMEM;
722         rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring",
723                                          info->rxq_entries,
724                                          sizeof(*rxq->sw_ring),
725                                          RTE_CACHE_LINE_SIZE, socket_id);
726         if (rxq->sw_ring == NULL)
727                 goto fail_desc_alloc;
728
729         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
730         rxq->ptr_mask = info->rxq_entries - 1;
731         rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries);
732         rxq->evq_hw_ring = info->evq_hw_ring;
733         rxq->max_fill_level = info->max_fill_level;
734         rxq->refill_threshold = info->refill_threshold;
735         rxq->prefix_size = info->prefix_size;
736         rxq->buf_size = info->buf_size;
737         rxq->refill_mb_pool = info->refill_mb_pool;
738         rxq->rxq_hw_ring = info->rxq_hw_ring;
739         rxq->doorbell = (volatile uint8_t *)info->mem_bar +
740                         ER_GZ_RX_RING_DOORBELL_OFST +
741                         (info->hw_index << info->vi_window_shift);
742
743         rxq->evq_hw_index = info->evq_hw_index;
744         rxq->evq_prime = (volatile uint8_t *)info->mem_bar +
745                          info->fcw_offset +
746                          ER_GZ_EVQ_INT_PRIME_OFST;
747
748         sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
749
750         *dp_rxqp = &rxq->dp;
751         return 0;
752
753 fail_desc_alloc:
754         rte_free(rxq);
755
756 fail_rxq_alloc:
757 fail_rxq_args:
758         return rc;
759 }
760
761 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy;
762 static void
763 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
764 {
765         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
766
767         rte_free(rxq->sw_ring);
768         rte_free(rxq);
769 }
770
771 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart;
772 static int
773 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr,
774                     const efx_rx_prefix_layout_t *pinfo)
775 {
776         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
777         uint32_t unsup_rx_prefix_fields;
778
779         SFC_ASSERT(rxq->completed == 0);
780         SFC_ASSERT(rxq->added == 0);
781
782         /* Prefix must fit into reserved Rx buffer space */
783         if (pinfo->erpl_length > rxq->prefix_size)
784                 return ENOTSUP;
785
786         unsup_rx_prefix_fields =
787                 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout);
788
789         /* LENGTH and CLASS filds must always be present */
790         if ((unsup_rx_prefix_fields &
791              ((1U << EFX_RX_PREFIX_FIELD_LENGTH) |
792               (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0)
793                 return ENOTSUP;
794
795         if ((unsup_rx_prefix_fields &
796              ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) |
797               (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0)
798                 rxq->flags |= SFC_EF100_RXQ_RSS_HASH;
799         else
800                 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH;
801
802         if ((unsup_rx_prefix_fields &
803              (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0)
804                 rxq->flags |= SFC_EF100_RXQ_USER_MARK;
805         else
806                 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK;
807
808         rxq->prefix_size = pinfo->erpl_length;
809         rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id,
810                                                        rxq->prefix_size);
811
812         sfc_ef100_rx_qrefill(rxq);
813
814         rxq->evq_read_ptr = evq_read_ptr;
815
816         rxq->flags |= SFC_EF100_RXQ_STARTED;
817         rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION);
818
819         if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN)
820                 sfc_ef100_rx_qprime(rxq);
821
822         return 0;
823 }
824
825 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop;
826 static void
827 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr)
828 {
829         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
830
831         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
832
833         *evq_read_ptr = rxq->evq_read_ptr;
834 }
835
836 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev;
837 static bool
838 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id)
839 {
840         __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
841
842         SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING);
843
844         /*
845          * It is safe to ignore Rx event since we free all mbufs on
846          * queue purge anyway.
847          */
848
849         return false;
850 }
851
852 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge;
853 static void
854 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
855 {
856         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
857         unsigned int i;
858         struct sfc_ef100_rx_sw_desc *rxd;
859
860         for (i = rxq->completed; i != rxq->added; ++i) {
861                 rxd = &rxq->sw_ring[i & rxq->ptr_mask];
862                 rte_mbuf_raw_free(rxd->mbuf);
863                 rxd->mbuf = NULL;
864         }
865
866         rxq->completed = rxq->added = 0;
867         rxq->ready_pkts = 0;
868
869         rxq->flags &= ~SFC_EF100_RXQ_STARTED;
870 }
871
872 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable;
873 static int
874 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq)
875 {
876         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
877
878         rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN;
879         if (rxq->flags & SFC_EF100_RXQ_STARTED)
880                 sfc_ef100_rx_qprime(rxq);
881         return 0;
882 }
883
884 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable;
885 static int
886 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq)
887 {
888         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
889
890         /* Cannot disarm, just disable rearm */
891         rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN;
892         return 0;
893 }
894
895 static sfc_dp_rx_get_pushed_t sfc_ef100_rx_get_pushed;
896 static unsigned int
897 sfc_ef100_rx_get_pushed(struct sfc_dp_rxq *dp_rxq)
898 {
899         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
900
901         /*
902          * The datapath keeps track only of added descriptors, since
903          * the number of pushed descriptors always equals the number
904          * of added descriptors due to enforced alignment.
905          */
906         return rxq->added;
907 }
908
909 struct sfc_dp_rx sfc_ef100_rx = {
910         .dp = {
911                 .name           = SFC_KVARG_DATAPATH_EF100,
912                 .type           = SFC_DP_RX,
913                 .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF100,
914         },
915         .features               = SFC_DP_RX_FEAT_MULTI_PROCESS |
916                                   SFC_DP_RX_FEAT_INTR,
917         .dev_offload_capa       = 0,
918         .queue_offload_capa     = DEV_RX_OFFLOAD_CHECKSUM |
919                                   DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
920                                   DEV_RX_OFFLOAD_OUTER_UDP_CKSUM |
921                                   DEV_RX_OFFLOAD_SCATTER |
922                                   DEV_RX_OFFLOAD_RSS_HASH,
923         .get_dev_info           = sfc_ef100_rx_get_dev_info,
924         .qsize_up_rings         = sfc_ef100_rx_qsize_up_rings,
925         .qcreate                = sfc_ef100_rx_qcreate,
926         .qdestroy               = sfc_ef100_rx_qdestroy,
927         .qstart                 = sfc_ef100_rx_qstart,
928         .qstop                  = sfc_ef100_rx_qstop,
929         .qrx_ev                 = sfc_ef100_rx_qrx_ev,
930         .qpurge                 = sfc_ef100_rx_qpurge,
931         .supported_ptypes_get   = sfc_ef100_supported_ptypes_get,
932         .qdesc_npending         = sfc_ef100_rx_qdesc_npending,
933         .qdesc_status           = sfc_ef100_rx_qdesc_status,
934         .intr_enable            = sfc_ef100_rx_intr_enable,
935         .intr_disable           = sfc_ef100_rx_intr_disable,
936         .get_pushed             = sfc_ef100_rx_get_pushed,
937         .pkt_burst              = sfc_ef100_recv_pkts,
938 };