net/hns3: fix Tx push capability
[dpdk.git] / drivers / net / sfc / sfc_ef100_rx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2018-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9
10 /* EF100 native datapath implementation */
11
12 #include <stdbool.h>
13
14 #include <rte_byteorder.h>
15 #include <rte_mbuf_ptype.h>
16 #include <rte_mbuf.h>
17 #include <rte_io.h>
18
19 #include "efx_types.h"
20 #include "efx_regs_ef100.h"
21 #include "efx.h"
22
23 #include "sfc_debug.h"
24 #include "sfc_tweak.h"
25 #include "sfc_dp_rx.h"
26 #include "sfc_kvargs.h"
27 #include "sfc_ef100.h"
28
29
30 #define sfc_ef100_rx_err(_rxq, ...) \
31         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__)
32
33 #define sfc_ef100_rx_debug(_rxq, ...) \
34         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \
35                    __VA_ARGS__)
36
37 /**
38  * Maximum number of descriptors/buffers in the Rx ring.
39  * It should guarantee that corresponding event queue never overfill.
40  * EF10 native datapath uses event queue of the same size as Rx queue.
41  * Maximum number of events on datapath can be estimated as number of
42  * Rx queue entries (one event per Rx buffer in the worst case) plus
43  * Rx error and flush events.
44  */
45 #define SFC_EF100_RXQ_LIMIT(_ndesc) \
46         ((_ndesc) - 1 /* head must not step on tail */ - \
47          1 /* Rx error */ - 1 /* flush */)
48
49 /** Invalid user mark value when the mark should be treated as unset */
50 #define SFC_EF100_USER_MARK_INVALID     0
51
52 struct sfc_ef100_rx_sw_desc {
53         struct rte_mbuf                 *mbuf;
54 };
55
56 struct sfc_ef100_rxq {
57         /* Used on data path */
58         unsigned int                    flags;
59 #define SFC_EF100_RXQ_STARTED           0x1
60 #define SFC_EF100_RXQ_NOT_RUNNING       0x2
61 #define SFC_EF100_RXQ_EXCEPTION         0x4
62 #define SFC_EF100_RXQ_RSS_HASH          0x10
63 #define SFC_EF100_RXQ_USER_MARK         0x20
64 #define SFC_EF100_RXQ_FLAG_INTR_EN      0x40
65         unsigned int                    ptr_mask;
66         unsigned int                    evq_phase_bit_shift;
67         unsigned int                    ready_pkts;
68         unsigned int                    completed;
69         unsigned int                    evq_read_ptr;
70         unsigned int                    evq_read_ptr_primed;
71         volatile efx_qword_t            *evq_hw_ring;
72         struct sfc_ef100_rx_sw_desc     *sw_ring;
73         uint64_t                        rearm_data;
74         uint16_t                        buf_size;
75         uint16_t                        prefix_size;
76
77         unsigned int                    evq_hw_index;
78         volatile void                   *evq_prime;
79
80         /* Used on refill */
81         unsigned int                    added;
82         unsigned int                    max_fill_level;
83         unsigned int                    refill_threshold;
84         struct rte_mempool              *refill_mb_pool;
85         efx_qword_t                     *rxq_hw_ring;
86         volatile void                   *doorbell;
87
88         /* Datapath receive queue anchor */
89         struct sfc_dp_rxq               dp;
90 };
91
92 static inline struct sfc_ef100_rxq *
93 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
94 {
95         return container_of(dp_rxq, struct sfc_ef100_rxq, dp);
96 }
97
98 static void
99 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq)
100 {
101         sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index,
102                             rxq->evq_read_ptr & rxq->ptr_mask);
103         rxq->evq_read_ptr_primed = rxq->evq_read_ptr;
104 }
105
106 static inline void
107 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
108 {
109         efx_dword_t dword;
110
111         EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask);
112
113         /* DMA sync to device is not required */
114
115         /*
116          * rte_write32() has rte_io_wmb() which guarantees that the STORE
117          * operations (i.e. Rx and event descriptor updates) that precede
118          * the rte_io_wmb() call are visible to NIC before the STORE
119          * operations that follow it (i.e. doorbell write).
120          */
121         rte_write32(dword.ed_u32[0], rxq->doorbell);
122         rxq->dp.dpq.rx_dbells++;
123
124         sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)",
125                            EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX),
126                            added);
127 }
128
129 static void
130 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
131 {
132         const unsigned int ptr_mask = rxq->ptr_mask;
133         unsigned int free_space;
134         unsigned int bulks;
135         void *objs[SFC_RX_REFILL_BULK];
136         unsigned int added = rxq->added;
137
138         free_space = rxq->max_fill_level - (added - rxq->completed);
139
140         if (free_space < rxq->refill_threshold)
141                 return;
142
143         bulks = free_space / RTE_DIM(objs);
144         /* refill_threshold guarantees that bulks is positive */
145         SFC_ASSERT(bulks > 0);
146
147         do {
148                 unsigned int id;
149                 unsigned int i;
150
151                 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
152                                                   RTE_DIM(objs)) < 0)) {
153                         struct rte_eth_dev_data *dev_data =
154                                 rte_eth_devices[rxq->dp.dpq.port_id].data;
155
156                         /*
157                          * It is hardly a safe way to increment counter
158                          * from different contexts, but all PMDs do it.
159                          */
160                         dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs);
161                         /* Return if we have posted nothing yet */
162                         if (added == rxq->added)
163                                 return;
164                         /* Push posted */
165                         break;
166                 }
167
168                 for (i = 0, id = added & ptr_mask;
169                      i < RTE_DIM(objs);
170                      ++i, ++id) {
171                         struct rte_mbuf *m = objs[i];
172                         struct sfc_ef100_rx_sw_desc *rxd;
173                         rte_iova_t phys_addr;
174
175                         __rte_mbuf_raw_sanity_check(m);
176
177                         SFC_ASSERT((id & ~ptr_mask) == 0);
178                         rxd = &rxq->sw_ring[id];
179                         rxd->mbuf = m;
180
181                         /*
182                          * Avoid writing to mbuf. It is cheaper to do it
183                          * when we receive packet and fill in nearby
184                          * structure members.
185                          */
186
187                         phys_addr = rte_mbuf_data_iova_default(m);
188                         EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
189                             ESF_GZ_RX_BUF_ADDR, phys_addr);
190                 }
191
192                 added += RTE_DIM(objs);
193         } while (--bulks > 0);
194
195         SFC_ASSERT(rxq->added != added);
196         rxq->added = added;
197         sfc_ef100_rx_qpush(rxq, added);
198 }
199
200 static inline uint64_t
201 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class)
202 {
203         return EFX_WORD_FIELD(class,
204                               ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) ==
205                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
206                 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
207 }
208
209 static inline uint64_t
210 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class)
211 {
212         return EFX_WORD_FIELD(class,
213                               ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) ==
214                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
215                 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_BAD;
216 }
217
218 static uint32_t
219 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags)
220 {
221         uint32_t ptype;
222         bool no_tunnel = false;
223
224         if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) !=
225                      ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN))
226                 return 0;
227
228         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) {
229         case 0:
230                 ptype = RTE_PTYPE_L2_ETHER;
231                 break;
232         case 1:
233                 ptype = RTE_PTYPE_L2_ETHER_VLAN;
234                 break;
235         default:
236                 ptype = RTE_PTYPE_L2_ETHER_QINQ;
237                 break;
238         }
239
240         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) {
241         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE:
242                 no_tunnel = true;
243                 break;
244         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN:
245                 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
246                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
247                 break;
248         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE:
249                 ptype |= RTE_PTYPE_TUNNEL_NVGRE;
250                 break;
251         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE:
252                 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
253                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
254                 break;
255         default:
256                 /*
257                  * Driver does not know the tunnel, but it is
258                  * still a tunnel and NT_OR_INNER refer to inner
259                  * frame.
260                  */
261                 no_tunnel = false;
262         }
263
264         if (no_tunnel) {
265                 bool l4_valid = true;
266
267                 switch (EFX_WORD_FIELD(class,
268                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
269                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
270                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
271                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
272                         break;
273                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
274                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
275                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
276                         break;
277                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
278                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
279                         break;
280                 default:
281                         l4_valid = false;
282                 }
283
284                 if (l4_valid) {
285                         switch (EFX_WORD_FIELD(class,
286                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
287                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
288                                 ptype |= RTE_PTYPE_L4_TCP;
289                                 *ol_flags |=
290                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
291                                 break;
292                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
293                                 ptype |= RTE_PTYPE_L4_UDP;
294                                 *ol_flags |=
295                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
296                                 break;
297                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
298                                 ptype |= RTE_PTYPE_L4_FRAG;
299                                 break;
300                         }
301                 }
302         } else {
303                 bool l4_valid = true;
304
305                 switch (EFX_WORD_FIELD(class,
306                         ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) {
307                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
308                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
309                         break;
310                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
311                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
312                         *ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
313                         break;
314                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
315                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
316                         break;
317                 }
318
319                 switch (EFX_WORD_FIELD(class,
320                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
321                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
322                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
323                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
324                         break;
325                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
326                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
327                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
328                         break;
329                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
330                         ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
331                         break;
332                 default:
333                         l4_valid = false;
334                         break;
335                 }
336
337                 if (l4_valid) {
338                         switch (EFX_WORD_FIELD(class,
339                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
340                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
341                                 ptype |= RTE_PTYPE_INNER_L4_TCP;
342                                 *ol_flags |=
343                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
344                                 break;
345                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
346                                 ptype |= RTE_PTYPE_INNER_L4_UDP;
347                                 *ol_flags |=
348                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
349                                 break;
350                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
351                                 ptype |= RTE_PTYPE_INNER_L4_FRAG;
352                                 break;
353                         }
354                 }
355         }
356
357         return ptype;
358 }
359
360 /*
361  * Below function relies on the following fields in Rx prefix.
362  * Some fields are mandatory, some fields are optional.
363  * See sfc_ef100_rx_qstart() below.
364  */
365 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = {
366         .erpl_fields    = {
367 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \
368         EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian)
369
370                 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE),
371                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE),
372                 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE),
373                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE),
374                 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE),
375
376 #undef  SFC_EF100_RX_PREFIX_FIELD
377         }
378 };
379
380 static bool
381 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq,
382                                 const efx_oword_t *rx_prefix,
383                                 struct rte_mbuf *m)
384 {
385         const efx_word_t *class;
386         uint64_t ol_flags = 0;
387
388         RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
389         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
390         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT !=
391                          sizeof(*class));
392         class = (const efx_word_t *)((const uint8_t *)rx_prefix +
393                 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT);
394         if (unlikely(EFX_WORD_FIELD(*class,
395                                     ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) !=
396                      ESE_GZ_RH_HCLASS_L2_STATUS_OK))
397                 return false;
398
399         m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags);
400
401         if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) &&
402             EFX_TEST_OWORD_BIT(rx_prefix[0],
403                                ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) {
404                 ol_flags |= PKT_RX_RSS_HASH;
405                 /* EFX_OWORD_FIELD converts little-endian to CPU */
406                 m->hash.rss = EFX_OWORD_FIELD(rx_prefix[0],
407                                               ESF_GZ_RX_PREFIX_RSS_HASH);
408         }
409
410         if (rxq->flags & SFC_EF100_RXQ_USER_MARK) {
411                 uint32_t user_mark;
412
413                 /* EFX_OWORD_FIELD converts little-endian to CPU */
414                 user_mark = EFX_OWORD_FIELD(rx_prefix[0],
415                                             ESF_GZ_RX_PREFIX_USER_MARK);
416                 if (user_mark != SFC_EF100_USER_MARK_INVALID) {
417                         ol_flags |= PKT_RX_FDIR_ID;
418                         m->hash.fdir.hi = user_mark;
419                 }
420         }
421
422         m->ol_flags = ol_flags;
423         return true;
424 }
425
426 static const uint8_t *
427 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m)
428 {
429         return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM;
430 }
431
432 static struct rte_mbuf *
433 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq)
434 {
435         struct rte_mbuf *m;
436         unsigned int id;
437
438         /* mbuf associated with current Rx descriptor */
439         m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf;
440
441         /* completed is already moved to the next one */
442         if (unlikely(rxq->completed == rxq->added))
443                 goto done;
444
445         /*
446          * Prefetch Rx prefix of the next packet.
447          * Current packet is scattered and the next mbuf is its fragment
448          * it simply prefetches some data - no harm since packet rate
449          * should not be high if scatter is used.
450          */
451         id = rxq->completed & rxq->ptr_mask;
452         rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf));
453
454         if (unlikely(rxq->completed + 1 == rxq->added))
455                 goto done;
456
457         /*
458          * Prefetch mbuf control structure of the next after next Rx
459          * descriptor.
460          */
461         id = (id == rxq->ptr_mask) ? 0 : (id + 1);
462         rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf);
463
464         /*
465          * If the next time we'll need SW Rx descriptor from the next
466          * cache line, try to make sure that we have it in cache.
467          */
468         if ((id & 0x7) == 0x7)
469                 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]);
470
471 done:
472         return m;
473 }
474
475 static struct rte_mbuf **
476 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
477                                 struct rte_mbuf **rx_pkts,
478                                 struct rte_mbuf ** const rx_pkts_end)
479 {
480         while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) {
481                 struct rte_mbuf *pkt;
482                 struct rte_mbuf *lastseg;
483                 const efx_oword_t *rx_prefix;
484                 uint16_t pkt_len;
485                 uint16_t seg_len;
486                 bool deliver;
487
488                 rxq->ready_pkts--;
489
490                 pkt = sfc_ef100_rx_next_mbuf(rxq);
491                 __rte_mbuf_raw_sanity_check(pkt);
492
493                 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) !=
494                                  sizeof(rxq->rearm_data));
495                 pkt->rearm_data[0] = rxq->rearm_data;
496
497                 /* data_off already moved past Rx prefix */
498                 rx_prefix = (const efx_oword_t *)sfc_ef100_rx_pkt_prefix(pkt);
499
500                 pkt_len = EFX_OWORD_FIELD(rx_prefix[0],
501                                           ESF_GZ_RX_PREFIX_LENGTH);
502                 SFC_ASSERT(pkt_len > 0);
503                 rte_pktmbuf_pkt_len(pkt) = pkt_len;
504
505                 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size);
506                 rte_pktmbuf_data_len(pkt) = seg_len;
507
508                 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt);
509
510                 lastseg = pkt;
511                 while ((pkt_len -= seg_len) > 0) {
512                         struct rte_mbuf *seg;
513
514                         seg = sfc_ef100_rx_next_mbuf(rxq);
515                         __rte_mbuf_raw_sanity_check(seg);
516
517                         seg->data_off = RTE_PKTMBUF_HEADROOM;
518
519                         seg_len = RTE_MIN(pkt_len, rxq->buf_size);
520                         rte_pktmbuf_data_len(seg) = seg_len;
521                         rte_pktmbuf_pkt_len(seg) = seg_len;
522
523                         pkt->nb_segs++;
524                         lastseg->next = seg;
525                         lastseg = seg;
526                 }
527
528                 if (likely(deliver))
529                         *rx_pkts++ = pkt;
530                 else
531                         rte_pktmbuf_free(pkt);
532         }
533
534         return rx_pkts;
535 }
536
537 static bool
538 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev)
539 {
540         *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask];
541
542         if (!sfc_ef100_ev_present(ev,
543                         (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1))
544                 return false;
545
546         if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) {
547                 /*
548                  * Do not move read_ptr to keep the event for exception
549                  * handling by the control path.
550                  */
551                 rxq->flags |= SFC_EF100_RXQ_EXCEPTION;
552                 sfc_ef100_rx_err(rxq,
553                         "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x",
554                         rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask,
555                         EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
556                         EFX_QWORD_FIELD(*ev, EFX_DWORD_0));
557                 return false;
558         }
559
560         sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)",
561                            EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
562                            EFX_QWORD_FIELD(*ev, EFX_DWORD_0),
563                            rxq->evq_read_ptr,
564                            rxq->evq_read_ptr & rxq->ptr_mask);
565
566         rxq->evq_read_ptr++;
567         return true;
568 }
569
570 static uint16_t
571 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
572 {
573         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue);
574         struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
575         efx_qword_t rx_ev;
576
577         rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end);
578
579         if (unlikely(rxq->flags &
580                      (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION)))
581                 goto done;
582
583         while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) {
584                 rxq->ready_pkts =
585                         EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT);
586                 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts,
587                                                           rx_pkts_end);
588         }
589
590         /* It is not a problem if we refill in the case of exception */
591         sfc_ef100_rx_qrefill(rxq);
592
593         if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) &&
594             rxq->evq_read_ptr_primed != rxq->evq_read_ptr)
595                 sfc_ef100_rx_qprime(rxq);
596
597 done:
598         return nb_pkts - (rx_pkts_end - rx_pkts);
599 }
600
601 static const uint32_t *
602 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
603 {
604         static const uint32_t ef100_native_ptypes[] = {
605                 RTE_PTYPE_L2_ETHER,
606                 RTE_PTYPE_L2_ETHER_VLAN,
607                 RTE_PTYPE_L2_ETHER_QINQ,
608                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
609                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
610                 RTE_PTYPE_L4_TCP,
611                 RTE_PTYPE_L4_UDP,
612                 RTE_PTYPE_L4_FRAG,
613                 RTE_PTYPE_TUNNEL_VXLAN,
614                 RTE_PTYPE_TUNNEL_NVGRE,
615                 RTE_PTYPE_TUNNEL_GENEVE,
616                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
617                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
618                 RTE_PTYPE_INNER_L4_TCP,
619                 RTE_PTYPE_INNER_L4_UDP,
620                 RTE_PTYPE_INNER_L4_FRAG,
621                 RTE_PTYPE_UNKNOWN
622         };
623
624         return ef100_native_ptypes;
625 }
626
627 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending;
628 static unsigned int
629 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
630 {
631         return 0;
632 }
633
634 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status;
635 static int
636 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
637                           __rte_unused uint16_t offset)
638 {
639         return -ENOTSUP;
640 }
641
642
643 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info;
644 static void
645 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info)
646 {
647         /*
648          * Number of descriptors just defines maximum number of pushed
649          * descriptors (fill level).
650          */
651         dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK;
652         dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK;
653 }
654
655
656 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings;
657 static int
658 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc,
659                            struct sfc_dp_rx_hw_limits *limits,
660                            __rte_unused struct rte_mempool *mb_pool,
661                            unsigned int *rxq_entries,
662                            unsigned int *evq_entries,
663                            unsigned int *rxq_max_fill_level)
664 {
665         /*
666          * rte_ethdev API guarantees that the number meets min, max and
667          * alignment requirements.
668          */
669         if (nb_rx_desc <= limits->rxq_min_entries)
670                 *rxq_entries = limits->rxq_min_entries;
671         else
672                 *rxq_entries = rte_align32pow2(nb_rx_desc);
673
674         *evq_entries = *rxq_entries;
675
676         *rxq_max_fill_level = RTE_MIN(nb_rx_desc,
677                                       SFC_EF100_RXQ_LIMIT(*evq_entries));
678         return 0;
679 }
680
681
682 static uint64_t
683 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
684 {
685         struct rte_mbuf m;
686
687         memset(&m, 0, sizeof(m));
688
689         rte_mbuf_refcnt_set(&m, 1);
690         m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size;
691         m.nb_segs = 1;
692         m.port = port_id;
693
694         /* rearm_data covers structure members filled in above */
695         rte_compiler_barrier();
696         RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t));
697         return m.rearm_data[0];
698 }
699
700 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate;
701 static int
702 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
703                     const struct rte_pci_addr *pci_addr, int socket_id,
704                     const struct sfc_dp_rx_qcreate_info *info,
705                     struct sfc_dp_rxq **dp_rxqp)
706 {
707         struct sfc_ef100_rxq *rxq;
708         int rc;
709
710         rc = EINVAL;
711         if (info->rxq_entries != info->evq_entries)
712                 goto fail_rxq_args;
713
714         rc = ENOMEM;
715         rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq),
716                                  RTE_CACHE_LINE_SIZE, socket_id);
717         if (rxq == NULL)
718                 goto fail_rxq_alloc;
719
720         sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
721
722         rc = ENOMEM;
723         rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring",
724                                          info->rxq_entries,
725                                          sizeof(*rxq->sw_ring),
726                                          RTE_CACHE_LINE_SIZE, socket_id);
727         if (rxq->sw_ring == NULL)
728                 goto fail_desc_alloc;
729
730         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
731         rxq->ptr_mask = info->rxq_entries - 1;
732         rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries);
733         rxq->evq_hw_ring = info->evq_hw_ring;
734         rxq->max_fill_level = info->max_fill_level;
735         rxq->refill_threshold = info->refill_threshold;
736         rxq->prefix_size = info->prefix_size;
737         rxq->buf_size = info->buf_size;
738         rxq->refill_mb_pool = info->refill_mb_pool;
739         rxq->rxq_hw_ring = info->rxq_hw_ring;
740         rxq->doorbell = (volatile uint8_t *)info->mem_bar +
741                         ER_GZ_RX_RING_DOORBELL_OFST +
742                         (info->hw_index << info->vi_window_shift);
743
744         rxq->evq_hw_index = info->evq_hw_index;
745         rxq->evq_prime = (volatile uint8_t *)info->mem_bar +
746                          info->fcw_offset +
747                          ER_GZ_EVQ_INT_PRIME_OFST;
748
749         sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
750
751         *dp_rxqp = &rxq->dp;
752         return 0;
753
754 fail_desc_alloc:
755         rte_free(rxq);
756
757 fail_rxq_alloc:
758 fail_rxq_args:
759         return rc;
760 }
761
762 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy;
763 static void
764 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
765 {
766         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
767
768         rte_free(rxq->sw_ring);
769         rte_free(rxq);
770 }
771
772 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart;
773 static int
774 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr,
775                     const efx_rx_prefix_layout_t *pinfo)
776 {
777         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
778         uint32_t unsup_rx_prefix_fields;
779
780         SFC_ASSERT(rxq->completed == 0);
781         SFC_ASSERT(rxq->added == 0);
782
783         /* Prefix must fit into reserved Rx buffer space */
784         if (pinfo->erpl_length > rxq->prefix_size)
785                 return ENOTSUP;
786
787         unsup_rx_prefix_fields =
788                 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout);
789
790         /* LENGTH and CLASS filds must always be present */
791         if ((unsup_rx_prefix_fields &
792              ((1U << EFX_RX_PREFIX_FIELD_LENGTH) |
793               (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0)
794                 return ENOTSUP;
795
796         if ((unsup_rx_prefix_fields &
797              ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) |
798               (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0)
799                 rxq->flags |= SFC_EF100_RXQ_RSS_HASH;
800         else
801                 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH;
802
803         if ((unsup_rx_prefix_fields &
804              (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0)
805                 rxq->flags |= SFC_EF100_RXQ_USER_MARK;
806         else
807                 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK;
808
809         rxq->prefix_size = pinfo->erpl_length;
810         rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id,
811                                                        rxq->prefix_size);
812
813         sfc_ef100_rx_qrefill(rxq);
814
815         rxq->evq_read_ptr = evq_read_ptr;
816
817         rxq->flags |= SFC_EF100_RXQ_STARTED;
818         rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION);
819
820         if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN)
821                 sfc_ef100_rx_qprime(rxq);
822
823         return 0;
824 }
825
826 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop;
827 static void
828 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr)
829 {
830         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
831
832         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
833
834         *evq_read_ptr = rxq->evq_read_ptr;
835 }
836
837 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev;
838 static bool
839 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id)
840 {
841         __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
842
843         SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING);
844
845         /*
846          * It is safe to ignore Rx event since we free all mbufs on
847          * queue purge anyway.
848          */
849
850         return false;
851 }
852
853 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge;
854 static void
855 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
856 {
857         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
858         unsigned int i;
859         struct sfc_ef100_rx_sw_desc *rxd;
860
861         for (i = rxq->completed; i != rxq->added; ++i) {
862                 rxd = &rxq->sw_ring[i & rxq->ptr_mask];
863                 rte_mbuf_raw_free(rxd->mbuf);
864                 rxd->mbuf = NULL;
865         }
866
867         rxq->completed = rxq->added = 0;
868         rxq->ready_pkts = 0;
869
870         rxq->flags &= ~SFC_EF100_RXQ_STARTED;
871 }
872
873 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable;
874 static int
875 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq)
876 {
877         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
878
879         rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN;
880         if (rxq->flags & SFC_EF100_RXQ_STARTED)
881                 sfc_ef100_rx_qprime(rxq);
882         return 0;
883 }
884
885 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable;
886 static int
887 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq)
888 {
889         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
890
891         /* Cannot disarm, just disable rearm */
892         rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN;
893         return 0;
894 }
895
896 static sfc_dp_rx_get_pushed_t sfc_ef100_rx_get_pushed;
897 static unsigned int
898 sfc_ef100_rx_get_pushed(struct sfc_dp_rxq *dp_rxq)
899 {
900         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
901
902         /*
903          * The datapath keeps track only of added descriptors, since
904          * the number of pushed descriptors always equals the number
905          * of added descriptors due to enforced alignment.
906          */
907         return rxq->added;
908 }
909
910 struct sfc_dp_rx sfc_ef100_rx = {
911         .dp = {
912                 .name           = SFC_KVARG_DATAPATH_EF100,
913                 .type           = SFC_DP_RX,
914                 .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF100,
915         },
916         .features               = SFC_DP_RX_FEAT_MULTI_PROCESS |
917                                   SFC_DP_RX_FEAT_INTR,
918         .dev_offload_capa       = 0,
919         .queue_offload_capa     = DEV_RX_OFFLOAD_CHECKSUM |
920                                   DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
921                                   DEV_RX_OFFLOAD_OUTER_UDP_CKSUM |
922                                   DEV_RX_OFFLOAD_SCATTER |
923                                   DEV_RX_OFFLOAD_RSS_HASH,
924         .get_dev_info           = sfc_ef100_rx_get_dev_info,
925         .qsize_up_rings         = sfc_ef100_rx_qsize_up_rings,
926         .qcreate                = sfc_ef100_rx_qcreate,
927         .qdestroy               = sfc_ef100_rx_qdestroy,
928         .qstart                 = sfc_ef100_rx_qstart,
929         .qstop                  = sfc_ef100_rx_qstop,
930         .qrx_ev                 = sfc_ef100_rx_qrx_ev,
931         .qpurge                 = sfc_ef100_rx_qpurge,
932         .supported_ptypes_get   = sfc_ef100_supported_ptypes_get,
933         .qdesc_npending         = sfc_ef100_rx_qdesc_npending,
934         .qdesc_status           = sfc_ef100_rx_qdesc_status,
935         .intr_enable            = sfc_ef100_rx_intr_enable,
936         .intr_disable           = sfc_ef100_rx_intr_disable,
937         .get_pushed             = sfc_ef100_rx_get_pushed,
938         .pkt_burst              = sfc_ef100_recv_pkts,
939 };