ethdev: add namespace
[dpdk.git] / drivers / net / sfc / sfc_ef100_rx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2018-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9
10 /* EF100 native datapath implementation */
11
12 #include <stdbool.h>
13
14 #include <rte_byteorder.h>
15 #include <rte_mbuf_ptype.h>
16 #include <rte_mbuf.h>
17 #include <rte_io.h>
18
19 #include "efx_types.h"
20 #include "efx_regs_ef100.h"
21 #include "efx.h"
22
23 #include "sfc.h"
24 #include "sfc_debug.h"
25 #include "sfc_flow_tunnel.h"
26 #include "sfc_tweak.h"
27 #include "sfc_dp_rx.h"
28 #include "sfc_kvargs.h"
29 #include "sfc_ef100.h"
30
31
32 #define sfc_ef100_rx_err(_rxq, ...) \
33         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__)
34
35 #define sfc_ef100_rx_debug(_rxq, ...) \
36         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \
37                    __VA_ARGS__)
38
39 /**
40  * Maximum number of descriptors/buffers in the Rx ring.
41  * It should guarantee that corresponding event queue never overfill.
42  * EF10 native datapath uses event queue of the same size as Rx queue.
43  * Maximum number of events on datapath can be estimated as number of
44  * Rx queue entries (one event per Rx buffer in the worst case) plus
45  * Rx error and flush events.
46  */
47 #define SFC_EF100_RXQ_LIMIT(_ndesc) \
48         ((_ndesc) - 1 /* head must not step on tail */ - \
49          1 /* Rx error */ - 1 /* flush */)
50
51 /** Invalid user mark value when the mark should be treated as unset */
52 #define SFC_EF100_USER_MARK_INVALID     0
53
54 struct sfc_ef100_rx_sw_desc {
55         struct rte_mbuf                 *mbuf;
56 };
57
58 struct sfc_ef100_rxq {
59         /* Used on data path */
60         unsigned int                    flags;
61 #define SFC_EF100_RXQ_STARTED           0x1
62 #define SFC_EF100_RXQ_NOT_RUNNING       0x2
63 #define SFC_EF100_RXQ_EXCEPTION         0x4
64 #define SFC_EF100_RXQ_RSS_HASH          0x10
65 #define SFC_EF100_RXQ_USER_MARK         0x20
66 #define SFC_EF100_RXQ_FLAG_INTR_EN      0x40
67 #define SFC_EF100_RXQ_INGRESS_MPORT     0x80
68 #define SFC_EF100_RXQ_USER_FLAG         0x100
69         unsigned int                    ptr_mask;
70         unsigned int                    evq_phase_bit_shift;
71         unsigned int                    ready_pkts;
72         unsigned int                    completed;
73         unsigned int                    evq_read_ptr;
74         unsigned int                    evq_read_ptr_primed;
75         volatile efx_qword_t            *evq_hw_ring;
76         struct sfc_ef100_rx_sw_desc     *sw_ring;
77         uint64_t                        rearm_data;
78         uint16_t                        buf_size;
79         uint16_t                        prefix_size;
80         uint32_t                        user_mark_mask;
81
82         unsigned int                    evq_hw_index;
83         volatile void                   *evq_prime;
84
85         /* Used on refill */
86         unsigned int                    added;
87         unsigned int                    max_fill_level;
88         unsigned int                    refill_threshold;
89         struct rte_mempool              *refill_mb_pool;
90         efx_qword_t                     *rxq_hw_ring;
91         volatile void                   *doorbell;
92
93         /* Datapath receive queue anchor */
94         struct sfc_dp_rxq               dp;
95 };
96
97 static inline struct sfc_ef100_rxq *
98 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
99 {
100         return container_of(dp_rxq, struct sfc_ef100_rxq, dp);
101 }
102
103 static void
104 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq)
105 {
106         sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index,
107                             rxq->evq_read_ptr & rxq->ptr_mask);
108         rxq->evq_read_ptr_primed = rxq->evq_read_ptr;
109 }
110
111 static inline void
112 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
113 {
114         efx_dword_t dword;
115
116         EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask);
117
118         /* DMA sync to device is not required */
119
120         /*
121          * rte_write32() has rte_io_wmb() which guarantees that the STORE
122          * operations (i.e. Rx and event descriptor updates) that precede
123          * the rte_io_wmb() call are visible to NIC before the STORE
124          * operations that follow it (i.e. doorbell write).
125          */
126         rte_write32(dword.ed_u32[0], rxq->doorbell);
127         rxq->dp.dpq.rx_dbells++;
128
129         sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)",
130                            EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX),
131                            added);
132 }
133
134 static void
135 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
136 {
137         const unsigned int ptr_mask = rxq->ptr_mask;
138         unsigned int free_space;
139         unsigned int bulks;
140         void *objs[SFC_RX_REFILL_BULK];
141         unsigned int added = rxq->added;
142
143         free_space = rxq->max_fill_level - (added - rxq->completed);
144
145         if (free_space < rxq->refill_threshold)
146                 return;
147
148         bulks = free_space / RTE_DIM(objs);
149         /* refill_threshold guarantees that bulks is positive */
150         SFC_ASSERT(bulks > 0);
151
152         do {
153                 unsigned int id;
154                 unsigned int i;
155
156                 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
157                                                   RTE_DIM(objs)) < 0)) {
158                         struct rte_eth_dev_data *dev_data =
159                                 rte_eth_devices[rxq->dp.dpq.port_id].data;
160
161                         /*
162                          * It is hardly a safe way to increment counter
163                          * from different contexts, but all PMDs do it.
164                          */
165                         dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs);
166                         /* Return if we have posted nothing yet */
167                         if (added == rxq->added)
168                                 return;
169                         /* Push posted */
170                         break;
171                 }
172
173                 for (i = 0, id = added & ptr_mask;
174                      i < RTE_DIM(objs);
175                      ++i, ++id) {
176                         struct rte_mbuf *m = objs[i];
177                         struct sfc_ef100_rx_sw_desc *rxd;
178                         rte_iova_t phys_addr;
179
180                         __rte_mbuf_raw_sanity_check(m);
181
182                         SFC_ASSERT((id & ~ptr_mask) == 0);
183                         rxd = &rxq->sw_ring[id];
184                         rxd->mbuf = m;
185
186                         /*
187                          * Avoid writing to mbuf. It is cheaper to do it
188                          * when we receive packet and fill in nearby
189                          * structure members.
190                          */
191
192                         phys_addr = rte_mbuf_data_iova_default(m);
193                         EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
194                             ESF_GZ_RX_BUF_ADDR, phys_addr);
195                 }
196
197                 added += RTE_DIM(objs);
198         } while (--bulks > 0);
199
200         SFC_ASSERT(rxq->added != added);
201         rxq->added = added;
202         sfc_ef100_rx_qpush(rxq, added);
203 }
204
205 static inline uint64_t
206 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class)
207 {
208         return EFX_WORD_FIELD(class,
209                               ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) ==
210                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
211                 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
212 }
213
214 static inline uint64_t
215 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class)
216 {
217         return EFX_WORD_FIELD(class,
218                               ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) ==
219                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
220                 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_BAD;
221 }
222
223 static uint32_t
224 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags)
225 {
226         uint32_t ptype;
227         bool no_tunnel = false;
228
229         if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) !=
230                      ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN))
231                 return 0;
232
233         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) {
234         case 0:
235                 ptype = RTE_PTYPE_L2_ETHER;
236                 break;
237         case 1:
238                 ptype = RTE_PTYPE_L2_ETHER_VLAN;
239                 break;
240         default:
241                 ptype = RTE_PTYPE_L2_ETHER_QINQ;
242                 break;
243         }
244
245         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) {
246         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE:
247                 no_tunnel = true;
248                 break;
249         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN:
250                 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
251                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
252                 break;
253         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE:
254                 ptype |= RTE_PTYPE_TUNNEL_NVGRE;
255                 break;
256         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE:
257                 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
258                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
259                 break;
260         default:
261                 /*
262                  * Driver does not know the tunnel, but it is
263                  * still a tunnel and NT_OR_INNER refer to inner
264                  * frame.
265                  */
266                 no_tunnel = false;
267         }
268
269         if (no_tunnel) {
270                 bool l4_valid = true;
271
272                 switch (EFX_WORD_FIELD(class,
273                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
274                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
275                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
276                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
277                         break;
278                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
279                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
280                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
281                         break;
282                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
283                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
284                         break;
285                 default:
286                         l4_valid = false;
287                 }
288
289                 if (l4_valid) {
290                         switch (EFX_WORD_FIELD(class,
291                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
292                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
293                                 ptype |= RTE_PTYPE_L4_TCP;
294                                 *ol_flags |=
295                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
296                                 break;
297                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
298                                 ptype |= RTE_PTYPE_L4_UDP;
299                                 *ol_flags |=
300                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
301                                 break;
302                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
303                                 ptype |= RTE_PTYPE_L4_FRAG;
304                                 break;
305                         }
306                 }
307         } else {
308                 bool l4_valid = true;
309
310                 switch (EFX_WORD_FIELD(class,
311                         ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) {
312                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
313                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
314                         break;
315                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
316                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
317                         *ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
318                         break;
319                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
320                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
321                         break;
322                 }
323
324                 switch (EFX_WORD_FIELD(class,
325                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
326                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
327                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
328                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
329                         break;
330                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
331                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
332                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
333                         break;
334                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
335                         ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
336                         break;
337                 default:
338                         l4_valid = false;
339                         break;
340                 }
341
342                 if (l4_valid) {
343                         switch (EFX_WORD_FIELD(class,
344                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
345                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
346                                 ptype |= RTE_PTYPE_INNER_L4_TCP;
347                                 *ol_flags |=
348                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
349                                 break;
350                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
351                                 ptype |= RTE_PTYPE_INNER_L4_UDP;
352                                 *ol_flags |=
353                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
354                                 break;
355                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
356                                 ptype |= RTE_PTYPE_INNER_L4_FRAG;
357                                 break;
358                         }
359                 }
360         }
361
362         return ptype;
363 }
364
365 /*
366  * Below function relies on the following fields in Rx prefix.
367  * Some fields are mandatory, some fields are optional.
368  * See sfc_ef100_rx_qstart() below.
369  */
370 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = {
371         .erpl_fields    = {
372 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \
373         EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian)
374
375                 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE),
376                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE),
377                 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE),
378                 EFX_RX_PREFIX_FIELD(INGRESS_MPORT,
379                                     ESF_GZ_RX_PREFIX_INGRESS_MPORT, B_FALSE),
380                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE),
381                 SFC_EF100_RX_PREFIX_FIELD(USER_FLAG, B_FALSE),
382                 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE),
383
384 #undef  SFC_EF100_RX_PREFIX_FIELD
385         }
386 };
387
388 static bool
389 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq,
390                                 const efx_xword_t *rx_prefix,
391                                 struct rte_mbuf *m)
392 {
393         const efx_word_t *class;
394         uint64_t ol_flags = 0;
395
396         RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
397         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
398         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT !=
399                          sizeof(*class));
400         class = (const efx_word_t *)((const uint8_t *)rx_prefix +
401                 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT);
402         if (unlikely(EFX_WORD_FIELD(*class,
403                                     ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) !=
404                      ESE_GZ_RH_HCLASS_L2_STATUS_OK))
405                 return false;
406
407         m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags);
408
409         if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) &&
410             EFX_TEST_XWORD_BIT(rx_prefix[0],
411                                ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) {
412                 ol_flags |= PKT_RX_RSS_HASH;
413                 /* EFX_XWORD_FIELD converts little-endian to CPU */
414                 m->hash.rss = EFX_XWORD_FIELD(rx_prefix[0],
415                                               ESF_GZ_RX_PREFIX_RSS_HASH);
416         }
417
418         if (rxq->flags & SFC_EF100_RXQ_USER_FLAG) {
419                 uint32_t user_flag;
420
421                 user_flag = EFX_XWORD_FIELD(rx_prefix[0],
422                                             ESF_GZ_RX_PREFIX_USER_FLAG);
423                 if (user_flag != 0)
424                         ol_flags |= PKT_RX_FDIR;
425         }
426
427         if (rxq->flags & SFC_EF100_RXQ_USER_MARK) {
428                 uint8_t tunnel_mark;
429                 uint32_t user_mark;
430                 uint32_t mark;
431
432                 /* EFX_XWORD_FIELD converts little-endian to CPU */
433                 mark = EFX_XWORD_FIELD(rx_prefix[0],
434                                        ESF_GZ_RX_PREFIX_USER_MARK);
435
436                 user_mark = mark & rxq->user_mark_mask;
437                 if (user_mark != SFC_EF100_USER_MARK_INVALID) {
438                         ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
439                         m->hash.fdir.hi = user_mark;
440                 }
441
442                 tunnel_mark = SFC_FT_GET_TUNNEL_MARK(mark);
443                 if (tunnel_mark != SFC_FT_TUNNEL_MARK_INVALID) {
444                         sfc_ft_id_t ft_id;
445
446                         ft_id = SFC_FT_TUNNEL_MARK_TO_ID(tunnel_mark);
447
448                         ol_flags |= sfc_dp_ft_id_valid;
449                         *RTE_MBUF_DYNFIELD(m, sfc_dp_ft_id_offset,
450                                            sfc_ft_id_t *) = ft_id;
451                 }
452         }
453
454         if (rxq->flags & SFC_EF100_RXQ_INGRESS_MPORT) {
455                 ol_flags |= sfc_dp_mport_override;
456                 *RTE_MBUF_DYNFIELD(m,
457                         sfc_dp_mport_offset,
458                         typeof(&((efx_mport_id_t *)0)->id)) =
459                                 EFX_XWORD_FIELD(rx_prefix[0],
460                                                 ESF_GZ_RX_PREFIX_INGRESS_MPORT);
461         }
462
463         m->ol_flags = ol_flags;
464         return true;
465 }
466
467 static const uint8_t *
468 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m)
469 {
470         return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM;
471 }
472
473 static struct rte_mbuf *
474 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq)
475 {
476         struct rte_mbuf *m;
477         unsigned int id;
478
479         /* mbuf associated with current Rx descriptor */
480         m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf;
481
482         /* completed is already moved to the next one */
483         if (unlikely(rxq->completed == rxq->added))
484                 goto done;
485
486         /*
487          * Prefetch Rx prefix of the next packet.
488          * Current packet is scattered and the next mbuf is its fragment
489          * it simply prefetches some data - no harm since packet rate
490          * should not be high if scatter is used.
491          */
492         id = rxq->completed & rxq->ptr_mask;
493         rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf));
494
495         if (unlikely(rxq->completed + 1 == rxq->added))
496                 goto done;
497
498         /*
499          * Prefetch mbuf control structure of the next after next Rx
500          * descriptor.
501          */
502         id = (id == rxq->ptr_mask) ? 0 : (id + 1);
503         rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf);
504
505         /*
506          * If the next time we'll need SW Rx descriptor from the next
507          * cache line, try to make sure that we have it in cache.
508          */
509         if ((id & 0x7) == 0x7)
510                 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]);
511
512 done:
513         return m;
514 }
515
516 static struct rte_mbuf **
517 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
518                                 struct rte_mbuf **rx_pkts,
519                                 struct rte_mbuf ** const rx_pkts_end)
520 {
521         while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) {
522                 struct rte_mbuf *pkt;
523                 struct rte_mbuf *lastseg;
524                 const efx_xword_t *rx_prefix;
525                 uint16_t pkt_len;
526                 uint16_t seg_len;
527                 bool deliver;
528
529                 rxq->ready_pkts--;
530
531                 pkt = sfc_ef100_rx_next_mbuf(rxq);
532                 __rte_mbuf_raw_sanity_check(pkt);
533
534                 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) !=
535                                  sizeof(rxq->rearm_data));
536                 pkt->rearm_data[0] = rxq->rearm_data;
537
538                 /* data_off already moved past Rx prefix */
539                 rx_prefix = (const efx_xword_t *)sfc_ef100_rx_pkt_prefix(pkt);
540
541                 pkt_len = EFX_XWORD_FIELD(rx_prefix[0],
542                                           ESF_GZ_RX_PREFIX_LENGTH);
543                 SFC_ASSERT(pkt_len > 0);
544                 rte_pktmbuf_pkt_len(pkt) = pkt_len;
545
546                 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size);
547                 rte_pktmbuf_data_len(pkt) = seg_len;
548
549                 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt);
550
551                 lastseg = pkt;
552                 while ((pkt_len -= seg_len) > 0) {
553                         struct rte_mbuf *seg;
554
555                         seg = sfc_ef100_rx_next_mbuf(rxq);
556                         __rte_mbuf_raw_sanity_check(seg);
557
558                         seg->data_off = RTE_PKTMBUF_HEADROOM;
559
560                         seg_len = RTE_MIN(pkt_len, rxq->buf_size);
561                         rte_pktmbuf_data_len(seg) = seg_len;
562                         rte_pktmbuf_pkt_len(seg) = seg_len;
563
564                         pkt->nb_segs++;
565                         lastseg->next = seg;
566                         lastseg = seg;
567                 }
568
569                 if (likely(deliver)) {
570                         *rx_pkts++ = pkt;
571                         sfc_pkts_bytes_add(&rxq->dp.dpq.stats, 1,
572                                            rte_pktmbuf_pkt_len(pkt));
573                 } else {
574                         rte_pktmbuf_free(pkt);
575                 }
576         }
577
578         return rx_pkts;
579 }
580
581 static bool
582 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev)
583 {
584         *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask];
585
586         if (!sfc_ef100_ev_present(ev,
587                         (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1))
588                 return false;
589
590         if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) {
591                 /*
592                  * Do not move read_ptr to keep the event for exception
593                  * handling by the control path.
594                  */
595                 rxq->flags |= SFC_EF100_RXQ_EXCEPTION;
596                 sfc_ef100_rx_err(rxq,
597                         "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x",
598                         rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask,
599                         EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
600                         EFX_QWORD_FIELD(*ev, EFX_DWORD_0));
601                 return false;
602         }
603
604         sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)",
605                            EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
606                            EFX_QWORD_FIELD(*ev, EFX_DWORD_0),
607                            rxq->evq_read_ptr,
608                            rxq->evq_read_ptr & rxq->ptr_mask);
609
610         rxq->evq_read_ptr++;
611         return true;
612 }
613
614 static uint16_t
615 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
616 {
617         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue);
618         struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
619         efx_qword_t rx_ev;
620
621         rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end);
622
623         if (unlikely(rxq->flags &
624                      (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION)))
625                 goto done;
626
627         while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) {
628                 rxq->ready_pkts =
629                         EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT);
630                 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts,
631                                                           rx_pkts_end);
632         }
633
634         /* It is not a problem if we refill in the case of exception */
635         sfc_ef100_rx_qrefill(rxq);
636
637         if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) &&
638             rxq->evq_read_ptr_primed != rxq->evq_read_ptr)
639                 sfc_ef100_rx_qprime(rxq);
640
641 done:
642         return nb_pkts - (rx_pkts_end - rx_pkts);
643 }
644
645 static const uint32_t *
646 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
647 {
648         static const uint32_t ef100_native_ptypes[] = {
649                 RTE_PTYPE_L2_ETHER,
650                 RTE_PTYPE_L2_ETHER_VLAN,
651                 RTE_PTYPE_L2_ETHER_QINQ,
652                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
653                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
654                 RTE_PTYPE_L4_TCP,
655                 RTE_PTYPE_L4_UDP,
656                 RTE_PTYPE_L4_FRAG,
657                 RTE_PTYPE_TUNNEL_VXLAN,
658                 RTE_PTYPE_TUNNEL_NVGRE,
659                 RTE_PTYPE_TUNNEL_GENEVE,
660                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
661                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
662                 RTE_PTYPE_INNER_L4_TCP,
663                 RTE_PTYPE_INNER_L4_UDP,
664                 RTE_PTYPE_INNER_L4_FRAG,
665                 RTE_PTYPE_UNKNOWN
666         };
667
668         return ef100_native_ptypes;
669 }
670
671 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending;
672 static unsigned int
673 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
674 {
675         return 0;
676 }
677
678 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status;
679 static int
680 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
681                           __rte_unused uint16_t offset)
682 {
683         return -ENOTSUP;
684 }
685
686
687 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info;
688 static void
689 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info)
690 {
691         /*
692          * Number of descriptors just defines maximum number of pushed
693          * descriptors (fill level).
694          */
695         dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK;
696         dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK;
697 }
698
699
700 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings;
701 static int
702 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc,
703                            struct sfc_dp_rx_hw_limits *limits,
704                            __rte_unused struct rte_mempool *mb_pool,
705                            unsigned int *rxq_entries,
706                            unsigned int *evq_entries,
707                            unsigned int *rxq_max_fill_level)
708 {
709         /*
710          * rte_ethdev API guarantees that the number meets min, max and
711          * alignment requirements.
712          */
713         if (nb_rx_desc <= limits->rxq_min_entries)
714                 *rxq_entries = limits->rxq_min_entries;
715         else
716                 *rxq_entries = rte_align32pow2(nb_rx_desc);
717
718         *evq_entries = *rxq_entries;
719
720         *rxq_max_fill_level = RTE_MIN(nb_rx_desc,
721                                       SFC_EF100_RXQ_LIMIT(*evq_entries));
722         return 0;
723 }
724
725
726 static uint64_t
727 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
728 {
729         struct rte_mbuf m;
730
731         memset(&m, 0, sizeof(m));
732
733         rte_mbuf_refcnt_set(&m, 1);
734         m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size;
735         m.nb_segs = 1;
736         m.port = port_id;
737
738         /* rearm_data covers structure members filled in above */
739         rte_compiler_barrier();
740         RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t));
741         return m.rearm_data[0];
742 }
743
744 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate;
745 static int
746 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
747                     const struct rte_pci_addr *pci_addr, int socket_id,
748                     const struct sfc_dp_rx_qcreate_info *info,
749                     struct sfc_dp_rxq **dp_rxqp)
750 {
751         struct sfc_ef100_rxq *rxq;
752         int rc;
753
754         rc = EINVAL;
755         if (info->rxq_entries != info->evq_entries)
756                 goto fail_rxq_args;
757
758         rc = ENOMEM;
759         rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq),
760                                  RTE_CACHE_LINE_SIZE, socket_id);
761         if (rxq == NULL)
762                 goto fail_rxq_alloc;
763
764         sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
765
766         rc = ENOMEM;
767         rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring",
768                                          info->rxq_entries,
769                                          sizeof(*rxq->sw_ring),
770                                          RTE_CACHE_LINE_SIZE, socket_id);
771         if (rxq->sw_ring == NULL)
772                 goto fail_desc_alloc;
773
774         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
775         rxq->ptr_mask = info->rxq_entries - 1;
776         rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries);
777         rxq->evq_hw_ring = info->evq_hw_ring;
778         rxq->max_fill_level = info->max_fill_level;
779         rxq->refill_threshold = info->refill_threshold;
780         rxq->prefix_size = info->prefix_size;
781
782         SFC_ASSERT(info->user_mark_mask != 0);
783         rxq->user_mark_mask = info->user_mark_mask;
784
785         rxq->buf_size = info->buf_size;
786         rxq->refill_mb_pool = info->refill_mb_pool;
787         rxq->rxq_hw_ring = info->rxq_hw_ring;
788         rxq->doorbell = (volatile uint8_t *)info->mem_bar +
789                         ER_GZ_RX_RING_DOORBELL_OFST +
790                         (info->hw_index << info->vi_window_shift);
791
792         rxq->evq_hw_index = info->evq_hw_index;
793         rxq->evq_prime = (volatile uint8_t *)info->mem_bar +
794                          info->fcw_offset +
795                          ER_GZ_EVQ_INT_PRIME_OFST;
796
797         sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
798
799         *dp_rxqp = &rxq->dp;
800         return 0;
801
802 fail_desc_alloc:
803         rte_free(rxq);
804
805 fail_rxq_alloc:
806 fail_rxq_args:
807         return rc;
808 }
809
810 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy;
811 static void
812 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
813 {
814         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
815
816         rte_free(rxq->sw_ring);
817         rte_free(rxq);
818 }
819
820 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart;
821 static int
822 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr,
823                     const efx_rx_prefix_layout_t *pinfo)
824 {
825         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
826         uint32_t unsup_rx_prefix_fields;
827
828         SFC_ASSERT(rxq->completed == 0);
829         SFC_ASSERT(rxq->added == 0);
830
831         /* Prefix must fit into reserved Rx buffer space */
832         if (pinfo->erpl_length > rxq->prefix_size)
833                 return ENOTSUP;
834
835         unsup_rx_prefix_fields =
836                 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout);
837
838         /* LENGTH and CLASS filds must always be present */
839         if ((unsup_rx_prefix_fields &
840              ((1U << EFX_RX_PREFIX_FIELD_LENGTH) |
841               (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0)
842                 return ENOTSUP;
843
844         if ((unsup_rx_prefix_fields &
845              ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) |
846               (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0)
847                 rxq->flags |= SFC_EF100_RXQ_RSS_HASH;
848         else
849                 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH;
850
851         if ((unsup_rx_prefix_fields &
852              (1U << EFX_RX_PREFIX_FIELD_USER_FLAG)) == 0)
853                 rxq->flags |= SFC_EF100_RXQ_USER_FLAG;
854         else
855                 rxq->flags &= ~SFC_EF100_RXQ_USER_FLAG;
856
857         if ((unsup_rx_prefix_fields &
858              (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0)
859                 rxq->flags |= SFC_EF100_RXQ_USER_MARK;
860         else
861                 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK;
862
863         if ((unsup_rx_prefix_fields &
864              (1U << EFX_RX_PREFIX_FIELD_INGRESS_MPORT)) == 0)
865                 rxq->flags |= SFC_EF100_RXQ_INGRESS_MPORT;
866         else
867                 rxq->flags &= ~SFC_EF100_RXQ_INGRESS_MPORT;
868
869         rxq->prefix_size = pinfo->erpl_length;
870         rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id,
871                                                        rxq->prefix_size);
872
873         sfc_ef100_rx_qrefill(rxq);
874
875         rxq->evq_read_ptr = evq_read_ptr;
876
877         rxq->flags |= SFC_EF100_RXQ_STARTED;
878         rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION);
879
880         if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN)
881                 sfc_ef100_rx_qprime(rxq);
882
883         return 0;
884 }
885
886 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop;
887 static void
888 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr)
889 {
890         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
891
892         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
893
894         *evq_read_ptr = rxq->evq_read_ptr;
895 }
896
897 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev;
898 static bool
899 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id)
900 {
901         __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
902
903         SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING);
904
905         /*
906          * It is safe to ignore Rx event since we free all mbufs on
907          * queue purge anyway.
908          */
909
910         return false;
911 }
912
913 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge;
914 static void
915 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
916 {
917         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
918         unsigned int i;
919         struct sfc_ef100_rx_sw_desc *rxd;
920
921         for (i = rxq->completed; i != rxq->added; ++i) {
922                 rxd = &rxq->sw_ring[i & rxq->ptr_mask];
923                 rte_mbuf_raw_free(rxd->mbuf);
924                 rxd->mbuf = NULL;
925         }
926
927         rxq->completed = rxq->added = 0;
928         rxq->ready_pkts = 0;
929
930         rxq->flags &= ~SFC_EF100_RXQ_STARTED;
931 }
932
933 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable;
934 static int
935 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq)
936 {
937         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
938
939         rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN;
940         if (rxq->flags & SFC_EF100_RXQ_STARTED)
941                 sfc_ef100_rx_qprime(rxq);
942         return 0;
943 }
944
945 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable;
946 static int
947 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq)
948 {
949         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
950
951         /* Cannot disarm, just disable rearm */
952         rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN;
953         return 0;
954 }
955
956 static sfc_dp_rx_get_pushed_t sfc_ef100_rx_get_pushed;
957 static unsigned int
958 sfc_ef100_rx_get_pushed(struct sfc_dp_rxq *dp_rxq)
959 {
960         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
961
962         /*
963          * The datapath keeps track only of added descriptors, since
964          * the number of pushed descriptors always equals the number
965          * of added descriptors due to enforced alignment.
966          */
967         return rxq->added;
968 }
969
970 struct sfc_dp_rx sfc_ef100_rx = {
971         .dp = {
972                 .name           = SFC_KVARG_DATAPATH_EF100,
973                 .type           = SFC_DP_RX,
974                 .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF100,
975         },
976         .features               = SFC_DP_RX_FEAT_MULTI_PROCESS |
977                                   SFC_DP_RX_FEAT_FLOW_FLAG |
978                                   SFC_DP_RX_FEAT_FLOW_MARK |
979                                   SFC_DP_RX_FEAT_INTR |
980                                   SFC_DP_RX_FEAT_STATS,
981         .dev_offload_capa       = 0,
982         .queue_offload_capa     = RTE_ETH_RX_OFFLOAD_CHECKSUM |
983                                   RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |
984                                   RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM |
985                                   RTE_ETH_RX_OFFLOAD_SCATTER |
986                                   RTE_ETH_RX_OFFLOAD_RSS_HASH,
987         .get_dev_info           = sfc_ef100_rx_get_dev_info,
988         .qsize_up_rings         = sfc_ef100_rx_qsize_up_rings,
989         .qcreate                = sfc_ef100_rx_qcreate,
990         .qdestroy               = sfc_ef100_rx_qdestroy,
991         .qstart                 = sfc_ef100_rx_qstart,
992         .qstop                  = sfc_ef100_rx_qstop,
993         .qrx_ev                 = sfc_ef100_rx_qrx_ev,
994         .qpurge                 = sfc_ef100_rx_qpurge,
995         .supported_ptypes_get   = sfc_ef100_supported_ptypes_get,
996         .qdesc_npending         = sfc_ef100_rx_qdesc_npending,
997         .qdesc_status           = sfc_ef100_rx_qdesc_status,
998         .intr_enable            = sfc_ef100_rx_intr_enable,
999         .intr_disable           = sfc_ef100_rx_intr_disable,
1000         .get_pushed             = sfc_ef100_rx_get_pushed,
1001         .pkt_burst              = sfc_ef100_recv_pkts,
1002 };