net/sfc: fence off 8 bits in Rx mark for tunnel offload
[dpdk.git] / drivers / net / sfc / sfc_ef100_rx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2018-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9
10 /* EF100 native datapath implementation */
11
12 #include <stdbool.h>
13
14 #include <rte_byteorder.h>
15 #include <rte_mbuf_ptype.h>
16 #include <rte_mbuf.h>
17 #include <rte_io.h>
18
19 #include "efx_types.h"
20 #include "efx_regs_ef100.h"
21 #include "efx.h"
22
23 #include "sfc.h"
24 #include "sfc_debug.h"
25 #include "sfc_flow_tunnel.h"
26 #include "sfc_tweak.h"
27 #include "sfc_dp_rx.h"
28 #include "sfc_kvargs.h"
29 #include "sfc_ef100.h"
30
31
32 #define sfc_ef100_rx_err(_rxq, ...) \
33         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__)
34
35 #define sfc_ef100_rx_debug(_rxq, ...) \
36         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \
37                    __VA_ARGS__)
38
39 /**
40  * Maximum number of descriptors/buffers in the Rx ring.
41  * It should guarantee that corresponding event queue never overfill.
42  * EF10 native datapath uses event queue of the same size as Rx queue.
43  * Maximum number of events on datapath can be estimated as number of
44  * Rx queue entries (one event per Rx buffer in the worst case) plus
45  * Rx error and flush events.
46  */
47 #define SFC_EF100_RXQ_LIMIT(_ndesc) \
48         ((_ndesc) - 1 /* head must not step on tail */ - \
49          1 /* Rx error */ - 1 /* flush */)
50
51 /** Invalid user mark value when the mark should be treated as unset */
52 #define SFC_EF100_USER_MARK_INVALID     0
53
54 struct sfc_ef100_rx_sw_desc {
55         struct rte_mbuf                 *mbuf;
56 };
57
58 struct sfc_ef100_rxq {
59         /* Used on data path */
60         unsigned int                    flags;
61 #define SFC_EF100_RXQ_STARTED           0x1
62 #define SFC_EF100_RXQ_NOT_RUNNING       0x2
63 #define SFC_EF100_RXQ_EXCEPTION         0x4
64 #define SFC_EF100_RXQ_RSS_HASH          0x10
65 #define SFC_EF100_RXQ_USER_MARK         0x20
66 #define SFC_EF100_RXQ_FLAG_INTR_EN      0x40
67 #define SFC_EF100_RXQ_INGRESS_MPORT     0x80
68 #define SFC_EF100_RXQ_USER_FLAG         0x100
69         unsigned int                    ptr_mask;
70         unsigned int                    evq_phase_bit_shift;
71         unsigned int                    ready_pkts;
72         unsigned int                    completed;
73         unsigned int                    evq_read_ptr;
74         unsigned int                    evq_read_ptr_primed;
75         volatile efx_qword_t            *evq_hw_ring;
76         struct sfc_ef100_rx_sw_desc     *sw_ring;
77         uint64_t                        rearm_data;
78         uint16_t                        buf_size;
79         uint16_t                        prefix_size;
80         uint32_t                        user_mark_mask;
81
82         unsigned int                    evq_hw_index;
83         volatile void                   *evq_prime;
84
85         /* Used on refill */
86         unsigned int                    added;
87         unsigned int                    max_fill_level;
88         unsigned int                    refill_threshold;
89         struct rte_mempool              *refill_mb_pool;
90         efx_qword_t                     *rxq_hw_ring;
91         volatile void                   *doorbell;
92
93         /* Datapath receive queue anchor */
94         struct sfc_dp_rxq               dp;
95 };
96
97 static inline struct sfc_ef100_rxq *
98 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
99 {
100         return container_of(dp_rxq, struct sfc_ef100_rxq, dp);
101 }
102
103 static void
104 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq)
105 {
106         sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index,
107                             rxq->evq_read_ptr & rxq->ptr_mask);
108         rxq->evq_read_ptr_primed = rxq->evq_read_ptr;
109 }
110
111 static inline void
112 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
113 {
114         efx_dword_t dword;
115
116         EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask);
117
118         /* DMA sync to device is not required */
119
120         /*
121          * rte_write32() has rte_io_wmb() which guarantees that the STORE
122          * operations (i.e. Rx and event descriptor updates) that precede
123          * the rte_io_wmb() call are visible to NIC before the STORE
124          * operations that follow it (i.e. doorbell write).
125          */
126         rte_write32(dword.ed_u32[0], rxq->doorbell);
127         rxq->dp.dpq.rx_dbells++;
128
129         sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)",
130                            EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX),
131                            added);
132 }
133
134 static void
135 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
136 {
137         const unsigned int ptr_mask = rxq->ptr_mask;
138         unsigned int free_space;
139         unsigned int bulks;
140         void *objs[SFC_RX_REFILL_BULK];
141         unsigned int added = rxq->added;
142
143         free_space = rxq->max_fill_level - (added - rxq->completed);
144
145         if (free_space < rxq->refill_threshold)
146                 return;
147
148         bulks = free_space / RTE_DIM(objs);
149         /* refill_threshold guarantees that bulks is positive */
150         SFC_ASSERT(bulks > 0);
151
152         do {
153                 unsigned int id;
154                 unsigned int i;
155
156                 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
157                                                   RTE_DIM(objs)) < 0)) {
158                         struct rte_eth_dev_data *dev_data =
159                                 rte_eth_devices[rxq->dp.dpq.port_id].data;
160
161                         /*
162                          * It is hardly a safe way to increment counter
163                          * from different contexts, but all PMDs do it.
164                          */
165                         dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs);
166                         /* Return if we have posted nothing yet */
167                         if (added == rxq->added)
168                                 return;
169                         /* Push posted */
170                         break;
171                 }
172
173                 for (i = 0, id = added & ptr_mask;
174                      i < RTE_DIM(objs);
175                      ++i, ++id) {
176                         struct rte_mbuf *m = objs[i];
177                         struct sfc_ef100_rx_sw_desc *rxd;
178                         rte_iova_t phys_addr;
179
180                         __rte_mbuf_raw_sanity_check(m);
181
182                         SFC_ASSERT((id & ~ptr_mask) == 0);
183                         rxd = &rxq->sw_ring[id];
184                         rxd->mbuf = m;
185
186                         /*
187                          * Avoid writing to mbuf. It is cheaper to do it
188                          * when we receive packet and fill in nearby
189                          * structure members.
190                          */
191
192                         phys_addr = rte_mbuf_data_iova_default(m);
193                         EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
194                             ESF_GZ_RX_BUF_ADDR, phys_addr);
195                 }
196
197                 added += RTE_DIM(objs);
198         } while (--bulks > 0);
199
200         SFC_ASSERT(rxq->added != added);
201         rxq->added = added;
202         sfc_ef100_rx_qpush(rxq, added);
203 }
204
205 static inline uint64_t
206 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class)
207 {
208         return EFX_WORD_FIELD(class,
209                               ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) ==
210                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
211                 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
212 }
213
214 static inline uint64_t
215 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class)
216 {
217         return EFX_WORD_FIELD(class,
218                               ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) ==
219                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
220                 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_BAD;
221 }
222
223 static uint32_t
224 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags)
225 {
226         uint32_t ptype;
227         bool no_tunnel = false;
228
229         if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) !=
230                      ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN))
231                 return 0;
232
233         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) {
234         case 0:
235                 ptype = RTE_PTYPE_L2_ETHER;
236                 break;
237         case 1:
238                 ptype = RTE_PTYPE_L2_ETHER_VLAN;
239                 break;
240         default:
241                 ptype = RTE_PTYPE_L2_ETHER_QINQ;
242                 break;
243         }
244
245         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) {
246         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE:
247                 no_tunnel = true;
248                 break;
249         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN:
250                 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
251                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
252                 break;
253         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE:
254                 ptype |= RTE_PTYPE_TUNNEL_NVGRE;
255                 break;
256         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE:
257                 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
258                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
259                 break;
260         default:
261                 /*
262                  * Driver does not know the tunnel, but it is
263                  * still a tunnel and NT_OR_INNER refer to inner
264                  * frame.
265                  */
266                 no_tunnel = false;
267         }
268
269         if (no_tunnel) {
270                 bool l4_valid = true;
271
272                 switch (EFX_WORD_FIELD(class,
273                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
274                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
275                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
276                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
277                         break;
278                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
279                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
280                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
281                         break;
282                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
283                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
284                         break;
285                 default:
286                         l4_valid = false;
287                 }
288
289                 if (l4_valid) {
290                         switch (EFX_WORD_FIELD(class,
291                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
292                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
293                                 ptype |= RTE_PTYPE_L4_TCP;
294                                 *ol_flags |=
295                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
296                                 break;
297                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
298                                 ptype |= RTE_PTYPE_L4_UDP;
299                                 *ol_flags |=
300                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
301                                 break;
302                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
303                                 ptype |= RTE_PTYPE_L4_FRAG;
304                                 break;
305                         }
306                 }
307         } else {
308                 bool l4_valid = true;
309
310                 switch (EFX_WORD_FIELD(class,
311                         ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) {
312                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
313                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
314                         break;
315                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
316                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
317                         *ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
318                         break;
319                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
320                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
321                         break;
322                 }
323
324                 switch (EFX_WORD_FIELD(class,
325                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
326                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
327                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
328                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
329                         break;
330                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
331                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
332                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
333                         break;
334                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
335                         ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
336                         break;
337                 default:
338                         l4_valid = false;
339                         break;
340                 }
341
342                 if (l4_valid) {
343                         switch (EFX_WORD_FIELD(class,
344                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
345                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
346                                 ptype |= RTE_PTYPE_INNER_L4_TCP;
347                                 *ol_flags |=
348                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
349                                 break;
350                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
351                                 ptype |= RTE_PTYPE_INNER_L4_UDP;
352                                 *ol_flags |=
353                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
354                                 break;
355                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
356                                 ptype |= RTE_PTYPE_INNER_L4_FRAG;
357                                 break;
358                         }
359                 }
360         }
361
362         return ptype;
363 }
364
365 /*
366  * Below function relies on the following fields in Rx prefix.
367  * Some fields are mandatory, some fields are optional.
368  * See sfc_ef100_rx_qstart() below.
369  */
370 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = {
371         .erpl_fields    = {
372 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \
373         EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian)
374
375                 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE),
376                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE),
377                 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE),
378                 EFX_RX_PREFIX_FIELD(INGRESS_MPORT,
379                                     ESF_GZ_RX_PREFIX_INGRESS_MPORT, B_FALSE),
380                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE),
381                 SFC_EF100_RX_PREFIX_FIELD(USER_FLAG, B_FALSE),
382                 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE),
383
384 #undef  SFC_EF100_RX_PREFIX_FIELD
385         }
386 };
387
388 static bool
389 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq,
390                                 const efx_xword_t *rx_prefix,
391                                 struct rte_mbuf *m)
392 {
393         const efx_word_t *class;
394         uint64_t ol_flags = 0;
395
396         RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
397         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
398         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT !=
399                          sizeof(*class));
400         class = (const efx_word_t *)((const uint8_t *)rx_prefix +
401                 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT);
402         if (unlikely(EFX_WORD_FIELD(*class,
403                                     ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) !=
404                      ESE_GZ_RH_HCLASS_L2_STATUS_OK))
405                 return false;
406
407         m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags);
408
409         if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) &&
410             EFX_TEST_XWORD_BIT(rx_prefix[0],
411                                ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) {
412                 ol_flags |= PKT_RX_RSS_HASH;
413                 /* EFX_XWORD_FIELD converts little-endian to CPU */
414                 m->hash.rss = EFX_XWORD_FIELD(rx_prefix[0],
415                                               ESF_GZ_RX_PREFIX_RSS_HASH);
416         }
417
418         if (rxq->flags & SFC_EF100_RXQ_USER_FLAG) {
419                 uint32_t user_flag;
420
421                 user_flag = EFX_XWORD_FIELD(rx_prefix[0],
422                                             ESF_GZ_RX_PREFIX_USER_FLAG);
423                 if (user_flag != 0)
424                         ol_flags |= PKT_RX_FDIR;
425         }
426
427         if (rxq->flags & SFC_EF100_RXQ_USER_MARK) {
428                 uint32_t user_mark;
429                 uint32_t mark;
430
431                 /* EFX_XWORD_FIELD converts little-endian to CPU */
432                 mark = EFX_XWORD_FIELD(rx_prefix[0],
433                                        ESF_GZ_RX_PREFIX_USER_MARK);
434
435                 user_mark = mark & rxq->user_mark_mask;
436                 if (user_mark != SFC_EF100_USER_MARK_INVALID) {
437                         ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
438                         m->hash.fdir.hi = user_mark;
439                 }
440         }
441
442         if (rxq->flags & SFC_EF100_RXQ_INGRESS_MPORT) {
443                 ol_flags |= sfc_dp_mport_override;
444                 *RTE_MBUF_DYNFIELD(m,
445                         sfc_dp_mport_offset,
446                         typeof(&((efx_mport_id_t *)0)->id)) =
447                                 EFX_XWORD_FIELD(rx_prefix[0],
448                                                 ESF_GZ_RX_PREFIX_INGRESS_MPORT);
449         }
450
451         m->ol_flags = ol_flags;
452         return true;
453 }
454
455 static const uint8_t *
456 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m)
457 {
458         return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM;
459 }
460
461 static struct rte_mbuf *
462 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq)
463 {
464         struct rte_mbuf *m;
465         unsigned int id;
466
467         /* mbuf associated with current Rx descriptor */
468         m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf;
469
470         /* completed is already moved to the next one */
471         if (unlikely(rxq->completed == rxq->added))
472                 goto done;
473
474         /*
475          * Prefetch Rx prefix of the next packet.
476          * Current packet is scattered and the next mbuf is its fragment
477          * it simply prefetches some data - no harm since packet rate
478          * should not be high if scatter is used.
479          */
480         id = rxq->completed & rxq->ptr_mask;
481         rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf));
482
483         if (unlikely(rxq->completed + 1 == rxq->added))
484                 goto done;
485
486         /*
487          * Prefetch mbuf control structure of the next after next Rx
488          * descriptor.
489          */
490         id = (id == rxq->ptr_mask) ? 0 : (id + 1);
491         rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf);
492
493         /*
494          * If the next time we'll need SW Rx descriptor from the next
495          * cache line, try to make sure that we have it in cache.
496          */
497         if ((id & 0x7) == 0x7)
498                 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]);
499
500 done:
501         return m;
502 }
503
504 static struct rte_mbuf **
505 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
506                                 struct rte_mbuf **rx_pkts,
507                                 struct rte_mbuf ** const rx_pkts_end)
508 {
509         while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) {
510                 struct rte_mbuf *pkt;
511                 struct rte_mbuf *lastseg;
512                 const efx_xword_t *rx_prefix;
513                 uint16_t pkt_len;
514                 uint16_t seg_len;
515                 bool deliver;
516
517                 rxq->ready_pkts--;
518
519                 pkt = sfc_ef100_rx_next_mbuf(rxq);
520                 __rte_mbuf_raw_sanity_check(pkt);
521
522                 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) !=
523                                  sizeof(rxq->rearm_data));
524                 pkt->rearm_data[0] = rxq->rearm_data;
525
526                 /* data_off already moved past Rx prefix */
527                 rx_prefix = (const efx_xword_t *)sfc_ef100_rx_pkt_prefix(pkt);
528
529                 pkt_len = EFX_XWORD_FIELD(rx_prefix[0],
530                                           ESF_GZ_RX_PREFIX_LENGTH);
531                 SFC_ASSERT(pkt_len > 0);
532                 rte_pktmbuf_pkt_len(pkt) = pkt_len;
533
534                 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size);
535                 rte_pktmbuf_data_len(pkt) = seg_len;
536
537                 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt);
538
539                 lastseg = pkt;
540                 while ((pkt_len -= seg_len) > 0) {
541                         struct rte_mbuf *seg;
542
543                         seg = sfc_ef100_rx_next_mbuf(rxq);
544                         __rte_mbuf_raw_sanity_check(seg);
545
546                         seg->data_off = RTE_PKTMBUF_HEADROOM;
547
548                         seg_len = RTE_MIN(pkt_len, rxq->buf_size);
549                         rte_pktmbuf_data_len(seg) = seg_len;
550                         rte_pktmbuf_pkt_len(seg) = seg_len;
551
552                         pkt->nb_segs++;
553                         lastseg->next = seg;
554                         lastseg = seg;
555                 }
556
557                 if (likely(deliver)) {
558                         *rx_pkts++ = pkt;
559                         sfc_pkts_bytes_add(&rxq->dp.dpq.stats, 1,
560                                            rte_pktmbuf_pkt_len(pkt));
561                 } else {
562                         rte_pktmbuf_free(pkt);
563                 }
564         }
565
566         return rx_pkts;
567 }
568
569 static bool
570 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev)
571 {
572         *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask];
573
574         if (!sfc_ef100_ev_present(ev,
575                         (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1))
576                 return false;
577
578         if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) {
579                 /*
580                  * Do not move read_ptr to keep the event for exception
581                  * handling by the control path.
582                  */
583                 rxq->flags |= SFC_EF100_RXQ_EXCEPTION;
584                 sfc_ef100_rx_err(rxq,
585                         "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x",
586                         rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask,
587                         EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
588                         EFX_QWORD_FIELD(*ev, EFX_DWORD_0));
589                 return false;
590         }
591
592         sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)",
593                            EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
594                            EFX_QWORD_FIELD(*ev, EFX_DWORD_0),
595                            rxq->evq_read_ptr,
596                            rxq->evq_read_ptr & rxq->ptr_mask);
597
598         rxq->evq_read_ptr++;
599         return true;
600 }
601
602 static uint16_t
603 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
604 {
605         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue);
606         struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
607         efx_qword_t rx_ev;
608
609         rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end);
610
611         if (unlikely(rxq->flags &
612                      (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION)))
613                 goto done;
614
615         while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) {
616                 rxq->ready_pkts =
617                         EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT);
618                 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts,
619                                                           rx_pkts_end);
620         }
621
622         /* It is not a problem if we refill in the case of exception */
623         sfc_ef100_rx_qrefill(rxq);
624
625         if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) &&
626             rxq->evq_read_ptr_primed != rxq->evq_read_ptr)
627                 sfc_ef100_rx_qprime(rxq);
628
629 done:
630         return nb_pkts - (rx_pkts_end - rx_pkts);
631 }
632
633 static const uint32_t *
634 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
635 {
636         static const uint32_t ef100_native_ptypes[] = {
637                 RTE_PTYPE_L2_ETHER,
638                 RTE_PTYPE_L2_ETHER_VLAN,
639                 RTE_PTYPE_L2_ETHER_QINQ,
640                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
641                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
642                 RTE_PTYPE_L4_TCP,
643                 RTE_PTYPE_L4_UDP,
644                 RTE_PTYPE_L4_FRAG,
645                 RTE_PTYPE_TUNNEL_VXLAN,
646                 RTE_PTYPE_TUNNEL_NVGRE,
647                 RTE_PTYPE_TUNNEL_GENEVE,
648                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
649                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
650                 RTE_PTYPE_INNER_L4_TCP,
651                 RTE_PTYPE_INNER_L4_UDP,
652                 RTE_PTYPE_INNER_L4_FRAG,
653                 RTE_PTYPE_UNKNOWN
654         };
655
656         return ef100_native_ptypes;
657 }
658
659 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending;
660 static unsigned int
661 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
662 {
663         return 0;
664 }
665
666 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status;
667 static int
668 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
669                           __rte_unused uint16_t offset)
670 {
671         return -ENOTSUP;
672 }
673
674
675 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info;
676 static void
677 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info)
678 {
679         /*
680          * Number of descriptors just defines maximum number of pushed
681          * descriptors (fill level).
682          */
683         dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK;
684         dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK;
685 }
686
687
688 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings;
689 static int
690 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc,
691                            struct sfc_dp_rx_hw_limits *limits,
692                            __rte_unused struct rte_mempool *mb_pool,
693                            unsigned int *rxq_entries,
694                            unsigned int *evq_entries,
695                            unsigned int *rxq_max_fill_level)
696 {
697         /*
698          * rte_ethdev API guarantees that the number meets min, max and
699          * alignment requirements.
700          */
701         if (nb_rx_desc <= limits->rxq_min_entries)
702                 *rxq_entries = limits->rxq_min_entries;
703         else
704                 *rxq_entries = rte_align32pow2(nb_rx_desc);
705
706         *evq_entries = *rxq_entries;
707
708         *rxq_max_fill_level = RTE_MIN(nb_rx_desc,
709                                       SFC_EF100_RXQ_LIMIT(*evq_entries));
710         return 0;
711 }
712
713
714 static uint64_t
715 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
716 {
717         struct rte_mbuf m;
718
719         memset(&m, 0, sizeof(m));
720
721         rte_mbuf_refcnt_set(&m, 1);
722         m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size;
723         m.nb_segs = 1;
724         m.port = port_id;
725
726         /* rearm_data covers structure members filled in above */
727         rte_compiler_barrier();
728         RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t));
729         return m.rearm_data[0];
730 }
731
732 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate;
733 static int
734 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
735                     const struct rte_pci_addr *pci_addr, int socket_id,
736                     const struct sfc_dp_rx_qcreate_info *info,
737                     struct sfc_dp_rxq **dp_rxqp)
738 {
739         struct sfc_ef100_rxq *rxq;
740         int rc;
741
742         rc = EINVAL;
743         if (info->rxq_entries != info->evq_entries)
744                 goto fail_rxq_args;
745
746         rc = ENOMEM;
747         rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq),
748                                  RTE_CACHE_LINE_SIZE, socket_id);
749         if (rxq == NULL)
750                 goto fail_rxq_alloc;
751
752         sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
753
754         rc = ENOMEM;
755         rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring",
756                                          info->rxq_entries,
757                                          sizeof(*rxq->sw_ring),
758                                          RTE_CACHE_LINE_SIZE, socket_id);
759         if (rxq->sw_ring == NULL)
760                 goto fail_desc_alloc;
761
762         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
763         rxq->ptr_mask = info->rxq_entries - 1;
764         rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries);
765         rxq->evq_hw_ring = info->evq_hw_ring;
766         rxq->max_fill_level = info->max_fill_level;
767         rxq->refill_threshold = info->refill_threshold;
768         rxq->prefix_size = info->prefix_size;
769
770         SFC_ASSERT(info->user_mark_mask != 0);
771         rxq->user_mark_mask = info->user_mark_mask;
772
773         rxq->buf_size = info->buf_size;
774         rxq->refill_mb_pool = info->refill_mb_pool;
775         rxq->rxq_hw_ring = info->rxq_hw_ring;
776         rxq->doorbell = (volatile uint8_t *)info->mem_bar +
777                         ER_GZ_RX_RING_DOORBELL_OFST +
778                         (info->hw_index << info->vi_window_shift);
779
780         rxq->evq_hw_index = info->evq_hw_index;
781         rxq->evq_prime = (volatile uint8_t *)info->mem_bar +
782                          info->fcw_offset +
783                          ER_GZ_EVQ_INT_PRIME_OFST;
784
785         sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
786
787         *dp_rxqp = &rxq->dp;
788         return 0;
789
790 fail_desc_alloc:
791         rte_free(rxq);
792
793 fail_rxq_alloc:
794 fail_rxq_args:
795         return rc;
796 }
797
798 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy;
799 static void
800 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
801 {
802         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
803
804         rte_free(rxq->sw_ring);
805         rte_free(rxq);
806 }
807
808 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart;
809 static int
810 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr,
811                     const efx_rx_prefix_layout_t *pinfo)
812 {
813         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
814         uint32_t unsup_rx_prefix_fields;
815
816         SFC_ASSERT(rxq->completed == 0);
817         SFC_ASSERT(rxq->added == 0);
818
819         /* Prefix must fit into reserved Rx buffer space */
820         if (pinfo->erpl_length > rxq->prefix_size)
821                 return ENOTSUP;
822
823         unsup_rx_prefix_fields =
824                 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout);
825
826         /* LENGTH and CLASS filds must always be present */
827         if ((unsup_rx_prefix_fields &
828              ((1U << EFX_RX_PREFIX_FIELD_LENGTH) |
829               (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0)
830                 return ENOTSUP;
831
832         if ((unsup_rx_prefix_fields &
833              ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) |
834               (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0)
835                 rxq->flags |= SFC_EF100_RXQ_RSS_HASH;
836         else
837                 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH;
838
839         if ((unsup_rx_prefix_fields &
840              (1U << EFX_RX_PREFIX_FIELD_USER_FLAG)) == 0)
841                 rxq->flags |= SFC_EF100_RXQ_USER_FLAG;
842         else
843                 rxq->flags &= ~SFC_EF100_RXQ_USER_FLAG;
844
845         if ((unsup_rx_prefix_fields &
846              (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0)
847                 rxq->flags |= SFC_EF100_RXQ_USER_MARK;
848         else
849                 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK;
850
851         if ((unsup_rx_prefix_fields &
852              (1U << EFX_RX_PREFIX_FIELD_INGRESS_MPORT)) == 0)
853                 rxq->flags |= SFC_EF100_RXQ_INGRESS_MPORT;
854         else
855                 rxq->flags &= ~SFC_EF100_RXQ_INGRESS_MPORT;
856
857         rxq->prefix_size = pinfo->erpl_length;
858         rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id,
859                                                        rxq->prefix_size);
860
861         sfc_ef100_rx_qrefill(rxq);
862
863         rxq->evq_read_ptr = evq_read_ptr;
864
865         rxq->flags |= SFC_EF100_RXQ_STARTED;
866         rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION);
867
868         if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN)
869                 sfc_ef100_rx_qprime(rxq);
870
871         return 0;
872 }
873
874 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop;
875 static void
876 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr)
877 {
878         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
879
880         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
881
882         *evq_read_ptr = rxq->evq_read_ptr;
883 }
884
885 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev;
886 static bool
887 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id)
888 {
889         __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
890
891         SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING);
892
893         /*
894          * It is safe to ignore Rx event since we free all mbufs on
895          * queue purge anyway.
896          */
897
898         return false;
899 }
900
901 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge;
902 static void
903 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
904 {
905         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
906         unsigned int i;
907         struct sfc_ef100_rx_sw_desc *rxd;
908
909         for (i = rxq->completed; i != rxq->added; ++i) {
910                 rxd = &rxq->sw_ring[i & rxq->ptr_mask];
911                 rte_mbuf_raw_free(rxd->mbuf);
912                 rxd->mbuf = NULL;
913         }
914
915         rxq->completed = rxq->added = 0;
916         rxq->ready_pkts = 0;
917
918         rxq->flags &= ~SFC_EF100_RXQ_STARTED;
919 }
920
921 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable;
922 static int
923 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq)
924 {
925         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
926
927         rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN;
928         if (rxq->flags & SFC_EF100_RXQ_STARTED)
929                 sfc_ef100_rx_qprime(rxq);
930         return 0;
931 }
932
933 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable;
934 static int
935 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq)
936 {
937         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
938
939         /* Cannot disarm, just disable rearm */
940         rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN;
941         return 0;
942 }
943
944 static sfc_dp_rx_get_pushed_t sfc_ef100_rx_get_pushed;
945 static unsigned int
946 sfc_ef100_rx_get_pushed(struct sfc_dp_rxq *dp_rxq)
947 {
948         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
949
950         /*
951          * The datapath keeps track only of added descriptors, since
952          * the number of pushed descriptors always equals the number
953          * of added descriptors due to enforced alignment.
954          */
955         return rxq->added;
956 }
957
958 struct sfc_dp_rx sfc_ef100_rx = {
959         .dp = {
960                 .name           = SFC_KVARG_DATAPATH_EF100,
961                 .type           = SFC_DP_RX,
962                 .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF100,
963         },
964         .features               = SFC_DP_RX_FEAT_MULTI_PROCESS |
965                                   SFC_DP_RX_FEAT_FLOW_FLAG |
966                                   SFC_DP_RX_FEAT_FLOW_MARK |
967                                   SFC_DP_RX_FEAT_INTR |
968                                   SFC_DP_RX_FEAT_STATS,
969         .dev_offload_capa       = 0,
970         .queue_offload_capa     = DEV_RX_OFFLOAD_CHECKSUM |
971                                   DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
972                                   DEV_RX_OFFLOAD_OUTER_UDP_CKSUM |
973                                   DEV_RX_OFFLOAD_SCATTER |
974                                   DEV_RX_OFFLOAD_RSS_HASH,
975         .get_dev_info           = sfc_ef100_rx_get_dev_info,
976         .qsize_up_rings         = sfc_ef100_rx_qsize_up_rings,
977         .qcreate                = sfc_ef100_rx_qcreate,
978         .qdestroy               = sfc_ef100_rx_qdestroy,
979         .qstart                 = sfc_ef100_rx_qstart,
980         .qstop                  = sfc_ef100_rx_qstop,
981         .qrx_ev                 = sfc_ef100_rx_qrx_ev,
982         .qpurge                 = sfc_ef100_rx_qpurge,
983         .supported_ptypes_get   = sfc_ef100_supported_ptypes_get,
984         .qdesc_npending         = sfc_ef100_rx_qdesc_npending,
985         .qdesc_status           = sfc_ef100_rx_qdesc_status,
986         .intr_enable            = sfc_ef100_rx_intr_enable,
987         .intr_disable           = sfc_ef100_rx_intr_disable,
988         .get_pushed             = sfc_ef100_rx_get_pushed,
989         .pkt_burst              = sfc_ef100_recv_pkts,
990 };