net/sfc: support user mark and flag Rx for EF100
[dpdk.git] / drivers / net / sfc / sfc_ef100_rx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2020 Xilinx, Inc.
4  * Copyright(c) 2018-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9
10 /* EF100 native datapath implementation */
11
12 #include <stdbool.h>
13
14 #include <rte_byteorder.h>
15 #include <rte_mbuf_ptype.h>
16 #include <rte_mbuf.h>
17 #include <rte_io.h>
18
19 #include "efx_types.h"
20 #include "efx_regs_ef100.h"
21 #include "efx.h"
22
23 #include "sfc_debug.h"
24 #include "sfc_tweak.h"
25 #include "sfc_dp_rx.h"
26 #include "sfc_kvargs.h"
27 #include "sfc_ef100.h"
28
29
30 #define sfc_ef100_rx_err(_rxq, ...) \
31         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__)
32
33 #define sfc_ef100_rx_debug(_rxq, ...) \
34         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \
35                    __VA_ARGS__)
36
37 /**
38  * Maximum number of descriptors/buffers in the Rx ring.
39  * It should guarantee that corresponding event queue never overfill.
40  * EF10 native datapath uses event queue of the same size as Rx queue.
41  * Maximum number of events on datapath can be estimated as number of
42  * Rx queue entries (one event per Rx buffer in the worst case) plus
43  * Rx error and flush events.
44  */
45 #define SFC_EF100_RXQ_LIMIT(_ndesc) \
46         ((_ndesc) - 1 /* head must not step on tail */ - \
47          1 /* Rx error */ - 1 /* flush */)
48
49 struct sfc_ef100_rx_sw_desc {
50         struct rte_mbuf                 *mbuf;
51 };
52
53 struct sfc_ef100_rxq {
54         /* Used on data path */
55         unsigned int                    flags;
56 #define SFC_EF100_RXQ_STARTED           0x1
57 #define SFC_EF100_RXQ_NOT_RUNNING       0x2
58 #define SFC_EF100_RXQ_EXCEPTION         0x4
59 #define SFC_EF100_RXQ_RSS_HASH          0x10
60 #define SFC_EF100_RXQ_USER_MARK         0x20
61         unsigned int                    ptr_mask;
62         unsigned int                    evq_phase_bit_shift;
63         unsigned int                    ready_pkts;
64         unsigned int                    completed;
65         unsigned int                    evq_read_ptr;
66         volatile efx_qword_t            *evq_hw_ring;
67         struct sfc_ef100_rx_sw_desc     *sw_ring;
68         uint64_t                        rearm_data;
69         uint16_t                        buf_size;
70         uint16_t                        prefix_size;
71
72         /* Used on refill */
73         unsigned int                    added;
74         unsigned int                    max_fill_level;
75         unsigned int                    refill_threshold;
76         struct rte_mempool              *refill_mb_pool;
77         efx_qword_t                     *rxq_hw_ring;
78         volatile void                   *doorbell;
79
80         /* Datapath receive queue anchor */
81         struct sfc_dp_rxq               dp;
82 };
83
84 static inline struct sfc_ef100_rxq *
85 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
86 {
87         return container_of(dp_rxq, struct sfc_ef100_rxq, dp);
88 }
89
90 static inline void
91 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
92 {
93         efx_dword_t dword;
94
95         EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask);
96
97         /* DMA sync to device is not required */
98
99         /*
100          * rte_write32() has rte_io_wmb() which guarantees that the STORE
101          * operations (i.e. Rx and event descriptor updates) that precede
102          * the rte_io_wmb() call are visible to NIC before the STORE
103          * operations that follow it (i.e. doorbell write).
104          */
105         rte_write32(dword.ed_u32[0], rxq->doorbell);
106
107         sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)",
108                            EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX),
109                            added);
110 }
111
112 static void
113 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
114 {
115         const unsigned int ptr_mask = rxq->ptr_mask;
116         unsigned int free_space;
117         unsigned int bulks;
118         void *objs[SFC_RX_REFILL_BULK];
119         unsigned int added = rxq->added;
120
121         free_space = rxq->max_fill_level - (added - rxq->completed);
122
123         if (free_space < rxq->refill_threshold)
124                 return;
125
126         bulks = free_space / RTE_DIM(objs);
127         /* refill_threshold guarantees that bulks is positive */
128         SFC_ASSERT(bulks > 0);
129
130         do {
131                 unsigned int id;
132                 unsigned int i;
133
134                 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
135                                                   RTE_DIM(objs)) < 0)) {
136                         struct rte_eth_dev_data *dev_data =
137                                 rte_eth_devices[rxq->dp.dpq.port_id].data;
138
139                         /*
140                          * It is hardly a safe way to increment counter
141                          * from different contexts, but all PMDs do it.
142                          */
143                         dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs);
144                         /* Return if we have posted nothing yet */
145                         if (added == rxq->added)
146                                 return;
147                         /* Push posted */
148                         break;
149                 }
150
151                 for (i = 0, id = added & ptr_mask;
152                      i < RTE_DIM(objs);
153                      ++i, ++id) {
154                         struct rte_mbuf *m = objs[i];
155                         struct sfc_ef100_rx_sw_desc *rxd;
156                         rte_iova_t phys_addr;
157
158                         MBUF_RAW_ALLOC_CHECK(m);
159
160                         SFC_ASSERT((id & ~ptr_mask) == 0);
161                         rxd = &rxq->sw_ring[id];
162                         rxd->mbuf = m;
163
164                         /*
165                          * Avoid writing to mbuf. It is cheaper to do it
166                          * when we receive packet and fill in nearby
167                          * structure members.
168                          */
169
170                         phys_addr = rte_mbuf_data_iova_default(m);
171                         EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
172                             ESF_GZ_RX_BUF_ADDR, phys_addr);
173                 }
174
175                 added += RTE_DIM(objs);
176         } while (--bulks > 0);
177
178         SFC_ASSERT(rxq->added != added);
179         rxq->added = added;
180         sfc_ef100_rx_qpush(rxq, added);
181 }
182
183 static inline uint64_t
184 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class)
185 {
186         return EFX_WORD_FIELD(class,
187                               ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) ==
188                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
189                 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
190 }
191
192 static inline uint64_t
193 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class)
194 {
195         return EFX_WORD_FIELD(class,
196                               ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) ==
197                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
198                 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_GOOD;
199 }
200
201 static uint32_t
202 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags)
203 {
204         uint32_t ptype;
205         bool no_tunnel = false;
206
207         if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) !=
208                      ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN))
209                 return 0;
210
211         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) {
212         case 0:
213                 ptype = RTE_PTYPE_L2_ETHER;
214                 break;
215         case 1:
216                 ptype = RTE_PTYPE_L2_ETHER_VLAN;
217                 break;
218         default:
219                 ptype = RTE_PTYPE_L2_ETHER_QINQ;
220                 break;
221         }
222
223         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) {
224         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE:
225                 no_tunnel = true;
226                 break;
227         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN:
228                 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
229                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
230                 break;
231         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE:
232                 ptype |= RTE_PTYPE_TUNNEL_NVGRE;
233                 break;
234         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE:
235                 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
236                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
237                 break;
238         default:
239                 /*
240                  * Driver does not know the tunnel, but it is
241                  * still a tunnel and NT_OR_INNER refer to inner
242                  * frame.
243                  */
244                 no_tunnel = false;
245         }
246
247         if (no_tunnel) {
248                 bool l4_valid = true;
249
250                 switch (EFX_WORD_FIELD(class,
251                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
252                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
253                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
254                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
255                         break;
256                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
257                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
258                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
259                         break;
260                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
261                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
262                         break;
263                 default:
264                         l4_valid = false;
265                 }
266
267                 if (l4_valid) {
268                         switch (EFX_WORD_FIELD(class,
269                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
270                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
271                                 ptype |= RTE_PTYPE_L4_TCP;
272                                 *ol_flags |=
273                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
274                                 break;
275                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
276                                 ptype |= RTE_PTYPE_L4_UDP;
277                                 *ol_flags |=
278                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
279                                 break;
280                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
281                                 ptype |= RTE_PTYPE_L4_FRAG;
282                                 break;
283                         }
284                 }
285         } else {
286                 bool l4_valid = true;
287
288                 switch (EFX_WORD_FIELD(class,
289                         ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) {
290                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
291                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
292                         break;
293                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
294                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
295                         *ol_flags |= PKT_RX_EIP_CKSUM_BAD;
296                         break;
297                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
298                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
299                         break;
300                 }
301
302                 switch (EFX_WORD_FIELD(class,
303                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
304                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
305                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
306                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
307                         break;
308                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
309                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
310                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
311                         break;
312                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
313                         ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
314                         break;
315                 default:
316                         l4_valid = false;
317                         break;
318                 }
319
320                 if (l4_valid) {
321                         switch (EFX_WORD_FIELD(class,
322                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
323                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
324                                 ptype |= RTE_PTYPE_INNER_L4_TCP;
325                                 *ol_flags |=
326                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
327                                 break;
328                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
329                                 ptype |= RTE_PTYPE_INNER_L4_UDP;
330                                 *ol_flags |=
331                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
332                                 break;
333                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
334                                 ptype |= RTE_PTYPE_INNER_L4_FRAG;
335                                 break;
336                         }
337                 }
338         }
339
340         return ptype;
341 }
342
343 /*
344  * Below function relies on the following fields in Rx prefix.
345  * Some fields are mandatory, some fields are optional.
346  * See sfc_ef100_rx_qstart() below.
347  */
348 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = {
349         .erpl_fields    = {
350 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \
351         EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian)
352
353                 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE),
354                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE),
355                 SFC_EF100_RX_PREFIX_FIELD(USER_FLAG, B_FALSE),
356                 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE),
357                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE),
358                 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE),
359
360 #undef  SFC_EF100_RX_PREFIX_FIELD
361         }
362 };
363
364 static bool
365 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq,
366                                 const efx_oword_t *rx_prefix,
367                                 struct rte_mbuf *m)
368 {
369         const efx_word_t *class;
370         uint64_t ol_flags = 0;
371
372         RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
373         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
374         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT !=
375                          sizeof(*class));
376         class = (const efx_word_t *)((const uint8_t *)rx_prefix +
377                 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT);
378         if (unlikely(EFX_WORD_FIELD(*class,
379                                     ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) !=
380                      ESE_GZ_RH_HCLASS_L2_STATUS_OK))
381                 return false;
382
383         m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags);
384
385         if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) &&
386             EFX_TEST_OWORD_BIT(rx_prefix[0],
387                                ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) {
388                 ol_flags |= PKT_RX_RSS_HASH;
389                 /* EFX_OWORD_FIELD converts little-endian to CPU */
390                 m->hash.rss = EFX_OWORD_FIELD(rx_prefix[0],
391                                               ESF_GZ_RX_PREFIX_RSS_HASH);
392         }
393
394         if ((rxq->flags & SFC_EF100_RXQ_USER_MARK) &&
395             EFX_TEST_OWORD_BIT(rx_prefix[0], ESF_GZ_RX_PREFIX_USER_FLAG_LBN)) {
396                 ol_flags |= PKT_RX_FDIR_ID;
397                 /* EFX_OWORD_FIELD converts little-endian to CPU */
398                 m->hash.fdir.hi = EFX_OWORD_FIELD(rx_prefix[0],
399                                                   ESF_GZ_RX_PREFIX_USER_MARK);
400         }
401
402         m->ol_flags = ol_flags;
403         return true;
404 }
405
406 static const uint8_t *
407 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m)
408 {
409         return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM;
410 }
411
412 static struct rte_mbuf *
413 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq)
414 {
415         struct rte_mbuf *m;
416         unsigned int id;
417
418         /* mbuf associated with current Rx descriptor */
419         m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf;
420
421         /* completed is already moved to the next one */
422         if (unlikely(rxq->completed == rxq->added))
423                 goto done;
424
425         /*
426          * Prefetch Rx prefix of the next packet.
427          * Current packet is scattered and the next mbuf is its fragment
428          * it simply prefetches some data - no harm since packet rate
429          * should not be high if scatter is used.
430          */
431         id = rxq->completed & rxq->ptr_mask;
432         rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf));
433
434         if (unlikely(rxq->completed + 1 == rxq->added))
435                 goto done;
436
437         /*
438          * Prefetch mbuf control structure of the next after next Rx
439          * descriptor.
440          */
441         id = (id == rxq->ptr_mask) ? 0 : (id + 1);
442         rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf);
443
444         /*
445          * If the next time we'll need SW Rx descriptor from the next
446          * cache line, try to make sure that we have it in cache.
447          */
448         if ((id & 0x7) == 0x7)
449                 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]);
450
451 done:
452         return m;
453 }
454
455 static struct rte_mbuf **
456 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
457                                 struct rte_mbuf **rx_pkts,
458                                 struct rte_mbuf ** const rx_pkts_end)
459 {
460         while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) {
461                 struct rte_mbuf *pkt;
462                 struct rte_mbuf *lastseg;
463                 const efx_oword_t *rx_prefix;
464                 uint16_t pkt_len;
465                 uint16_t seg_len;
466                 bool deliver;
467
468                 rxq->ready_pkts--;
469
470                 pkt = sfc_ef100_rx_next_mbuf(rxq);
471                 MBUF_RAW_ALLOC_CHECK(pkt);
472
473                 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) !=
474                                  sizeof(rxq->rearm_data));
475                 pkt->rearm_data[0] = rxq->rearm_data;
476
477                 /* data_off already moved past Rx prefix */
478                 rx_prefix = (const efx_oword_t *)sfc_ef100_rx_pkt_prefix(pkt);
479
480                 pkt_len = EFX_OWORD_FIELD(rx_prefix[0],
481                                           ESF_GZ_RX_PREFIX_LENGTH);
482                 SFC_ASSERT(pkt_len > 0);
483                 rte_pktmbuf_pkt_len(pkt) = pkt_len;
484
485                 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size);
486                 rte_pktmbuf_data_len(pkt) = seg_len;
487
488                 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt);
489
490                 lastseg = pkt;
491                 while ((pkt_len -= seg_len) > 0) {
492                         struct rte_mbuf *seg;
493
494                         seg = sfc_ef100_rx_next_mbuf(rxq);
495                         MBUF_RAW_ALLOC_CHECK(seg);
496
497                         seg->data_off = RTE_PKTMBUF_HEADROOM;
498
499                         seg_len = RTE_MIN(pkt_len, rxq->buf_size);
500                         rte_pktmbuf_data_len(seg) = seg_len;
501                         rte_pktmbuf_pkt_len(seg) = seg_len;
502
503                         pkt->nb_segs++;
504                         lastseg->next = seg;
505                         lastseg = seg;
506                 }
507
508                 if (likely(deliver))
509                         *rx_pkts++ = pkt;
510                 else
511                         rte_pktmbuf_free(pkt);
512         }
513
514         return rx_pkts;
515 }
516
517 static bool
518 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev)
519 {
520         *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask];
521
522         if (!sfc_ef100_ev_present(ev,
523                         (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1))
524                 return false;
525
526         if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) {
527                 /*
528                  * Do not move read_ptr to keep the event for exception
529                  * handling by the control path.
530                  */
531                 rxq->flags |= SFC_EF100_RXQ_EXCEPTION;
532                 sfc_ef100_rx_err(rxq,
533                         "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x",
534                         rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask,
535                         EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
536                         EFX_QWORD_FIELD(*ev, EFX_DWORD_0));
537                 return false;
538         }
539
540         sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)",
541                            EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
542                            EFX_QWORD_FIELD(*ev, EFX_DWORD_0),
543                            rxq->evq_read_ptr,
544                            rxq->evq_read_ptr & rxq->ptr_mask);
545
546         rxq->evq_read_ptr++;
547         return true;
548 }
549
550 static uint16_t
551 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
552 {
553         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue);
554         struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
555         efx_qword_t rx_ev;
556
557         rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end);
558
559         if (unlikely(rxq->flags &
560                      (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION)))
561                 goto done;
562
563         while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) {
564                 rxq->ready_pkts =
565                         EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT);
566                 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts,
567                                                           rx_pkts_end);
568         }
569
570         /* It is not a problem if we refill in the case of exception */
571         sfc_ef100_rx_qrefill(rxq);
572
573 done:
574         return nb_pkts - (rx_pkts_end - rx_pkts);
575 }
576
577 static const uint32_t *
578 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
579 {
580         static const uint32_t ef100_native_ptypes[] = {
581                 RTE_PTYPE_L2_ETHER,
582                 RTE_PTYPE_L2_ETHER_VLAN,
583                 RTE_PTYPE_L2_ETHER_QINQ,
584                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
585                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
586                 RTE_PTYPE_L4_TCP,
587                 RTE_PTYPE_L4_UDP,
588                 RTE_PTYPE_L4_FRAG,
589                 RTE_PTYPE_TUNNEL_VXLAN,
590                 RTE_PTYPE_TUNNEL_NVGRE,
591                 RTE_PTYPE_TUNNEL_GENEVE,
592                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
593                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
594                 RTE_PTYPE_INNER_L4_TCP,
595                 RTE_PTYPE_INNER_L4_UDP,
596                 RTE_PTYPE_INNER_L4_FRAG,
597                 RTE_PTYPE_UNKNOWN
598         };
599
600         return ef100_native_ptypes;
601 }
602
603 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending;
604 static unsigned int
605 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
606 {
607         return 0;
608 }
609
610 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status;
611 static int
612 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
613                           __rte_unused uint16_t offset)
614 {
615         return -ENOTSUP;
616 }
617
618
619 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info;
620 static void
621 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info)
622 {
623         /*
624          * Number of descriptors just defines maximum number of pushed
625          * descriptors (fill level).
626          */
627         dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK;
628         dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK;
629 }
630
631
632 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings;
633 static int
634 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc,
635                            struct sfc_dp_rx_hw_limits *limits,
636                            __rte_unused struct rte_mempool *mb_pool,
637                            unsigned int *rxq_entries,
638                            unsigned int *evq_entries,
639                            unsigned int *rxq_max_fill_level)
640 {
641         /*
642          * rte_ethdev API guarantees that the number meets min, max and
643          * alignment requirements.
644          */
645         if (nb_rx_desc <= limits->rxq_min_entries)
646                 *rxq_entries = limits->rxq_min_entries;
647         else
648                 *rxq_entries = rte_align32pow2(nb_rx_desc);
649
650         *evq_entries = *rxq_entries;
651
652         *rxq_max_fill_level = RTE_MIN(nb_rx_desc,
653                                       SFC_EF100_RXQ_LIMIT(*evq_entries));
654         return 0;
655 }
656
657
658 static uint64_t
659 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
660 {
661         struct rte_mbuf m;
662
663         memset(&m, 0, sizeof(m));
664
665         rte_mbuf_refcnt_set(&m, 1);
666         m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size;
667         m.nb_segs = 1;
668         m.port = port_id;
669
670         /* rearm_data covers structure members filled in above */
671         rte_compiler_barrier();
672         RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t));
673         return m.rearm_data[0];
674 }
675
676 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate;
677 static int
678 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
679                     const struct rte_pci_addr *pci_addr, int socket_id,
680                     const struct sfc_dp_rx_qcreate_info *info,
681                     struct sfc_dp_rxq **dp_rxqp)
682 {
683         struct sfc_ef100_rxq *rxq;
684         int rc;
685
686         rc = EINVAL;
687         if (info->rxq_entries != info->evq_entries)
688                 goto fail_rxq_args;
689
690         rc = ENOMEM;
691         rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq),
692                                  RTE_CACHE_LINE_SIZE, socket_id);
693         if (rxq == NULL)
694                 goto fail_rxq_alloc;
695
696         sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
697
698         rc = ENOMEM;
699         rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring",
700                                          info->rxq_entries,
701                                          sizeof(*rxq->sw_ring),
702                                          RTE_CACHE_LINE_SIZE, socket_id);
703         if (rxq->sw_ring == NULL)
704                 goto fail_desc_alloc;
705
706         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
707         rxq->ptr_mask = info->rxq_entries - 1;
708         rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries);
709         rxq->evq_hw_ring = info->evq_hw_ring;
710         rxq->max_fill_level = info->max_fill_level;
711         rxq->refill_threshold = info->refill_threshold;
712         rxq->prefix_size = info->prefix_size;
713         rxq->buf_size = info->buf_size;
714         rxq->refill_mb_pool = info->refill_mb_pool;
715         rxq->rxq_hw_ring = info->rxq_hw_ring;
716         rxq->doorbell = (volatile uint8_t *)info->mem_bar +
717                         ER_GZ_RX_RING_DOORBELL_OFST +
718                         (info->hw_index << info->vi_window_shift);
719
720         sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
721
722         *dp_rxqp = &rxq->dp;
723         return 0;
724
725 fail_desc_alloc:
726         rte_free(rxq);
727
728 fail_rxq_alloc:
729 fail_rxq_args:
730         return rc;
731 }
732
733 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy;
734 static void
735 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
736 {
737         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
738
739         rte_free(rxq->sw_ring);
740         rte_free(rxq);
741 }
742
743 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart;
744 static int
745 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr,
746                     const efx_rx_prefix_layout_t *pinfo)
747 {
748         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
749         uint32_t unsup_rx_prefix_fields;
750
751         SFC_ASSERT(rxq->completed == 0);
752         SFC_ASSERT(rxq->added == 0);
753
754         /* Prefix must fit into reserved Rx buffer space */
755         if (pinfo->erpl_length > rxq->prefix_size)
756                 return ENOTSUP;
757
758         unsup_rx_prefix_fields =
759                 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout);
760
761         /* LENGTH and CLASS filds must always be present */
762         if ((unsup_rx_prefix_fields &
763              ((1U << EFX_RX_PREFIX_FIELD_LENGTH) |
764               (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0)
765                 return ENOTSUP;
766
767         if ((unsup_rx_prefix_fields &
768              ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) |
769               (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0)
770                 rxq->flags |= SFC_EF100_RXQ_RSS_HASH;
771         else
772                 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH;
773
774         if ((unsup_rx_prefix_fields &
775              ((1U << EFX_RX_PREFIX_FIELD_USER_FLAG) |
776               (1U << EFX_RX_PREFIX_FIELD_USER_MARK))) == 0)
777                 rxq->flags |= SFC_EF100_RXQ_USER_MARK;
778         else
779                 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK;
780
781         rxq->prefix_size = pinfo->erpl_length;
782         rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id,
783                                                        rxq->prefix_size);
784
785         sfc_ef100_rx_qrefill(rxq);
786
787         rxq->evq_read_ptr = evq_read_ptr;
788
789         rxq->flags |= SFC_EF100_RXQ_STARTED;
790         rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION);
791
792         return 0;
793 }
794
795 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop;
796 static void
797 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr)
798 {
799         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
800
801         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
802
803         *evq_read_ptr = rxq->evq_read_ptr;
804 }
805
806 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev;
807 static bool
808 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id)
809 {
810         __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
811
812         SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING);
813
814         /*
815          * It is safe to ignore Rx event since we free all mbufs on
816          * queue purge anyway.
817          */
818
819         return false;
820 }
821
822 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge;
823 static void
824 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
825 {
826         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
827         unsigned int i;
828         struct sfc_ef100_rx_sw_desc *rxd;
829
830         for (i = rxq->completed; i != rxq->added; ++i) {
831                 rxd = &rxq->sw_ring[i & rxq->ptr_mask];
832                 rte_mbuf_raw_free(rxd->mbuf);
833                 rxd->mbuf = NULL;
834         }
835
836         rxq->completed = rxq->added = 0;
837         rxq->ready_pkts = 0;
838
839         rxq->flags &= ~SFC_EF100_RXQ_STARTED;
840 }
841
842 struct sfc_dp_rx sfc_ef100_rx = {
843         .dp = {
844                 .name           = SFC_KVARG_DATAPATH_EF100,
845                 .type           = SFC_DP_RX,
846                 .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF100,
847         },
848         .features               = SFC_DP_RX_FEAT_MULTI_PROCESS,
849         .dev_offload_capa       = 0,
850         .queue_offload_capa     = DEV_RX_OFFLOAD_CHECKSUM |
851                                   DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
852                                   DEV_RX_OFFLOAD_OUTER_UDP_CKSUM |
853                                   DEV_RX_OFFLOAD_SCATTER |
854                                   DEV_RX_OFFLOAD_RSS_HASH,
855         .get_dev_info           = sfc_ef100_rx_get_dev_info,
856         .qsize_up_rings         = sfc_ef100_rx_qsize_up_rings,
857         .qcreate                = sfc_ef100_rx_qcreate,
858         .qdestroy               = sfc_ef100_rx_qdestroy,
859         .qstart                 = sfc_ef100_rx_qstart,
860         .qstop                  = sfc_ef100_rx_qstop,
861         .qrx_ev                 = sfc_ef100_rx_qrx_ev,
862         .qpurge                 = sfc_ef100_rx_qpurge,
863         .supported_ptypes_get   = sfc_ef100_supported_ptypes_get,
864         .qdesc_npending         = sfc_ef100_rx_qdesc_npending,
865         .qdesc_status           = sfc_ef100_rx_qdesc_status,
866         .pkt_burst              = sfc_ef100_recv_pkts,
867 };