net/i40e: fix Rx packet statistics
[dpdk.git] / drivers / net / sfc / sfc_ef100_rx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2019-2021 Xilinx, Inc.
4  * Copyright(c) 2018-2019 Solarflare Communications Inc.
5  *
6  * This software was jointly developed between OKTET Labs (under contract
7  * for Solarflare) and Solarflare Communications, Inc.
8  */
9
10 /* EF100 native datapath implementation */
11
12 #include <stdbool.h>
13
14 #include <rte_byteorder.h>
15 #include <rte_mbuf_ptype.h>
16 #include <rte_mbuf.h>
17 #include <rte_io.h>
18
19 #include "efx_types.h"
20 #include "efx_regs_ef100.h"
21 #include "efx.h"
22
23 #include "sfc_debug.h"
24 #include "sfc_tweak.h"
25 #include "sfc_dp_rx.h"
26 #include "sfc_kvargs.h"
27 #include "sfc_ef100.h"
28
29
30 #define sfc_ef100_rx_err(_rxq, ...) \
31         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__)
32
33 #define sfc_ef100_rx_debug(_rxq, ...) \
34         SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \
35                    __VA_ARGS__)
36
37 /**
38  * Maximum number of descriptors/buffers in the Rx ring.
39  * It should guarantee that corresponding event queue never overfill.
40  * EF10 native datapath uses event queue of the same size as Rx queue.
41  * Maximum number of events on datapath can be estimated as number of
42  * Rx queue entries (one event per Rx buffer in the worst case) plus
43  * Rx error and flush events.
44  */
45 #define SFC_EF100_RXQ_LIMIT(_ndesc) \
46         ((_ndesc) - 1 /* head must not step on tail */ - \
47          1 /* Rx error */ - 1 /* flush */)
48
49 /** Invalid user mark value when the mark should be treated as unset */
50 #define SFC_EF100_USER_MARK_INVALID     0
51
52 struct sfc_ef100_rx_sw_desc {
53         struct rte_mbuf                 *mbuf;
54 };
55
56 struct sfc_ef100_rxq {
57         /* Used on data path */
58         unsigned int                    flags;
59 #define SFC_EF100_RXQ_STARTED           0x1
60 #define SFC_EF100_RXQ_NOT_RUNNING       0x2
61 #define SFC_EF100_RXQ_EXCEPTION         0x4
62 #define SFC_EF100_RXQ_RSS_HASH          0x10
63 #define SFC_EF100_RXQ_USER_MARK         0x20
64 #define SFC_EF100_RXQ_FLAG_INTR_EN      0x40
65 #define SFC_EF100_RXQ_INGRESS_MPORT     0x80
66         unsigned int                    ptr_mask;
67         unsigned int                    evq_phase_bit_shift;
68         unsigned int                    ready_pkts;
69         unsigned int                    completed;
70         unsigned int                    evq_read_ptr;
71         unsigned int                    evq_read_ptr_primed;
72         volatile efx_qword_t            *evq_hw_ring;
73         struct sfc_ef100_rx_sw_desc     *sw_ring;
74         uint64_t                        rearm_data;
75         uint16_t                        buf_size;
76         uint16_t                        prefix_size;
77
78         unsigned int                    evq_hw_index;
79         volatile void                   *evq_prime;
80
81         /* Used on refill */
82         unsigned int                    added;
83         unsigned int                    max_fill_level;
84         unsigned int                    refill_threshold;
85         struct rte_mempool              *refill_mb_pool;
86         efx_qword_t                     *rxq_hw_ring;
87         volatile void                   *doorbell;
88
89         /* Datapath receive queue anchor */
90         struct sfc_dp_rxq               dp;
91 };
92
93 static inline struct sfc_ef100_rxq *
94 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq)
95 {
96         return container_of(dp_rxq, struct sfc_ef100_rxq, dp);
97 }
98
99 static void
100 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq)
101 {
102         sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index,
103                             rxq->evq_read_ptr & rxq->ptr_mask);
104         rxq->evq_read_ptr_primed = rxq->evq_read_ptr;
105 }
106
107 static inline void
108 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
109 {
110         efx_dword_t dword;
111
112         EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask);
113
114         /* DMA sync to device is not required */
115
116         /*
117          * rte_write32() has rte_io_wmb() which guarantees that the STORE
118          * operations (i.e. Rx and event descriptor updates) that precede
119          * the rte_io_wmb() call are visible to NIC before the STORE
120          * operations that follow it (i.e. doorbell write).
121          */
122         rte_write32(dword.ed_u32[0], rxq->doorbell);
123         rxq->dp.dpq.rx_dbells++;
124
125         sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)",
126                            EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX),
127                            added);
128 }
129
130 static void
131 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
132 {
133         const unsigned int ptr_mask = rxq->ptr_mask;
134         unsigned int free_space;
135         unsigned int bulks;
136         void *objs[SFC_RX_REFILL_BULK];
137         unsigned int added = rxq->added;
138
139         free_space = rxq->max_fill_level - (added - rxq->completed);
140
141         if (free_space < rxq->refill_threshold)
142                 return;
143
144         bulks = free_space / RTE_DIM(objs);
145         /* refill_threshold guarantees that bulks is positive */
146         SFC_ASSERT(bulks > 0);
147
148         do {
149                 unsigned int id;
150                 unsigned int i;
151
152                 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
153                                                   RTE_DIM(objs)) < 0)) {
154                         struct rte_eth_dev_data *dev_data =
155                                 rte_eth_devices[rxq->dp.dpq.port_id].data;
156
157                         /*
158                          * It is hardly a safe way to increment counter
159                          * from different contexts, but all PMDs do it.
160                          */
161                         dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs);
162                         /* Return if we have posted nothing yet */
163                         if (added == rxq->added)
164                                 return;
165                         /* Push posted */
166                         break;
167                 }
168
169                 for (i = 0, id = added & ptr_mask;
170                      i < RTE_DIM(objs);
171                      ++i, ++id) {
172                         struct rte_mbuf *m = objs[i];
173                         struct sfc_ef100_rx_sw_desc *rxd;
174                         rte_iova_t phys_addr;
175
176                         __rte_mbuf_raw_sanity_check(m);
177
178                         SFC_ASSERT((id & ~ptr_mask) == 0);
179                         rxd = &rxq->sw_ring[id];
180                         rxd->mbuf = m;
181
182                         /*
183                          * Avoid writing to mbuf. It is cheaper to do it
184                          * when we receive packet and fill in nearby
185                          * structure members.
186                          */
187
188                         phys_addr = rte_mbuf_data_iova_default(m);
189                         EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
190                             ESF_GZ_RX_BUF_ADDR, phys_addr);
191                 }
192
193                 added += RTE_DIM(objs);
194         } while (--bulks > 0);
195
196         SFC_ASSERT(rxq->added != added);
197         rxq->added = added;
198         sfc_ef100_rx_qpush(rxq, added);
199 }
200
201 static inline uint64_t
202 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class)
203 {
204         return EFX_WORD_FIELD(class,
205                               ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) ==
206                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
207                 PKT_RX_L4_CKSUM_GOOD : PKT_RX_L4_CKSUM_BAD;
208 }
209
210 static inline uint64_t
211 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class)
212 {
213         return EFX_WORD_FIELD(class,
214                               ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) ==
215                 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ?
216                 PKT_RX_OUTER_L4_CKSUM_GOOD : PKT_RX_OUTER_L4_CKSUM_BAD;
217 }
218
219 static uint32_t
220 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags)
221 {
222         uint32_t ptype;
223         bool no_tunnel = false;
224
225         if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) !=
226                      ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN))
227                 return 0;
228
229         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) {
230         case 0:
231                 ptype = RTE_PTYPE_L2_ETHER;
232                 break;
233         case 1:
234                 ptype = RTE_PTYPE_L2_ETHER_VLAN;
235                 break;
236         default:
237                 ptype = RTE_PTYPE_L2_ETHER_QINQ;
238                 break;
239         }
240
241         switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) {
242         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE:
243                 no_tunnel = true;
244                 break;
245         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN:
246                 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
247                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
248                 break;
249         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE:
250                 ptype |= RTE_PTYPE_TUNNEL_NVGRE;
251                 break;
252         case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE:
253                 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP;
254                 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class);
255                 break;
256         default:
257                 /*
258                  * Driver does not know the tunnel, but it is
259                  * still a tunnel and NT_OR_INNER refer to inner
260                  * frame.
261                  */
262                 no_tunnel = false;
263         }
264
265         if (no_tunnel) {
266                 bool l4_valid = true;
267
268                 switch (EFX_WORD_FIELD(class,
269                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
270                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
271                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
272                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
273                         break;
274                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
275                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
276                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
277                         break;
278                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
279                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
280                         break;
281                 default:
282                         l4_valid = false;
283                 }
284
285                 if (l4_valid) {
286                         switch (EFX_WORD_FIELD(class,
287                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
288                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
289                                 ptype |= RTE_PTYPE_L4_TCP;
290                                 *ol_flags |=
291                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
292                                 break;
293                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
294                                 ptype |= RTE_PTYPE_L4_UDP;
295                                 *ol_flags |=
296                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
297                                 break;
298                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
299                                 ptype |= RTE_PTYPE_L4_FRAG;
300                                 break;
301                         }
302                 }
303         } else {
304                 bool l4_valid = true;
305
306                 switch (EFX_WORD_FIELD(class,
307                         ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) {
308                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
309                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
310                         break;
311                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
312                         ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
313                         *ol_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
314                         break;
315                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
316                         ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
317                         break;
318                 }
319
320                 switch (EFX_WORD_FIELD(class,
321                         ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) {
322                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD:
323                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
324                         *ol_flags |= PKT_RX_IP_CKSUM_GOOD;
325                         break;
326                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD:
327                         ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN;
328                         *ol_flags |= PKT_RX_IP_CKSUM_BAD;
329                         break;
330                 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6:
331                         ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN;
332                         break;
333                 default:
334                         l4_valid = false;
335                         break;
336                 }
337
338                 if (l4_valid) {
339                         switch (EFX_WORD_FIELD(class,
340                                 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) {
341                         case ESE_GZ_RH_HCLASS_L4_CLASS_TCP:
342                                 ptype |= RTE_PTYPE_INNER_L4_TCP;
343                                 *ol_flags |=
344                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
345                                 break;
346                         case ESE_GZ_RH_HCLASS_L4_CLASS_UDP:
347                                 ptype |= RTE_PTYPE_INNER_L4_UDP;
348                                 *ol_flags |=
349                                         sfc_ef100_rx_nt_or_inner_l4_csum(class);
350                                 break;
351                         case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG:
352                                 ptype |= RTE_PTYPE_INNER_L4_FRAG;
353                                 break;
354                         }
355                 }
356         }
357
358         return ptype;
359 }
360
361 /*
362  * Below function relies on the following fields in Rx prefix.
363  * Some fields are mandatory, some fields are optional.
364  * See sfc_ef100_rx_qstart() below.
365  */
366 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = {
367         .erpl_fields    = {
368 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \
369         EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian)
370
371                 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE),
372                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE),
373                 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE),
374                 EFX_RX_PREFIX_FIELD(INGRESS_MPORT,
375                                     ESF_GZ_RX_PREFIX_INGRESS_MPORT, B_FALSE),
376                 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE),
377                 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE),
378
379 #undef  SFC_EF100_RX_PREFIX_FIELD
380         }
381 };
382
383 static bool
384 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq,
385                                 const efx_xword_t *rx_prefix,
386                                 struct rte_mbuf *m)
387 {
388         const efx_word_t *class;
389         uint64_t ol_flags = 0;
390
391         RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
392         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0);
393         RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT !=
394                          sizeof(*class));
395         class = (const efx_word_t *)((const uint8_t *)rx_prefix +
396                 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT);
397         if (unlikely(EFX_WORD_FIELD(*class,
398                                     ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) !=
399                      ESE_GZ_RH_HCLASS_L2_STATUS_OK))
400                 return false;
401
402         m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags);
403
404         if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) &&
405             EFX_TEST_XWORD_BIT(rx_prefix[0],
406                                ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) {
407                 ol_flags |= PKT_RX_RSS_HASH;
408                 /* EFX_XWORD_FIELD converts little-endian to CPU */
409                 m->hash.rss = EFX_XWORD_FIELD(rx_prefix[0],
410                                               ESF_GZ_RX_PREFIX_RSS_HASH);
411         }
412
413         if (rxq->flags & SFC_EF100_RXQ_USER_MARK) {
414                 uint32_t user_mark;
415
416                 /* EFX_XWORD_FIELD converts little-endian to CPU */
417                 user_mark = EFX_XWORD_FIELD(rx_prefix[0],
418                                             ESF_GZ_RX_PREFIX_USER_MARK);
419                 if (user_mark != SFC_EF100_USER_MARK_INVALID) {
420                         ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
421                         m->hash.fdir.hi = user_mark;
422                 }
423         }
424
425         if (rxq->flags & SFC_EF100_RXQ_INGRESS_MPORT) {
426                 ol_flags |= sfc_dp_mport_override;
427                 *RTE_MBUF_DYNFIELD(m,
428                         sfc_dp_mport_offset,
429                         typeof(&((efx_mport_id_t *)0)->id)) =
430                                 EFX_XWORD_FIELD(rx_prefix[0],
431                                                 ESF_GZ_RX_PREFIX_INGRESS_MPORT);
432         }
433
434         m->ol_flags = ol_flags;
435         return true;
436 }
437
438 static const uint8_t *
439 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m)
440 {
441         return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM;
442 }
443
444 static struct rte_mbuf *
445 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq)
446 {
447         struct rte_mbuf *m;
448         unsigned int id;
449
450         /* mbuf associated with current Rx descriptor */
451         m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf;
452
453         /* completed is already moved to the next one */
454         if (unlikely(rxq->completed == rxq->added))
455                 goto done;
456
457         /*
458          * Prefetch Rx prefix of the next packet.
459          * Current packet is scattered and the next mbuf is its fragment
460          * it simply prefetches some data - no harm since packet rate
461          * should not be high if scatter is used.
462          */
463         id = rxq->completed & rxq->ptr_mask;
464         rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf));
465
466         if (unlikely(rxq->completed + 1 == rxq->added))
467                 goto done;
468
469         /*
470          * Prefetch mbuf control structure of the next after next Rx
471          * descriptor.
472          */
473         id = (id == rxq->ptr_mask) ? 0 : (id + 1);
474         rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf);
475
476         /*
477          * If the next time we'll need SW Rx descriptor from the next
478          * cache line, try to make sure that we have it in cache.
479          */
480         if ((id & 0x7) == 0x7)
481                 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]);
482
483 done:
484         return m;
485 }
486
487 static struct rte_mbuf **
488 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq,
489                                 struct rte_mbuf **rx_pkts,
490                                 struct rte_mbuf ** const rx_pkts_end)
491 {
492         while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) {
493                 struct rte_mbuf *pkt;
494                 struct rte_mbuf *lastseg;
495                 const efx_xword_t *rx_prefix;
496                 uint16_t pkt_len;
497                 uint16_t seg_len;
498                 bool deliver;
499
500                 rxq->ready_pkts--;
501
502                 pkt = sfc_ef100_rx_next_mbuf(rxq);
503                 __rte_mbuf_raw_sanity_check(pkt);
504
505                 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) !=
506                                  sizeof(rxq->rearm_data));
507                 pkt->rearm_data[0] = rxq->rearm_data;
508
509                 /* data_off already moved past Rx prefix */
510                 rx_prefix = (const efx_xword_t *)sfc_ef100_rx_pkt_prefix(pkt);
511
512                 pkt_len = EFX_XWORD_FIELD(rx_prefix[0],
513                                           ESF_GZ_RX_PREFIX_LENGTH);
514                 SFC_ASSERT(pkt_len > 0);
515                 rte_pktmbuf_pkt_len(pkt) = pkt_len;
516
517                 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size);
518                 rte_pktmbuf_data_len(pkt) = seg_len;
519
520                 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt);
521
522                 lastseg = pkt;
523                 while ((pkt_len -= seg_len) > 0) {
524                         struct rte_mbuf *seg;
525
526                         seg = sfc_ef100_rx_next_mbuf(rxq);
527                         __rte_mbuf_raw_sanity_check(seg);
528
529                         seg->data_off = RTE_PKTMBUF_HEADROOM;
530
531                         seg_len = RTE_MIN(pkt_len, rxq->buf_size);
532                         rte_pktmbuf_data_len(seg) = seg_len;
533                         rte_pktmbuf_pkt_len(seg) = seg_len;
534
535                         pkt->nb_segs++;
536                         lastseg->next = seg;
537                         lastseg = seg;
538                 }
539
540                 if (likely(deliver)) {
541                         *rx_pkts++ = pkt;
542                         sfc_pkts_bytes_add(&rxq->dp.dpq.stats, 1,
543                                            rte_pktmbuf_pkt_len(pkt));
544                 } else {
545                         rte_pktmbuf_free(pkt);
546                 }
547         }
548
549         return rx_pkts;
550 }
551
552 static bool
553 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev)
554 {
555         *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask];
556
557         if (!sfc_ef100_ev_present(ev,
558                         (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1))
559                 return false;
560
561         if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) {
562                 /*
563                  * Do not move read_ptr to keep the event for exception
564                  * handling by the control path.
565                  */
566                 rxq->flags |= SFC_EF100_RXQ_EXCEPTION;
567                 sfc_ef100_rx_err(rxq,
568                         "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x",
569                         rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask,
570                         EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
571                         EFX_QWORD_FIELD(*ev, EFX_DWORD_0));
572                 return false;
573         }
574
575         sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)",
576                            EFX_QWORD_FIELD(*ev, EFX_DWORD_1),
577                            EFX_QWORD_FIELD(*ev, EFX_DWORD_0),
578                            rxq->evq_read_ptr,
579                            rxq->evq_read_ptr & rxq->ptr_mask);
580
581         rxq->evq_read_ptr++;
582         return true;
583 }
584
585 static uint16_t
586 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
587 {
588         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue);
589         struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
590         efx_qword_t rx_ev;
591
592         rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end);
593
594         if (unlikely(rxq->flags &
595                      (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION)))
596                 goto done;
597
598         while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) {
599                 rxq->ready_pkts =
600                         EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT);
601                 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts,
602                                                           rx_pkts_end);
603         }
604
605         /* It is not a problem if we refill in the case of exception */
606         sfc_ef100_rx_qrefill(rxq);
607
608         if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) &&
609             rxq->evq_read_ptr_primed != rxq->evq_read_ptr)
610                 sfc_ef100_rx_qprime(rxq);
611
612 done:
613         return nb_pkts - (rx_pkts_end - rx_pkts);
614 }
615
616 static const uint32_t *
617 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps)
618 {
619         static const uint32_t ef100_native_ptypes[] = {
620                 RTE_PTYPE_L2_ETHER,
621                 RTE_PTYPE_L2_ETHER_VLAN,
622                 RTE_PTYPE_L2_ETHER_QINQ,
623                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
624                 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
625                 RTE_PTYPE_L4_TCP,
626                 RTE_PTYPE_L4_UDP,
627                 RTE_PTYPE_L4_FRAG,
628                 RTE_PTYPE_TUNNEL_VXLAN,
629                 RTE_PTYPE_TUNNEL_NVGRE,
630                 RTE_PTYPE_TUNNEL_GENEVE,
631                 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
632                 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
633                 RTE_PTYPE_INNER_L4_TCP,
634                 RTE_PTYPE_INNER_L4_UDP,
635                 RTE_PTYPE_INNER_L4_FRAG,
636                 RTE_PTYPE_UNKNOWN
637         };
638
639         return ef100_native_ptypes;
640 }
641
642 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending;
643 static unsigned int
644 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
645 {
646         return 0;
647 }
648
649 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status;
650 static int
651 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
652                           __rte_unused uint16_t offset)
653 {
654         return -ENOTSUP;
655 }
656
657
658 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info;
659 static void
660 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info)
661 {
662         /*
663          * Number of descriptors just defines maximum number of pushed
664          * descriptors (fill level).
665          */
666         dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK;
667         dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK;
668 }
669
670
671 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings;
672 static int
673 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc,
674                            struct sfc_dp_rx_hw_limits *limits,
675                            __rte_unused struct rte_mempool *mb_pool,
676                            unsigned int *rxq_entries,
677                            unsigned int *evq_entries,
678                            unsigned int *rxq_max_fill_level)
679 {
680         /*
681          * rte_ethdev API guarantees that the number meets min, max and
682          * alignment requirements.
683          */
684         if (nb_rx_desc <= limits->rxq_min_entries)
685                 *rxq_entries = limits->rxq_min_entries;
686         else
687                 *rxq_entries = rte_align32pow2(nb_rx_desc);
688
689         *evq_entries = *rxq_entries;
690
691         *rxq_max_fill_level = RTE_MIN(nb_rx_desc,
692                                       SFC_EF100_RXQ_LIMIT(*evq_entries));
693         return 0;
694 }
695
696
697 static uint64_t
698 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size)
699 {
700         struct rte_mbuf m;
701
702         memset(&m, 0, sizeof(m));
703
704         rte_mbuf_refcnt_set(&m, 1);
705         m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size;
706         m.nb_segs = 1;
707         m.port = port_id;
708
709         /* rearm_data covers structure members filled in above */
710         rte_compiler_barrier();
711         RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t));
712         return m.rearm_data[0];
713 }
714
715 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate;
716 static int
717 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
718                     const struct rte_pci_addr *pci_addr, int socket_id,
719                     const struct sfc_dp_rx_qcreate_info *info,
720                     struct sfc_dp_rxq **dp_rxqp)
721 {
722         struct sfc_ef100_rxq *rxq;
723         int rc;
724
725         rc = EINVAL;
726         if (info->rxq_entries != info->evq_entries)
727                 goto fail_rxq_args;
728
729         rc = ENOMEM;
730         rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq),
731                                  RTE_CACHE_LINE_SIZE, socket_id);
732         if (rxq == NULL)
733                 goto fail_rxq_alloc;
734
735         sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr);
736
737         rc = ENOMEM;
738         rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring",
739                                          info->rxq_entries,
740                                          sizeof(*rxq->sw_ring),
741                                          RTE_CACHE_LINE_SIZE, socket_id);
742         if (rxq->sw_ring == NULL)
743                 goto fail_desc_alloc;
744
745         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
746         rxq->ptr_mask = info->rxq_entries - 1;
747         rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries);
748         rxq->evq_hw_ring = info->evq_hw_ring;
749         rxq->max_fill_level = info->max_fill_level;
750         rxq->refill_threshold = info->refill_threshold;
751         rxq->prefix_size = info->prefix_size;
752         rxq->buf_size = info->buf_size;
753         rxq->refill_mb_pool = info->refill_mb_pool;
754         rxq->rxq_hw_ring = info->rxq_hw_ring;
755         rxq->doorbell = (volatile uint8_t *)info->mem_bar +
756                         ER_GZ_RX_RING_DOORBELL_OFST +
757                         (info->hw_index << info->vi_window_shift);
758
759         rxq->evq_hw_index = info->evq_hw_index;
760         rxq->evq_prime = (volatile uint8_t *)info->mem_bar +
761                          info->fcw_offset +
762                          ER_GZ_EVQ_INT_PRIME_OFST;
763
764         sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
765
766         *dp_rxqp = &rxq->dp;
767         return 0;
768
769 fail_desc_alloc:
770         rte_free(rxq);
771
772 fail_rxq_alloc:
773 fail_rxq_args:
774         return rc;
775 }
776
777 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy;
778 static void
779 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq)
780 {
781         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
782
783         rte_free(rxq->sw_ring);
784         rte_free(rxq);
785 }
786
787 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart;
788 static int
789 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr,
790                     const efx_rx_prefix_layout_t *pinfo)
791 {
792         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
793         uint32_t unsup_rx_prefix_fields;
794
795         SFC_ASSERT(rxq->completed == 0);
796         SFC_ASSERT(rxq->added == 0);
797
798         /* Prefix must fit into reserved Rx buffer space */
799         if (pinfo->erpl_length > rxq->prefix_size)
800                 return ENOTSUP;
801
802         unsup_rx_prefix_fields =
803                 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout);
804
805         /* LENGTH and CLASS filds must always be present */
806         if ((unsup_rx_prefix_fields &
807              ((1U << EFX_RX_PREFIX_FIELD_LENGTH) |
808               (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0)
809                 return ENOTSUP;
810
811         if ((unsup_rx_prefix_fields &
812              ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) |
813               (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0)
814                 rxq->flags |= SFC_EF100_RXQ_RSS_HASH;
815         else
816                 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH;
817
818         if ((unsup_rx_prefix_fields &
819              (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0)
820                 rxq->flags |= SFC_EF100_RXQ_USER_MARK;
821         else
822                 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK;
823
824         if ((unsup_rx_prefix_fields &
825              (1U << EFX_RX_PREFIX_FIELD_INGRESS_MPORT)) == 0)
826                 rxq->flags |= SFC_EF100_RXQ_INGRESS_MPORT;
827         else
828                 rxq->flags &= ~SFC_EF100_RXQ_INGRESS_MPORT;
829
830         rxq->prefix_size = pinfo->erpl_length;
831         rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id,
832                                                        rxq->prefix_size);
833
834         sfc_ef100_rx_qrefill(rxq);
835
836         rxq->evq_read_ptr = evq_read_ptr;
837
838         rxq->flags |= SFC_EF100_RXQ_STARTED;
839         rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION);
840
841         if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN)
842                 sfc_ef100_rx_qprime(rxq);
843
844         return 0;
845 }
846
847 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop;
848 static void
849 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr)
850 {
851         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
852
853         rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING;
854
855         *evq_read_ptr = rxq->evq_read_ptr;
856 }
857
858 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev;
859 static bool
860 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id)
861 {
862         __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
863
864         SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING);
865
866         /*
867          * It is safe to ignore Rx event since we free all mbufs on
868          * queue purge anyway.
869          */
870
871         return false;
872 }
873
874 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge;
875 static void
876 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
877 {
878         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
879         unsigned int i;
880         struct sfc_ef100_rx_sw_desc *rxd;
881
882         for (i = rxq->completed; i != rxq->added; ++i) {
883                 rxd = &rxq->sw_ring[i & rxq->ptr_mask];
884                 rte_mbuf_raw_free(rxd->mbuf);
885                 rxd->mbuf = NULL;
886         }
887
888         rxq->completed = rxq->added = 0;
889         rxq->ready_pkts = 0;
890
891         rxq->flags &= ~SFC_EF100_RXQ_STARTED;
892 }
893
894 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable;
895 static int
896 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq)
897 {
898         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
899
900         rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN;
901         if (rxq->flags & SFC_EF100_RXQ_STARTED)
902                 sfc_ef100_rx_qprime(rxq);
903         return 0;
904 }
905
906 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable;
907 static int
908 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq)
909 {
910         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
911
912         /* Cannot disarm, just disable rearm */
913         rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN;
914         return 0;
915 }
916
917 static sfc_dp_rx_get_pushed_t sfc_ef100_rx_get_pushed;
918 static unsigned int
919 sfc_ef100_rx_get_pushed(struct sfc_dp_rxq *dp_rxq)
920 {
921         struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq);
922
923         /*
924          * The datapath keeps track only of added descriptors, since
925          * the number of pushed descriptors always equals the number
926          * of added descriptors due to enforced alignment.
927          */
928         return rxq->added;
929 }
930
931 struct sfc_dp_rx sfc_ef100_rx = {
932         .dp = {
933                 .name           = SFC_KVARG_DATAPATH_EF100,
934                 .type           = SFC_DP_RX,
935                 .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF100,
936         },
937         .features               = SFC_DP_RX_FEAT_MULTI_PROCESS |
938                                   SFC_DP_RX_FEAT_INTR |
939                                   SFC_DP_RX_FEAT_STATS,
940         .dev_offload_capa       = 0,
941         .queue_offload_capa     = DEV_RX_OFFLOAD_CHECKSUM |
942                                   DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
943                                   DEV_RX_OFFLOAD_OUTER_UDP_CKSUM |
944                                   DEV_RX_OFFLOAD_SCATTER |
945                                   DEV_RX_OFFLOAD_RSS_HASH,
946         .get_dev_info           = sfc_ef100_rx_get_dev_info,
947         .qsize_up_rings         = sfc_ef100_rx_qsize_up_rings,
948         .qcreate                = sfc_ef100_rx_qcreate,
949         .qdestroy               = sfc_ef100_rx_qdestroy,
950         .qstart                 = sfc_ef100_rx_qstart,
951         .qstop                  = sfc_ef100_rx_qstop,
952         .qrx_ev                 = sfc_ef100_rx_qrx_ev,
953         .qpurge                 = sfc_ef100_rx_qpurge,
954         .supported_ptypes_get   = sfc_ef100_supported_ptypes_get,
955         .qdesc_npending         = sfc_ef100_rx_qdesc_npending,
956         .qdesc_status           = sfc_ef100_rx_qdesc_status,
957         .intr_enable            = sfc_ef100_rx_intr_enable,
958         .intr_disable           = sfc_ef100_rx_intr_disable,
959         .get_pushed             = sfc_ef100_rx_get_pushed,
960         .pkt_burst              = sfc_ef100_recv_pkts,
961 };