net/sfc: support MAC address edits in transfer flows
[dpdk.git] / drivers / net / sfc / sfc_mae_counter.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2020-2021 Xilinx, Inc.
4  */
5
6 #include <rte_common.h>
7 #include <rte_service_component.h>
8
9 #include "efx.h"
10 #include "efx_regs_counters_pkt_format.h"
11
12 #include "sfc_ev.h"
13 #include "sfc.h"
14 #include "sfc_rx.h"
15 #include "sfc_mae_counter.h"
16 #include "sfc_service.h"
17
18 /**
19  * Approximate maximum number of counters per packet.
20  * In fact maximum depends on per-counter data offset which is specified
21  * in counter packet header.
22  */
23 #define SFC_MAE_COUNTERS_PER_PACKET_MAX \
24         ((SFC_MAE_COUNTER_STREAM_PACKET_SIZE - \
25           ER_RX_SL_PACKETISER_HEADER_WORD_SIZE) / \
26           ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE)
27
28 /**
29  * Minimum number of Rx buffers in counters only Rx queue.
30  */
31 #define SFC_MAE_COUNTER_RXQ_BUFS_MIN \
32         (SFC_COUNTER_RXQ_RX_DESC_COUNT - SFC_COUNTER_RXQ_REFILL_LEVEL)
33
34 /**
35  * Approximate number of counter updates fit in counters only Rx queue.
36  * The number is inaccurate since SFC_MAE_COUNTERS_PER_PACKET_MAX is
37  * inaccurate (see above). However, it provides the gist for a number of
38  * counter updates which can fit in an Rx queue after empty poll.
39  *
40  * The define is not actually used, but provides calculations details.
41  */
42 #define SFC_MAE_COUNTERS_RXQ_SPACE \
43         (SFC_MAE_COUNTER_RXQ_BUFS_MIN * SFC_MAE_COUNTERS_PER_PACKET_MAX)
44
45 static uint32_t
46 sfc_mae_counter_get_service_lcore(struct sfc_adapter *sa)
47 {
48         uint32_t cid;
49
50         cid = sfc_get_service_lcore(sa->socket_id);
51         if (cid != RTE_MAX_LCORE)
52                 return cid;
53
54         if (sa->socket_id != SOCKET_ID_ANY)
55                 cid = sfc_get_service_lcore(SOCKET_ID_ANY);
56
57         if (cid == RTE_MAX_LCORE) {
58                 sfc_warn(sa, "failed to get service lcore for counter service");
59         } else if (sa->socket_id != SOCKET_ID_ANY) {
60                 sfc_warn(sa,
61                         "failed to get service lcore for counter service at socket %d, but got at socket %u",
62                         sa->socket_id, rte_lcore_to_socket_id(cid));
63         }
64         return cid;
65 }
66
67 bool
68 sfc_mae_counter_rxq_required(struct sfc_adapter *sa)
69 {
70         const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
71
72         if (encp->enc_mae_supported == B_FALSE)
73                 return false;
74
75         return true;
76 }
77
78 int
79 sfc_mae_counter_enable(struct sfc_adapter *sa,
80                        struct sfc_mae_counter_id *counterp)
81 {
82         struct sfc_mae_counter_registry *reg = &sa->mae.counter_registry;
83         struct sfc_mae_counters *counters = &reg->counters;
84         struct sfc_mae_counter *p;
85         efx_counter_t mae_counter;
86         uint32_t generation_count;
87         uint32_t unused;
88         int rc;
89
90         /*
91          * The actual count of counters allocated is ignored since a failure
92          * to allocate a single counter is indicated by non-zero return code.
93          */
94         rc = efx_mae_counters_alloc(sa->nic, 1, &unused, &mae_counter,
95                                     &generation_count);
96         if (rc != 0) {
97                 sfc_err(sa, "failed to alloc MAE counter: %s",
98                         rte_strerror(rc));
99                 goto fail_mae_counter_alloc;
100         }
101
102         if (mae_counter.id >= counters->n_mae_counters) {
103                 /*
104                  * ID of a counter is expected to be within the range
105                  * between 0 and the maximum count of counters to always
106                  * fit into a pre-allocated array size of maximum counter ID.
107                  */
108                 sfc_err(sa, "MAE counter ID is out of expected range");
109                 rc = EFAULT;
110                 goto fail_counter_id_range;
111         }
112
113         counterp->mae_id = mae_counter;
114
115         p = &counters->mae_counters[mae_counter.id];
116
117         /*
118          * Ordering is relaxed since it is the only operation on counter value.
119          * And it does not depend on different stores/loads in other threads.
120          * Paired with relaxed ordering in counter increment.
121          */
122         __atomic_store(&p->reset.pkts_bytes.int128,
123                        &p->value.pkts_bytes.int128, __ATOMIC_RELAXED);
124         p->generation_count = generation_count;
125
126         p->ft_group_hit_counter = counterp->ft_group_hit_counter;
127
128         /*
129          * The flag is set at the very end of add operation and reset
130          * at the beginning of delete operation. Release ordering is
131          * paired with acquire ordering on load in counter increment operation.
132          */
133         __atomic_store_n(&p->inuse, true, __ATOMIC_RELEASE);
134
135         sfc_info(sa, "enabled MAE counter #%u with reset pkts=%" PRIu64
136                  " bytes=%" PRIu64, mae_counter.id,
137                  p->reset.pkts, p->reset.bytes);
138
139         return 0;
140
141 fail_counter_id_range:
142         (void)efx_mae_counters_free(sa->nic, 1, &unused, &mae_counter, NULL);
143
144 fail_mae_counter_alloc:
145         sfc_log_init(sa, "failed: %s", rte_strerror(rc));
146         return rc;
147 }
148
149 int
150 sfc_mae_counter_disable(struct sfc_adapter *sa,
151                         struct sfc_mae_counter_id *counter)
152 {
153         struct sfc_mae_counter_registry *reg = &sa->mae.counter_registry;
154         struct sfc_mae_counters *counters = &reg->counters;
155         struct sfc_mae_counter *p;
156         uint32_t unused;
157         int rc;
158
159         if (counter->mae_id.id == EFX_MAE_RSRC_ID_INVALID)
160                 return 0;
161
162         SFC_ASSERT(counter->mae_id.id < counters->n_mae_counters);
163         /*
164          * The flag is set at the very end of add operation and reset
165          * at the beginning of delete operation. Release ordering is
166          * paired with acquire ordering on load in counter increment operation.
167          */
168         p = &counters->mae_counters[counter->mae_id.id];
169         __atomic_store_n(&p->inuse, false, __ATOMIC_RELEASE);
170
171         rc = efx_mae_counters_free(sa->nic, 1, &unused, &counter->mae_id, NULL);
172         if (rc != 0)
173                 sfc_err(sa, "failed to free MAE counter %u: %s",
174                         counter->mae_id.id, rte_strerror(rc));
175
176         sfc_info(sa, "disabled MAE counter #%u with reset pkts=%" PRIu64
177                  " bytes=%" PRIu64, counter->mae_id.id,
178                  p->reset.pkts, p->reset.bytes);
179
180         /*
181          * Do this regardless of what efx_mae_counters_free() return value is.
182          * If there's some error, the resulting resource leakage is bad, but
183          * nothing sensible can be done in this case.
184          */
185         counter->mae_id.id = EFX_MAE_RSRC_ID_INVALID;
186
187         return rc;
188 }
189
190 static void
191 sfc_mae_counter_increment(struct sfc_adapter *sa,
192                           struct sfc_mae_counters *counters,
193                           uint32_t mae_counter_id,
194                           uint32_t generation_count,
195                           uint64_t pkts, uint64_t bytes)
196 {
197         struct sfc_mae_counter *p = &counters->mae_counters[mae_counter_id];
198         struct sfc_mae_counters_xstats *xstats = &counters->xstats;
199         union sfc_pkts_bytes cnt_val;
200         bool inuse;
201
202         /*
203          * Acquire ordering is paired with release ordering in counter add
204          * and delete operations.
205          */
206         __atomic_load(&p->inuse, &inuse, __ATOMIC_ACQUIRE);
207         if (!inuse) {
208                 /*
209                  * Two possible cases include:
210                  * 1) Counter is just allocated. Too early counter update
211                  *    cannot be processed properly.
212                  * 2) Stale update of freed and not reallocated counter.
213                  *    There is no point in processing that update.
214                  */
215                 xstats->not_inuse_update++;
216                 return;
217         }
218
219         if (unlikely(generation_count < p->generation_count)) {
220                 /*
221                  * It is a stale update for the reallocated counter
222                  * (i.e., freed and the same ID allocated again).
223                  */
224                 xstats->realloc_update++;
225                 return;
226         }
227
228         cnt_val.pkts = p->value.pkts + pkts;
229         cnt_val.bytes = p->value.bytes + bytes;
230
231         /*
232          * Ordering is relaxed since it is the only operation on counter value.
233          * And it does not depend on different stores/loads in other threads.
234          * Paired with relaxed ordering on counter reset.
235          */
236         __atomic_store(&p->value.pkts_bytes,
237                        &cnt_val.pkts_bytes, __ATOMIC_RELAXED);
238
239         if (p->ft_group_hit_counter != NULL) {
240                 uint64_t ft_group_hit_counter;
241
242                 ft_group_hit_counter = *p->ft_group_hit_counter + pkts;
243                 __atomic_store_n(p->ft_group_hit_counter, ft_group_hit_counter,
244                                  __ATOMIC_RELAXED);
245         }
246
247         sfc_info(sa, "update MAE counter #%u: pkts+%" PRIu64 "=%" PRIu64
248                  ", bytes+%" PRIu64 "=%" PRIu64, mae_counter_id,
249                  pkts, cnt_val.pkts, bytes, cnt_val.bytes);
250 }
251
252 static void
253 sfc_mae_parse_counter_packet(struct sfc_adapter *sa,
254                              struct sfc_mae_counter_registry *counter_registry,
255                              const struct rte_mbuf *m)
256 {
257         uint32_t generation_count;
258         const efx_xword_t *hdr;
259         const efx_oword_t *counters_data;
260         unsigned int version;
261         unsigned int id;
262         unsigned int header_offset;
263         unsigned int payload_offset;
264         unsigned int counter_count;
265         unsigned int required_len;
266         unsigned int i;
267
268         if (unlikely(m->nb_segs != 1)) {
269                 sfc_err(sa, "unexpectedly scattered MAE counters packet (%u segments)",
270                         m->nb_segs);
271                 return;
272         }
273
274         if (unlikely(m->data_len < ER_RX_SL_PACKETISER_HEADER_WORD_SIZE)) {
275                 sfc_err(sa, "too short MAE counters packet (%u bytes)",
276                         m->data_len);
277                 return;
278         }
279
280         /*
281          * The generation count is located in the Rx prefix in the USER_MARK
282          * field which is written into hash.fdir.hi field of an mbuf. See
283          * SF-123581-TC SmartNIC Datapath Offloads section 4.7.5 Counters.
284          */
285         generation_count = m->hash.fdir.hi;
286
287         hdr = rte_pktmbuf_mtod(m, const efx_xword_t *);
288
289         version = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_VERSION);
290         if (unlikely(version != ERF_SC_PACKETISER_HEADER_VERSION_2)) {
291                 sfc_err(sa, "unexpected MAE counters packet version %u",
292                         version);
293                 return;
294         }
295
296         id = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_IDENTIFIER);
297         if (unlikely(id != ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR)) {
298                 sfc_err(sa, "unexpected MAE counters source identifier %u", id);
299                 return;
300         }
301
302         /* Packet layout definitions assume fixed header offset in fact */
303         header_offset =
304                 EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_HEADER_OFFSET);
305         if (unlikely(header_offset !=
306                      ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT)) {
307                 sfc_err(sa, "unexpected MAE counters packet header offset %u",
308                         header_offset);
309                 return;
310         }
311
312         payload_offset =
313                 EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_PAYLOAD_OFFSET);
314
315         counter_count = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_COUNT);
316
317         required_len = payload_offset +
318                         counter_count * sizeof(counters_data[0]);
319         if (unlikely(required_len > m->data_len)) {
320                 sfc_err(sa, "truncated MAE counters packet: %u counters, packet length is %u vs %u required",
321                         counter_count, m->data_len, required_len);
322                 /*
323                  * In theory it is possible process available counters data,
324                  * but such condition is really unexpected and it is
325                  * better to treat entire packet as corrupted.
326                  */
327                 return;
328         }
329
330         /* Ensure that counters data is 32-bit aligned */
331         if (unlikely(payload_offset % sizeof(uint32_t) != 0)) {
332                 sfc_err(sa, "unsupported MAE counters payload offset %u, must be 32-bit aligned",
333                         payload_offset);
334                 return;
335         }
336         RTE_BUILD_BUG_ON(sizeof(counters_data[0]) !=
337                         ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE);
338
339         counters_data =
340                 rte_pktmbuf_mtod_offset(m, const efx_oword_t *, payload_offset);
341
342         sfc_info(sa, "update %u MAE counters with gc=%u",
343                  counter_count, generation_count);
344
345         for (i = 0; i < counter_count; ++i) {
346                 uint32_t packet_count_lo;
347                 uint32_t packet_count_hi;
348                 uint32_t byte_count_lo;
349                 uint32_t byte_count_hi;
350
351                 /*
352                  * Use 32-bit field accessors below since counters data
353                  * is not 64-bit aligned.
354                  * 32-bit alignment is checked above taking into account
355                  * that start of packet data is 32-bit aligned
356                  * (cache-line size aligned in fact).
357                  */
358                 packet_count_lo =
359                         EFX_OWORD_FIELD32(counters_data[i],
360                                 ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LO);
361                 packet_count_hi =
362                         EFX_OWORD_FIELD32(counters_data[i],
363                                 ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_HI);
364                 byte_count_lo =
365                         EFX_OWORD_FIELD32(counters_data[i],
366                                 ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LO);
367                 byte_count_hi =
368                         EFX_OWORD_FIELD32(counters_data[i],
369                                 ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_HI);
370                 sfc_mae_counter_increment(sa,
371                         &counter_registry->counters,
372                         EFX_OWORD_FIELD32(counters_data[i],
373                                 ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX),
374                         generation_count,
375                         (uint64_t)packet_count_lo |
376                         ((uint64_t)packet_count_hi <<
377                          ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LO_WIDTH),
378                         (uint64_t)byte_count_lo |
379                         ((uint64_t)byte_count_hi <<
380                          ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LO_WIDTH));
381         }
382 }
383
384 static int32_t
385 sfc_mae_counter_poll_packets(struct sfc_adapter *sa)
386 {
387         struct sfc_mae_counter_registry *counter_registry =
388                 &sa->mae.counter_registry;
389         struct rte_mbuf *mbufs[SFC_MAE_COUNTER_RX_BURST];
390         unsigned int pushed_diff;
391         unsigned int pushed;
392         unsigned int i;
393         uint16_t n;
394         int rc;
395
396         n = counter_registry->rx_pkt_burst(counter_registry->rx_dp, mbufs,
397                                            SFC_MAE_COUNTER_RX_BURST);
398
399         for (i = 0; i < n; i++)
400                 sfc_mae_parse_counter_packet(sa, counter_registry, mbufs[i]);
401
402         rte_pktmbuf_free_bulk(mbufs, n);
403
404         if (!counter_registry->use_credits)
405                 return n;
406
407         pushed = sfc_rx_get_pushed(sa, counter_registry->rx_dp);
408         pushed_diff = pushed - counter_registry->pushed_n_buffers;
409
410         if (pushed_diff >= SFC_COUNTER_RXQ_REFILL_LEVEL) {
411                 rc = efx_mae_counters_stream_give_credits(sa->nic, pushed_diff);
412                 if (rc == 0) {
413                         counter_registry->pushed_n_buffers = pushed;
414                 } else {
415                         /*
416                          * FIXME: counters might be important for the
417                          * application. Handle the error in order to recover
418                          * from the failure
419                          */
420                         SFC_GENERIC_LOG(DEBUG, "Give credits failed: %s",
421                                         rte_strerror(rc));
422                 }
423         }
424
425         return n;
426 }
427
428 static int32_t
429 sfc_mae_counter_service_routine(void *arg)
430 {
431         struct sfc_adapter *sa = arg;
432
433         /*
434          * We cannot propagate any errors and we don't need to know
435          * the number of packets we've received.
436          */
437         (void)sfc_mae_counter_poll_packets(sa);
438
439         return 0;
440 }
441
442 static void *
443 sfc_mae_counter_thread(void *data)
444 {
445         struct sfc_adapter *sa = data;
446         struct sfc_mae_counter_registry *counter_registry =
447                 &sa->mae.counter_registry;
448         int32_t rc;
449
450         while (__atomic_load_n(&counter_registry->polling.thread.run,
451                                __ATOMIC_ACQUIRE)) {
452                 rc = sfc_mae_counter_poll_packets(sa);
453                 if (rc == 0) {
454                         /*
455                          * The queue is empty. Do not burn CPU.
456                          * An empty queue has just enough space for about
457                          * SFC_MAE_COUNTERS_RXQ_SPACE counter updates which is
458                          * more than 100K, so we can sleep a bit. The queue uses
459                          * a credit-based flow control anyway, so firmware will
460                          * not enqueue more counter updates until the host
461                          * supplies it with additional credits. The counters are
462                          * 48bits wide, so the timeout need only be short enough
463                          * to ensure that the counter values do not overflow
464                          * before the next counter update. Also we should not
465                          * delay counter updates for a long time, otherwise
466                          * application may decide that flow is idle and should
467                          * be removed.
468                          */
469                         rte_delay_ms(1);
470                 }
471         }
472
473         return NULL;
474 }
475
476 static void
477 sfc_mae_counter_service_unregister(struct sfc_adapter *sa)
478 {
479         struct sfc_mae_counter_registry *registry =
480                 &sa->mae.counter_registry;
481         const unsigned int wait_ms = 10000;
482         unsigned int i;
483
484         rte_service_runstate_set(registry->polling.service.id, 0);
485         rte_service_component_runstate_set(registry->polling.service.id, 0);
486
487         /*
488          * Wait for the counter routine to finish the last iteration.
489          * Give up on timeout.
490          */
491         for (i = 0; i < wait_ms; i++) {
492                 if (rte_service_may_be_active(registry->polling.service.id) == 0)
493                         break;
494
495                 rte_delay_ms(1);
496         }
497         if (i == wait_ms)
498                 sfc_warn(sa, "failed to wait for counter service to stop");
499
500         rte_service_map_lcore_set(registry->polling.service.id,
501                                   registry->polling.service.core_id, 0);
502
503         rte_service_component_unregister(registry->polling.service.id);
504 }
505
506 static struct sfc_rxq_info *
507 sfc_counter_rxq_info_get(struct sfc_adapter *sa)
508 {
509         return &sfc_sa2shared(sa)->rxq_info[sa->counter_rxq.sw_index];
510 }
511
512 static void
513 sfc_mae_counter_registry_prepare(struct sfc_mae_counter_registry *registry,
514                                  struct sfc_adapter *sa,
515                                  uint32_t counter_stream_flags)
516 {
517         registry->rx_pkt_burst = sa->eth_dev->rx_pkt_burst;
518         registry->rx_dp = sfc_counter_rxq_info_get(sa)->dp;
519         registry->pushed_n_buffers = 0;
520         registry->use_credits = counter_stream_flags &
521                 EFX_MAE_COUNTERS_STREAM_OUT_USES_CREDITS;
522 }
523
524 static int
525 sfc_mae_counter_service_register(struct sfc_adapter *sa,
526                                  uint32_t counter_stream_flags)
527 {
528         struct rte_service_spec service;
529         char counter_service_name[sizeof(service.name)] = "counter_service";
530         struct sfc_mae_counter_registry *counter_registry =
531                 &sa->mae.counter_registry;
532         uint32_t cid;
533         uint32_t sid;
534         int rc;
535
536         sfc_log_init(sa, "entry");
537
538         /* Prepare service info */
539         memset(&service, 0, sizeof(service));
540         rte_strscpy(service.name, counter_service_name, sizeof(service.name));
541         service.socket_id = sa->socket_id;
542         service.callback = sfc_mae_counter_service_routine;
543         service.callback_userdata = sa;
544         sfc_mae_counter_registry_prepare(counter_registry, sa,
545                                          counter_stream_flags);
546
547         cid = sfc_get_service_lcore(sa->socket_id);
548         if (cid == RTE_MAX_LCORE && sa->socket_id != SOCKET_ID_ANY) {
549                 /* Warn and try to allocate on any NUMA node */
550                 sfc_warn(sa,
551                         "failed to get service lcore for counter service at socket %d",
552                         sa->socket_id);
553
554                 cid = sfc_get_service_lcore(SOCKET_ID_ANY);
555         }
556         if (cid == RTE_MAX_LCORE) {
557                 rc = ENOTSUP;
558                 sfc_err(sa, "failed to get service lcore for counter service");
559                 goto fail_get_service_lcore;
560         }
561
562         /* Service core may be in "stopped" state, start it */
563         rc = rte_service_lcore_start(cid);
564         if (rc != 0 && rc != -EALREADY) {
565                 sfc_err(sa, "failed to start service core for counter service: %s",
566                         rte_strerror(-rc));
567                 rc = ENOTSUP;
568                 goto fail_start_core;
569         }
570
571         /* Register counter service */
572         rc = rte_service_component_register(&service, &sid);
573         if (rc != 0) {
574                 rc = ENOEXEC;
575                 sfc_err(sa, "failed to register counter service component");
576                 goto fail_register;
577         }
578
579         /* Map the service with the service core */
580         rc = rte_service_map_lcore_set(sid, cid, 1);
581         if (rc != 0) {
582                 rc = -rc;
583                 sfc_err(sa, "failed to map lcore for counter service: %s",
584                         rte_strerror(rc));
585                 goto fail_map_lcore;
586         }
587
588         /* Run the service */
589         rc = rte_service_component_runstate_set(sid, 1);
590         if (rc < 0) {
591                 rc = -rc;
592                 sfc_err(sa, "failed to run counter service component: %s",
593                         rte_strerror(rc));
594                 goto fail_component_runstate_set;
595         }
596         rc = rte_service_runstate_set(sid, 1);
597         if (rc < 0) {
598                 rc = -rc;
599                 sfc_err(sa, "failed to run counter service");
600                 goto fail_runstate_set;
601         }
602
603         counter_registry->polling_mode = SFC_MAE_COUNTER_POLLING_SERVICE;
604         counter_registry->polling.service.core_id = cid;
605         counter_registry->polling.service.id = sid;
606
607         sfc_log_init(sa, "done");
608
609         return 0;
610
611 fail_runstate_set:
612         rte_service_component_runstate_set(sid, 0);
613
614 fail_component_runstate_set:
615         rte_service_map_lcore_set(sid, cid, 0);
616
617 fail_map_lcore:
618         rte_service_component_unregister(sid);
619
620 fail_register:
621 fail_start_core:
622 fail_get_service_lcore:
623         sfc_log_init(sa, "failed: %s", rte_strerror(rc));
624
625         return rc;
626 }
627
628 static void
629 sfc_mae_counter_thread_stop(struct sfc_adapter *sa)
630 {
631         struct sfc_mae_counter_registry *counter_registry =
632                 &sa->mae.counter_registry;
633         int rc;
634
635         /* Ensure that flag is set before attempting to join thread */
636         __atomic_store_n(&counter_registry->polling.thread.run, false,
637                          __ATOMIC_RELEASE);
638
639         rc = pthread_join(counter_registry->polling.thread.id, NULL);
640         if (rc != 0)
641                 sfc_err(sa, "failed to join the MAE counter polling thread");
642
643         counter_registry->polling_mode = SFC_MAE_COUNTER_POLLING_OFF;
644 }
645
646 static int
647 sfc_mae_counter_thread_spawn(struct sfc_adapter *sa,
648                              uint32_t counter_stream_flags)
649 {
650         struct sfc_mae_counter_registry *counter_registry =
651                 &sa->mae.counter_registry;
652         int rc;
653
654         sfc_log_init(sa, "entry");
655
656         sfc_mae_counter_registry_prepare(counter_registry, sa,
657                                          counter_stream_flags);
658
659         counter_registry->polling_mode = SFC_MAE_COUNTER_POLLING_THREAD;
660         counter_registry->polling.thread.run = true;
661
662         rc = rte_ctrl_thread_create(&sa->mae.counter_registry.polling.thread.id,
663                                     "mae_counter_thread", NULL,
664                                     sfc_mae_counter_thread, sa);
665
666         return rc;
667 }
668
669 int
670 sfc_mae_counters_init(struct sfc_mae_counters *counters,
671                       uint32_t nb_counters_max)
672 {
673         int rc;
674
675         SFC_GENERIC_LOG(DEBUG, "%s: entry", __func__);
676
677         counters->mae_counters = rte_zmalloc("sfc_mae_counters",
678                 sizeof(*counters->mae_counters) * nb_counters_max, 0);
679         if (counters->mae_counters == NULL) {
680                 rc = ENOMEM;
681                 SFC_GENERIC_LOG(ERR, "%s: failed: %s", __func__,
682                                 rte_strerror(rc));
683                 return rc;
684         }
685
686         counters->n_mae_counters = nb_counters_max;
687
688         SFC_GENERIC_LOG(DEBUG, "%s: done", __func__);
689
690         return 0;
691 }
692
693 void
694 sfc_mae_counters_fini(struct sfc_mae_counters *counters)
695 {
696         rte_free(counters->mae_counters);
697         counters->mae_counters = NULL;
698 }
699
700 int
701 sfc_mae_counter_rxq_attach(struct sfc_adapter *sa)
702 {
703         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
704         char name[RTE_MEMPOOL_NAMESIZE];
705         struct rte_mempool *mp;
706         unsigned int n_elements;
707         unsigned int cache_size;
708         /* The mempool is internal and private area is not required */
709         const uint16_t priv_size = 0;
710         const uint16_t data_room_size = RTE_PKTMBUF_HEADROOM +
711                 SFC_MAE_COUNTER_STREAM_PACKET_SIZE;
712         int rc;
713
714         sfc_log_init(sa, "entry");
715
716         if (!sas->counters_rxq_allocated) {
717                 sfc_log_init(sa, "counter queue is not supported - skip");
718                 return 0;
719         }
720
721         /*
722          * At least one element in the ring is always unused to distinguish
723          * between empty and full ring cases.
724          */
725         n_elements = SFC_COUNTER_RXQ_RX_DESC_COUNT - 1;
726
727         /*
728          * The cache must have sufficient space to put received buckets
729          * before they're reused on refill.
730          */
731         cache_size = rte_align32pow2(SFC_COUNTER_RXQ_REFILL_LEVEL +
732                                      SFC_MAE_COUNTER_RX_BURST - 1);
733
734         if (snprintf(name, sizeof(name), "counter_rxq-pool-%u", sas->port_id) >=
735             (int)sizeof(name)) {
736                 sfc_err(sa, "failed: counter RxQ mempool name is too long");
737                 rc = ENAMETOOLONG;
738                 goto fail_long_name;
739         }
740
741         /*
742          * It could be single-producer single-consumer ring mempool which
743          * requires minimal barriers. However, cache size and refill/burst
744          * policy are aligned, therefore it does not matter which
745          * mempool backend is chosen since backend is unused.
746          */
747         mp = rte_pktmbuf_pool_create(name, n_elements, cache_size,
748                                      priv_size, data_room_size, sa->socket_id);
749         if (mp == NULL) {
750                 sfc_err(sa, "failed to create counter RxQ mempool");
751                 rc = rte_errno;
752                 goto fail_mp_create;
753         }
754
755         sa->counter_rxq.sw_index = sfc_counters_rxq_sw_index(sas);
756         sa->counter_rxq.mp = mp;
757         sa->counter_rxq.state |= SFC_COUNTER_RXQ_ATTACHED;
758
759         sfc_log_init(sa, "done");
760
761         return 0;
762
763 fail_mp_create:
764 fail_long_name:
765         sfc_log_init(sa, "failed: %s", rte_strerror(rc));
766
767         return rc;
768 }
769
770 void
771 sfc_mae_counter_rxq_detach(struct sfc_adapter *sa)
772 {
773         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
774
775         sfc_log_init(sa, "entry");
776
777         if (!sas->counters_rxq_allocated) {
778                 sfc_log_init(sa, "counter queue is not supported - skip");
779                 return;
780         }
781
782         if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED) == 0) {
783                 sfc_log_init(sa, "counter queue is not attached - skip");
784                 return;
785         }
786
787         rte_mempool_free(sa->counter_rxq.mp);
788         sa->counter_rxq.mp = NULL;
789         sa->counter_rxq.state &= ~SFC_COUNTER_RXQ_ATTACHED;
790
791         sfc_log_init(sa, "done");
792 }
793
794 int
795 sfc_mae_counter_rxq_init(struct sfc_adapter *sa)
796 {
797         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
798         const struct rte_eth_rxconf rxconf = {
799                 .rx_free_thresh = SFC_COUNTER_RXQ_REFILL_LEVEL,
800                 .rx_drop_en = 1,
801         };
802         uint16_t nb_rx_desc = SFC_COUNTER_RXQ_RX_DESC_COUNT;
803         int rc;
804
805         sfc_log_init(sa, "entry");
806
807         if (!sas->counters_rxq_allocated) {
808                 sfc_log_init(sa, "counter queue is not supported - skip");
809                 return 0;
810         }
811
812         if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED) == 0) {
813                 sfc_log_init(sa, "counter queue is not attached - skip");
814                 return 0;
815         }
816
817         nb_rx_desc = RTE_MIN(nb_rx_desc, sa->rxq_max_entries);
818         nb_rx_desc = RTE_MAX(nb_rx_desc, sa->rxq_min_entries);
819
820         rc = sfc_rx_qinit_info(sa, sa->counter_rxq.sw_index,
821                                EFX_RXQ_FLAG_USER_MARK);
822         if (rc != 0)
823                 goto fail_counter_rxq_init_info;
824
825         rc = sfc_rx_qinit(sa, sa->counter_rxq.sw_index, nb_rx_desc,
826                           sa->socket_id, &rxconf, sa->counter_rxq.mp);
827         if (rc != 0) {
828                 sfc_err(sa, "failed to init counter RxQ");
829                 goto fail_counter_rxq_init;
830         }
831
832         sa->counter_rxq.state |= SFC_COUNTER_RXQ_INITIALIZED;
833
834         sfc_log_init(sa, "done");
835
836         return 0;
837
838 fail_counter_rxq_init:
839 fail_counter_rxq_init_info:
840         sfc_log_init(sa, "failed: %s", rte_strerror(rc));
841
842         return rc;
843 }
844
845 void
846 sfc_mae_counter_rxq_fini(struct sfc_adapter *sa)
847 {
848         struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
849
850         sfc_log_init(sa, "entry");
851
852         if (!sas->counters_rxq_allocated) {
853                 sfc_log_init(sa, "counter queue is not supported - skip");
854                 return;
855         }
856
857         if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_INITIALIZED) == 0) {
858                 sfc_log_init(sa, "counter queue is not initialized - skip");
859                 return;
860         }
861
862         sfc_rx_qfini(sa, sa->counter_rxq.sw_index);
863
864         sfc_log_init(sa, "done");
865 }
866
867 void
868 sfc_mae_counter_stop(struct sfc_adapter *sa)
869 {
870         struct sfc_mae *mae = &sa->mae;
871
872         sfc_log_init(sa, "entry");
873
874         if (!mae->counter_rxq_running) {
875                 sfc_log_init(sa, "counter queue is not running - skip");
876                 return;
877         }
878
879         SFC_ASSERT(mae->counter_registry.polling_mode !=
880                         SFC_MAE_COUNTER_POLLING_OFF);
881
882         if (mae->counter_registry.polling_mode ==
883                         SFC_MAE_COUNTER_POLLING_SERVICE)
884                 sfc_mae_counter_service_unregister(sa);
885         else
886                 sfc_mae_counter_thread_stop(sa);
887
888         efx_mae_counters_stream_stop(sa->nic, sa->counter_rxq.sw_index, NULL);
889
890         mae->counter_rxq_running = false;
891
892         sfc_log_init(sa, "done");
893 }
894
895 int
896 sfc_mae_counter_start(struct sfc_adapter *sa)
897 {
898         struct sfc_mae *mae = &sa->mae;
899         uint32_t flags;
900         int rc;
901
902         SFC_ASSERT(sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED);
903
904         if (mae->counter_rxq_running)
905                 return 0;
906
907         sfc_log_init(sa, "entry");
908
909         rc = efx_mae_counters_stream_start(sa->nic, sa->counter_rxq.sw_index,
910                                            SFC_MAE_COUNTER_STREAM_PACKET_SIZE,
911                                            0 /* No flags required */, &flags);
912         if (rc != 0) {
913                 sfc_err(sa, "failed to start MAE counters stream: %s",
914                         rte_strerror(rc));
915                 goto fail_counter_stream;
916         }
917
918         sfc_log_init(sa, "stream start flags: 0x%x", flags);
919
920         if (sfc_mae_counter_get_service_lcore(sa) != RTE_MAX_LCORE) {
921                 rc = sfc_mae_counter_service_register(sa, flags);
922                 if (rc != 0)
923                         goto fail_service_register;
924         } else {
925                 rc = sfc_mae_counter_thread_spawn(sa, flags);
926                 if (rc != 0)
927                         goto fail_thread_spawn;
928         }
929
930         mae->counter_rxq_running = true;
931
932         return 0;
933
934 fail_service_register:
935 fail_thread_spawn:
936         efx_mae_counters_stream_stop(sa->nic, sa->counter_rxq.sw_index, NULL);
937
938 fail_counter_stream:
939         sfc_log_init(sa, "failed: %s", rte_strerror(rc));
940
941         return rc;
942 }
943
944 int
945 sfc_mae_counter_get(struct sfc_mae_counters *counters,
946                     const struct sfc_mae_counter_id *counter,
947                     struct rte_flow_query_count *data)
948 {
949         struct sfc_flow_tunnel *ft = counter->ft;
950         uint64_t non_reset_jump_hit_counter;
951         struct sfc_mae_counter *p;
952         union sfc_pkts_bytes value;
953
954         SFC_ASSERT(counter->mae_id.id < counters->n_mae_counters);
955         p = &counters->mae_counters[counter->mae_id.id];
956
957         /*
958          * Ordering is relaxed since it is the only operation on counter value.
959          * And it does not depend on different stores/loads in other threads.
960          * Paired with relaxed ordering in counter increment.
961          */
962         value.pkts_bytes.int128 = __atomic_load_n(&p->value.pkts_bytes.int128,
963                                                   __ATOMIC_RELAXED);
964
965         data->hits_set = 1;
966         data->hits = value.pkts - p->reset.pkts;
967
968         if (ft != NULL) {
969                 data->hits += ft->group_hit_counter;
970                 non_reset_jump_hit_counter = data->hits;
971                 data->hits -= ft->reset_jump_hit_counter;
972         } else {
973                 data->bytes_set = 1;
974                 data->bytes = value.bytes - p->reset.bytes;
975         }
976
977         if (data->reset != 0) {
978                 if (ft != NULL) {
979                         ft->reset_jump_hit_counter = non_reset_jump_hit_counter;
980                 } else {
981                         p->reset.pkts = value.pkts;
982                         p->reset.bytes = value.bytes;
983                 }
984         }
985
986         return 0;
987 }
988
989 bool
990 sfc_mae_counter_stream_enabled(struct sfc_adapter *sa)
991 {
992         if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_INITIALIZED) == 0 ||
993             sfc_get_service_lcore(SOCKET_ID_ANY) == RTE_MAX_LCORE)
994                 return B_FALSE;
995         else
996                 return B_TRUE;
997 }