lib: cleanup
[dpdk.git] / lib / librte_distributor / rte_distributor.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <sys/queue.h>
7 #include <string.h>
8 #include <rte_mbuf.h>
9 #include <rte_memory.h>
10 #include <rte_cycles.h>
11 #include <rte_memzone.h>
12 #include <rte_errno.h>
13 #include <rte_string_fns.h>
14 #include <rte_eal_memconfig.h>
15 #include <rte_pause.h>
16 #include <rte_tailq.h>
17
18 #include "rte_distributor.h"
19 #include "rte_distributor_single.h"
20 #include "distributor_private.h"
21
22 TAILQ_HEAD(rte_dist_burst_list, rte_distributor);
23
24 static struct rte_tailq_elem rte_dist_burst_tailq = {
25         .name = "RTE_DIST_BURST",
26 };
27 EAL_REGISTER_TAILQ(rte_dist_burst_tailq)
28
29 /**** APIs called by workers ****/
30
31 /**** Burst Packet APIs called by workers ****/
32
33 void
34 rte_distributor_request_pkt(struct rte_distributor *d,
35                 unsigned int worker_id, struct rte_mbuf **oldpkt,
36                 unsigned int count)
37 {
38         struct rte_distributor_buffer *buf = &(d->bufs[worker_id]);
39         unsigned int i;
40
41         volatile int64_t *retptr64;
42
43         if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) {
44                 rte_distributor_request_pkt_single(d->d_single,
45                         worker_id, oldpkt[0]);
46                 return;
47         }
48
49         retptr64 = &(buf->retptr64[0]);
50         /* Spin while handshake bits are set (scheduler clears it).
51          * Sync with worker on GET_BUF flag.
52          */
53         while (unlikely(__atomic_load_n(retptr64, __ATOMIC_ACQUIRE)
54                         & RTE_DISTRIB_GET_BUF)) {
55                 rte_pause();
56                 uint64_t t = rte_rdtsc()+100;
57
58                 while (rte_rdtsc() < t)
59                         rte_pause();
60         }
61
62         /*
63          * OK, if we've got here, then the scheduler has just cleared the
64          * handshake bits. Populate the retptrs with returning packets.
65          */
66
67         for (i = count; i < RTE_DIST_BURST_SIZE; i++)
68                 buf->retptr64[i] = 0;
69
70         /* Set Return bit for each packet returned */
71         for (i = count; i-- > 0; )
72                 buf->retptr64[i] =
73                         (((int64_t)(uintptr_t)(oldpkt[i])) <<
74                         RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF;
75
76         /*
77          * Finally, set the GET_BUF  to signal to distributor that cache
78          * line is ready for processing
79          * Sync with distributor to release retptrs
80          */
81         __atomic_store_n(retptr64, *retptr64 | RTE_DISTRIB_GET_BUF,
82                         __ATOMIC_RELEASE);
83 }
84
85 int
86 rte_distributor_poll_pkt(struct rte_distributor *d,
87                 unsigned int worker_id, struct rte_mbuf **pkts)
88 {
89         struct rte_distributor_buffer *buf = &d->bufs[worker_id];
90         uint64_t ret;
91         int count = 0;
92         unsigned int i;
93
94         if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) {
95                 pkts[0] = rte_distributor_poll_pkt_single(d->d_single,
96                         worker_id);
97                 return (pkts[0]) ? 1 : 0;
98         }
99
100         /* If bit is set, return
101          * Sync with distributor to acquire bufptrs
102          */
103         if (__atomic_load_n(&(buf->bufptr64[0]), __ATOMIC_ACQUIRE)
104                 & RTE_DISTRIB_GET_BUF)
105                 return -1;
106
107         /* since bufptr64 is signed, this should be an arithmetic shift */
108         for (i = 0; i < RTE_DIST_BURST_SIZE; i++) {
109                 if (likely(buf->bufptr64[i] & RTE_DISTRIB_VALID_BUF)) {
110                         ret = buf->bufptr64[i] >> RTE_DISTRIB_FLAG_BITS;
111                         pkts[count++] = (struct rte_mbuf *)((uintptr_t)(ret));
112                 }
113         }
114
115         /*
116          * so now we've got the contents of the cacheline into an  array of
117          * mbuf pointers, so toggle the bit so scheduler can start working
118          * on the next cacheline while we're working.
119          * Sync with distributor on GET_BUF flag. Release bufptrs.
120          */
121         __atomic_store_n(&(buf->bufptr64[0]),
122                 buf->bufptr64[0] | RTE_DISTRIB_GET_BUF, __ATOMIC_RELEASE);
123
124         return count;
125 }
126
127 int
128 rte_distributor_get_pkt(struct rte_distributor *d,
129                 unsigned int worker_id, struct rte_mbuf **pkts,
130                 struct rte_mbuf **oldpkt, unsigned int return_count)
131 {
132         int count;
133
134         if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) {
135                 if (return_count <= 1) {
136                         pkts[0] = rte_distributor_get_pkt_single(d->d_single,
137                                 worker_id, oldpkt[0]);
138                         return (pkts[0]) ? 1 : 0;
139                 } else
140                         return -EINVAL;
141         }
142
143         rte_distributor_request_pkt(d, worker_id, oldpkt, return_count);
144
145         count = rte_distributor_poll_pkt(d, worker_id, pkts);
146         while (count == -1) {
147                 uint64_t t = rte_rdtsc() + 100;
148
149                 while (rte_rdtsc() < t)
150                         rte_pause();
151
152                 count = rte_distributor_poll_pkt(d, worker_id, pkts);
153         }
154         return count;
155 }
156
157 int
158 rte_distributor_return_pkt(struct rte_distributor *d,
159                 unsigned int worker_id, struct rte_mbuf **oldpkt, int num)
160 {
161         struct rte_distributor_buffer *buf = &d->bufs[worker_id];
162         unsigned int i;
163
164         if (unlikely(d->alg_type == RTE_DIST_ALG_SINGLE)) {
165                 if (num == 1)
166                         return rte_distributor_return_pkt_single(d->d_single,
167                                 worker_id, oldpkt[0]);
168                 else
169                         return -EINVAL;
170         }
171
172         /* Sync with distributor to acquire retptrs */
173         __atomic_thread_fence(__ATOMIC_ACQUIRE);
174         for (i = 0; i < RTE_DIST_BURST_SIZE; i++)
175                 /* Switch off the return bit first */
176                 buf->retptr64[i] &= ~RTE_DISTRIB_RETURN_BUF;
177
178         for (i = num; i-- > 0; )
179                 buf->retptr64[i] = (((int64_t)(uintptr_t)oldpkt[i]) <<
180                         RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF;
181
182         /* set the GET_BUF but even if we got no returns.
183          * Sync with distributor on GET_BUF flag. Release retptrs.
184          */
185         __atomic_store_n(&(buf->retptr64[0]),
186                 buf->retptr64[0] | RTE_DISTRIB_GET_BUF, __ATOMIC_RELEASE);
187
188         return 0;
189 }
190
191 /**** APIs called on distributor core ***/
192
193 /* stores a packet returned from a worker inside the returns array */
194 static inline void
195 store_return(uintptr_t oldbuf, struct rte_distributor *d,
196                 unsigned int *ret_start, unsigned int *ret_count)
197 {
198         if (!oldbuf)
199                 return;
200         /* store returns in a circular buffer */
201         d->returns.mbufs[(*ret_start + *ret_count) & RTE_DISTRIB_RETURNS_MASK]
202                         = (void *)oldbuf;
203         *ret_start += (*ret_count == RTE_DISTRIB_RETURNS_MASK);
204         *ret_count += (*ret_count != RTE_DISTRIB_RETURNS_MASK);
205 }
206
207 /*
208  * Match then flow_ids (tags) of the incoming packets to the flow_ids
209  * of the inflight packets (both inflight on the workers and in each worker
210  * backlog). This will then allow us to pin those packets to the relevant
211  * workers to give us our atomic flow pinning.
212  */
213 void
214 find_match_scalar(struct rte_distributor *d,
215                         uint16_t *data_ptr,
216                         uint16_t *output_ptr)
217 {
218         struct rte_distributor_backlog *bl;
219         uint16_t i, j, w;
220
221         /*
222          * Function overview:
223          * 1. Loop through all worker ID's
224          * 2. Compare the current inflights to the incoming tags
225          * 3. Compare the current backlog to the incoming tags
226          * 4. Add any matches to the output
227          */
228
229         for (j = 0 ; j < RTE_DIST_BURST_SIZE; j++)
230                 output_ptr[j] = 0;
231
232         for (i = 0; i < d->num_workers; i++) {
233                 bl = &d->backlog[i];
234
235                 for (j = 0; j < RTE_DIST_BURST_SIZE ; j++)
236                         for (w = 0; w < RTE_DIST_BURST_SIZE; w++)
237                                 if (d->in_flight_tags[i][j] == data_ptr[w]) {
238                                         output_ptr[j] = i+1;
239                                         break;
240                                 }
241                 for (j = 0; j < RTE_DIST_BURST_SIZE; j++)
242                         for (w = 0; w < RTE_DIST_BURST_SIZE; w++)
243                                 if (bl->tags[j] == data_ptr[w]) {
244                                         output_ptr[j] = i+1;
245                                         break;
246                                 }
247         }
248
249         /*
250          * At this stage, the output contains 8 16-bit values, with
251          * each non-zero value containing the worker ID on which the
252          * corresponding flow is pinned to.
253          */
254 }
255
256
257 /*
258  * When the handshake bits indicate that there are packets coming
259  * back from the worker, this function is called to copy and store
260  * the valid returned pointers (store_return).
261  */
262 static unsigned int
263 handle_returns(struct rte_distributor *d, unsigned int wkr)
264 {
265         struct rte_distributor_buffer *buf = &(d->bufs[wkr]);
266         uintptr_t oldbuf;
267         unsigned int ret_start = d->returns.start,
268                         ret_count = d->returns.count;
269         unsigned int count = 0;
270         unsigned int i;
271
272         /* Sync on GET_BUF flag. Acquire retptrs. */
273         if (__atomic_load_n(&(buf->retptr64[0]), __ATOMIC_ACQUIRE)
274                 & RTE_DISTRIB_GET_BUF) {
275                 for (i = 0; i < RTE_DIST_BURST_SIZE; i++) {
276                         if (buf->retptr64[i] & RTE_DISTRIB_RETURN_BUF) {
277                                 oldbuf = ((uintptr_t)(buf->retptr64[i] >>
278                                         RTE_DISTRIB_FLAG_BITS));
279                                 /* store returns in a circular buffer */
280                                 store_return(oldbuf, d, &ret_start, &ret_count);
281                                 count++;
282                                 buf->retptr64[i] &= ~RTE_DISTRIB_RETURN_BUF;
283                         }
284                 }
285                 d->returns.start = ret_start;
286                 d->returns.count = ret_count;
287                 /* Clear for the worker to populate with more returns.
288                  * Sync with distributor on GET_BUF flag. Release retptrs.
289                  */
290                 __atomic_store_n(&(buf->retptr64[0]), 0, __ATOMIC_RELEASE);
291         }
292         return count;
293 }
294
295 /*
296  * This function releases a burst (cache line) to a worker.
297  * It is called from the process function when a cacheline is
298  * full to make room for more packets for that worker, or when
299  * all packets have been assigned to bursts and need to be flushed
300  * to the workers.
301  * It also needs to wait for any outstanding packets from the worker
302  * before sending out new packets.
303  */
304 static unsigned int
305 release(struct rte_distributor *d, unsigned int wkr)
306 {
307         struct rte_distributor_buffer *buf = &(d->bufs[wkr]);
308         unsigned int i;
309
310         /* Sync with worker on GET_BUF flag */
311         while (!(__atomic_load_n(&(d->bufs[wkr].bufptr64[0]), __ATOMIC_ACQUIRE)
312                 & RTE_DISTRIB_GET_BUF))
313                 rte_pause();
314
315         handle_returns(d, wkr);
316
317         buf->count = 0;
318
319         for (i = 0; i < d->backlog[wkr].count; i++) {
320                 d->bufs[wkr].bufptr64[i] = d->backlog[wkr].pkts[i] |
321                                 RTE_DISTRIB_GET_BUF | RTE_DISTRIB_VALID_BUF;
322                 d->in_flight_tags[wkr][i] = d->backlog[wkr].tags[i];
323         }
324         buf->count = i;
325         for ( ; i < RTE_DIST_BURST_SIZE ; i++) {
326                 buf->bufptr64[i] = RTE_DISTRIB_GET_BUF;
327                 d->in_flight_tags[wkr][i] = 0;
328         }
329
330         d->backlog[wkr].count = 0;
331
332         /* Clear the GET bit.
333          * Sync with worker on GET_BUF flag. Release bufptrs.
334          */
335         __atomic_store_n(&(buf->bufptr64[0]),
336                 buf->bufptr64[0] & ~RTE_DISTRIB_GET_BUF, __ATOMIC_RELEASE);
337         return  buf->count;
338
339 }
340
341
342 /* process a set of packets to distribute them to workers */
343 int
344 rte_distributor_process(struct rte_distributor *d,
345                 struct rte_mbuf **mbufs, unsigned int num_mbufs)
346 {
347         unsigned int next_idx = 0;
348         static unsigned int wkr;
349         struct rte_mbuf *next_mb = NULL;
350         int64_t next_value = 0;
351         uint16_t new_tag = 0;
352         uint16_t flows[RTE_DIST_BURST_SIZE] __rte_cache_aligned;
353         unsigned int i, j, w, wid;
354
355         if (d->alg_type == RTE_DIST_ALG_SINGLE) {
356                 /* Call the old API */
357                 return rte_distributor_process_single(d->d_single,
358                         mbufs, num_mbufs);
359         }
360
361         if (unlikely(num_mbufs == 0)) {
362                 /* Flush out all non-full cache-lines to workers. */
363                 for (wid = 0 ; wid < d->num_workers; wid++) {
364                         /* Sync with worker on GET_BUF flag. */
365                         if (__atomic_load_n(&(d->bufs[wid].bufptr64[0]),
366                                 __ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF) {
367                                 release(d, wid);
368                                 handle_returns(d, wid);
369                         }
370                 }
371                 return 0;
372         }
373
374         while (next_idx < num_mbufs) {
375                 uint16_t matches[RTE_DIST_BURST_SIZE];
376                 unsigned int pkts;
377
378                 /* Sync with worker on GET_BUF flag. */
379                 if (__atomic_load_n(&(d->bufs[wkr].bufptr64[0]),
380                         __ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF)
381                         d->bufs[wkr].count = 0;
382
383                 if ((num_mbufs - next_idx) < RTE_DIST_BURST_SIZE)
384                         pkts = num_mbufs - next_idx;
385                 else
386                         pkts = RTE_DIST_BURST_SIZE;
387
388                 for (i = 0; i < pkts; i++) {
389                         if (mbufs[next_idx + i]) {
390                                 /* flows have to be non-zero */
391                                 flows[i] = mbufs[next_idx + i]->hash.usr | 1;
392                         } else
393                                 flows[i] = 0;
394                 }
395                 for (; i < RTE_DIST_BURST_SIZE; i++)
396                         flows[i] = 0;
397
398                 switch (d->dist_match_fn) {
399                 case RTE_DIST_MATCH_VECTOR:
400                         find_match_vec(d, &flows[0], &matches[0]);
401                         break;
402                 default:
403                         find_match_scalar(d, &flows[0], &matches[0]);
404                 }
405
406                 /*
407                  * Matches array now contain the intended worker ID (+1) of
408                  * the incoming packets. Any zeroes need to be assigned
409                  * workers.
410                  */
411
412                 for (j = 0; j < pkts; j++) {
413
414                         next_mb = mbufs[next_idx++];
415                         next_value = (((int64_t)(uintptr_t)next_mb) <<
416                                         RTE_DISTRIB_FLAG_BITS);
417                         /*
418                          * User is advocated to set tag value for each
419                          * mbuf before calling rte_distributor_process.
420                          * User defined tags are used to identify flows,
421                          * or sessions.
422                          */
423                         /* flows MUST be non-zero */
424                         new_tag = (uint16_t)(next_mb->hash.usr) | 1;
425
426                         /*
427                          * Uncommenting the next line will cause the find_match
428                          * function to be optimized out, making this function
429                          * do parallel (non-atomic) distribution
430                          */
431                         /* matches[j] = 0; */
432
433                         if (matches[j]) {
434                                 struct rte_distributor_backlog *bl =
435                                                 &d->backlog[matches[j]-1];
436                                 if (unlikely(bl->count ==
437                                                 RTE_DIST_BURST_SIZE)) {
438                                         release(d, matches[j]-1);
439                                 }
440
441                                 /* Add to worker that already has flow */
442                                 unsigned int idx = bl->count++;
443
444                                 bl->tags[idx] = new_tag;
445                                 bl->pkts[idx] = next_value;
446
447                         } else {
448                                 struct rte_distributor_backlog *bl =
449                                                 &d->backlog[wkr];
450                                 if (unlikely(bl->count ==
451                                                 RTE_DIST_BURST_SIZE)) {
452                                         release(d, wkr);
453                                 }
454
455                                 /* Add to current worker worker */
456                                 unsigned int idx = bl->count++;
457
458                                 bl->tags[idx] = new_tag;
459                                 bl->pkts[idx] = next_value;
460                                 /*
461                                  * Now that we've just added an unpinned flow
462                                  * to a worker, we need to ensure that all
463                                  * other packets with that same flow will go
464                                  * to the same worker in this burst.
465                                  */
466                                 for (w = j; w < pkts; w++)
467                                         if (flows[w] == new_tag)
468                                                 matches[w] = wkr+1;
469                         }
470                 }
471                 wkr++;
472                 if (wkr >= d->num_workers)
473                         wkr = 0;
474         }
475
476         /* Flush out all non-full cache-lines to workers. */
477         for (wid = 0 ; wid < d->num_workers; wid++)
478                 /* Sync with worker on GET_BUF flag. */
479                 if ((__atomic_load_n(&(d->bufs[wid].bufptr64[0]),
480                         __ATOMIC_ACQUIRE) & RTE_DISTRIB_GET_BUF))
481                         release(d, wid);
482
483         return num_mbufs;
484 }
485
486 /* return to the caller, packets returned from workers */
487 int
488 rte_distributor_returned_pkts(struct rte_distributor *d,
489                 struct rte_mbuf **mbufs, unsigned int max_mbufs)
490 {
491         struct rte_distributor_returned_pkts *returns = &d->returns;
492         unsigned int retval = (max_mbufs < returns->count) ?
493                         max_mbufs : returns->count;
494         unsigned int i;
495
496         if (d->alg_type == RTE_DIST_ALG_SINGLE) {
497                 /* Call the old API */
498                 return rte_distributor_returned_pkts_single(d->d_single,
499                                 mbufs, max_mbufs);
500         }
501
502         for (i = 0; i < retval; i++) {
503                 unsigned int idx = (returns->start + i) &
504                                 RTE_DISTRIB_RETURNS_MASK;
505
506                 mbufs[i] = returns->mbufs[idx];
507         }
508         returns->start += i;
509         returns->count -= i;
510
511         return retval;
512 }
513
514 /*
515  * Return the number of packets in-flight in a distributor, i.e. packets
516  * being worked on or queued up in a backlog.
517  */
518 static inline unsigned int
519 total_outstanding(const struct rte_distributor *d)
520 {
521         unsigned int wkr, total_outstanding = 0;
522
523         for (wkr = 0; wkr < d->num_workers; wkr++)
524                 total_outstanding += d->backlog[wkr].count;
525
526         return total_outstanding;
527 }
528
529 /*
530  * Flush the distributor, so that there are no outstanding packets in flight or
531  * queued up.
532  */
533 int
534 rte_distributor_flush(struct rte_distributor *d)
535 {
536         unsigned int flushed;
537         unsigned int wkr;
538
539         if (d->alg_type == RTE_DIST_ALG_SINGLE) {
540                 /* Call the old API */
541                 return rte_distributor_flush_single(d->d_single);
542         }
543
544         flushed = total_outstanding(d);
545
546         while (total_outstanding(d) > 0)
547                 rte_distributor_process(d, NULL, 0);
548
549         /* wait 10ms to allow all worker drain the pkts */
550         rte_delay_us(10000);
551
552         /*
553          * Send empty burst to all workers to allow them to exit
554          * gracefully, should they need to.
555          */
556         rte_distributor_process(d, NULL, 0);
557
558         for (wkr = 0; wkr < d->num_workers; wkr++)
559                 handle_returns(d, wkr);
560
561         return flushed;
562 }
563
564 /* clears the internal returns array in the distributor */
565 void
566 rte_distributor_clear_returns(struct rte_distributor *d)
567 {
568         unsigned int wkr;
569
570         if (d->alg_type == RTE_DIST_ALG_SINGLE) {
571                 /* Call the old API */
572                 rte_distributor_clear_returns_single(d->d_single);
573                 return;
574         }
575
576         /* throw away returns, so workers can exit */
577         for (wkr = 0; wkr < d->num_workers; wkr++)
578                 /* Sync with worker. Release retptrs. */
579                 __atomic_store_n(&(d->bufs[wkr].retptr64[0]), 0,
580                                 __ATOMIC_RELEASE);
581 }
582
583 /* creates a distributor instance */
584 struct rte_distributor *
585 rte_distributor_create(const char *name,
586                 unsigned int socket_id,
587                 unsigned int num_workers,
588                 unsigned int alg_type)
589 {
590         struct rte_distributor *d;
591         struct rte_dist_burst_list *dist_burst_list;
592         char mz_name[RTE_MEMZONE_NAMESIZE];
593         const struct rte_memzone *mz;
594         unsigned int i;
595
596         /* TODO Reorganise function properly around RTE_DIST_ALG_SINGLE/BURST */
597
598         /* compilation-time checks */
599         RTE_BUILD_BUG_ON((sizeof(*d) & RTE_CACHE_LINE_MASK) != 0);
600         RTE_BUILD_BUG_ON((RTE_DISTRIB_MAX_WORKERS & 7) != 0);
601
602         if (name == NULL || num_workers >=
603                 (unsigned int)RTE_MIN(RTE_DISTRIB_MAX_WORKERS, RTE_MAX_LCORE)) {
604                 rte_errno = EINVAL;
605                 return NULL;
606         }
607
608         if (alg_type == RTE_DIST_ALG_SINGLE) {
609                 d = malloc(sizeof(struct rte_distributor));
610                 if (d == NULL) {
611                         rte_errno = ENOMEM;
612                         return NULL;
613                 }
614                 d->d_single = rte_distributor_create_single(name,
615                                 socket_id, num_workers);
616                 if (d->d_single == NULL) {
617                         free(d);
618                         /* rte_errno will have been set */
619                         return NULL;
620                 }
621                 d->alg_type = alg_type;
622                 return d;
623         }
624
625         snprintf(mz_name, sizeof(mz_name), RTE_DISTRIB_PREFIX"%s", name);
626         mz = rte_memzone_reserve(mz_name, sizeof(*d), socket_id, NO_FLAGS);
627         if (mz == NULL) {
628                 rte_errno = ENOMEM;
629                 return NULL;
630         }
631
632         d = mz->addr;
633         strlcpy(d->name, name, sizeof(d->name));
634         d->num_workers = num_workers;
635         d->alg_type = alg_type;
636
637         d->dist_match_fn = RTE_DIST_MATCH_SCALAR;
638 #if defined(RTE_ARCH_X86)
639         d->dist_match_fn = RTE_DIST_MATCH_VECTOR;
640 #endif
641
642         /*
643          * Set up the backlog tags so they're pointing at the second cache
644          * line for performance during flow matching
645          */
646         for (i = 0 ; i < num_workers ; i++)
647                 d->backlog[i].tags = &d->in_flight_tags[i][RTE_DIST_BURST_SIZE];
648
649         dist_burst_list = RTE_TAILQ_CAST(rte_dist_burst_tailq.head,
650                                           rte_dist_burst_list);
651
652
653         rte_mcfg_tailq_write_lock();
654         TAILQ_INSERT_TAIL(dist_burst_list, d, next);
655         rte_mcfg_tailq_write_unlock();
656
657         return d;
658 }