mbuf: support dynamic fields and flags
[dpdk.git] / lib / librte_mbuf / rte_mbuf_dyn.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 6WIND S.A.
3  */
4
5 #include <sys/queue.h>
6 #include <stdint.h>
7 #include <limits.h>
8
9 #include <rte_common.h>
10 #include <rte_eal.h>
11 #include <rte_eal_memconfig.h>
12 #include <rte_tailq.h>
13 #include <rte_errno.h>
14 #include <rte_malloc.h>
15 #include <rte_string_fns.h>
16 #include <rte_mbuf.h>
17 #include <rte_mbuf_dyn.h>
18
19 #define RTE_MBUF_DYN_MZNAME "rte_mbuf_dyn"
20
21 struct mbuf_dynfield_elt {
22         TAILQ_ENTRY(mbuf_dynfield_elt) next;
23         struct rte_mbuf_dynfield params;
24         size_t offset;
25 };
26 TAILQ_HEAD(mbuf_dynfield_list, rte_tailq_entry);
27
28 static struct rte_tailq_elem mbuf_dynfield_tailq = {
29         .name = "RTE_MBUF_DYNFIELD",
30 };
31 EAL_REGISTER_TAILQ(mbuf_dynfield_tailq);
32
33 struct mbuf_dynflag_elt {
34         TAILQ_ENTRY(mbuf_dynflag_elt) next;
35         struct rte_mbuf_dynflag params;
36         unsigned int bitnum;
37 };
38 TAILQ_HEAD(mbuf_dynflag_list, rte_tailq_entry);
39
40 static struct rte_tailq_elem mbuf_dynflag_tailq = {
41         .name = "RTE_MBUF_DYNFLAG",
42 };
43 EAL_REGISTER_TAILQ(mbuf_dynflag_tailq);
44
45 struct mbuf_dyn_shm {
46         /**
47          * For each mbuf byte, free_space[i] != 0 if space is free.
48          * The value is the size of the biggest aligned element that
49          * can fit in the zone.
50          */
51         uint8_t free_space[sizeof(struct rte_mbuf)];
52         /** Bitfield of available flags. */
53         uint64_t free_flags;
54 };
55 static struct mbuf_dyn_shm *shm;
56
57 /* Set the value of free_space[] according to the size and alignment of
58  * the free areas. This helps to select the best place when reserving a
59  * dynamic field. Assume tailq is locked.
60  */
61 static void
62 process_score(void)
63 {
64         size_t off, align, size, i;
65
66         /* first, erase previous info */
67         for (i = 0; i < sizeof(struct rte_mbuf); i++) {
68                 if (shm->free_space[i])
69                         shm->free_space[i] = 1;
70         }
71
72         for (off = 0; off < sizeof(struct rte_mbuf); off++) {
73                 /* get the size of the free zone */
74                 for (size = 0; shm->free_space[off + size]; size++)
75                         ;
76                 if (size == 0)
77                         continue;
78
79                 /* get the alignment of biggest object that can fit in
80                  * the zone at this offset.
81                  */
82                 for (align = 1;
83                      (off % (align << 1)) == 0 && (align << 1) <= size;
84                      align <<= 1)
85                         ;
86
87                 /* save it in free_space[] */
88                 for (i = off; i < off + size; i++)
89                         shm->free_space[i] = RTE_MAX(align, shm->free_space[i]);
90         }
91 }
92
93 /* Mark the area occupied by a mbuf field as available in the shm. */
94 #define mark_free(field)                                                \
95         memset(&shm->free_space[offsetof(struct rte_mbuf, field)],      \
96                 1, sizeof(((struct rte_mbuf *)0)->field))
97
98 /* Allocate and initialize the shared memory. Assume tailq is locked */
99 static int
100 init_shared_mem(void)
101 {
102         const struct rte_memzone *mz;
103         uint64_t mask;
104
105         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
106                 mz = rte_memzone_reserve_aligned(RTE_MBUF_DYN_MZNAME,
107                                                 sizeof(struct mbuf_dyn_shm),
108                                                 SOCKET_ID_ANY, 0,
109                                                 RTE_CACHE_LINE_SIZE);
110         } else {
111                 mz = rte_memzone_lookup(RTE_MBUF_DYN_MZNAME);
112         }
113         if (mz == NULL)
114                 return -1;
115
116         shm = mz->addr;
117
118         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
119                 /* init free_space, keep it sync'd with
120                  * rte_mbuf_dynfield_copy().
121                  */
122                 memset(shm, 0, sizeof(*shm));
123                 mark_free(dynfield1);
124
125                 /* init free_flags */
126                 for (mask = PKT_FIRST_FREE; mask <= PKT_LAST_FREE; mask <<= 1)
127                         shm->free_flags |= mask;
128
129                 process_score();
130         }
131
132         return 0;
133 }
134
135 /* check if this offset can be used */
136 static int
137 check_offset(size_t offset, size_t size, size_t align)
138 {
139         size_t i;
140
141         if ((offset & (align - 1)) != 0)
142                 return -1;
143         if (offset + size > sizeof(struct rte_mbuf))
144                 return -1;
145
146         for (i = 0; i < size; i++) {
147                 if (!shm->free_space[i + offset])
148                         return -1;
149         }
150
151         return 0;
152 }
153
154 /* assume tailq is locked */
155 static struct mbuf_dynfield_elt *
156 __mbuf_dynfield_lookup(const char *name)
157 {
158         struct mbuf_dynfield_list *mbuf_dynfield_list;
159         struct mbuf_dynfield_elt *mbuf_dynfield;
160         struct rte_tailq_entry *te;
161
162         mbuf_dynfield_list = RTE_TAILQ_CAST(
163                 mbuf_dynfield_tailq.head, mbuf_dynfield_list);
164
165         TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
166                 mbuf_dynfield = (struct mbuf_dynfield_elt *)te->data;
167                 if (strcmp(name, mbuf_dynfield->params.name) == 0)
168                         break;
169         }
170
171         if (te == NULL) {
172                 rte_errno = ENOENT;
173                 return NULL;
174         }
175
176         return mbuf_dynfield;
177 }
178
179 int
180 rte_mbuf_dynfield_lookup(const char *name, struct rte_mbuf_dynfield *params)
181 {
182         struct mbuf_dynfield_elt *mbuf_dynfield;
183
184         if (shm == NULL) {
185                 rte_errno = ENOENT;
186                 return -1;
187         }
188
189         rte_mcfg_tailq_read_lock();
190         mbuf_dynfield = __mbuf_dynfield_lookup(name);
191         rte_mcfg_tailq_read_unlock();
192
193         if (mbuf_dynfield == NULL) {
194                 rte_errno = ENOENT;
195                 return -1;
196         }
197
198         if (params != NULL)
199                 memcpy(params, &mbuf_dynfield->params, sizeof(*params));
200
201         return mbuf_dynfield->offset;
202 }
203
204 static int mbuf_dynfield_cmp(const struct rte_mbuf_dynfield *params1,
205                 const struct rte_mbuf_dynfield *params2)
206 {
207         if (strcmp(params1->name, params2->name))
208                 return -1;
209         if (params1->size != params2->size)
210                 return -1;
211         if (params1->align != params2->align)
212                 return -1;
213         if (params1->flags != params2->flags)
214                 return -1;
215         return 0;
216 }
217
218 /* assume tailq is locked */
219 static int
220 __rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
221                                 size_t req)
222 {
223         struct mbuf_dynfield_list *mbuf_dynfield_list;
224         struct mbuf_dynfield_elt *mbuf_dynfield = NULL;
225         struct rte_tailq_entry *te = NULL;
226         unsigned int best_zone = UINT_MAX;
227         size_t i, offset;
228         int ret;
229
230         if (shm == NULL && init_shared_mem() < 0)
231                 return -1;
232
233         mbuf_dynfield = __mbuf_dynfield_lookup(params->name);
234         if (mbuf_dynfield != NULL) {
235                 if (req != SIZE_MAX && req != mbuf_dynfield->offset) {
236                         rte_errno = EEXIST;
237                         return -1;
238                 }
239                 if (mbuf_dynfield_cmp(params, &mbuf_dynfield->params) < 0) {
240                         rte_errno = EEXIST;
241                         return -1;
242                 }
243                 return mbuf_dynfield->offset;
244         }
245
246         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
247                 rte_errno = EPERM;
248                 return -1;
249         }
250
251         if (req == SIZE_MAX) {
252                 /* Find the best place to put this field: we search the
253                  * lowest value of shm->free_space[offset]: the zones
254                  * containing room for larger fields are kept for later.
255                  */
256                 for (offset = 0;
257                      offset < sizeof(struct rte_mbuf);
258                      offset++) {
259                         if (check_offset(offset, params->size,
260                                                 params->align) == 0 &&
261                                         shm->free_space[offset] < best_zone) {
262                                 best_zone = shm->free_space[offset];
263                                 req = offset;
264                         }
265                 }
266                 if (req == SIZE_MAX) {
267                         rte_errno = ENOENT;
268                         return -1;
269                 }
270         } else {
271                 if (check_offset(req, params->size, params->align) < 0) {
272                         rte_errno = EBUSY;
273                         return -1;
274                 }
275         }
276
277         offset = req;
278         mbuf_dynfield_list = RTE_TAILQ_CAST(
279                 mbuf_dynfield_tailq.head, mbuf_dynfield_list);
280
281         te = rte_zmalloc("MBUF_DYNFIELD_TAILQ_ENTRY", sizeof(*te), 0);
282         if (te == NULL)
283                 return -1;
284
285         mbuf_dynfield = rte_zmalloc("mbuf_dynfield", sizeof(*mbuf_dynfield), 0);
286         if (mbuf_dynfield == NULL) {
287                 rte_free(te);
288                 return -1;
289         }
290
291         ret = strlcpy(mbuf_dynfield->params.name, params->name,
292                 sizeof(mbuf_dynfield->params.name));
293         if (ret < 0 || ret >= (int)sizeof(mbuf_dynfield->params.name)) {
294                 rte_errno = ENAMETOOLONG;
295                 rte_free(mbuf_dynfield);
296                 rte_free(te);
297                 return -1;
298         }
299         memcpy(&mbuf_dynfield->params, params, sizeof(mbuf_dynfield->params));
300         mbuf_dynfield->offset = offset;
301         te->data = mbuf_dynfield;
302
303         TAILQ_INSERT_TAIL(mbuf_dynfield_list, te, next);
304
305         for (i = offset; i < offset + params->size; i++)
306                 shm->free_space[i] = 0;
307         process_score();
308
309         RTE_LOG(DEBUG, MBUF, "Registered dynamic field %s (sz=%zu, al=%zu, fl=0x%x) -> %zd\n",
310                 params->name, params->size, params->align, params->flags,
311                 offset);
312
313         return offset;
314 }
315
316 int
317 rte_mbuf_dynfield_register_offset(const struct rte_mbuf_dynfield *params,
318                                 size_t req)
319 {
320         int ret;
321
322         if (params->size >= sizeof(struct rte_mbuf)) {
323                 rte_errno = EINVAL;
324                 return -1;
325         }
326         if (!rte_is_power_of_2(params->align)) {
327                 rte_errno = EINVAL;
328                 return -1;
329         }
330         if (params->flags != 0) {
331                 rte_errno = EINVAL;
332                 return -1;
333         }
334
335         rte_mcfg_tailq_write_lock();
336         ret = __rte_mbuf_dynfield_register_offset(params, req);
337         rte_mcfg_tailq_write_unlock();
338
339         return ret;
340 }
341
342 int
343 rte_mbuf_dynfield_register(const struct rte_mbuf_dynfield *params)
344 {
345         return rte_mbuf_dynfield_register_offset(params, SIZE_MAX);
346 }
347
348 /* assume tailq is locked */
349 static struct mbuf_dynflag_elt *
350 __mbuf_dynflag_lookup(const char *name)
351 {
352         struct mbuf_dynflag_list *mbuf_dynflag_list;
353         struct mbuf_dynflag_elt *mbuf_dynflag;
354         struct rte_tailq_entry *te;
355
356         mbuf_dynflag_list = RTE_TAILQ_CAST(
357                 mbuf_dynflag_tailq.head, mbuf_dynflag_list);
358
359         TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
360                 mbuf_dynflag = (struct mbuf_dynflag_elt *)te->data;
361                 if (strncmp(name, mbuf_dynflag->params.name,
362                                 RTE_MBUF_DYN_NAMESIZE) == 0)
363                         break;
364         }
365
366         if (te == NULL) {
367                 rte_errno = ENOENT;
368                 return NULL;
369         }
370
371         return mbuf_dynflag;
372 }
373
374 int
375 rte_mbuf_dynflag_lookup(const char *name,
376                         struct rte_mbuf_dynflag *params)
377 {
378         struct mbuf_dynflag_elt *mbuf_dynflag;
379
380         if (shm == NULL) {
381                 rte_errno = ENOENT;
382                 return -1;
383         }
384
385         rte_mcfg_tailq_read_lock();
386         mbuf_dynflag = __mbuf_dynflag_lookup(name);
387         rte_mcfg_tailq_read_unlock();
388
389         if (mbuf_dynflag == NULL) {
390                 rte_errno = ENOENT;
391                 return -1;
392         }
393
394         if (params != NULL)
395                 memcpy(params, &mbuf_dynflag->params, sizeof(*params));
396
397         return mbuf_dynflag->bitnum;
398 }
399
400 static int mbuf_dynflag_cmp(const struct rte_mbuf_dynflag *params1,
401                 const struct rte_mbuf_dynflag *params2)
402 {
403         if (strcmp(params1->name, params2->name))
404                 return -1;
405         if (params1->flags != params2->flags)
406                 return -1;
407         return 0;
408 }
409
410 /* assume tailq is locked */
411 static int
412 __rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
413                                 unsigned int req)
414 {
415         struct mbuf_dynflag_list *mbuf_dynflag_list;
416         struct mbuf_dynflag_elt *mbuf_dynflag = NULL;
417         struct rte_tailq_entry *te = NULL;
418         unsigned int bitnum;
419         int ret;
420
421         if (shm == NULL && init_shared_mem() < 0)
422                 return -1;
423
424         mbuf_dynflag = __mbuf_dynflag_lookup(params->name);
425         if (mbuf_dynflag != NULL) {
426                 if (req != UINT_MAX && req != mbuf_dynflag->bitnum) {
427                         rte_errno = EEXIST;
428                         return -1;
429                 }
430                 if (mbuf_dynflag_cmp(params, &mbuf_dynflag->params) < 0) {
431                         rte_errno = EEXIST;
432                         return -1;
433                 }
434                 return mbuf_dynflag->bitnum;
435         }
436
437         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
438                 rte_errno = EPERM;
439                 return -1;
440         }
441
442         if (req == UINT_MAX) {
443                 if (shm->free_flags == 0) {
444                         rte_errno = ENOENT;
445                         return -1;
446                 }
447                 bitnum = rte_bsf64(shm->free_flags);
448         } else {
449                 if ((shm->free_flags & (1ULL << req)) == 0) {
450                         rte_errno = EBUSY;
451                         return -1;
452                 }
453                 bitnum = req;
454         }
455
456         mbuf_dynflag_list = RTE_TAILQ_CAST(
457                 mbuf_dynflag_tailq.head, mbuf_dynflag_list);
458
459         te = rte_zmalloc("MBUF_DYNFLAG_TAILQ_ENTRY", sizeof(*te), 0);
460         if (te == NULL)
461                 return -1;
462
463         mbuf_dynflag = rte_zmalloc("mbuf_dynflag", sizeof(*mbuf_dynflag), 0);
464         if (mbuf_dynflag == NULL) {
465                 rte_free(te);
466                 return -1;
467         }
468
469         ret = strlcpy(mbuf_dynflag->params.name, params->name,
470                 sizeof(mbuf_dynflag->params.name));
471         if (ret < 0 || ret >= (int)sizeof(mbuf_dynflag->params.name)) {
472                 rte_free(mbuf_dynflag);
473                 rte_free(te);
474                 rte_errno = ENAMETOOLONG;
475                 return -1;
476         }
477         mbuf_dynflag->bitnum = bitnum;
478         te->data = mbuf_dynflag;
479
480         TAILQ_INSERT_TAIL(mbuf_dynflag_list, te, next);
481
482         shm->free_flags &= ~(1ULL << bitnum);
483
484         RTE_LOG(DEBUG, MBUF, "Registered dynamic flag %s (fl=0x%x) -> %u\n",
485                 params->name, params->flags, bitnum);
486
487         return bitnum;
488 }
489
490 int
491 rte_mbuf_dynflag_register_bitnum(const struct rte_mbuf_dynflag *params,
492                                 unsigned int req)
493 {
494         int ret;
495
496         if (req >= RTE_SIZEOF_FIELD(struct rte_mbuf, ol_flags) * CHAR_BIT &&
497                         req != UINT_MAX) {
498                 rte_errno = EINVAL;
499                 return -1;
500         }
501
502         rte_mcfg_tailq_write_lock();
503         ret = __rte_mbuf_dynflag_register_bitnum(params, req);
504         rte_mcfg_tailq_write_unlock();
505
506         return ret;
507 }
508
509 int
510 rte_mbuf_dynflag_register(const struct rte_mbuf_dynflag *params)
511 {
512         return rte_mbuf_dynflag_register_bitnum(params, UINT_MAX);
513 }
514
515 void rte_mbuf_dyn_dump(FILE *out)
516 {
517         struct mbuf_dynfield_list *mbuf_dynfield_list;
518         struct mbuf_dynfield_elt *dynfield;
519         struct mbuf_dynflag_list *mbuf_dynflag_list;
520         struct mbuf_dynflag_elt *dynflag;
521         struct rte_tailq_entry *te;
522         size_t i;
523
524         rte_mcfg_tailq_write_lock();
525         init_shared_mem();
526         fprintf(out, "Reserved fields:\n");
527         mbuf_dynfield_list = RTE_TAILQ_CAST(
528                 mbuf_dynfield_tailq.head, mbuf_dynfield_list);
529         TAILQ_FOREACH(te, mbuf_dynfield_list, next) {
530                 dynfield = (struct mbuf_dynfield_elt *)te->data;
531                 fprintf(out, "  name=%s offset=%zd size=%zd align=%zd flags=%x\n",
532                         dynfield->params.name, dynfield->offset,
533                         dynfield->params.size, dynfield->params.align,
534                         dynfield->params.flags);
535         }
536         fprintf(out, "Reserved flags:\n");
537         mbuf_dynflag_list = RTE_TAILQ_CAST(
538                 mbuf_dynflag_tailq.head, mbuf_dynflag_list);
539         TAILQ_FOREACH(te, mbuf_dynflag_list, next) {
540                 dynflag = (struct mbuf_dynflag_elt *)te->data;
541                 fprintf(out, "  name=%s bitnum=%u flags=%x\n",
542                         dynflag->params.name, dynflag->bitnum,
543                         dynflag->params.flags);
544         }
545         fprintf(out, "Free space in mbuf (0 = free, value = zone alignment):\n");
546         for (i = 0; i < sizeof(struct rte_mbuf); i++) {
547                 if ((i % 8) == 0)
548                         fprintf(out, "  %4.4zx: ", i);
549                 fprintf(out, "%2.2x%s", shm->free_space[i],
550                         (i % 8 != 7) ? " " : "\n");
551         }
552         rte_mcfg_tailq_write_unlock();
553 }