vdpa/mlx5: add task ring for multi-thread management
[dpdk.git] / drivers / dma / skeleton / skeleton_dmadev.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 HiSilicon Limited
3  */
4
5 #include <inttypes.h>
6
7 #include <rte_bus_vdev.h>
8 #include <rte_cycles.h>
9 #include <rte_eal.h>
10 #include <rte_kvargs.h>
11 #include <rte_lcore.h>
12 #include <rte_log.h>
13 #include <rte_malloc.h>
14 #include <rte_memcpy.h>
15
16 #include <rte_dmadev_pmd.h>
17
18 #include "skeleton_dmadev.h"
19
20 RTE_LOG_REGISTER_DEFAULT(skeldma_logtype, INFO);
21 #define SKELDMA_LOG(level, fmt, args...) \
22         rte_log(RTE_LOG_ ## level, skeldma_logtype, "%s(): " fmt "\n", \
23                 __func__, ##args)
24
25 static int
26 skeldma_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *dev_info,
27                  uint32_t info_sz)
28 {
29 #define SKELDMA_MAX_DESC        8192
30 #define SKELDMA_MIN_DESC        32
31
32         RTE_SET_USED(dev);
33         RTE_SET_USED(info_sz);
34
35         dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
36                              RTE_DMA_CAPA_SVA |
37                              RTE_DMA_CAPA_OPS_COPY;
38         dev_info->max_vchans = 1;
39         dev_info->max_desc = SKELDMA_MAX_DESC;
40         dev_info->min_desc = SKELDMA_MIN_DESC;
41
42         return 0;
43 }
44
45 static int
46 skeldma_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *conf,
47                   uint32_t conf_sz)
48 {
49         RTE_SET_USED(dev);
50         RTE_SET_USED(conf);
51         RTE_SET_USED(conf_sz);
52         return 0;
53 }
54
55 static void *
56 cpucopy_thread(void *param)
57 {
58 #define SLEEP_THRESHOLD         10000
59 #define SLEEP_US_VAL            10
60
61         struct rte_dma_dev *dev = param;
62         struct skeldma_hw *hw = dev->data->dev_private;
63         struct skeldma_desc *desc = NULL;
64         int ret;
65
66         while (!hw->exit_flag) {
67                 ret = rte_ring_dequeue(hw->desc_running, (void **)&desc);
68                 if (ret) {
69                         hw->zero_req_count++;
70                         if (hw->zero_req_count == 0)
71                                 hw->zero_req_count = SLEEP_THRESHOLD;
72                         if (hw->zero_req_count >= SLEEP_THRESHOLD)
73                                 rte_delay_us_sleep(SLEEP_US_VAL);
74                         continue;
75                 }
76
77                 hw->zero_req_count = 0;
78                 rte_memcpy(desc->dst, desc->src, desc->len);
79                 __atomic_add_fetch(&hw->completed_count, 1, __ATOMIC_RELEASE);
80                 (void)rte_ring_enqueue(hw->desc_completed, (void *)desc);
81         }
82
83         return NULL;
84 }
85
86 static void
87 fflush_ring(struct skeldma_hw *hw, struct rte_ring *ring)
88 {
89         struct skeldma_desc *desc = NULL;
90         while (rte_ring_count(ring) > 0) {
91                 (void)rte_ring_dequeue(ring, (void **)&desc);
92                 (void)rte_ring_enqueue(hw->desc_empty, (void *)desc);
93         }
94 }
95
96 static int
97 skeldma_start(struct rte_dma_dev *dev)
98 {
99         struct skeldma_hw *hw = dev->data->dev_private;
100         char name[RTE_MAX_THREAD_NAME_LEN];
101         rte_cpuset_t cpuset;
102         int ret;
103
104         if (hw->desc_mem == NULL) {
105                 SKELDMA_LOG(ERR, "Vchan was not setup, start fail!");
106                 return -EINVAL;
107         }
108
109         /* Reset the dmadev to a known state, include:
110          * 1) fflush pending/running/completed ring to empty ring.
111          * 2) init ring idx to zero.
112          * 3) init running statistics.
113          * 4) mark cpucopy task exit_flag to false.
114          */
115         fflush_ring(hw, hw->desc_pending);
116         fflush_ring(hw, hw->desc_running);
117         fflush_ring(hw, hw->desc_completed);
118         hw->ridx = 0;
119         hw->last_ridx = hw->ridx - 1;
120         hw->submitted_count = 0;
121         hw->zero_req_count = 0;
122         hw->completed_count = 0;
123         hw->exit_flag = false;
124
125         rte_mb();
126
127         snprintf(name, sizeof(name), "dma_skel_%d", dev->data->dev_id);
128         ret = rte_ctrl_thread_create(&hw->thread, name, NULL,
129                                      cpucopy_thread, dev);
130         if (ret) {
131                 SKELDMA_LOG(ERR, "Start cpucopy thread fail!");
132                 return -EINVAL;
133         }
134
135         if (hw->lcore_id != -1) {
136                 cpuset = rte_lcore_cpuset(hw->lcore_id);
137                 ret = pthread_setaffinity_np(hw->thread, sizeof(cpuset),
138                                              &cpuset);
139                 if (ret)
140                         SKELDMA_LOG(WARNING,
141                                 "Set thread affinity lcore = %d fail!",
142                                 hw->lcore_id);
143         }
144
145         return 0;
146 }
147
148 static int
149 skeldma_stop(struct rte_dma_dev *dev)
150 {
151         struct skeldma_hw *hw = dev->data->dev_private;
152
153         hw->exit_flag = true;
154         rte_delay_ms(1);
155
156         (void)pthread_cancel(hw->thread);
157         pthread_join(hw->thread, NULL);
158
159         return 0;
160 }
161
162 static int
163 vchan_setup(struct skeldma_hw *hw, int16_t dev_id, uint16_t nb_desc)
164 {
165         char name[RTE_RING_NAMESIZE];
166         struct skeldma_desc *desc;
167         struct rte_ring *empty;
168         struct rte_ring *pending;
169         struct rte_ring *running;
170         struct rte_ring *completed;
171         uint16_t i;
172
173         desc = rte_zmalloc_socket(NULL, nb_desc * sizeof(struct skeldma_desc),
174                                   RTE_CACHE_LINE_SIZE, hw->socket_id);
175         if (desc == NULL) {
176                 SKELDMA_LOG(ERR, "Malloc dma skeleton desc fail!");
177                 return -ENOMEM;
178         }
179
180         snprintf(name, RTE_RING_NAMESIZE, "dma_skel_desc_empty_%d", dev_id);
181         empty = rte_ring_create(name, nb_desc, hw->socket_id,
182                                 RING_F_SP_ENQ | RING_F_SC_DEQ);
183         snprintf(name, RTE_RING_NAMESIZE, "dma_skel_desc_pend_%d", dev_id);
184         pending = rte_ring_create(name, nb_desc, hw->socket_id,
185                                   RING_F_SP_ENQ | RING_F_SC_DEQ);
186         snprintf(name, RTE_RING_NAMESIZE, "dma_skel_desc_run_%d", dev_id);
187         running = rte_ring_create(name, nb_desc, hw->socket_id,
188                                   RING_F_SP_ENQ | RING_F_SC_DEQ);
189         snprintf(name, RTE_RING_NAMESIZE, "dma_skel_desc_comp_%d", dev_id);
190         completed = rte_ring_create(name, nb_desc, hw->socket_id,
191                                     RING_F_SP_ENQ | RING_F_SC_DEQ);
192         if (empty == NULL || pending == NULL || running == NULL ||
193             completed == NULL) {
194                 SKELDMA_LOG(ERR, "Create dma skeleton desc ring fail!");
195                 rte_ring_free(empty);
196                 rte_ring_free(pending);
197                 rte_ring_free(running);
198                 rte_ring_free(completed);
199                 rte_free(desc);
200                 return -ENOMEM;
201         }
202
203         /* The real usable ring size is *count-1* instead of *count* to
204          * differentiate a free ring from an empty ring.
205          * @see rte_ring_create
206          */
207         for (i = 0; i < nb_desc - 1; i++)
208                 (void)rte_ring_enqueue(empty, (void *)(desc + i));
209
210         hw->desc_mem = desc;
211         hw->desc_empty = empty;
212         hw->desc_pending = pending;
213         hw->desc_running = running;
214         hw->desc_completed = completed;
215
216         return 0;
217 }
218
219 static void
220 vchan_release(struct skeldma_hw *hw)
221 {
222         if (hw->desc_mem == NULL)
223                 return;
224
225         rte_free(hw->desc_mem);
226         hw->desc_mem = NULL;
227         rte_ring_free(hw->desc_empty);
228         hw->desc_empty = NULL;
229         rte_ring_free(hw->desc_pending);
230         hw->desc_pending = NULL;
231         rte_ring_free(hw->desc_running);
232         hw->desc_running = NULL;
233         rte_ring_free(hw->desc_completed);
234         hw->desc_completed = NULL;
235 }
236
237 static int
238 skeldma_close(struct rte_dma_dev *dev)
239 {
240         /* The device already stopped */
241         vchan_release(dev->data->dev_private);
242         return 0;
243 }
244
245 static int
246 skeldma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
247                     const struct rte_dma_vchan_conf *conf,
248                     uint32_t conf_sz)
249 {
250         struct skeldma_hw *hw = dev->data->dev_private;
251
252         RTE_SET_USED(vchan);
253         RTE_SET_USED(conf_sz);
254
255         if (!rte_is_power_of_2(conf->nb_desc)) {
256                 SKELDMA_LOG(ERR, "Number of desc must be power of 2!");
257                 return -EINVAL;
258         }
259
260         vchan_release(hw);
261         return vchan_setup(hw, dev->data->dev_id, conf->nb_desc);
262 }
263
264 static int
265 skeldma_vchan_status(const struct rte_dma_dev *dev,
266                 uint16_t vchan, enum rte_dma_vchan_status *status)
267 {
268         struct skeldma_hw *hw = dev->data->dev_private;
269
270         RTE_SET_USED(vchan);
271
272         *status = RTE_DMA_VCHAN_IDLE;
273         if (hw->submitted_count != __atomic_load_n(&hw->completed_count, __ATOMIC_ACQUIRE)
274                         || hw->zero_req_count == 0)
275                 *status = RTE_DMA_VCHAN_ACTIVE;
276         return 0;
277 }
278
279 static int
280 skeldma_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
281                   struct rte_dma_stats *stats, uint32_t stats_sz)
282 {
283         struct skeldma_hw *hw = dev->data->dev_private;
284
285         RTE_SET_USED(vchan);
286         RTE_SET_USED(stats_sz);
287
288         stats->submitted = hw->submitted_count;
289         stats->completed = hw->completed_count;
290         stats->errors = 0;
291
292         return 0;
293 }
294
295 static int
296 skeldma_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
297 {
298         struct skeldma_hw *hw = dev->data->dev_private;
299
300         RTE_SET_USED(vchan);
301
302         hw->submitted_count = 0;
303         hw->completed_count = 0;
304
305         return 0;
306 }
307
308 static int
309 skeldma_dump(const struct rte_dma_dev *dev, FILE *f)
310 {
311 #define GET_RING_COUNT(ring)    ((ring) ? (rte_ring_count(ring)) : 0)
312
313         struct skeldma_hw *hw = dev->data->dev_private;
314
315         (void)fprintf(f,
316                 "    lcore_id: %d\n"
317                 "    socket_id: %d\n"
318                 "    desc_empty_ring_count: %u\n"
319                 "    desc_pending_ring_count: %u\n"
320                 "    desc_running_ring_count: %u\n"
321                 "    desc_completed_ring_count: %u\n",
322                 hw->lcore_id, hw->socket_id,
323                 GET_RING_COUNT(hw->desc_empty),
324                 GET_RING_COUNT(hw->desc_pending),
325                 GET_RING_COUNT(hw->desc_running),
326                 GET_RING_COUNT(hw->desc_completed));
327         (void)fprintf(f,
328                 "    next_ring_idx: %u\n"
329                 "    last_ring_idx: %u\n"
330                 "    submitted_count: %" PRIu64 "\n"
331                 "    completed_count: %" PRIu64 "\n",
332                 hw->ridx, hw->last_ridx,
333                 hw->submitted_count, hw->completed_count);
334
335         return 0;
336 }
337
338 static inline void
339 submit(struct skeldma_hw *hw, struct skeldma_desc *desc)
340 {
341         uint16_t count = rte_ring_count(hw->desc_pending);
342         struct skeldma_desc *pend_desc = NULL;
343
344         while (count > 0) {
345                 (void)rte_ring_dequeue(hw->desc_pending, (void **)&pend_desc);
346                 (void)rte_ring_enqueue(hw->desc_running, (void *)pend_desc);
347                 count--;
348         }
349
350         if (desc)
351                 (void)rte_ring_enqueue(hw->desc_running, (void *)desc);
352 }
353
354 static int
355 skeldma_copy(void *dev_private, uint16_t vchan,
356              rte_iova_t src, rte_iova_t dst,
357              uint32_t length, uint64_t flags)
358 {
359         struct skeldma_hw *hw = dev_private;
360         struct skeldma_desc *desc;
361         int ret;
362
363         RTE_SET_USED(vchan);
364         RTE_SET_USED(flags);
365
366         ret = rte_ring_dequeue(hw->desc_empty, (void **)&desc);
367         if (ret)
368                 return -ENOSPC;
369         desc->src = (void *)(uintptr_t)src;
370         desc->dst = (void *)(uintptr_t)dst;
371         desc->len = length;
372         desc->ridx = hw->ridx;
373         if (flags & RTE_DMA_OP_FLAG_SUBMIT)
374                 submit(hw, desc);
375         else
376                 (void)rte_ring_enqueue(hw->desc_pending, (void *)desc);
377         hw->submitted_count++;
378
379         return hw->ridx++;
380 }
381
382 static int
383 skeldma_submit(void *dev_private, uint16_t vchan)
384 {
385         struct skeldma_hw *hw = dev_private;
386         RTE_SET_USED(vchan);
387         submit(hw, NULL);
388         return 0;
389 }
390
391 static uint16_t
392 skeldma_completed(void *dev_private,
393                   uint16_t vchan, const uint16_t nb_cpls,
394                   uint16_t *last_idx, bool *has_error)
395 {
396         struct skeldma_hw *hw = dev_private;
397         struct skeldma_desc *desc = NULL;
398         uint16_t index = 0;
399         uint16_t count;
400
401         RTE_SET_USED(vchan);
402         RTE_SET_USED(has_error);
403
404         count = RTE_MIN(nb_cpls, rte_ring_count(hw->desc_completed));
405         while (index < count) {
406                 (void)rte_ring_dequeue(hw->desc_completed, (void **)&desc);
407                 if (index == count - 1) {
408                         hw->last_ridx = desc->ridx;
409                         *last_idx = desc->ridx;
410                 }
411                 index++;
412                 (void)rte_ring_enqueue(hw->desc_empty, (void *)desc);
413         }
414         if (unlikely(count == 0))
415                 *last_idx = hw->last_ridx;
416
417         return count;
418 }
419
420 static uint16_t
421 skeldma_completed_status(void *dev_private,
422                          uint16_t vchan, const uint16_t nb_cpls,
423                          uint16_t *last_idx, enum rte_dma_status_code *status)
424 {
425         struct skeldma_hw *hw = dev_private;
426         struct skeldma_desc *desc = NULL;
427         uint16_t index = 0;
428         uint16_t count;
429
430         RTE_SET_USED(vchan);
431
432         count = RTE_MIN(nb_cpls, rte_ring_count(hw->desc_completed));
433         while (index < count) {
434                 (void)rte_ring_dequeue(hw->desc_completed, (void **)&desc);
435                 if (index == count - 1) {
436                         hw->last_ridx = desc->ridx;
437                         *last_idx = desc->ridx;
438                 }
439                 status[index++] = RTE_DMA_STATUS_SUCCESSFUL;
440                 (void)rte_ring_enqueue(hw->desc_empty, (void *)desc);
441         }
442         if (unlikely(count == 0))
443                 *last_idx = hw->last_ridx;
444
445         return count;
446 }
447
448 static uint16_t
449 skeldma_burst_capacity(const void *dev_private, uint16_t vchan)
450 {
451         const struct skeldma_hw *hw = dev_private;
452
453         RTE_SET_USED(vchan);
454         return rte_ring_count(hw->desc_empty);
455 }
456
457 static const struct rte_dma_dev_ops skeldma_ops = {
458         .dev_info_get     = skeldma_info_get,
459         .dev_configure    = skeldma_configure,
460         .dev_start        = skeldma_start,
461         .dev_stop         = skeldma_stop,
462         .dev_close        = skeldma_close,
463
464         .vchan_setup      = skeldma_vchan_setup,
465         .vchan_status     = skeldma_vchan_status,
466
467         .stats_get        = skeldma_stats_get,
468         .stats_reset      = skeldma_stats_reset,
469
470         .dev_dump         = skeldma_dump,
471 };
472
473 static int
474 skeldma_create(const char *name, struct rte_vdev_device *vdev, int lcore_id)
475 {
476         struct rte_dma_dev *dev;
477         struct skeldma_hw *hw;
478         int socket_id;
479
480         socket_id = (lcore_id < 0) ? rte_socket_id() :
481                                      rte_lcore_to_socket_id(lcore_id);
482         dev = rte_dma_pmd_allocate(name, socket_id, sizeof(struct skeldma_hw));
483         if (dev == NULL) {
484                 SKELDMA_LOG(ERR, "Unable to allocate dmadev: %s", name);
485                 return -EINVAL;
486         }
487
488         dev->device = &vdev->device;
489         dev->dev_ops = &skeldma_ops;
490         dev->fp_obj->dev_private = dev->data->dev_private;
491         dev->fp_obj->copy = skeldma_copy;
492         dev->fp_obj->submit = skeldma_submit;
493         dev->fp_obj->completed = skeldma_completed;
494         dev->fp_obj->completed_status = skeldma_completed_status;
495         dev->fp_obj->burst_capacity = skeldma_burst_capacity;
496
497         hw = dev->data->dev_private;
498         hw->lcore_id = lcore_id;
499         hw->socket_id = socket_id;
500
501         dev->state = RTE_DMA_DEV_READY;
502
503         return dev->data->dev_id;
504 }
505
506 static int
507 skeldma_destroy(const char *name)
508 {
509         return rte_dma_pmd_release(name);
510 }
511
512 static int
513 skeldma_parse_lcore(const char *key __rte_unused,
514                     const char *value,
515                     void *opaque)
516 {
517         int lcore_id = atoi(value);
518         if (lcore_id >= 0 && lcore_id < RTE_MAX_LCORE)
519                 *(int *)opaque = lcore_id;
520         return 0;
521 }
522
523 static void
524 skeldma_parse_vdev_args(struct rte_vdev_device *vdev, int *lcore_id)
525 {
526         static const char *const args[] = {
527                 SKELDMA_ARG_LCORE,
528                 NULL
529         };
530
531         struct rte_kvargs *kvlist;
532         const char *params;
533
534         params = rte_vdev_device_args(vdev);
535         if (params == NULL || params[0] == '\0')
536                 return;
537
538         kvlist = rte_kvargs_parse(params, args);
539         if (!kvlist)
540                 return;
541
542         (void)rte_kvargs_process(kvlist, SKELDMA_ARG_LCORE,
543                                  skeldma_parse_lcore, lcore_id);
544         SKELDMA_LOG(INFO, "Parse lcore_id = %d", *lcore_id);
545
546         rte_kvargs_free(kvlist);
547 }
548
549 static int
550 skeldma_probe(struct rte_vdev_device *vdev)
551 {
552         const char *name;
553         int lcore_id = -1;
554         int ret;
555
556         name = rte_vdev_device_name(vdev);
557         if (name == NULL)
558                 return -EINVAL;
559
560         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
561                 SKELDMA_LOG(ERR, "Multiple process not supported for %s", name);
562                 return -EINVAL;
563         }
564
565         skeldma_parse_vdev_args(vdev, &lcore_id);
566
567         ret = skeldma_create(name, vdev, lcore_id);
568         if (ret >= 0)
569                 SKELDMA_LOG(INFO, "Create %s dmadev with lcore-id %d",
570                         name, lcore_id);
571
572         return ret < 0 ? ret : 0;
573 }
574
575 static int
576 skeldma_remove(struct rte_vdev_device *vdev)
577 {
578         const char *name;
579         int ret;
580
581         name = rte_vdev_device_name(vdev);
582         if (name == NULL)
583                 return -1;
584
585         ret = skeldma_destroy(name);
586         if (!ret)
587                 SKELDMA_LOG(INFO, "Remove %s dmadev", name);
588
589         return ret;
590 }
591
592 static struct rte_vdev_driver skeldma_pmd_drv = {
593         .probe = skeldma_probe,
594         .remove = skeldma_remove,
595         .drv_flags = RTE_VDEV_DRV_NEED_IOVA_AS_VA,
596 };
597
598 RTE_PMD_REGISTER_VDEV(dma_skeleton, skeldma_pmd_drv);
599 RTE_PMD_REGISTER_PARAM_STRING(dma_skeleton,
600                 SKELDMA_ARG_LCORE "=<uint16> ");