kni: allow configuring thread granularity
[dpdk.git] / kernel / linux / kni / kni_misc.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright(c) 2010-2014 Intel Corporation.
4  */
5
6 #include <linux/version.h>
7 #include <linux/module.h>
8 #include <linux/miscdevice.h>
9 #include <linux/netdevice.h>
10 #include <linux/etherdevice.h>
11 #include <linux/pci.h>
12 #include <linux/kthread.h>
13 #include <linux/rwsem.h>
14 #include <linux/mutex.h>
15 #include <linux/nsproxy.h>
16 #include <net/net_namespace.h>
17 #include <net/netns/generic.h>
18
19 #include <rte_kni_common.h>
20
21 #include "compat.h"
22 #include "kni_dev.h"
23
24 MODULE_VERSION(KNI_VERSION);
25 MODULE_LICENSE("Dual BSD/GPL");
26 MODULE_AUTHOR("Intel Corporation");
27 MODULE_DESCRIPTION("Kernel Module for managing kni devices");
28
29 #define KNI_RX_LOOP_NUM 1000
30
31 #define KNI_MAX_DEVICES 32
32
33 /* loopback mode */
34 static char *lo_mode;
35
36 /* Kernel thread mode */
37 static char *kthread_mode;
38 static uint32_t multiple_kthread_on;
39
40 /* Default carrier state for created KNI network interfaces */
41 static char *carrier;
42 uint32_t kni_dflt_carrier;
43
44 /* Request processing support for bifurcated drivers. */
45 static char *enable_bifurcated;
46 uint32_t bifurcated_support;
47
48 /* KNI thread scheduling interval */
49 static long min_scheduling_interval = 100; /* us */
50 static long max_scheduling_interval = 200; /* us */
51
52 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
53
54 static int kni_net_id;
55
56 struct kni_net {
57         unsigned long device_in_use; /* device in use flag */
58         struct mutex kni_kthread_lock;
59         struct task_struct *kni_kthread;
60         struct rw_semaphore kni_list_lock;
61         struct list_head kni_list_head;
62 };
63
64 static int __net_init
65 kni_init_net(struct net *net)
66 {
67 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
68         struct kni_net *knet = net_generic(net, kni_net_id);
69
70         memset(knet, 0, sizeof(*knet));
71 #else
72         struct kni_net *knet;
73         int ret;
74
75         knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
76         if (!knet) {
77                 ret = -ENOMEM;
78                 return ret;
79         }
80 #endif
81
82         /* Clear the bit of device in use */
83         clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
84
85         mutex_init(&knet->kni_kthread_lock);
86
87         init_rwsem(&knet->kni_list_lock);
88         INIT_LIST_HEAD(&knet->kni_list_head);
89
90 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
91         return 0;
92 #else
93         ret = net_assign_generic(net, kni_net_id, knet);
94         if (ret < 0)
95                 kfree(knet);
96
97         return ret;
98 #endif
99 }
100
101 static void __net_exit
102 kni_exit_net(struct net *net)
103 {
104         struct kni_net *knet __maybe_unused;
105
106         knet = net_generic(net, kni_net_id);
107         mutex_destroy(&knet->kni_kthread_lock);
108
109 #ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
110         kfree(knet);
111 #endif
112 }
113
114 static struct pernet_operations kni_net_ops = {
115         .init = kni_init_net,
116         .exit = kni_exit_net,
117 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
118         .id   = &kni_net_id,
119         .size = sizeof(struct kni_net),
120 #endif
121 };
122
123 static int
124 kni_thread_single(void *data)
125 {
126         struct kni_net *knet = data;
127         int j;
128         struct kni_dev *dev;
129
130         while (!kthread_should_stop()) {
131                 down_read(&knet->kni_list_lock);
132                 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
133                         list_for_each_entry(dev, &knet->kni_list_head, list) {
134                                 kni_net_rx(dev);
135                                 kni_net_poll_resp(dev);
136                         }
137                 }
138                 up_read(&knet->kni_list_lock);
139                 /* reschedule out for a while */
140                 usleep_range(min_scheduling_interval, max_scheduling_interval);
141         }
142
143         return 0;
144 }
145
146 static int
147 kni_thread_multiple(void *param)
148 {
149         int j;
150         struct kni_dev *dev = param;
151
152         while (!kthread_should_stop()) {
153                 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
154                         kni_net_rx(dev);
155                         kni_net_poll_resp(dev);
156                 }
157                 usleep_range(min_scheduling_interval, max_scheduling_interval);
158         }
159
160         return 0;
161 }
162
163 static int
164 kni_open(struct inode *inode, struct file *file)
165 {
166         struct net *net = current->nsproxy->net_ns;
167         struct kni_net *knet = net_generic(net, kni_net_id);
168
169         /* kni device can be opened by one user only per netns */
170         if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
171                 return -EBUSY;
172
173         file->private_data = get_net(net);
174         pr_debug("/dev/kni opened\n");
175
176         return 0;
177 }
178
179 static int
180 kni_dev_remove(struct kni_dev *dev)
181 {
182         if (!dev)
183                 return -ENODEV;
184
185         if (dev->net_dev) {
186                 unregister_netdev(dev->net_dev);
187                 free_netdev(dev->net_dev);
188         }
189
190         kni_net_release_fifo_phy(dev);
191
192         return 0;
193 }
194
195 static int
196 kni_release(struct inode *inode, struct file *file)
197 {
198         struct net *net = file->private_data;
199         struct kni_net *knet = net_generic(net, kni_net_id);
200         struct kni_dev *dev, *n;
201
202         /* Stop kernel thread for single mode */
203         if (multiple_kthread_on == 0) {
204                 mutex_lock(&knet->kni_kthread_lock);
205                 /* Stop kernel thread */
206                 if (knet->kni_kthread != NULL) {
207                         kthread_stop(knet->kni_kthread);
208                         knet->kni_kthread = NULL;
209                 }
210                 mutex_unlock(&knet->kni_kthread_lock);
211         }
212
213         down_write(&knet->kni_list_lock);
214         list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
215                 /* Stop kernel thread for multiple mode */
216                 if (multiple_kthread_on && dev->pthread != NULL) {
217                         kthread_stop(dev->pthread);
218                         dev->pthread = NULL;
219                 }
220
221                 kni_dev_remove(dev);
222                 list_del(&dev->list);
223         }
224         up_write(&knet->kni_list_lock);
225
226         /* Clear the bit of device in use */
227         clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
228
229         put_net(net);
230         pr_debug("/dev/kni closed\n");
231
232         return 0;
233 }
234
235 static int
236 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
237 {
238         if (!kni || !dev)
239                 return -1;
240
241         /* Check if network name has been used */
242         if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
243                 pr_err("KNI name %s duplicated\n", dev->name);
244                 return -1;
245         }
246
247         return 0;
248 }
249
250 static int
251 kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
252 {
253         /**
254          * Create a new kernel thread for multiple mode, set its core affinity,
255          * and finally wake it up.
256          */
257         if (multiple_kthread_on) {
258                 kni->pthread = kthread_create(kni_thread_multiple,
259                         (void *)kni, "kni_%s", kni->name);
260                 if (IS_ERR(kni->pthread)) {
261                         kni_dev_remove(kni);
262                         return -ECANCELED;
263                 }
264
265                 if (force_bind)
266                         kthread_bind(kni->pthread, kni->core_id);
267                 wake_up_process(kni->pthread);
268         } else {
269                 mutex_lock(&knet->kni_kthread_lock);
270
271                 if (knet->kni_kthread == NULL) {
272                         knet->kni_kthread = kthread_create(kni_thread_single,
273                                 (void *)knet, "kni_single");
274                         if (IS_ERR(knet->kni_kthread)) {
275                                 mutex_unlock(&knet->kni_kthread_lock);
276                                 kni_dev_remove(kni);
277                                 return -ECANCELED;
278                         }
279
280                         if (force_bind)
281                                 kthread_bind(knet->kni_kthread, kni->core_id);
282                         wake_up_process(knet->kni_kthread);
283                 }
284
285                 mutex_unlock(&knet->kni_kthread_lock);
286         }
287
288         return 0;
289 }
290
291 static int
292 kni_ioctl_create(struct net *net, uint32_t ioctl_num,
293                 unsigned long ioctl_param)
294 {
295         struct kni_net *knet = net_generic(net, kni_net_id);
296         int ret;
297         struct rte_kni_device_info dev_info;
298         struct net_device *net_dev = NULL;
299         struct kni_dev *kni, *dev, *n;
300
301         pr_info("Creating kni...\n");
302         /* Check the buffer size, to avoid warning */
303         if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
304                 return -EINVAL;
305
306         /* Copy kni info from user space */
307         if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
308                 return -EFAULT;
309
310         /* Check if name is zero-ended */
311         if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
312                 pr_err("kni.name not zero-terminated");
313                 return -EINVAL;
314         }
315
316         /**
317          * Check if the cpu core id is valid for binding.
318          */
319         if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
320                 pr_err("cpu %u is not online\n", dev_info.core_id);
321                 return -EINVAL;
322         }
323
324         /* Check if it has been created */
325         down_read(&knet->kni_list_lock);
326         list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
327                 if (kni_check_param(dev, &dev_info) < 0) {
328                         up_read(&knet->kni_list_lock);
329                         return -EINVAL;
330                 }
331         }
332         up_read(&knet->kni_list_lock);
333
334         net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
335 #ifdef NET_NAME_USER
336                                                         NET_NAME_USER,
337 #endif
338                                                         kni_net_init);
339         if (net_dev == NULL) {
340                 pr_err("error allocating device \"%s\"\n", dev_info.name);
341                 return -EBUSY;
342         }
343
344         dev_net_set(net_dev, net);
345
346         kni = netdev_priv(net_dev);
347
348         kni->net_dev = net_dev;
349         kni->core_id = dev_info.core_id;
350         strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
351
352         /* Translate user space info into kernel space info */
353         if (dev_info.iova_mode) {
354 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
355                 kni->tx_q = iova_to_kva(current, dev_info.tx_phys);
356                 kni->rx_q = iova_to_kva(current, dev_info.rx_phys);
357                 kni->alloc_q = iova_to_kva(current, dev_info.alloc_phys);
358                 kni->free_q = iova_to_kva(current, dev_info.free_phys);
359
360                 kni->req_q = iova_to_kva(current, dev_info.req_phys);
361                 kni->resp_q = iova_to_kva(current, dev_info.resp_phys);
362                 kni->sync_va = dev_info.sync_va;
363                 kni->sync_kva = iova_to_kva(current, dev_info.sync_phys);
364                 kni->usr_tsk = current;
365                 kni->iova_mode = 1;
366 #else
367                 pr_err("KNI module does not support IOVA to VA translation\n");
368                 return -EINVAL;
369 #endif
370         } else {
371
372                 kni->tx_q = phys_to_virt(dev_info.tx_phys);
373                 kni->rx_q = phys_to_virt(dev_info.rx_phys);
374                 kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
375                 kni->free_q = phys_to_virt(dev_info.free_phys);
376
377                 kni->req_q = phys_to_virt(dev_info.req_phys);
378                 kni->resp_q = phys_to_virt(dev_info.resp_phys);
379                 kni->sync_va = dev_info.sync_va;
380                 kni->sync_kva = phys_to_virt(dev_info.sync_phys);
381                 kni->iova_mode = 0;
382         }
383
384         kni->mbuf_size = dev_info.mbuf_size;
385
386         pr_debug("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
387                 (unsigned long long) dev_info.tx_phys, kni->tx_q);
388         pr_debug("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
389                 (unsigned long long) dev_info.rx_phys, kni->rx_q);
390         pr_debug("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
391                 (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
392         pr_debug("free_phys:    0x%016llx, free_q addr:    0x%p\n",
393                 (unsigned long long) dev_info.free_phys, kni->free_q);
394         pr_debug("req_phys:     0x%016llx, req_q addr:     0x%p\n",
395                 (unsigned long long) dev_info.req_phys, kni->req_q);
396         pr_debug("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
397                 (unsigned long long) dev_info.resp_phys, kni->resp_q);
398         pr_debug("mbuf_size:    %u\n", kni->mbuf_size);
399
400         /* if user has provided a valid mac address */
401         if (is_valid_ether_addr(dev_info.mac_addr))
402                 memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN);
403         else
404                 /* Generate random MAC address. */
405                 eth_random_addr(net_dev->dev_addr);
406
407         if (dev_info.mtu)
408                 net_dev->mtu = dev_info.mtu;
409 #ifdef HAVE_MAX_MTU_PARAM
410         net_dev->max_mtu = net_dev->mtu;
411
412         if (dev_info.min_mtu)
413                 net_dev->min_mtu = dev_info.min_mtu;
414
415         if (dev_info.max_mtu)
416                 net_dev->max_mtu = dev_info.max_mtu;
417 #endif
418
419         ret = register_netdev(net_dev);
420         if (ret) {
421                 pr_err("error %i registering device \"%s\"\n",
422                                         ret, dev_info.name);
423                 kni->net_dev = NULL;
424                 kni_dev_remove(kni);
425                 free_netdev(net_dev);
426                 return -ENODEV;
427         }
428
429         netif_carrier_off(net_dev);
430
431         ret = kni_run_thread(knet, kni, dev_info.force_bind);
432         if (ret != 0)
433                 return ret;
434
435         down_write(&knet->kni_list_lock);
436         list_add(&kni->list, &knet->kni_list_head);
437         up_write(&knet->kni_list_lock);
438
439         return 0;
440 }
441
442 static int
443 kni_ioctl_release(struct net *net, uint32_t ioctl_num,
444                 unsigned long ioctl_param)
445 {
446         struct kni_net *knet = net_generic(net, kni_net_id);
447         int ret = -EINVAL;
448         struct kni_dev *dev, *n;
449         struct rte_kni_device_info dev_info;
450
451         if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
452                 return -EINVAL;
453
454         if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
455                 return -EFAULT;
456
457         /* Release the network device according to its name */
458         if (strlen(dev_info.name) == 0)
459                 return -EINVAL;
460
461         down_write(&knet->kni_list_lock);
462         list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
463                 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
464                         continue;
465
466                 if (multiple_kthread_on && dev->pthread != NULL) {
467                         kthread_stop(dev->pthread);
468                         dev->pthread = NULL;
469                 }
470
471                 kni_dev_remove(dev);
472                 list_del(&dev->list);
473                 ret = 0;
474                 break;
475         }
476         up_write(&knet->kni_list_lock);
477         pr_info("%s release kni named %s\n",
478                 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
479
480         return ret;
481 }
482
483 static int
484 kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
485 {
486         int ret = -EINVAL;
487         struct net *net = current->nsproxy->net_ns;
488
489         pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
490
491         /*
492          * Switch according to the ioctl called
493          */
494         switch (_IOC_NR(ioctl_num)) {
495         case _IOC_NR(RTE_KNI_IOCTL_TEST):
496                 /* For test only, not used */
497                 break;
498         case _IOC_NR(RTE_KNI_IOCTL_CREATE):
499                 ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
500                 break;
501         case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
502                 ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
503                 break;
504         default:
505                 pr_debug("IOCTL default\n");
506                 break;
507         }
508
509         return ret;
510 }
511
512 static int
513 kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
514                 unsigned long ioctl_param)
515 {
516         /* 32 bits app on 64 bits OS to be supported later */
517         pr_debug("Not implemented.\n");
518
519         return -EINVAL;
520 }
521
522 static const struct file_operations kni_fops = {
523         .owner = THIS_MODULE,
524         .open = kni_open,
525         .release = kni_release,
526         .unlocked_ioctl = (void *)kni_ioctl,
527         .compat_ioctl = (void *)kni_compat_ioctl,
528 };
529
530 static struct miscdevice kni_misc = {
531         .minor = MISC_DYNAMIC_MINOR,
532         .name = KNI_DEVICE,
533         .fops = &kni_fops,
534 };
535
536 static int __init
537 kni_parse_kthread_mode(void)
538 {
539         if (!kthread_mode)
540                 return 0;
541
542         if (strcmp(kthread_mode, "single") == 0)
543                 return 0;
544         else if (strcmp(kthread_mode, "multiple") == 0)
545                 multiple_kthread_on = 1;
546         else
547                 return -1;
548
549         return 0;
550 }
551
552 static int __init
553 kni_parse_carrier_state(void)
554 {
555         if (!carrier) {
556                 kni_dflt_carrier = 0;
557                 return 0;
558         }
559
560         if (strcmp(carrier, "off") == 0)
561                 kni_dflt_carrier = 0;
562         else if (strcmp(carrier, "on") == 0)
563                 kni_dflt_carrier = 1;
564         else
565                 return -1;
566
567         return 0;
568 }
569
570 static int __init
571 kni_parse_bifurcated_support(void)
572 {
573         if (!enable_bifurcated) {
574                 bifurcated_support = 0;
575                 return 0;
576         }
577
578         if (strcmp(enable_bifurcated, "on") == 0)
579                 bifurcated_support = 1;
580         else
581                 return -1;
582
583         return 0;
584 }
585
586 static int __init
587 kni_init(void)
588 {
589         int rc;
590
591         if (kni_parse_kthread_mode() < 0) {
592                 pr_err("Invalid parameter for kthread_mode\n");
593                 return -EINVAL;
594         }
595
596         if (multiple_kthread_on == 0)
597                 pr_debug("Single kernel thread for all KNI devices\n");
598         else
599                 pr_debug("Multiple kernel thread mode enabled\n");
600
601         if (kni_parse_carrier_state() < 0) {
602                 pr_err("Invalid parameter for carrier\n");
603                 return -EINVAL;
604         }
605
606         if (kni_dflt_carrier == 0)
607                 pr_debug("Default carrier state set to off.\n");
608         else
609                 pr_debug("Default carrier state set to on.\n");
610
611         if (kni_parse_bifurcated_support() < 0) {
612                 pr_err("Invalid parameter for bifurcated support\n");
613                 return -EINVAL;
614         }
615         if (bifurcated_support == 1)
616                 pr_debug("bifurcated support is enabled.\n");
617
618         if (min_scheduling_interval < 0 || max_scheduling_interval < 0 ||
619                 min_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL ||
620                 max_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL ||
621                 min_scheduling_interval >= max_scheduling_interval) {
622                 pr_err("Invalid parameters for scheduling interval\n");
623                 return -EINVAL;
624         }
625
626 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
627         rc = register_pernet_subsys(&kni_net_ops);
628 #else
629         rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
630 #endif
631         if (rc)
632                 return -EPERM;
633
634         rc = misc_register(&kni_misc);
635         if (rc != 0) {
636                 pr_err("Misc registration failed\n");
637                 goto out;
638         }
639
640         /* Configure the lo mode according to the input parameter */
641         kni_net_config_lo_mode(lo_mode);
642
643         return 0;
644
645 out:
646 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
647         unregister_pernet_subsys(&kni_net_ops);
648 #else
649         unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
650 #endif
651         return rc;
652 }
653
654 static void __exit
655 kni_exit(void)
656 {
657         misc_deregister(&kni_misc);
658 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
659         unregister_pernet_subsys(&kni_net_ops);
660 #else
661         unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
662 #endif
663 }
664
665 module_init(kni_init);
666 module_exit(kni_exit);
667
668 module_param(lo_mode, charp, 0644);
669 MODULE_PARM_DESC(lo_mode,
670 "KNI loopback mode (default=lo_mode_none):\n"
671 "\t\tlo_mode_none        Kernel loopback disabled\n"
672 "\t\tlo_mode_fifo        Enable kernel loopback with fifo\n"
673 "\t\tlo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer\n"
674 "\t\t"
675 );
676
677 module_param(kthread_mode, charp, 0644);
678 MODULE_PARM_DESC(kthread_mode,
679 "Kernel thread mode (default=single):\n"
680 "\t\tsingle    Single kernel thread mode enabled.\n"
681 "\t\tmultiple  Multiple kernel thread mode enabled.\n"
682 "\t\t"
683 );
684
685 module_param(carrier, charp, 0644);
686 MODULE_PARM_DESC(carrier,
687 "Default carrier state for KNI interface (default=off):\n"
688 "\t\toff   Interfaces will be created with carrier state set to off.\n"
689 "\t\ton    Interfaces will be created with carrier state set to on.\n"
690 "\t\t"
691 );
692
693 module_param(enable_bifurcated, charp, 0644);
694 MODULE_PARM_DESC(enable_bifurcated,
695 "Enable request processing support for bifurcated drivers, "
696 "which means releasing rtnl_lock before calling userspace callback and "
697 "supporting async requests (default=off):\n"
698 "\t\ton    Enable request processing support for bifurcated drivers.\n"
699 "\t\t"
700 );
701
702 module_param(min_scheduling_interval, long, 0644);
703 MODULE_PARM_DESC(min_scheduling_interval,
704 "KNI thread min scheduling interval (default=100 microseconds)"
705 );
706
707 module_param(max_scheduling_interval, long, 0644);
708 MODULE_PARM_DESC(max_scheduling_interval,
709 "KNI thread max scheduling interval (default=200 microseconds)"
710 );