1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(c) 2010-2014 Intel Corporation.
6 #include <linux/version.h>
7 #include <linux/module.h>
8 #include <linux/miscdevice.h>
9 #include <linux/netdevice.h>
10 #include <linux/etherdevice.h>
11 #include <linux/pci.h>
12 #include <linux/kthread.h>
13 #include <linux/rwsem.h>
14 #include <linux/mutex.h>
15 #include <linux/nsproxy.h>
16 #include <net/net_namespace.h>
17 #include <net/netns/generic.h>
19 #include <rte_kni_common.h>
24 MODULE_VERSION(KNI_VERSION);
25 MODULE_LICENSE("Dual BSD/GPL");
26 MODULE_AUTHOR("Intel Corporation");
27 MODULE_DESCRIPTION("Kernel Module for managing kni devices");
29 #define KNI_RX_LOOP_NUM 1000
31 #define KNI_MAX_DEVICES 32
36 /* Kernel thread mode */
37 static char *kthread_mode;
38 static uint32_t multiple_kthread_on;
40 /* Default carrier state for created KNI network interfaces */
42 uint32_t kni_dflt_carrier;
44 /* Request processing support for bifurcated drivers. */
45 static char *enable_bifurcated;
46 uint32_t bifurcated_support;
48 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
50 static int kni_net_id;
53 unsigned long device_in_use; /* device in use flag */
54 struct mutex kni_kthread_lock;
55 struct task_struct *kni_kthread;
56 struct rw_semaphore kni_list_lock;
57 struct list_head kni_list_head;
61 kni_init_net(struct net *net)
63 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
64 struct kni_net *knet = net_generic(net, kni_net_id);
66 memset(knet, 0, sizeof(*knet));
71 knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
78 /* Clear the bit of device in use */
79 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
81 mutex_init(&knet->kni_kthread_lock);
83 init_rwsem(&knet->kni_list_lock);
84 INIT_LIST_HEAD(&knet->kni_list_head);
86 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
89 ret = net_assign_generic(net, kni_net_id, knet);
97 static void __net_exit
98 kni_exit_net(struct net *net)
100 struct kni_net *knet __maybe_unused;
102 knet = net_generic(net, kni_net_id);
103 mutex_destroy(&knet->kni_kthread_lock);
105 #ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
110 static struct pernet_operations kni_net_ops = {
111 .init = kni_init_net,
112 .exit = kni_exit_net,
113 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
115 .size = sizeof(struct kni_net),
120 kni_thread_single(void *data)
122 struct kni_net *knet = data;
126 while (!kthread_should_stop()) {
127 down_read(&knet->kni_list_lock);
128 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
129 list_for_each_entry(dev, &knet->kni_list_head, list) {
131 kni_net_poll_resp(dev);
134 up_read(&knet->kni_list_lock);
135 #ifdef RTE_KNI_PREEMPT_DEFAULT
136 /* reschedule out for a while */
137 schedule_timeout_interruptible(
138 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
146 kni_thread_multiple(void *param)
149 struct kni_dev *dev = param;
151 while (!kthread_should_stop()) {
152 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
154 kni_net_poll_resp(dev);
156 #ifdef RTE_KNI_PREEMPT_DEFAULT
157 schedule_timeout_interruptible(
158 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
166 kni_open(struct inode *inode, struct file *file)
168 struct net *net = current->nsproxy->net_ns;
169 struct kni_net *knet = net_generic(net, kni_net_id);
171 /* kni device can be opened by one user only per netns */
172 if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
175 file->private_data = get_net(net);
176 pr_debug("/dev/kni opened\n");
182 kni_dev_remove(struct kni_dev *dev)
188 unregister_netdev(dev->net_dev);
189 free_netdev(dev->net_dev);
192 kni_net_release_fifo_phy(dev);
198 kni_release(struct inode *inode, struct file *file)
200 struct net *net = file->private_data;
201 struct kni_net *knet = net_generic(net, kni_net_id);
202 struct kni_dev *dev, *n;
204 /* Stop kernel thread for single mode */
205 if (multiple_kthread_on == 0) {
206 mutex_lock(&knet->kni_kthread_lock);
207 /* Stop kernel thread */
208 if (knet->kni_kthread != NULL) {
209 kthread_stop(knet->kni_kthread);
210 knet->kni_kthread = NULL;
212 mutex_unlock(&knet->kni_kthread_lock);
215 down_write(&knet->kni_list_lock);
216 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
217 /* Stop kernel thread for multiple mode */
218 if (multiple_kthread_on && dev->pthread != NULL) {
219 kthread_stop(dev->pthread);
224 list_del(&dev->list);
226 up_write(&knet->kni_list_lock);
228 /* Clear the bit of device in use */
229 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
232 pr_debug("/dev/kni closed\n");
238 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
243 /* Check if network name has been used */
244 if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
245 pr_err("KNI name %s duplicated\n", dev->name);
253 kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
256 * Create a new kernel thread for multiple mode, set its core affinity,
257 * and finally wake it up.
259 if (multiple_kthread_on) {
260 kni->pthread = kthread_create(kni_thread_multiple,
261 (void *)kni, "kni_%s", kni->name);
262 if (IS_ERR(kni->pthread)) {
268 kthread_bind(kni->pthread, kni->core_id);
269 wake_up_process(kni->pthread);
271 mutex_lock(&knet->kni_kthread_lock);
273 if (knet->kni_kthread == NULL) {
274 knet->kni_kthread = kthread_create(kni_thread_single,
275 (void *)knet, "kni_single");
276 if (IS_ERR(knet->kni_kthread)) {
277 mutex_unlock(&knet->kni_kthread_lock);
283 kthread_bind(knet->kni_kthread, kni->core_id);
284 wake_up_process(knet->kni_kthread);
287 mutex_unlock(&knet->kni_kthread_lock);
294 kni_ioctl_create(struct net *net, uint32_t ioctl_num,
295 unsigned long ioctl_param)
297 struct kni_net *knet = net_generic(net, kni_net_id);
299 struct rte_kni_device_info dev_info;
300 struct net_device *net_dev = NULL;
301 struct kni_dev *kni, *dev, *n;
303 pr_info("Creating kni...\n");
304 /* Check the buffer size, to avoid warning */
305 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
308 /* Copy kni info from user space */
309 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
312 /* Check if name is zero-ended */
313 if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
314 pr_err("kni.name not zero-terminated");
319 * Check if the cpu core id is valid for binding.
321 if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
322 pr_err("cpu %u is not online\n", dev_info.core_id);
326 /* Check if it has been created */
327 down_read(&knet->kni_list_lock);
328 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
329 if (kni_check_param(dev, &dev_info) < 0) {
330 up_read(&knet->kni_list_lock);
334 up_read(&knet->kni_list_lock);
336 net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
341 if (net_dev == NULL) {
342 pr_err("error allocating device \"%s\"\n", dev_info.name);
346 dev_net_set(net_dev, net);
348 kni = netdev_priv(net_dev);
350 kni->net_dev = net_dev;
351 kni->core_id = dev_info.core_id;
352 strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
354 /* Translate user space info into kernel space info */
355 if (dev_info.iova_mode) {
356 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
357 kni->tx_q = iova_to_kva(current, dev_info.tx_phys);
358 kni->rx_q = iova_to_kva(current, dev_info.rx_phys);
359 kni->alloc_q = iova_to_kva(current, dev_info.alloc_phys);
360 kni->free_q = iova_to_kva(current, dev_info.free_phys);
362 kni->req_q = iova_to_kva(current, dev_info.req_phys);
363 kni->resp_q = iova_to_kva(current, dev_info.resp_phys);
364 kni->sync_va = dev_info.sync_va;
365 kni->sync_kva = iova_to_kva(current, dev_info.sync_phys);
366 kni->usr_tsk = current;
369 pr_err("KNI module does not support IOVA to VA translation\n");
374 kni->tx_q = phys_to_virt(dev_info.tx_phys);
375 kni->rx_q = phys_to_virt(dev_info.rx_phys);
376 kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
377 kni->free_q = phys_to_virt(dev_info.free_phys);
379 kni->req_q = phys_to_virt(dev_info.req_phys);
380 kni->resp_q = phys_to_virt(dev_info.resp_phys);
381 kni->sync_va = dev_info.sync_va;
382 kni->sync_kva = phys_to_virt(dev_info.sync_phys);
386 kni->mbuf_size = dev_info.mbuf_size;
388 pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
389 (unsigned long long) dev_info.tx_phys, kni->tx_q);
390 pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
391 (unsigned long long) dev_info.rx_phys, kni->rx_q);
392 pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
393 (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
394 pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n",
395 (unsigned long long) dev_info.free_phys, kni->free_q);
396 pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n",
397 (unsigned long long) dev_info.req_phys, kni->req_q);
398 pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
399 (unsigned long long) dev_info.resp_phys, kni->resp_q);
400 pr_debug("mbuf_size: %u\n", kni->mbuf_size);
402 /* if user has provided a valid mac address */
403 if (is_valid_ether_addr(dev_info.mac_addr))
404 memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN);
406 /* Generate random MAC address. */
407 eth_random_addr(net_dev->dev_addr);
410 net_dev->mtu = dev_info.mtu;
411 #ifdef HAVE_MAX_MTU_PARAM
412 net_dev->max_mtu = net_dev->mtu;
414 if (dev_info.min_mtu)
415 net_dev->min_mtu = dev_info.min_mtu;
417 if (dev_info.max_mtu)
418 net_dev->max_mtu = dev_info.max_mtu;
421 ret = register_netdev(net_dev);
423 pr_err("error %i registering device \"%s\"\n",
427 free_netdev(net_dev);
431 netif_carrier_off(net_dev);
433 ret = kni_run_thread(knet, kni, dev_info.force_bind);
437 down_write(&knet->kni_list_lock);
438 list_add(&kni->list, &knet->kni_list_head);
439 up_write(&knet->kni_list_lock);
445 kni_ioctl_release(struct net *net, uint32_t ioctl_num,
446 unsigned long ioctl_param)
448 struct kni_net *knet = net_generic(net, kni_net_id);
450 struct kni_dev *dev, *n;
451 struct rte_kni_device_info dev_info;
453 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
456 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
459 /* Release the network device according to its name */
460 if (strlen(dev_info.name) == 0)
463 down_write(&knet->kni_list_lock);
464 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
465 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
468 if (multiple_kthread_on && dev->pthread != NULL) {
469 kthread_stop(dev->pthread);
474 list_del(&dev->list);
478 up_write(&knet->kni_list_lock);
479 pr_info("%s release kni named %s\n",
480 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
486 kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
489 struct net *net = current->nsproxy->net_ns;
491 pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
494 * Switch according to the ioctl called
496 switch (_IOC_NR(ioctl_num)) {
497 case _IOC_NR(RTE_KNI_IOCTL_TEST):
498 /* For test only, not used */
500 case _IOC_NR(RTE_KNI_IOCTL_CREATE):
501 ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
503 case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
504 ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
507 pr_debug("IOCTL default\n");
515 kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
516 unsigned long ioctl_param)
518 /* 32 bits app on 64 bits OS to be supported later */
519 pr_debug("Not implemented.\n");
524 static const struct file_operations kni_fops = {
525 .owner = THIS_MODULE,
527 .release = kni_release,
528 .unlocked_ioctl = (void *)kni_ioctl,
529 .compat_ioctl = (void *)kni_compat_ioctl,
532 static struct miscdevice kni_misc = {
533 .minor = MISC_DYNAMIC_MINOR,
539 kni_parse_kthread_mode(void)
544 if (strcmp(kthread_mode, "single") == 0)
546 else if (strcmp(kthread_mode, "multiple") == 0)
547 multiple_kthread_on = 1;
555 kni_parse_carrier_state(void)
558 kni_dflt_carrier = 0;
562 if (strcmp(carrier, "off") == 0)
563 kni_dflt_carrier = 0;
564 else if (strcmp(carrier, "on") == 0)
565 kni_dflt_carrier = 1;
573 kni_parse_bifurcated_support(void)
575 if (!enable_bifurcated) {
576 bifurcated_support = 0;
580 if (strcmp(enable_bifurcated, "on") == 0)
581 bifurcated_support = 1;
593 if (kni_parse_kthread_mode() < 0) {
594 pr_err("Invalid parameter for kthread_mode\n");
598 if (multiple_kthread_on == 0)
599 pr_debug("Single kernel thread for all KNI devices\n");
601 pr_debug("Multiple kernel thread mode enabled\n");
603 if (kni_parse_carrier_state() < 0) {
604 pr_err("Invalid parameter for carrier\n");
608 if (kni_dflt_carrier == 0)
609 pr_debug("Default carrier state set to off.\n");
611 pr_debug("Default carrier state set to on.\n");
613 if (kni_parse_bifurcated_support() < 0) {
614 pr_err("Invalid parameter for bifurcated support\n");
617 if (bifurcated_support == 1)
618 pr_debug("bifurcated support is enabled.\n");
620 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
621 rc = register_pernet_subsys(&kni_net_ops);
623 rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
628 rc = misc_register(&kni_misc);
630 pr_err("Misc registration failed\n");
634 /* Configure the lo mode according to the input parameter */
635 kni_net_config_lo_mode(lo_mode);
640 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
641 unregister_pernet_subsys(&kni_net_ops);
643 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
651 misc_deregister(&kni_misc);
652 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
653 unregister_pernet_subsys(&kni_net_ops);
655 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
659 module_init(kni_init);
660 module_exit(kni_exit);
662 module_param(lo_mode, charp, 0644);
663 MODULE_PARM_DESC(lo_mode,
664 "KNI loopback mode (default=lo_mode_none):\n"
665 "\t\tlo_mode_none Kernel loopback disabled\n"
666 "\t\tlo_mode_fifo Enable kernel loopback with fifo\n"
667 "\t\tlo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n"
671 module_param(kthread_mode, charp, 0644);
672 MODULE_PARM_DESC(kthread_mode,
673 "Kernel thread mode (default=single):\n"
674 "\t\tsingle Single kernel thread mode enabled.\n"
675 "\t\tmultiple Multiple kernel thread mode enabled.\n"
679 module_param(carrier, charp, 0644);
680 MODULE_PARM_DESC(carrier,
681 "Default carrier state for KNI interface (default=off):\n"
682 "\t\toff Interfaces will be created with carrier state set to off.\n"
683 "\t\ton Interfaces will be created with carrier state set to on.\n"
687 module_param(enable_bifurcated, charp, 0644);
688 MODULE_PARM_DESC(enable_bifurcated,
689 "Enable request processing support for bifurcated drivers, "
690 "which means releasing rtnl_lock before calling userspace callback and "
691 "supporting async requests (default=off):\n"
692 "\t\ton Enable request processing support for bifurcated drivers.\n"