4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of version 2 of the GNU General Public License as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 * The full GNU General Public License is included in this distribution
19 * in the file called LICENSE.GPL.
21 * Contact Information:
25 #include <linux/version.h>
26 #include <linux/module.h>
27 #include <linux/miscdevice.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/pci.h>
31 #include <linux/kthread.h>
32 #include <linux/rwsem.h>
33 #include <linux/mutex.h>
34 #include <linux/nsproxy.h>
35 #include <net/net_namespace.h>
36 #include <net/netns/generic.h>
38 #include <exec-env/rte_kni_common.h>
43 MODULE_LICENSE("Dual BSD/GPL");
44 MODULE_AUTHOR("Intel Corporation");
45 MODULE_DESCRIPTION("Kernel Module for managing kni devices");
47 #define KNI_RX_LOOP_NUM 1000
49 #define KNI_MAX_DEVICES 32
51 extern const struct pci_device_id ixgbe_pci_tbl[];
52 extern const struct pci_device_id igb_pci_tbl[];
57 /* Kernel thread mode */
58 static char *kthread_mode;
59 static unsigned int multiple_kthread_on;
61 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
63 static int kni_net_id;
66 unsigned long device_in_use; /* device in use flag */
67 struct mutex kni_kthread_lock;
68 struct task_struct *kni_kthread;
69 struct rw_semaphore kni_list_lock;
70 struct list_head kni_list_head;
74 kni_init_net(struct net *net)
76 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
77 struct kni_net *knet = net_generic(net, kni_net_id);
79 memset(knet, 0, sizeof(*knet));
84 knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
91 /* Clear the bit of device in use */
92 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
94 mutex_init(&knet->kni_kthread_lock);
96 init_rwsem(&knet->kni_list_lock);
97 INIT_LIST_HEAD(&knet->kni_list_head);
99 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
102 ret = net_assign_generic(net, kni_net_id, knet);
110 static void __net_exit
111 kni_exit_net(struct net *net)
113 struct kni_net *knet = net_generic(net, kni_net_id);
115 mutex_destroy(&knet->kni_kthread_lock);
116 #ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
121 static struct pernet_operations kni_net_ops = {
122 .init = kni_init_net,
123 .exit = kni_exit_net,
124 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
126 .size = sizeof(struct kni_net),
131 kni_thread_single(void *data)
133 struct kni_net *knet = data;
137 while (!kthread_should_stop()) {
138 down_read(&knet->kni_list_lock);
139 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
140 list_for_each_entry(dev, &knet->kni_list_head, list) {
142 kni_chk_vhost_rx(dev);
146 kni_net_poll_resp(dev);
149 up_read(&knet->kni_list_lock);
150 #ifdef RTE_KNI_PREEMPT_DEFAULT
151 /* reschedule out for a while */
152 schedule_timeout_interruptible(
153 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
161 kni_thread_multiple(void *param)
164 struct kni_dev *dev = (struct kni_dev *)param;
166 while (!kthread_should_stop()) {
167 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
169 kni_chk_vhost_rx(dev);
173 kni_net_poll_resp(dev);
175 #ifdef RTE_KNI_PREEMPT_DEFAULT
176 schedule_timeout_interruptible(
177 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
185 kni_open(struct inode *inode, struct file *file)
187 struct net *net = current->nsproxy->net_ns;
188 struct kni_net *knet = net_generic(net, kni_net_id);
190 /* kni device can be opened by one user only per netns */
191 if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
194 file->private_data = get_net(net);
195 pr_debug("/dev/kni opened\n");
201 kni_dev_remove(struct kni_dev *dev)
207 if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev))
208 ixgbe_kni_remove(dev->pci_dev);
209 else if (pci_match_id(igb_pci_tbl, dev->pci_dev))
210 igb_kni_remove(dev->pci_dev);
214 unregister_netdev(dev->net_dev);
215 free_netdev(dev->net_dev);
222 kni_release(struct inode *inode, struct file *file)
224 struct net *net = file->private_data;
225 struct kni_net *knet = net_generic(net, kni_net_id);
226 struct kni_dev *dev, *n;
228 /* Stop kernel thread for single mode */
229 if (multiple_kthread_on == 0) {
230 mutex_lock(&knet->kni_kthread_lock);
231 /* Stop kernel thread */
232 if (knet->kni_kthread != NULL) {
233 kthread_stop(knet->kni_kthread);
234 knet->kni_kthread = NULL;
236 mutex_unlock(&knet->kni_kthread_lock);
239 down_write(&knet->kni_list_lock);
240 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
241 /* Stop kernel thread for multiple mode */
242 if (multiple_kthread_on && dev->pthread != NULL) {
243 kthread_stop(dev->pthread);
248 kni_vhost_backend_release(dev);
251 list_del(&dev->list);
253 up_write(&knet->kni_list_lock);
255 /* Clear the bit of device in use */
256 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
259 pr_debug("/dev/kni closed\n");
265 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
270 /* Check if network name has been used */
271 if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
272 pr_err("KNI name %s duplicated\n", dev->name);
280 kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
283 * Create a new kernel thread for multiple mode, set its core affinity,
284 * and finally wake it up.
286 if (multiple_kthread_on) {
287 kni->pthread = kthread_create(kni_thread_multiple,
288 (void *)kni, "kni_%s", kni->name);
289 if (IS_ERR(kni->pthread)) {
295 kthread_bind(kni->pthread, kni->core_id);
296 wake_up_process(kni->pthread);
298 mutex_lock(&knet->kni_kthread_lock);
300 if (knet->kni_kthread == NULL) {
301 knet->kni_kthread = kthread_create(kni_thread_single,
302 (void *)knet, "kni_single");
303 if (IS_ERR(knet->kni_kthread)) {
304 mutex_unlock(&knet->kni_kthread_lock);
310 kthread_bind(knet->kni_kthread, kni->core_id);
311 wake_up_process(knet->kni_kthread);
314 mutex_unlock(&knet->kni_kthread_lock);
321 kni_ioctl_create(struct net *net,
322 unsigned int ioctl_num, unsigned long ioctl_param)
324 struct kni_net *knet = net_generic(net, kni_net_id);
326 struct rte_kni_device_info dev_info;
327 struct pci_dev *pci = NULL;
328 struct pci_dev *found_pci = NULL;
329 struct net_device *net_dev = NULL;
330 struct net_device *lad_dev = NULL;
331 struct kni_dev *kni, *dev, *n;
333 pr_info("Creating kni...\n");
334 /* Check the buffer size, to avoid warning */
335 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
338 /* Copy kni info from user space */
339 ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
341 pr_err("copy_from_user in kni_ioctl_create");
346 * Check if the cpu core id is valid for binding.
348 if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
349 pr_err("cpu %u is not online\n", dev_info.core_id);
353 /* Check if it has been created */
354 down_read(&knet->kni_list_lock);
355 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
356 if (kni_check_param(dev, &dev_info) < 0) {
357 up_read(&knet->kni_list_lock);
361 up_read(&knet->kni_list_lock);
363 net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
364 #ifdef NET_NAME_UNKNOWN
368 if (net_dev == NULL) {
369 pr_err("error allocating device \"%s\"\n", dev_info.name);
373 dev_net_set(net_dev, net);
375 kni = netdev_priv(net_dev);
377 kni->net_dev = net_dev;
378 kni->group_id = dev_info.group_id;
379 kni->core_id = dev_info.core_id;
380 strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
382 /* Translate user space info into kernel space info */
383 kni->tx_q = phys_to_virt(dev_info.tx_phys);
384 kni->rx_q = phys_to_virt(dev_info.rx_phys);
385 kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
386 kni->free_q = phys_to_virt(dev_info.free_phys);
388 kni->req_q = phys_to_virt(dev_info.req_phys);
389 kni->resp_q = phys_to_virt(dev_info.resp_phys);
390 kni->sync_va = dev_info.sync_va;
391 kni->sync_kva = phys_to_virt(dev_info.sync_phys);
394 kni->vhost_queue = NULL;
395 kni->vq_status = BE_STOP;
397 kni->mbuf_size = dev_info.mbuf_size;
399 pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
400 (unsigned long long) dev_info.tx_phys, kni->tx_q);
401 pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
402 (unsigned long long) dev_info.rx_phys, kni->rx_q);
403 pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
404 (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
405 pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n",
406 (unsigned long long) dev_info.free_phys, kni->free_q);
407 pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n",
408 (unsigned long long) dev_info.req_phys, kni->req_q);
409 pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
410 (unsigned long long) dev_info.resp_phys, kni->resp_q);
411 pr_debug("mbuf_size: %u\n", kni->mbuf_size);
413 pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n",
420 pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
422 /* Support Ethtool */
424 pr_debug("pci_bus: %02x:%02x:%02x\n",
426 PCI_SLOT(pci->devfn),
427 PCI_FUNC(pci->devfn));
429 if ((pci->bus->number == dev_info.bus) &&
430 (PCI_SLOT(pci->devfn) == dev_info.devid) &&
431 (PCI_FUNC(pci->devfn) == dev_info.function)) {
434 if (pci_match_id(ixgbe_pci_tbl, found_pci))
435 ret = ixgbe_kni_probe(found_pci, &lad_dev);
436 else if (pci_match_id(igb_pci_tbl, found_pci))
437 ret = igb_kni_probe(found_pci, &lad_dev);
441 pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n",
444 kni->lad_dev = lad_dev;
445 kni_set_ethtool_ops(kni->net_dev);
447 pr_err("Device not supported by ethtool");
451 kni->pci_dev = found_pci;
452 kni->device_id = dev_info.device_id;
455 pci = pci_get_device(dev_info.vendor_id,
456 dev_info.device_id, pci);
462 ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr);
465 * Generate random mac address. eth_random_addr() is the newer
466 * version of generating mac address in linux kernel.
468 random_ether_addr(net_dev->dev_addr);
470 ret = register_netdev(net_dev);
472 pr_err("error %i registering device \"%s\"\n",
476 free_netdev(net_dev);
484 ret = kni_run_thread(knet, kni, dev_info.force_bind);
488 down_write(&knet->kni_list_lock);
489 list_add(&kni->list, &knet->kni_list_head);
490 up_write(&knet->kni_list_lock);
496 kni_ioctl_release(struct net *net,
497 unsigned int ioctl_num, unsigned long ioctl_param)
499 struct kni_net *knet = net_generic(net, kni_net_id);
501 struct kni_dev *dev, *n;
502 struct rte_kni_device_info dev_info;
504 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
507 ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
509 pr_err("copy_from_user in kni_ioctl_release");
513 /* Release the network device according to its name */
514 if (strlen(dev_info.name) == 0)
517 down_write(&knet->kni_list_lock);
518 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
519 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
522 if (multiple_kthread_on && dev->pthread != NULL) {
523 kthread_stop(dev->pthread);
528 kni_vhost_backend_release(dev);
531 list_del(&dev->list);
535 up_write(&knet->kni_list_lock);
536 pr_info("%s release kni named %s\n",
537 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
543 kni_ioctl(struct inode *inode,
544 unsigned int ioctl_num,
545 unsigned long ioctl_param)
548 struct net *net = current->nsproxy->net_ns;
550 pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
553 * Switch according to the ioctl called
555 switch (_IOC_NR(ioctl_num)) {
556 case _IOC_NR(RTE_KNI_IOCTL_TEST):
557 /* For test only, not used */
559 case _IOC_NR(RTE_KNI_IOCTL_CREATE):
560 ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
562 case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
563 ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
566 pr_debug("IOCTL default\n");
574 kni_compat_ioctl(struct inode *inode,
575 unsigned int ioctl_num,
576 unsigned long ioctl_param)
578 /* 32 bits app on 64 bits OS to be supported later */
579 pr_debug("Not implemented.\n");
584 static const struct file_operations kni_fops = {
585 .owner = THIS_MODULE,
587 .release = kni_release,
588 .unlocked_ioctl = (void *)kni_ioctl,
589 .compat_ioctl = (void *)kni_compat_ioctl,
592 static struct miscdevice kni_misc = {
593 .minor = MISC_DYNAMIC_MINOR,
599 kni_parse_kthread_mode(void)
604 if (strcmp(kthread_mode, "single") == 0)
606 else if (strcmp(kthread_mode, "multiple") == 0)
607 multiple_kthread_on = 1;
619 pr_debug("######## DPDK kni module loading ########\n");
621 if (kni_parse_kthread_mode() < 0) {
622 pr_err("Invalid parameter for kthread_mode\n");
626 if (multiple_kthread_on == 0)
627 pr_debug("Single kernel thread for all KNI devices\n");
629 pr_debug("Multiple kernel thread mode enabled\n");
631 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
632 rc = register_pernet_subsys(&kni_net_ops);
634 rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
639 rc = misc_register(&kni_misc);
641 pr_err("Misc registration failed\n");
645 /* Configure the lo mode according to the input parameter */
646 kni_net_config_lo_mode(lo_mode);
648 pr_debug("######## DPDK kni module loaded ########\n");
653 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
654 unregister_pernet_subsys(&kni_net_ops);
656 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
664 misc_deregister(&kni_misc);
665 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
666 unregister_pernet_subsys(&kni_net_ops);
668 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
670 pr_debug("####### DPDK kni module unloaded #######\n");
673 module_init(kni_init);
674 module_exit(kni_exit);
676 module_param(lo_mode, charp, S_IRUGO | S_IWUSR);
677 MODULE_PARM_DESC(lo_mode,
678 "KNI loopback mode (default=lo_mode_none):\n"
679 " lo_mode_none Kernel loopback disabled\n"
680 " lo_mode_fifo Enable kernel loopback with fifo\n"
681 " lo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n"
685 module_param(kthread_mode, charp, S_IRUGO);
686 MODULE_PARM_DESC(kthread_mode,
687 "Kernel thread mode (default=single):\n"
688 " single Single kernel thread mode enabled.\n"
689 " multiple Multiple kernel thread mode enabled.\n"