33b61f2a8c88269f9b9d0b08b12ab81f81db3365
[dpdk.git] / lib / librte_eal / linuxapp / kni / kni_misc.c
1 /*-
2  * GPL LICENSE SUMMARY
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *
6  *   This program is free software; you can redistribute it and/or modify
7  *   it under the terms of version 2 of the GNU General Public License as
8  *   published by the Free Software Foundation.
9  *
10  *   This program is distributed in the hope that it will be useful, but
11  *   WITHOUT ANY WARRANTY; without even the implied warranty of
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *   General Public License for more details.
14  *
15  *   You should have received a copy of the GNU General Public License
16  *   along with this program; if not, write to the Free Software
17  *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18  *   The full GNU General Public License is included in this distribution
19  *   in the file called LICENSE.GPL.
20  *
21  *   Contact Information:
22  *   Intel Corporation
23  */
24
25 #include <linux/version.h>
26 #include <linux/module.h>
27 #include <linux/miscdevice.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/pci.h>
31 #include <linux/kthread.h>
32 #include <linux/rwsem.h>
33 #include <linux/mutex.h>
34 #include <linux/nsproxy.h>
35 #include <net/net_namespace.h>
36 #include <net/netns/generic.h>
37
38 #include <exec-env/rte_kni_common.h>
39
40 #include "compat.h"
41 #include "kni_dev.h"
42
43 MODULE_LICENSE("Dual BSD/GPL");
44 MODULE_AUTHOR("Intel Corporation");
45 MODULE_DESCRIPTION("Kernel Module for managing kni devices");
46
47 #define KNI_RX_LOOP_NUM 1000
48
49 #define KNI_MAX_DEVICES 32
50
51 extern const struct pci_device_id ixgbe_pci_tbl[];
52 extern const struct pci_device_id igb_pci_tbl[];
53
54 /* loopback mode */
55 static char *lo_mode;
56
57 /* Kernel thread mode */
58 static char *kthread_mode;
59 static uint32_t multiple_kthread_on;
60
61 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
62
63 static int kni_net_id;
64
65 struct kni_net {
66         unsigned long device_in_use; /* device in use flag */
67         struct mutex kni_kthread_lock;
68         struct task_struct *kni_kthread;
69         struct rw_semaphore kni_list_lock;
70         struct list_head kni_list_head;
71 };
72
73 static int __net_init
74 kni_init_net(struct net *net)
75 {
76 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
77         struct kni_net *knet = net_generic(net, kni_net_id);
78
79         memset(knet, 0, sizeof(*knet));
80 #else
81         struct kni_net *knet;
82         int ret;
83
84         knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
85         if (!knet) {
86                 ret = -ENOMEM;
87                 return ret;
88         }
89 #endif
90
91         /* Clear the bit of device in use */
92         clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
93
94         mutex_init(&knet->kni_kthread_lock);
95
96         init_rwsem(&knet->kni_list_lock);
97         INIT_LIST_HEAD(&knet->kni_list_head);
98
99 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
100         return 0;
101 #else
102         ret = net_assign_generic(net, kni_net_id, knet);
103         if (ret < 0)
104                 kfree(knet);
105
106         return ret;
107 #endif
108 }
109
110 static void __net_exit
111 kni_exit_net(struct net *net)
112 {
113         struct kni_net *knet __maybe_unused;
114
115         knet = net_generic(net, kni_net_id);
116         mutex_destroy(&knet->kni_kthread_lock);
117
118 #ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
119         kfree(knet);
120 #endif
121 }
122
123 static struct pernet_operations kni_net_ops = {
124         .init = kni_init_net,
125         .exit = kni_exit_net,
126 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
127         .id   = &kni_net_id,
128         .size = sizeof(struct kni_net),
129 #endif
130 };
131
132 static int
133 kni_thread_single(void *data)
134 {
135         struct kni_net *knet = data;
136         int j;
137         struct kni_dev *dev;
138
139         while (!kthread_should_stop()) {
140                 down_read(&knet->kni_list_lock);
141                 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
142                         list_for_each_entry(dev, &knet->kni_list_head, list) {
143 #ifdef RTE_KNI_VHOST
144                                 kni_chk_vhost_rx(dev);
145 #else
146                                 kni_net_rx(dev);
147 #endif
148                                 kni_net_poll_resp(dev);
149                         }
150                 }
151                 up_read(&knet->kni_list_lock);
152 #ifdef RTE_KNI_PREEMPT_DEFAULT
153                 /* reschedule out for a while */
154                 schedule_timeout_interruptible(
155                         usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
156 #endif
157         }
158
159         return 0;
160 }
161
162 static int
163 kni_thread_multiple(void *param)
164 {
165         int j;
166         struct kni_dev *dev = (struct kni_dev *)param;
167
168         while (!kthread_should_stop()) {
169                 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
170 #ifdef RTE_KNI_VHOST
171                         kni_chk_vhost_rx(dev);
172 #else
173                         kni_net_rx(dev);
174 #endif
175                         kni_net_poll_resp(dev);
176                 }
177 #ifdef RTE_KNI_PREEMPT_DEFAULT
178                 schedule_timeout_interruptible(
179                         usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
180 #endif
181         }
182
183         return 0;
184 }
185
186 static int
187 kni_open(struct inode *inode, struct file *file)
188 {
189         struct net *net = current->nsproxy->net_ns;
190         struct kni_net *knet = net_generic(net, kni_net_id);
191
192         /* kni device can be opened by one user only per netns */
193         if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
194                 return -EBUSY;
195
196         file->private_data = get_net(net);
197         pr_debug("/dev/kni opened\n");
198
199         return 0;
200 }
201
202 static int
203 kni_dev_remove(struct kni_dev *dev)
204 {
205         if (!dev)
206                 return -ENODEV;
207
208 #ifdef CONFIG_RTE_KNI_KMOD_ETHTOOL
209         if (dev->pci_dev) {
210                 if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev))
211                         ixgbe_kni_remove(dev->pci_dev);
212                 else if (pci_match_id(igb_pci_tbl, dev->pci_dev))
213                         igb_kni_remove(dev->pci_dev);
214         }
215 #endif
216
217         if (dev->net_dev) {
218                 unregister_netdev(dev->net_dev);
219                 free_netdev(dev->net_dev);
220         }
221
222         return 0;
223 }
224
225 static int
226 kni_release(struct inode *inode, struct file *file)
227 {
228         struct net *net = file->private_data;
229         struct kni_net *knet = net_generic(net, kni_net_id);
230         struct kni_dev *dev, *n;
231
232         /* Stop kernel thread for single mode */
233         if (multiple_kthread_on == 0) {
234                 mutex_lock(&knet->kni_kthread_lock);
235                 /* Stop kernel thread */
236                 if (knet->kni_kthread != NULL) {
237                         kthread_stop(knet->kni_kthread);
238                         knet->kni_kthread = NULL;
239                 }
240                 mutex_unlock(&knet->kni_kthread_lock);
241         }
242
243         down_write(&knet->kni_list_lock);
244         list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
245                 /* Stop kernel thread for multiple mode */
246                 if (multiple_kthread_on && dev->pthread != NULL) {
247                         kthread_stop(dev->pthread);
248                         dev->pthread = NULL;
249                 }
250
251 #ifdef RTE_KNI_VHOST
252                 kni_vhost_backend_release(dev);
253 #endif
254                 kni_dev_remove(dev);
255                 list_del(&dev->list);
256         }
257         up_write(&knet->kni_list_lock);
258
259         /* Clear the bit of device in use */
260         clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
261
262         put_net(net);
263         pr_debug("/dev/kni closed\n");
264
265         return 0;
266 }
267
268 static int
269 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
270 {
271         if (!kni || !dev)
272                 return -1;
273
274         /* Check if network name has been used */
275         if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
276                 pr_err("KNI name %s duplicated\n", dev->name);
277                 return -1;
278         }
279
280         return 0;
281 }
282
283 static int
284 kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
285 {
286         /**
287          * Create a new kernel thread for multiple mode, set its core affinity,
288          * and finally wake it up.
289          */
290         if (multiple_kthread_on) {
291                 kni->pthread = kthread_create(kni_thread_multiple,
292                         (void *)kni, "kni_%s", kni->name);
293                 if (IS_ERR(kni->pthread)) {
294                         kni_dev_remove(kni);
295                         return -ECANCELED;
296                 }
297
298                 if (force_bind)
299                         kthread_bind(kni->pthread, kni->core_id);
300                 wake_up_process(kni->pthread);
301         } else {
302                 mutex_lock(&knet->kni_kthread_lock);
303
304                 if (knet->kni_kthread == NULL) {
305                         knet->kni_kthread = kthread_create(kni_thread_single,
306                                 (void *)knet, "kni_single");
307                         if (IS_ERR(knet->kni_kthread)) {
308                                 mutex_unlock(&knet->kni_kthread_lock);
309                                 kni_dev_remove(kni);
310                                 return -ECANCELED;
311                         }
312
313                         if (force_bind)
314                                 kthread_bind(knet->kni_kthread, kni->core_id);
315                         wake_up_process(knet->kni_kthread);
316                 }
317
318                 mutex_unlock(&knet->kni_kthread_lock);
319         }
320
321         return 0;
322 }
323
324 static int
325 kni_ioctl_create(struct net *net, uint32_t ioctl_num,
326                 unsigned long ioctl_param)
327 {
328         struct kni_net *knet = net_generic(net, kni_net_id);
329         int ret;
330         struct rte_kni_device_info dev_info;
331         struct net_device *net_dev = NULL;
332         struct kni_dev *kni, *dev, *n;
333
334         pr_info("Creating kni...\n");
335         /* Check the buffer size, to avoid warning */
336         if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
337                 return -EINVAL;
338
339         /* Copy kni info from user space */
340         ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
341         if (ret) {
342                 pr_err("copy_from_user in kni_ioctl_create");
343                 return -EIO;
344         }
345
346         /* Check if name is zero-ended */
347         if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
348                 pr_err("kni.name not zero-terminated");
349                 return -EINVAL;
350         }
351
352         /**
353          * Check if the cpu core id is valid for binding.
354          */
355         if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
356                 pr_err("cpu %u is not online\n", dev_info.core_id);
357                 return -EINVAL;
358         }
359
360         /* Check if it has been created */
361         down_read(&knet->kni_list_lock);
362         list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
363                 if (kni_check_param(dev, &dev_info) < 0) {
364                         up_read(&knet->kni_list_lock);
365                         return -EINVAL;
366                 }
367         }
368         up_read(&knet->kni_list_lock);
369
370         net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
371 #ifdef NET_NAME_USER
372                                                         NET_NAME_USER,
373 #endif
374                                                         kni_net_init);
375         if (net_dev == NULL) {
376                 pr_err("error allocating device \"%s\"\n", dev_info.name);
377                 return -EBUSY;
378         }
379
380         dev_net_set(net_dev, net);
381
382         kni = netdev_priv(net_dev);
383
384         kni->net_dev = net_dev;
385         kni->group_id = dev_info.group_id;
386         kni->core_id = dev_info.core_id;
387         strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
388
389         /* Translate user space info into kernel space info */
390         kni->tx_q = phys_to_virt(dev_info.tx_phys);
391         kni->rx_q = phys_to_virt(dev_info.rx_phys);
392         kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
393         kni->free_q = phys_to_virt(dev_info.free_phys);
394
395         kni->req_q = phys_to_virt(dev_info.req_phys);
396         kni->resp_q = phys_to_virt(dev_info.resp_phys);
397         kni->sync_va = dev_info.sync_va;
398         kni->sync_kva = phys_to_virt(dev_info.sync_phys);
399
400 #ifdef RTE_KNI_VHOST
401         kni->vhost_queue = NULL;
402         kni->vq_status = BE_STOP;
403 #endif
404         kni->mbuf_size = dev_info.mbuf_size;
405
406         pr_debug("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
407                 (unsigned long long) dev_info.tx_phys, kni->tx_q);
408         pr_debug("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
409                 (unsigned long long) dev_info.rx_phys, kni->rx_q);
410         pr_debug("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
411                 (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
412         pr_debug("free_phys:    0x%016llx, free_q addr:    0x%p\n",
413                 (unsigned long long) dev_info.free_phys, kni->free_q);
414         pr_debug("req_phys:     0x%016llx, req_q addr:     0x%p\n",
415                 (unsigned long long) dev_info.req_phys, kni->req_q);
416         pr_debug("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
417                 (unsigned long long) dev_info.resp_phys, kni->resp_q);
418         pr_debug("mbuf_size:    %u\n", kni->mbuf_size);
419
420         pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n",
421                                         dev_info.bus,
422                                         dev_info.devid,
423                                         dev_info.function,
424                                         dev_info.vendor_id,
425                                         dev_info.device_id);
426
427 #ifdef CONFIG_RTE_KNI_KMOD_ETHTOOL
428         struct pci_dev *found_pci = NULL;
429         struct net_device *lad_dev = NULL;
430         struct pci_dev *pci = NULL;
431
432         pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
433
434         /* Support Ethtool */
435         while (pci) {
436                 pr_debug("pci_bus: %02x:%02x:%02x\n",
437                                         pci->bus->number,
438                                         PCI_SLOT(pci->devfn),
439                                         PCI_FUNC(pci->devfn));
440
441                 if ((pci->bus->number == dev_info.bus) &&
442                         (PCI_SLOT(pci->devfn) == dev_info.devid) &&
443                         (PCI_FUNC(pci->devfn) == dev_info.function)) {
444                         found_pci = pci;
445
446                         if (pci_match_id(ixgbe_pci_tbl, found_pci))
447                                 ret = ixgbe_kni_probe(found_pci, &lad_dev);
448                         else if (pci_match_id(igb_pci_tbl, found_pci))
449                                 ret = igb_kni_probe(found_pci, &lad_dev);
450                         else
451                                 ret = -1;
452
453                         pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n",
454                                                         pci, lad_dev);
455                         if (ret == 0) {
456                                 kni->lad_dev = lad_dev;
457                                 kni_set_ethtool_ops(kni->net_dev);
458                         } else {
459                                 pr_err("Device not supported by ethtool");
460                                 kni->lad_dev = NULL;
461                         }
462
463                         kni->pci_dev = found_pci;
464                         kni->device_id = dev_info.device_id;
465                         break;
466                 }
467                 pci = pci_get_device(dev_info.vendor_id,
468                                 dev_info.device_id, pci);
469         }
470         if (pci)
471                 pci_dev_put(pci);
472 #endif
473
474         if (kni->lad_dev)
475                 ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr);
476         else
477                 /*
478                  * Generate random mac address. eth_random_addr() is the newer
479                  * version of generating mac address in linux kernel.
480                  */
481                 random_ether_addr(net_dev->dev_addr);
482
483         ret = register_netdev(net_dev);
484         if (ret) {
485                 pr_err("error %i registering device \"%s\"\n",
486                                         ret, dev_info.name);
487                 kni->net_dev = NULL;
488                 kni_dev_remove(kni);
489                 free_netdev(net_dev);
490                 return -ENODEV;
491         }
492
493 #ifdef RTE_KNI_VHOST
494         kni_vhost_init(kni);
495 #endif
496
497         ret = kni_run_thread(knet, kni, dev_info.force_bind);
498         if (ret != 0)
499                 return ret;
500
501         down_write(&knet->kni_list_lock);
502         list_add(&kni->list, &knet->kni_list_head);
503         up_write(&knet->kni_list_lock);
504
505         return 0;
506 }
507
508 static int
509 kni_ioctl_release(struct net *net, uint32_t ioctl_num,
510                 unsigned long ioctl_param)
511 {
512         struct kni_net *knet = net_generic(net, kni_net_id);
513         int ret = -EINVAL;
514         struct kni_dev *dev, *n;
515         struct rte_kni_device_info dev_info;
516
517         if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
518                 return -EINVAL;
519
520         ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
521         if (ret) {
522                 pr_err("copy_from_user in kni_ioctl_release");
523                 return -EIO;
524         }
525
526         /* Release the network device according to its name */
527         if (strlen(dev_info.name) == 0)
528                 return ret;
529
530         down_write(&knet->kni_list_lock);
531         list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
532                 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
533                         continue;
534
535                 if (multiple_kthread_on && dev->pthread != NULL) {
536                         kthread_stop(dev->pthread);
537                         dev->pthread = NULL;
538                 }
539
540 #ifdef RTE_KNI_VHOST
541                 kni_vhost_backend_release(dev);
542 #endif
543                 kni_dev_remove(dev);
544                 list_del(&dev->list);
545                 ret = 0;
546                 break;
547         }
548         up_write(&knet->kni_list_lock);
549         pr_info("%s release kni named %s\n",
550                 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
551
552         return ret;
553 }
554
555 static int
556 kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
557 {
558         int ret = -EINVAL;
559         struct net *net = current->nsproxy->net_ns;
560
561         pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
562
563         /*
564          * Switch according to the ioctl called
565          */
566         switch (_IOC_NR(ioctl_num)) {
567         case _IOC_NR(RTE_KNI_IOCTL_TEST):
568                 /* For test only, not used */
569                 break;
570         case _IOC_NR(RTE_KNI_IOCTL_CREATE):
571                 ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
572                 break;
573         case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
574                 ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
575                 break;
576         default:
577                 pr_debug("IOCTL default\n");
578                 break;
579         }
580
581         return ret;
582 }
583
584 static int
585 kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
586                 unsigned long ioctl_param)
587 {
588         /* 32 bits app on 64 bits OS to be supported later */
589         pr_debug("Not implemented.\n");
590
591         return -EINVAL;
592 }
593
594 static const struct file_operations kni_fops = {
595         .owner = THIS_MODULE,
596         .open = kni_open,
597         .release = kni_release,
598         .unlocked_ioctl = (void *)kni_ioctl,
599         .compat_ioctl = (void *)kni_compat_ioctl,
600 };
601
602 static struct miscdevice kni_misc = {
603         .minor = MISC_DYNAMIC_MINOR,
604         .name = KNI_DEVICE,
605         .fops = &kni_fops,
606 };
607
608 static int __init
609 kni_parse_kthread_mode(void)
610 {
611         if (!kthread_mode)
612                 return 0;
613
614         if (strcmp(kthread_mode, "single") == 0)
615                 return 0;
616         else if (strcmp(kthread_mode, "multiple") == 0)
617                 multiple_kthread_on = 1;
618         else
619                 return -1;
620
621         return 0;
622 }
623
624 static int __init
625 kni_init(void)
626 {
627         int rc;
628
629         if (kni_parse_kthread_mode() < 0) {
630                 pr_err("Invalid parameter for kthread_mode\n");
631                 return -EINVAL;
632         }
633
634         if (multiple_kthread_on == 0)
635                 pr_debug("Single kernel thread for all KNI devices\n");
636         else
637                 pr_debug("Multiple kernel thread mode enabled\n");
638
639 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
640         rc = register_pernet_subsys(&kni_net_ops);
641 #else
642         rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
643 #endif
644         if (rc)
645                 return -EPERM;
646
647         rc = misc_register(&kni_misc);
648         if (rc != 0) {
649                 pr_err("Misc registration failed\n");
650                 goto out;
651         }
652
653         /* Configure the lo mode according to the input parameter */
654         kni_net_config_lo_mode(lo_mode);
655
656         return 0;
657
658 out:
659 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
660         unregister_pernet_subsys(&kni_net_ops);
661 #else
662         unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
663 #endif
664         return rc;
665 }
666
667 static void __exit
668 kni_exit(void)
669 {
670         misc_deregister(&kni_misc);
671 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
672         unregister_pernet_subsys(&kni_net_ops);
673 #else
674         unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
675 #endif
676 }
677
678 module_init(kni_init);
679 module_exit(kni_exit);
680
681 module_param(lo_mode, charp, S_IRUGO | S_IWUSR);
682 MODULE_PARM_DESC(lo_mode,
683 "KNI loopback mode (default=lo_mode_none):\n"
684 "    lo_mode_none        Kernel loopback disabled\n"
685 "    lo_mode_fifo        Enable kernel loopback with fifo\n"
686 "    lo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer\n"
687 "\n"
688 );
689
690 module_param(kthread_mode, charp, S_IRUGO);
691 MODULE_PARM_DESC(kthread_mode,
692 "Kernel thread mode (default=single):\n"
693 "    single    Single kernel thread mode enabled.\n"
694 "    multiple  Multiple kernel thread mode enabled.\n"
695 "\n"
696 );