kni: allow per-net instances
[dpdk.git] / lib / librte_eal / linuxapp / kni / kni_misc.c
1 /*-
2  * GPL LICENSE SUMMARY
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *
6  *   This program is free software; you can redistribute it and/or modify
7  *   it under the terms of version 2 of the GNU General Public License as
8  *   published by the Free Software Foundation.
9  *
10  *   This program is distributed in the hope that it will be useful, but
11  *   WITHOUT ANY WARRANTY; without even the implied warranty of
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *   General Public License for more details.
14  *
15  *   You should have received a copy of the GNU General Public License
16  *   along with this program; if not, write to the Free Software
17  *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18  *   The full GNU General Public License is included in this distribution
19  *   in the file called LICENSE.GPL.
20  *
21  *   Contact Information:
22  *   Intel Corporation
23  */
24
25 #include <linux/module.h>
26 #include <linux/miscdevice.h>
27 #include <linux/netdevice.h>
28 #include <linux/pci.h>
29 #include <linux/kthread.h>
30 #include <linux/rwsem.h>
31 #include <linux/nsproxy.h>
32 #include <net/net_namespace.h>
33 #include <net/netns/generic.h>
34
35 #include <exec-env/rte_kni_common.h>
36 #include "kni_dev.h"
37 #include <rte_config.h>
38
39 MODULE_LICENSE("Dual BSD/GPL");
40 MODULE_AUTHOR("Intel Corporation");
41 MODULE_DESCRIPTION("Kernel Module for managing kni devices");
42
43 #define KNI_RX_LOOP_NUM 1000
44
45 #define KNI_MAX_DEVICES 32
46
47 extern void kni_net_rx(struct kni_dev *kni);
48 extern void kni_net_init(struct net_device *dev);
49 extern void kni_net_config_lo_mode(char *lo_str);
50 extern void kni_net_poll_resp(struct kni_dev *kni);
51 extern void kni_set_ethtool_ops(struct net_device *netdev);
52
53 extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
54 extern void ixgbe_kni_remove(struct pci_dev *pdev);
55 extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
56 extern void igb_kni_remove(struct pci_dev *pdev);
57
58 static int kni_open(struct inode *inode, struct file *file);
59 static int kni_release(struct inode *inode, struct file *file);
60 static int kni_ioctl(struct inode *inode, unsigned int ioctl_num,
61                                         unsigned long ioctl_param);
62 static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num,
63                                                 unsigned long ioctl_param);
64 static int kni_dev_remove(struct kni_dev *dev);
65
66 static int __init kni_parse_kthread_mode(void);
67
68 /* KNI processing for single kernel thread mode */
69 static int kni_thread_single(void *unused);
70 /* KNI processing for multiple kernel thread mode */
71 static int kni_thread_multiple(void *param);
72
73 static struct file_operations kni_fops = {
74         .owner = THIS_MODULE,
75         .open = kni_open,
76         .release = kni_release,
77         .unlocked_ioctl = (void *)kni_ioctl,
78         .compat_ioctl = (void *)kni_compat_ioctl,
79 };
80
81 static struct miscdevice kni_misc = {
82         .minor = MISC_DYNAMIC_MINOR,
83         .name = KNI_DEVICE,
84         .fops = &kni_fops,
85 };
86
87 /* loopback mode */
88 static char *lo_mode = NULL;
89
90 /* Kernel thread mode */
91 static char *kthread_mode = NULL;
92 static unsigned multiple_kthread_on = 0;
93
94 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
95
96 static int kni_net_id;
97
98 struct kni_net {
99         unsigned long device_in_use; /* device in use flag */
100         struct task_struct *kni_kthread;
101         struct rw_semaphore kni_list_lock;
102         struct list_head kni_list_head;
103 };
104
105 static __net_init int kni_init_net(struct net *net)
106 {
107         struct kni_net *knet = net_generic(net, kni_net_id);
108
109         /* Clear the bit of device in use */
110         clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
111
112         init_rwsem(&knet->kni_list_lock);
113         INIT_LIST_HEAD(&knet->kni_list_head);
114
115         return 0;
116 }
117
118 static struct pernet_operations kni_net_ops = {
119         .init = kni_init_net,
120         .exit = NULL,
121         .id   = &kni_net_id,
122         .size = sizeof(struct kni_net),
123 };
124
125 static int __init
126 kni_init(void)
127 {
128         int rc;
129
130         KNI_PRINT("######## DPDK kni module loading ########\n");
131
132         if (kni_parse_kthread_mode() < 0) {
133                 KNI_ERR("Invalid parameter for kthread_mode\n");
134                 return -EINVAL;
135         }
136
137         rc = register_pernet_subsys(&kni_net_ops);
138         if (rc)
139                 return -EPERM;
140
141         rc = misc_register(&kni_misc);
142         if (rc != 0) {
143                 KNI_ERR("Misc registration failed\n");
144                 goto out;
145         }
146
147         /* Configure the lo mode according to the input parameter */
148         kni_net_config_lo_mode(lo_mode);
149
150         KNI_PRINT("######## DPDK kni module loaded  ########\n");
151
152         return 0;
153
154 out:
155         unregister_pernet_subsys(&kni_net_ops);
156         return rc;
157 }
158
159 static void __exit
160 kni_exit(void)
161 {
162         misc_deregister(&kni_misc);
163         unregister_pernet_subsys(&kni_net_ops);
164         KNI_PRINT("####### DPDK kni module unloaded  #######\n");
165 }
166
167 static int __init
168 kni_parse_kthread_mode(void)
169 {
170         if (!kthread_mode)
171                 return 0;
172
173         if (strcmp(kthread_mode, "single") == 0)
174                 return 0;
175         else if (strcmp(kthread_mode, "multiple") == 0)
176                 multiple_kthread_on = 1;
177         else
178                 return -1;
179
180         return 0;
181 }
182
183 static int
184 kni_open(struct inode *inode, struct file *file)
185 {
186         struct net *net = current->nsproxy->net_ns;
187         struct kni_net *knet = net_generic(net, kni_net_id);
188
189         /* kni device can be opened by one user only per netns */
190         if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
191                 return -EBUSY;
192
193         /* Create kernel thread for single mode */
194         if (multiple_kthread_on == 0) {
195                 KNI_PRINT("Single kernel thread for all KNI devices\n");
196                 /* Create kernel thread for RX */
197                 knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
198                                                 "kni_single");
199                 if (IS_ERR(knet->kni_kthread)) {
200                         KNI_ERR("Unable to create kernel threaed\n");
201                         return PTR_ERR(knet->kni_kthread);
202                 }
203         } else
204                 KNI_PRINT("Multiple kernel thread mode enabled\n");
205
206         file->private_data = get_net(net);
207         KNI_PRINT("/dev/kni opened\n");
208
209         return 0;
210 }
211
212 static int
213 kni_release(struct inode *inode, struct file *file)
214 {
215         struct net *net = file->private_data;
216         struct kni_net *knet = net_generic(net, kni_net_id);
217         struct kni_dev *dev, *n;
218
219         /* Stop kernel thread for single mode */
220         if (multiple_kthread_on == 0) {
221                 /* Stop kernel thread */
222                 kthread_stop(knet->kni_kthread);
223                 knet->kni_kthread = NULL;
224         }
225
226         down_write(&knet->kni_list_lock);
227         list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
228                 /* Stop kernel thread for multiple mode */
229                 if (multiple_kthread_on && dev->pthread != NULL) {
230                         kthread_stop(dev->pthread);
231                         dev->pthread = NULL;
232                 }
233
234 #ifdef RTE_KNI_VHOST
235                 kni_vhost_backend_release(dev);
236 #endif
237                 kni_dev_remove(dev);
238                 list_del(&dev->list);
239         }
240         up_write(&knet->kni_list_lock);
241
242         /* Clear the bit of device in use */
243         clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
244
245         put_net(net);
246         KNI_PRINT("/dev/kni closed\n");
247
248         return 0;
249 }
250
251 static int
252 kni_thread_single(void *data)
253 {
254         struct kni_net *knet = data;
255         int j;
256         struct kni_dev *dev;
257
258         while (!kthread_should_stop()) {
259                 down_read(&knet->kni_list_lock);
260                 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
261                         list_for_each_entry(dev, &knet->kni_list_head, list) {
262 #ifdef RTE_KNI_VHOST
263                                 kni_chk_vhost_rx(dev);
264 #else
265                                 kni_net_rx(dev);
266 #endif
267                                 kni_net_poll_resp(dev);
268                         }
269                 }
270                 up_read(&knet->kni_list_lock);
271 #ifdef RTE_KNI_PREEMPT_DEFAULT
272                 /* reschedule out for a while */
273                 schedule_timeout_interruptible(usecs_to_jiffies( \
274                                 KNI_KTHREAD_RESCHEDULE_INTERVAL));
275 #endif
276         }
277
278         return 0;
279 }
280
281 static int
282 kni_thread_multiple(void *param)
283 {
284         int j;
285         struct kni_dev *dev = (struct kni_dev *)param;
286
287         while (!kthread_should_stop()) {
288                 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
289 #ifdef RTE_KNI_VHOST
290                         kni_chk_vhost_rx(dev);
291 #else
292                         kni_net_rx(dev);
293 #endif
294                         kni_net_poll_resp(dev);
295                 }
296 #ifdef RTE_KNI_PREEMPT_DEFAULT
297                 schedule_timeout_interruptible(usecs_to_jiffies( \
298                                 KNI_KTHREAD_RESCHEDULE_INTERVAL));
299 #endif
300         }
301
302         return 0;
303 }
304
305 static int
306 kni_dev_remove(struct kni_dev *dev)
307 {
308         if (!dev)
309                 return -ENODEV;
310
311         switch (dev->device_id) {
312         #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
313         #include <rte_pci_dev_ids.h>
314                 igb_kni_remove(dev->pci_dev);
315                 break;
316         #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev):
317         #include <rte_pci_dev_ids.h>
318                 ixgbe_kni_remove(dev->pci_dev);
319                 break;
320         default:
321                 break;
322         }
323
324         if (dev->net_dev) {
325                 unregister_netdev(dev->net_dev);
326                 free_netdev(dev->net_dev);
327         }
328
329         return 0;
330 }
331
332 static int
333 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
334 {
335         if (!kni || !dev)
336                 return -1;
337
338         /* Check if network name has been used */
339         if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
340                 KNI_ERR("KNI name %s duplicated\n", dev->name);
341                 return -1;
342         }
343
344         return 0;
345 }
346
347 static int
348 kni_ioctl_create(struct net *net,
349                 unsigned int ioctl_num, unsigned long ioctl_param)
350 {
351         struct kni_net *knet = net_generic(net, kni_net_id);
352         int ret;
353         struct rte_kni_device_info dev_info;
354         struct pci_dev *pci = NULL;
355         struct pci_dev *found_pci = NULL;
356         struct net_device *net_dev = NULL;
357         struct net_device *lad_dev = NULL;
358         struct kni_dev *kni, *dev, *n;
359
360         printk(KERN_INFO "KNI: Creating kni...\n");
361         /* Check the buffer size, to avoid warning */
362         if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
363                 return -EINVAL;
364
365         /* Copy kni info from user space */
366         ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
367         if (ret) {
368                 KNI_ERR("copy_from_user in kni_ioctl_create");
369                 return -EIO;
370         }
371
372         /**
373          * Check if the cpu core id is valid for binding,
374          * for multiple kernel thread mode.
375          */
376         if (multiple_kthread_on && dev_info.force_bind &&
377                                 !cpu_online(dev_info.core_id)) {
378                 KNI_ERR("cpu %u is not online\n", dev_info.core_id);
379                 return -EINVAL;
380         }
381
382         /* Check if it has been created */
383         down_read(&knet->kni_list_lock);
384         list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
385                 if (kni_check_param(dev, &dev_info) < 0) {
386                         up_read(&knet->kni_list_lock);
387                         return -EINVAL;
388                 }
389         }
390         up_read(&knet->kni_list_lock);
391
392         net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
393 #ifdef NET_NAME_UNKNOWN
394                                                         NET_NAME_UNKNOWN,
395 #endif
396                                                         kni_net_init);
397         if (net_dev == NULL) {
398                 KNI_ERR("error allocating device \"%s\"\n", dev_info.name);
399                 return -EBUSY;
400         }
401
402         dev_net_set(net_dev, net);
403
404         kni = netdev_priv(net_dev);
405
406         kni->net_dev = net_dev;
407         kni->group_id = dev_info.group_id;
408         kni->core_id = dev_info.core_id;
409         strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
410
411         /* Translate user space info into kernel space info */
412         kni->tx_q = phys_to_virt(dev_info.tx_phys);
413         kni->rx_q = phys_to_virt(dev_info.rx_phys);
414         kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
415         kni->free_q = phys_to_virt(dev_info.free_phys);
416
417         kni->req_q = phys_to_virt(dev_info.req_phys);
418         kni->resp_q = phys_to_virt(dev_info.resp_phys);
419         kni->sync_va = dev_info.sync_va;
420         kni->sync_kva = phys_to_virt(dev_info.sync_phys);
421
422         kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
423         kni->mbuf_va = dev_info.mbuf_va;
424
425 #ifdef RTE_KNI_VHOST
426         kni->vhost_queue = NULL;
427         kni->vq_status = BE_STOP;
428 #endif
429         kni->mbuf_size = dev_info.mbuf_size;
430
431         KNI_PRINT("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
432                 (unsigned long long) dev_info.tx_phys, kni->tx_q);
433         KNI_PRINT("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
434                 (unsigned long long) dev_info.rx_phys, kni->rx_q);
435         KNI_PRINT("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
436                 (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
437         KNI_PRINT("free_phys:    0x%016llx, free_q addr:    0x%p\n",
438                 (unsigned long long) dev_info.free_phys, kni->free_q);
439         KNI_PRINT("req_phys:     0x%016llx, req_q addr:     0x%p\n",
440                 (unsigned long long) dev_info.req_phys, kni->req_q);
441         KNI_PRINT("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
442                 (unsigned long long) dev_info.resp_phys, kni->resp_q);
443         KNI_PRINT("mbuf_phys:    0x%016llx, mbuf_kva:       0x%p\n",
444                 (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
445         KNI_PRINT("mbuf_va:      0x%p\n", dev_info.mbuf_va);
446         KNI_PRINT("mbuf_size:    %u\n", kni->mbuf_size);
447
448         KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",
449                                         dev_info.bus,
450                                         dev_info.devid,
451                                         dev_info.function,
452                                         dev_info.vendor_id,
453                                         dev_info.device_id);
454
455         pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
456
457         /* Support Ethtool */
458         while (pci) {
459                 KNI_PRINT("pci_bus: %02x:%02x:%02x \n",
460                                         pci->bus->number,
461                                         PCI_SLOT(pci->devfn),
462                                         PCI_FUNC(pci->devfn));
463
464                 if ((pci->bus->number == dev_info.bus) &&
465                         (PCI_SLOT(pci->devfn) == dev_info.devid) &&
466                         (PCI_FUNC(pci->devfn) == dev_info.function)) {
467                         found_pci = pci;
468                         switch (dev_info.device_id) {
469                         #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
470                         #include <rte_pci_dev_ids.h>
471                                 ret = igb_kni_probe(found_pci, &lad_dev);
472                                 break;
473                         #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \
474                                                         case (dev):
475                         #include <rte_pci_dev_ids.h>
476                                 ret = ixgbe_kni_probe(found_pci, &lad_dev);
477                                 break;
478                         default:
479                                 ret = -1;
480                                 break;
481                         }
482
483                         KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",
484                                                         pci, lad_dev);
485                         if (ret == 0) {
486                                 kni->lad_dev = lad_dev;
487                                 kni_set_ethtool_ops(kni->net_dev);
488                         } else {
489                                 KNI_ERR("Device not supported by ethtool");
490                                 kni->lad_dev = NULL;
491                         }
492
493                         kni->pci_dev = found_pci;
494                         kni->device_id = dev_info.device_id;
495                         break;
496                 }
497                 pci = pci_get_device(dev_info.vendor_id,
498                                 dev_info.device_id, pci);
499         }
500         if (pci)
501                 pci_dev_put(pci);
502
503         ret = register_netdev(net_dev);
504         if (ret) {
505                 KNI_ERR("error %i registering device \"%s\"\n",
506                                         ret, dev_info.name);
507                 kni_dev_remove(kni);
508                 return -ENODEV;
509         }
510
511 #ifdef RTE_KNI_VHOST
512         kni_vhost_init(kni);
513 #endif
514
515         /**
516          * Create a new kernel thread for multiple mode, set its core affinity,
517          * and finally wake it up.
518          */
519         if (multiple_kthread_on) {
520                 kni->pthread = kthread_create(kni_thread_multiple,
521                                               (void *)kni,
522                                               "kni_%s", kni->name);
523                 if (IS_ERR(kni->pthread)) {
524                         kni_dev_remove(kni);
525                         return -ECANCELED;
526                 }
527                 if (dev_info.force_bind)
528                         kthread_bind(kni->pthread, kni->core_id);
529                 wake_up_process(kni->pthread);
530         }
531
532         down_write(&knet->kni_list_lock);
533         list_add(&kni->list, &knet->kni_list_head);
534         up_write(&knet->kni_list_lock);
535
536         return 0;
537 }
538
539 static int
540 kni_ioctl_release(struct net *net,
541                 unsigned int ioctl_num, unsigned long ioctl_param)
542 {
543         struct kni_net *knet = net_generic(net, kni_net_id);
544         int ret = -EINVAL;
545         struct kni_dev *dev, *n;
546         struct rte_kni_device_info dev_info;
547
548         if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
549                         return -EINVAL;
550
551         ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
552         if (ret) {
553                 KNI_ERR("copy_from_user in kni_ioctl_release");
554                 return -EIO;
555         }
556
557         /* Release the network device according to its name */
558         if (strlen(dev_info.name) == 0)
559                 return ret;
560
561         down_write(&knet->kni_list_lock);
562         list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
563                 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
564                         continue;
565
566                 if (multiple_kthread_on && dev->pthread != NULL) {
567                         kthread_stop(dev->pthread);
568                         dev->pthread = NULL;
569                 }
570
571 #ifdef RTE_KNI_VHOST
572                 kni_vhost_backend_release(dev);
573 #endif
574                 kni_dev_remove(dev);
575                 list_del(&dev->list);
576                 ret = 0;
577                 break;
578         }
579         up_write(&knet->kni_list_lock);
580         printk(KERN_INFO "KNI: %s release kni named %s\n",
581                 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
582
583         return ret;
584 }
585
586 static int
587 kni_ioctl(struct inode *inode,
588         unsigned int ioctl_num,
589         unsigned long ioctl_param)
590 {
591         int ret = -EINVAL;
592         struct net *net = current->nsproxy->net_ns;
593
594         KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
595
596         /*
597          * Switch according to the ioctl called
598          */
599         switch (_IOC_NR(ioctl_num)) {
600         case _IOC_NR(RTE_KNI_IOCTL_TEST):
601                 /* For test only, not used */
602                 break;
603         case _IOC_NR(RTE_KNI_IOCTL_CREATE):
604                 ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
605                 break;
606         case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
607                 ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
608                 break;
609         default:
610                 KNI_DBG("IOCTL default\n");
611                 break;
612         }
613
614         return ret;
615 }
616
617 static int
618 kni_compat_ioctl(struct inode *inode,
619                 unsigned int ioctl_num,
620                 unsigned long ioctl_param)
621 {
622         /* 32 bits app on 64 bits OS to be supported later */
623         KNI_PRINT("Not implemented.\n");
624
625         return -EINVAL;
626 }
627
628 module_init(kni_init);
629 module_exit(kni_exit);
630
631 module_param(lo_mode, charp, S_IRUGO | S_IWUSR);
632 MODULE_PARM_DESC(lo_mode,
633 "KNI loopback mode (default=lo_mode_none):\n"
634 "    lo_mode_none        Kernel loopback disabled\n"
635 "    lo_mode_fifo        Enable kernel loopback with fifo\n"
636 "    lo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer\n"
637 "\n"
638 );
639
640 module_param(kthread_mode, charp, S_IRUGO);
641 MODULE_PARM_DESC(kthread_mode,
642 "Kernel thread mode (default=single):\n"
643 "    single    Single kernel thread mode enabled.\n"
644 "    multiple  Multiple kernel thread mode enabled.\n"
645 "\n"
646 );