kni: add build option to disable preempting
[dpdk.git] / lib / librte_eal / linuxapp / kni / kni_misc.c
1 /*-
2  * GPL LICENSE SUMMARY
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *
6  *   This program is free software; you can redistribute it and/or modify
7  *   it under the terms of version 2 of the GNU General Public License as
8  *   published by the Free Software Foundation.
9  *
10  *   This program is distributed in the hope that it will be useful, but
11  *   WITHOUT ANY WARRANTY; without even the implied warranty of
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *   General Public License for more details.
14  *
15  *   You should have received a copy of the GNU General Public License
16  *   along with this program; if not, write to the Free Software
17  *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18  *   The full GNU General Public License is included in this distribution
19  *   in the file called LICENSE.GPL.
20  *
21  *   Contact Information:
22  *   Intel Corporation
23  */
24
25 #include <linux/module.h>
26 #include <linux/miscdevice.h>
27 #include <linux/netdevice.h>
28 #include <linux/pci.h>
29 #include <linux/kthread.h>
30 #include <linux/rwsem.h>
31
32 #include <exec-env/rte_kni_common.h>
33 #include "kni_dev.h"
34 #include <rte_config.h>
35
36 MODULE_LICENSE("Dual BSD/GPL");
37 MODULE_AUTHOR("Intel Corporation");
38 MODULE_DESCRIPTION("Kernel Module for managing kni devices");
39
40 #define KNI_RX_LOOP_NUM 1000
41
42 #define KNI_MAX_DEVICES 32
43
44 extern void kni_net_rx(struct kni_dev *kni);
45 extern void kni_net_init(struct net_device *dev);
46 extern void kni_net_config_lo_mode(char *lo_str);
47 extern void kni_net_poll_resp(struct kni_dev *kni);
48 extern void kni_set_ethtool_ops(struct net_device *netdev);
49
50 extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
51 extern void ixgbe_kni_remove(struct pci_dev *pdev);
52 extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev);
53 extern void igb_kni_remove(struct pci_dev *pdev);
54
55 static int kni_open(struct inode *inode, struct file *file);
56 static int kni_release(struct inode *inode, struct file *file);
57 static int kni_ioctl(struct inode *inode, unsigned int ioctl_num,
58                                         unsigned long ioctl_param);
59 static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num,
60                                                 unsigned long ioctl_param);
61 static int kni_dev_remove(struct kni_dev *dev);
62
63 static int __init kni_parse_kthread_mode(void);
64
65 /* KNI processing for single kernel thread mode */
66 static int kni_thread_single(void *unused);
67 /* KNI processing for multiple kernel thread mode */
68 static int kni_thread_multiple(void *param);
69
70 static struct file_operations kni_fops = {
71         .owner = THIS_MODULE,
72         .open = kni_open,
73         .release = kni_release,
74         .unlocked_ioctl = (void *)kni_ioctl,
75         .compat_ioctl = (void *)kni_compat_ioctl,
76 };
77
78 static struct miscdevice kni_misc = {
79         .minor = MISC_DYNAMIC_MINOR,
80         .name = KNI_DEVICE,
81         .fops = &kni_fops,
82 };
83
84 /* loopback mode */
85 static char *lo_mode = NULL;
86
87 /* Kernel thread mode */
88 static char *kthread_mode = NULL;
89 static unsigned multiple_kthread_on = 0;
90
91 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
92
93 static volatile unsigned long device_in_use; /* device in use flag */
94 static struct task_struct *kni_kthread;
95
96 /* kni list lock */
97 static DECLARE_RWSEM(kni_list_lock);
98
99 /* kni list */
100 static struct list_head kni_list_head = LIST_HEAD_INIT(kni_list_head);
101
102 static int __init
103 kni_init(void)
104 {
105         KNI_PRINT("######## DPDK kni module loading ########\n");
106
107         if (kni_parse_kthread_mode() < 0) {
108                 KNI_ERR("Invalid parameter for kthread_mode\n");
109                 return -EINVAL;
110         }
111
112         if (misc_register(&kni_misc) != 0) {
113                 KNI_ERR("Misc registration failed\n");
114                 return -EPERM;
115         }
116
117         /* Clear the bit of device in use */
118         clear_bit(KNI_DEV_IN_USE_BIT_NUM, &device_in_use);
119
120         /* Configure the lo mode according to the input parameter */
121         kni_net_config_lo_mode(lo_mode);
122
123         KNI_PRINT("######## DPDK kni module loaded  ########\n");
124
125         return 0;
126 }
127
128 static void __exit
129 kni_exit(void)
130 {
131         misc_deregister(&kni_misc);
132         KNI_PRINT("####### DPDK kni module unloaded  #######\n");
133 }
134
135 static int __init
136 kni_parse_kthread_mode(void)
137 {
138         if (!kthread_mode)
139                 return 0;
140
141         if (strcmp(kthread_mode, "single") == 0)
142                 return 0;
143         else if (strcmp(kthread_mode, "multiple") == 0)
144                 multiple_kthread_on = 1;
145         else
146                 return -1;
147
148         return 0;
149 }
150
151 static int
152 kni_open(struct inode *inode, struct file *file)
153 {
154         /* kni device can be opened by one user only, test and set bit */
155         if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &device_in_use))
156                 return -EBUSY;
157
158         /* Create kernel thread for single mode */
159         if (multiple_kthread_on == 0) {
160                 KNI_PRINT("Single kernel thread for all KNI devices\n");
161                 /* Create kernel thread for RX */
162                 kni_kthread = kthread_run(kni_thread_single, NULL,
163                                                 "kni_single");
164                 if (IS_ERR(kni_kthread)) {
165                         KNI_ERR("Unable to create kernel threaed\n");
166                         return PTR_ERR(kni_kthread);
167                 }
168         } else
169                 KNI_PRINT("Multiple kernel thread mode enabled\n");
170
171         KNI_PRINT("/dev/kni opened\n");
172
173         return 0;
174 }
175
176 static int
177 kni_release(struct inode *inode, struct file *file)
178 {
179         struct kni_dev *dev, *n;
180
181         /* Stop kernel thread for single mode */
182         if (multiple_kthread_on == 0) {
183                 /* Stop kernel thread */
184                 kthread_stop(kni_kthread);
185                 kni_kthread = NULL;
186         }
187
188         down_write(&kni_list_lock);
189         list_for_each_entry_safe(dev, n, &kni_list_head, list) {
190                 /* Stop kernel thread for multiple mode */
191                 if (multiple_kthread_on && dev->pthread != NULL) {
192                         kthread_stop(dev->pthread);
193                         dev->pthread = NULL;
194                 }
195
196 #ifdef RTE_KNI_VHOST
197                 kni_vhost_backend_release(dev);
198 #endif
199                 kni_dev_remove(dev);
200                 list_del(&dev->list);
201         }
202         up_write(&kni_list_lock);
203
204         /* Clear the bit of device in use */
205         clear_bit(KNI_DEV_IN_USE_BIT_NUM, &device_in_use);
206
207         KNI_PRINT("/dev/kni closed\n");
208
209         return 0;
210 }
211
212 static int
213 kni_thread_single(void *unused)
214 {
215         int j;
216         struct kni_dev *dev, *n;
217
218         while (!kthread_should_stop()) {
219                 down_read(&kni_list_lock);
220                 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
221                         list_for_each_entry_safe(dev, n,
222                                         &kni_list_head, list) {
223 #ifdef RTE_KNI_VHOST
224                                 kni_chk_vhost_rx(dev);
225 #else
226                                 kni_net_rx(dev);
227 #endif
228                                 kni_net_poll_resp(dev);
229                         }
230                 }
231                 up_read(&kni_list_lock);
232 #ifdef RTE_KNI_PREEMPT_DEFAULT
233                 /* reschedule out for a while */
234                 schedule_timeout_interruptible(usecs_to_jiffies( \
235                                 KNI_KTHREAD_RESCHEDULE_INTERVAL));
236 #endif
237         }
238
239         return 0;
240 }
241
242 static int
243 kni_thread_multiple(void *param)
244 {
245         int j;
246         struct kni_dev *dev = (struct kni_dev *)param;
247
248         while (!kthread_should_stop()) {
249                 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
250 #ifdef RTE_KNI_VHOST
251                         kni_chk_vhost_rx(dev);
252 #else
253                         kni_net_rx(dev);
254 #endif
255                         kni_net_poll_resp(dev);
256                 }
257 #ifdef RTE_KNI_PREEMPT_DEFAULT
258                 schedule_timeout_interruptible(usecs_to_jiffies( \
259                                 KNI_KTHREAD_RESCHEDULE_INTERVAL));
260 #endif
261         }
262
263         return 0;
264 }
265
266 static int
267 kni_dev_remove(struct kni_dev *dev)
268 {
269         if (!dev)
270                 return -ENODEV;
271
272         switch (dev->device_id) {
273         #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
274         #include <rte_pci_dev_ids.h>
275                 igb_kni_remove(dev->pci_dev);
276                 break;
277         #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev):
278         #include <rte_pci_dev_ids.h>
279                 ixgbe_kni_remove(dev->pci_dev);
280                 break;
281         default:
282                 break;
283         }
284
285         if (dev->net_dev) {
286                 unregister_netdev(dev->net_dev);
287                 free_netdev(dev->net_dev);
288         }
289
290         return 0;
291 }
292
293 static int
294 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
295 {
296         if (!kni || !dev)
297                 return -1;
298
299         /* Check if network name has been used */
300         if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
301                 KNI_ERR("KNI name %s duplicated\n", dev->name);
302                 return -1;
303         }
304
305         return 0;
306 }
307
308 static int
309 kni_ioctl_create(unsigned int ioctl_num, unsigned long ioctl_param)
310 {
311         int ret;
312         struct rte_kni_device_info dev_info;
313         struct pci_dev *pci = NULL;
314         struct pci_dev *found_pci = NULL;
315         struct net_device *net_dev = NULL;
316         struct net_device *lad_dev = NULL;
317         struct kni_dev *kni, *dev, *n;
318         struct net *net;
319
320         printk(KERN_INFO "KNI: Creating kni...\n");
321         /* Check the buffer size, to avoid warning */
322         if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
323                 return -EINVAL;
324
325         /* Copy kni info from user space */
326         ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
327         if (ret) {
328                 KNI_ERR("copy_from_user in kni_ioctl_create");
329                 return -EIO;
330         }
331
332         /**
333          * Check if the cpu core id is valid for binding,
334          * for multiple kernel thread mode.
335          */
336         if (multiple_kthread_on && dev_info.force_bind &&
337                                 !cpu_online(dev_info.core_id)) {
338                 KNI_ERR("cpu %u is not online\n", dev_info.core_id);
339                 return -EINVAL;
340         }
341
342         /* Check if it has been created */
343         down_read(&kni_list_lock);
344         list_for_each_entry_safe(dev, n, &kni_list_head, list) {
345                 if (kni_check_param(dev, &dev_info) < 0) {
346                         up_read(&kni_list_lock);
347                         return -EINVAL;
348                 }
349         }
350         up_read(&kni_list_lock);
351
352         net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
353 #ifdef NET_NAME_UNKNOWN
354                                                         NET_NAME_UNKNOWN,
355 #endif
356                                                         kni_net_init);
357         if (net_dev == NULL) {
358                 KNI_ERR("error allocating device \"%s\"\n", dev_info.name);
359                 return -EBUSY;
360         }
361
362         net = get_net_ns_by_pid(current->pid);
363         if (IS_ERR(net)) {
364                 free_netdev(net_dev);
365                 return PTR_ERR(net);
366         }
367         dev_net_set(net_dev, net);
368         put_net(net);
369
370         kni = netdev_priv(net_dev);
371
372         kni->net_dev = net_dev;
373         kni->group_id = dev_info.group_id;
374         kni->core_id = dev_info.core_id;
375         strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
376
377         /* Translate user space info into kernel space info */
378         kni->tx_q = phys_to_virt(dev_info.tx_phys);
379         kni->rx_q = phys_to_virt(dev_info.rx_phys);
380         kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
381         kni->free_q = phys_to_virt(dev_info.free_phys);
382
383         kni->req_q = phys_to_virt(dev_info.req_phys);
384         kni->resp_q = phys_to_virt(dev_info.resp_phys);
385         kni->sync_va = dev_info.sync_va;
386         kni->sync_kva = phys_to_virt(dev_info.sync_phys);
387
388         kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys);
389         kni->mbuf_va = dev_info.mbuf_va;
390
391 #ifdef RTE_KNI_VHOST
392         kni->vhost_queue = NULL;
393         kni->vq_status = BE_STOP;
394 #endif
395         kni->mbuf_size = dev_info.mbuf_size;
396
397         KNI_PRINT("tx_phys:      0x%016llx, tx_q addr:      0x%p\n",
398                 (unsigned long long) dev_info.tx_phys, kni->tx_q);
399         KNI_PRINT("rx_phys:      0x%016llx, rx_q addr:      0x%p\n",
400                 (unsigned long long) dev_info.rx_phys, kni->rx_q);
401         KNI_PRINT("alloc_phys:   0x%016llx, alloc_q addr:   0x%p\n",
402                 (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
403         KNI_PRINT("free_phys:    0x%016llx, free_q addr:    0x%p\n",
404                 (unsigned long long) dev_info.free_phys, kni->free_q);
405         KNI_PRINT("req_phys:     0x%016llx, req_q addr:     0x%p\n",
406                 (unsigned long long) dev_info.req_phys, kni->req_q);
407         KNI_PRINT("resp_phys:    0x%016llx, resp_q addr:    0x%p\n",
408                 (unsigned long long) dev_info.resp_phys, kni->resp_q);
409         KNI_PRINT("mbuf_phys:    0x%016llx, mbuf_kva:       0x%p\n",
410                 (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva);
411         KNI_PRINT("mbuf_va:      0x%p\n", dev_info.mbuf_va);
412         KNI_PRINT("mbuf_size:    %u\n", kni->mbuf_size);
413
414         KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n",
415                                         dev_info.bus,
416                                         dev_info.devid,
417                                         dev_info.function,
418                                         dev_info.vendor_id,
419                                         dev_info.device_id);
420
421         pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
422
423         /* Support Ethtool */
424         while (pci) {
425                 KNI_PRINT("pci_bus: %02x:%02x:%02x \n",
426                                         pci->bus->number,
427                                         PCI_SLOT(pci->devfn),
428                                         PCI_FUNC(pci->devfn));
429
430                 if ((pci->bus->number == dev_info.bus) &&
431                         (PCI_SLOT(pci->devfn) == dev_info.devid) &&
432                         (PCI_FUNC(pci->devfn) == dev_info.function)) {
433                         found_pci = pci;
434                         switch (dev_info.device_id) {
435                         #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev):
436                         #include <rte_pci_dev_ids.h>
437                                 ret = igb_kni_probe(found_pci, &lad_dev);
438                                 break;
439                         #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \
440                                                         case (dev):
441                         #include <rte_pci_dev_ids.h>
442                                 ret = ixgbe_kni_probe(found_pci, &lad_dev);
443                                 break;
444                         default:
445                                 ret = -1;
446                                 break;
447                         }
448
449                         KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n",
450                                                         pci, lad_dev);
451                         if (ret == 0) {
452                                 kni->lad_dev = lad_dev;
453                                 kni_set_ethtool_ops(kni->net_dev);
454                         } else {
455                                 KNI_ERR("Device not supported by ethtool");
456                                 kni->lad_dev = NULL;
457                         }
458
459                         kni->pci_dev = found_pci;
460                         kni->device_id = dev_info.device_id;
461                         break;
462                 }
463                 pci = pci_get_device(dev_info.vendor_id,
464                                 dev_info.device_id, pci);
465         }
466         if (pci)
467                 pci_dev_put(pci);
468
469         ret = register_netdev(net_dev);
470         if (ret) {
471                 KNI_ERR("error %i registering device \"%s\"\n",
472                                         ret, dev_info.name);
473                 kni_dev_remove(kni);
474                 return -ENODEV;
475         }
476
477 #ifdef RTE_KNI_VHOST
478         kni_vhost_init(kni);
479 #endif
480
481         /**
482          * Create a new kernel thread for multiple mode, set its core affinity,
483          * and finally wake it up.
484          */
485         if (multiple_kthread_on) {
486                 kni->pthread = kthread_create(kni_thread_multiple,
487                                               (void *)kni,
488                                               "kni_%s", kni->name);
489                 if (IS_ERR(kni->pthread)) {
490                         kni_dev_remove(kni);
491                         return -ECANCELED;
492                 }
493                 if (dev_info.force_bind)
494                         kthread_bind(kni->pthread, kni->core_id);
495                 wake_up_process(kni->pthread);
496         }
497
498         down_write(&kni_list_lock);
499         list_add(&kni->list, &kni_list_head);
500         up_write(&kni_list_lock);
501
502         return 0;
503 }
504
505 static int
506 kni_ioctl_release(unsigned int ioctl_num, unsigned long ioctl_param)
507 {
508         int ret = -EINVAL;
509         struct kni_dev *dev, *n;
510         struct rte_kni_device_info dev_info;
511
512         if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
513                         return -EINVAL;
514
515         ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
516         if (ret) {
517                 KNI_ERR("copy_from_user in kni_ioctl_release");
518                 return -EIO;
519         }
520
521         /* Release the network device according to its name */
522         if (strlen(dev_info.name) == 0)
523                 return ret;
524
525         down_write(&kni_list_lock);
526         list_for_each_entry_safe(dev, n, &kni_list_head, list) {
527                 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
528                         continue;
529
530                 if (multiple_kthread_on && dev->pthread != NULL) {
531                         kthread_stop(dev->pthread);
532                         dev->pthread = NULL;
533                 }
534
535 #ifdef RTE_KNI_VHOST
536                 kni_vhost_backend_release(dev);
537 #endif
538                 kni_dev_remove(dev);
539                 list_del(&dev->list);
540                 ret = 0;
541                 break;
542         }
543         up_write(&kni_list_lock);
544         printk(KERN_INFO "KNI: %s release kni named %s\n",
545                 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
546
547         return ret;
548 }
549
550 static int
551 kni_ioctl(struct inode *inode,
552         unsigned int ioctl_num,
553         unsigned long ioctl_param)
554 {
555         int ret = -EINVAL;
556
557         KNI_DBG("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param);
558
559         /*
560          * Switch according to the ioctl called
561          */
562         switch (_IOC_NR(ioctl_num)) {
563         case _IOC_NR(RTE_KNI_IOCTL_TEST):
564                 /* For test only, not used */
565                 break;
566         case _IOC_NR(RTE_KNI_IOCTL_CREATE):
567                 ret = kni_ioctl_create(ioctl_num, ioctl_param);
568                 break;
569         case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
570                 ret = kni_ioctl_release(ioctl_num, ioctl_param);
571                 break;
572         default:
573                 KNI_DBG("IOCTL default \n");
574                 break;
575         }
576
577         return ret;
578 }
579
580 static int
581 kni_compat_ioctl(struct inode *inode,
582                 unsigned int ioctl_num,
583                 unsigned long ioctl_param)
584 {
585         /* 32 bits app on 64 bits OS to be supported later */
586         KNI_PRINT("Not implemented.\n");
587
588         return -EINVAL;
589 }
590
591 module_init(kni_init);
592 module_exit(kni_exit);
593
594 module_param(lo_mode, charp, S_IRUGO | S_IWUSR);
595 MODULE_PARM_DESC(lo_mode,
596 "KNI loopback mode (default=lo_mode_none):\n"
597 "    lo_mode_none        Kernel loopback disabled\n"
598 "    lo_mode_fifo        Enable kernel loopback with fifo\n"
599 "    lo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer\n"
600 "\n"
601 );
602
603 module_param(kthread_mode, charp, S_IRUGO);
604 MODULE_PARM_DESC(kthread_mode,
605 "Kernel thread mode (default=single):\n"
606 "    single    Single kernel thread mode enabled.\n"
607 "    multiple  Multiple kernel thread mode enabled.\n"
608 "\n"
609 );
610