From 5569dd7d90b8bfb08facd2125ff55fefd8e61626 Mon Sep 17 00:00:00 2001 From: Tudor Cornea Date: Thu, 20 Jan 2022 14:41:34 +0200 Subject: [PATCH] kni: allow configuring thread granularity The Kni kthreads seem to be re-scheduled at a granularity of roughly 1 millisecond right now, which seems to be insufficient for performing tests involving a lot of control plane traffic. Even if KNI_KTHREAD_RESCHEDULE_INTERVAL is set to 5 microseconds, it seems that the existing code cannot reschedule at the desired granularily, due to precision constraints of schedule_timeout_interruptible(). In our use case, we leverage the Linux Kernel for control plane, and it is not uncommon to have 60K - 100K pps for some signaling protocols. Since we are not in atomic context, the usleep_range() function seems to be more appropriate for being able to introduce smaller controlled delays, in the range of 5-10 microseconds. Upon reading the existing code, it would seem that this was the original intent. Adding sub-millisecond delays, seems unfeasible with a call to schedule_timeout_interruptible(). KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ schedule_timeout_interruptible( usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); Below, we attempted a brief comparison between the existing implementation, which uses schedule_timeout_interruptible() and usleep_range(). We attempt to measure the CPU usage, and RTT between two Kni interfaces, which are created on top of vmxnet3 adapters, connected by a vSwitch. insmod rte_kni.ko kthread_mode=single carrier=on schedule_timeout_interruptible(usecs_to_jiffies(5)) kni_single CPU Usage: 2-4 % [root@localhost ~]# ping 1.1.1.2 -I eth1 PING 1.1.1.2 (1.1.1.2) from 1.1.1.1 eth1: 56(84) bytes of data. 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=2.70 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=1.00 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=1.99 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.985 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=1.00 ms usleep_range(5, 10) kni_single CPU usage: 50% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.338 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.150 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.123 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.139 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.159 ms usleep_range(20, 50) kni_single CPU usage: 24% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.202 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.170 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.171 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.248 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.185 ms usleep_range(50, 100) kni_single CPU usage: 13% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.537 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.257 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.231 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.143 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.200 ms usleep_range(100, 200) kni_single CPU usage: 7% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.716 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.167 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.459 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.455 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.252 ms usleep_range(1000, 1100) kni_single CPU usage: 2% 64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=2.22 ms 64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=1.17 ms 64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=1.17 ms 64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=1.17 ms 64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=1.15 ms Upon testing, usleep_range(1000, 1100) seems roughly equivalent in latency and cpu usage to the variant with schedule_timeout_interruptible(), while usleep_range(100, 200) seems to give a decent tradeoff between latency and cpu usage, while allowing users to tweak the limits for improved precision if they have such use cases. Disabling RTE_KNI_PREEMPT_DEFAULT, interestingly seems to lead to a softlockup on my kernel. Kernel panic - not syncing: softlockup: hung tasks CPU: 0 PID: 1226 Comm: kni_single Tainted: G W O 3.10 #1 [] dump_stack+0x19/0x1b [] panic+0xcd/0x1e0 [] watchdog_timer_fn+0x160/0x160 [] __run_hrtimer.isra.4+0x42/0xd0 [] hrtimer_interrupt+0xe7/0x1f0 [] smp_apic_timer_interrupt+0x67/0xa0 [] apic_timer_interrupt+0x6d/0x80 This patch also attempts to remove this option. References: [1] https://www.kernel.org/doc/Documentation/timers/timers-howto.txt Signed-off-by: Tudor Cornea Acked-by: Padraig Connolly Reviewed-by: Ferruh Yigit --- config/rte_config.h | 3 -- .../prog_guide/kernel_nic_interface.rst | 33 +++++++++++++++++++ kernel/linux/kni/kni_dev.h | 2 +- kernel/linux/kni/kni_misc.c | 32 +++++++++++++----- 4 files changed, 58 insertions(+), 12 deletions(-) diff --git a/config/rte_config.h b/config/rte_config.h index cab4390a97..91d96eeecb 100644 --- a/config/rte_config.h +++ b/config/rte_config.h @@ -95,9 +95,6 @@ #define RTE_SCHED_PORT_N_GRINDERS 8 #undef RTE_SCHED_VECTOR -/* KNI defines */ -#define RTE_KNI_PREEMPT_DEFAULT 1 - /* rte_graph defines */ #define RTE_GRAPH_BURST_SIZE 256 #define RTE_LIBRTE_GRAPH_STATS 1 diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst b/doc/guides/prog_guide/kernel_nic_interface.rst index 771c7d7fda..5248f9622b 100644 --- a/doc/guides/prog_guide/kernel_nic_interface.rst +++ b/doc/guides/prog_guide/kernel_nic_interface.rst @@ -61,6 +61,10 @@ can be specified when the module is loaded to control its behavior: userspace callback and supporting async requests (default=off): on Enable request processing support for bifurcated drivers. (charp) + parm: min_scheduling_interval: KNI thread min scheduling interval (default=100 microseconds) + (long) + parm: max_scheduling_interval: KNI thread max scheduling interval (default=200 microseconds) + (long) Loading the ``rte_kni`` kernel module without any optional parameters is @@ -202,6 +206,35 @@ Enabling bifurcated device support releases ``rtnl`` lock before calling callback and locks it back after callback. Also enables asynchronous request to support callbacks that requires rtnl lock to work (interface down). +KNI Kthread Scheduling +~~~~~~~~~~~~~~~~~~~~~~ + +The ``min_scheduling_interval`` and ``max_scheduling_interval`` parameters +control the rescheduling interval of the KNI kthreads. + +This might be useful if we have use cases in which we require improved +latency or performance for control plane traffic. + +The implementation is backed by Linux High Precision Timers, and uses ``usleep_range``. +Hence, it will have the same granularity constraints as this Linux subsystem. + +For Linux High Precision Timers, you can check the following resource: `Kernel Timers `_ + +To set the ``min_scheduling_interval`` to a value of 100 microseconds: + +.. code-block:: console + + # insmod /kernel/linux/kni/rte_kni.ko min_scheduling_interval=100 + +To set the ``max_scheduling_interval`` to a value of 200 microseconds: + +.. code-block:: console + + # insmod /kernel/linux/kni/rte_kni.ko max_scheduling_interval=200 + +If the ``min_scheduling_interval`` and ``max_scheduling_interval`` parameters are +not specified, the default interval limits will be set to *100* and *200* respectively. + KNI Creation and Deletion ------------------------- diff --git a/kernel/linux/kni/kni_dev.h b/kernel/linux/kni/kni_dev.h index e8633486ee..a2c6d9fc1a 100644 --- a/kernel/linux/kni/kni_dev.h +++ b/kernel/linux/kni/kni_dev.h @@ -27,7 +27,7 @@ #include #include -#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ +#define KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL 1000000 /* us */ #define MBUF_BURST_SZ 32 diff --git a/kernel/linux/kni/kni_misc.c b/kernel/linux/kni/kni_misc.c index f10dcd069d..e8a8203c90 100644 --- a/kernel/linux/kni/kni_misc.c +++ b/kernel/linux/kni/kni_misc.c @@ -45,6 +45,10 @@ uint32_t kni_dflt_carrier; static char *enable_bifurcated; uint32_t bifurcated_support; +/* KNI thread scheduling interval */ +static long min_scheduling_interval = 100; /* us */ +static long max_scheduling_interval = 200; /* us */ + #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ static int kni_net_id; @@ -132,11 +136,8 @@ kni_thread_single(void *data) } } up_read(&knet->kni_list_lock); -#ifdef RTE_KNI_PREEMPT_DEFAULT /* reschedule out for a while */ - schedule_timeout_interruptible( - usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif + usleep_range(min_scheduling_interval, max_scheduling_interval); } return 0; @@ -153,10 +154,7 @@ kni_thread_multiple(void *param) kni_net_rx(dev); kni_net_poll_resp(dev); } -#ifdef RTE_KNI_PREEMPT_DEFAULT - schedule_timeout_interruptible( - usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); -#endif + usleep_range(min_scheduling_interval, max_scheduling_interval); } return 0; @@ -617,6 +615,14 @@ kni_init(void) if (bifurcated_support == 1) pr_debug("bifurcated support is enabled.\n"); + if (min_scheduling_interval < 0 || max_scheduling_interval < 0 || + min_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL || + max_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL || + min_scheduling_interval >= max_scheduling_interval) { + pr_err("Invalid parameters for scheduling interval\n"); + return -EINVAL; + } + #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS rc = register_pernet_subsys(&kni_net_ops); #else @@ -692,3 +698,13 @@ MODULE_PARM_DESC(enable_bifurcated, "\t\ton Enable request processing support for bifurcated drivers.\n" "\t\t" ); + +module_param(min_scheduling_interval, long, 0644); +MODULE_PARM_DESC(min_scheduling_interval, +"KNI thread min scheduling interval (default=100 microseconds)" +); + +module_param(max_scheduling_interval, long, 0644); +MODULE_PARM_DESC(max_scheduling_interval, +"KNI thread max scheduling interval (default=200 microseconds)" +); -- 2.20.1