The Kni kthreads seem to be re-scheduled at a granularity of roughly
1 millisecond right now, which seems to be insufficient for performing
tests involving a lot of control plane traffic.
Even if KNI_KTHREAD_RESCHEDULE_INTERVAL is set to 5 microseconds, it
seems that the existing code cannot reschedule at the desired granularily,
due to precision constraints of schedule_timeout_interruptible().
In our use case, we leverage the Linux Kernel for control plane, and
it is not uncommon to have 60K - 100K pps for some signaling protocols.
Since we are not in atomic context, the usleep_range() function seems to be
more appropriate for being able to introduce smaller controlled delays,
in the range of 5-10 microseconds. Upon reading the existing code, it would
seem that this was the original intent. Adding sub-millisecond delays,
seems unfeasible with a call to schedule_timeout_interruptible().
KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
schedule_timeout_interruptible(
usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
Below, we attempted a brief comparison between the existing implementation,
which uses schedule_timeout_interruptible() and usleep_range().
We attempt to measure the CPU usage, and RTT between two Kni interfaces,
which are created on top of vmxnet3 adapters, connected by a vSwitch.
insmod rte_kni.ko kthread_mode=single carrier=on
schedule_timeout_interruptible(usecs_to_jiffies(5))
kni_single CPU Usage: 2-4 %
[root@localhost ~]# ping 1.1.1.2 -I eth1
PING 1.1.1.2 (1.1.1.2) from 1.1.1.1 eth1: 56(84) bytes of data.
64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=2.70 ms
64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=1.00 ms
64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=1.99 ms
64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.985 ms
64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=1.00 ms
usleep_range(5, 10)
kni_single CPU usage: 50%
64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.338 ms
64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.150 ms
64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.123 ms
64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.139 ms
64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.159 ms
usleep_range(20, 50)
kni_single CPU usage: 24%
64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.202 ms
64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.170 ms
64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.171 ms
64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.248 ms
64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.185 ms
usleep_range(50, 100)
kni_single CPU usage: 13%
64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.537 ms
64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.257 ms
64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.231 ms
64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.143 ms
64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.200 ms
usleep_range(100, 200)
kni_single CPU usage: 7%
64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=0.716 ms
64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=0.167 ms
64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=0.459 ms
64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=0.455 ms
64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=0.252 ms
usleep_range(1000, 1100)
kni_single CPU usage: 2%
64 bytes from 1.1.1.2: icmp_seq=1 ttl=64 time=2.22 ms
64 bytes from 1.1.1.2: icmp_seq=2 ttl=64 time=1.17 ms
64 bytes from 1.1.1.2: icmp_seq=3 ttl=64 time=1.17 ms
64 bytes from 1.1.1.2: icmp_seq=4 ttl=64 time=1.17 ms
64 bytes from 1.1.1.2: icmp_seq=5 ttl=64 time=1.15 ms
Upon testing, usleep_range(1000, 1100) seems roughly equivalent in
latency and cpu usage to the variant with schedule_timeout_interruptible(),
while usleep_range(100, 200) seems to give a decent tradeoff between
latency and cpu usage, while allowing users to tweak the limits for
improved precision if they have such use cases.
Disabling RTE_KNI_PREEMPT_DEFAULT, interestingly seems to lead to a
softlockup on my kernel.
Kernel panic - not syncing: softlockup: hung tasks
CPU: 0 PID: 1226 Comm: kni_single Tainted: G W O 3.10 #1
<IRQ> [<
ffffffff814f84de>] dump_stack+0x19/0x1b
[<
ffffffff814f7891>] panic+0xcd/0x1e0
[<
ffffffff810993b0>] watchdog_timer_fn+0x160/0x160
[<
ffffffff810644b2>] __run_hrtimer.isra.4+0x42/0xd0
[<
ffffffff81064b57>] hrtimer_interrupt+0xe7/0x1f0
[<
ffffffff8102cd57>] smp_apic_timer_interrupt+0x67/0xa0
[<
ffffffff8150321d>] apic_timer_interrupt+0x6d/0x80
This patch also attempts to remove this option.
References:
[1] https://www.kernel.org/doc/Documentation/timers/timers-howto.txt
Signed-off-by: Tudor Cornea <tudor.cornea@gmail.com>
Acked-by: Padraig Connolly <Padraig.J.Connolly@intel.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
#define RTE_SCHED_PORT_N_GRINDERS 8
#undef RTE_SCHED_VECTOR
-/* KNI defines */
-#define RTE_KNI_PREEMPT_DEFAULT 1
-
/* rte_graph defines */
#define RTE_GRAPH_BURST_SIZE 256
#define RTE_LIBRTE_GRAPH_STATS 1
userspace callback and supporting async requests (default=off):
on Enable request processing support for bifurcated drivers.
(charp)
+ parm: min_scheduling_interval: KNI thread min scheduling interval (default=100 microseconds)
+ (long)
+ parm: max_scheduling_interval: KNI thread max scheduling interval (default=200 microseconds)
+ (long)
Loading the ``rte_kni`` kernel module without any optional parameters is
callback and locks it back after callback. Also enables asynchronous request to
support callbacks that requires rtnl lock to work (interface down).
+KNI Kthread Scheduling
+~~~~~~~~~~~~~~~~~~~~~~
+
+The ``min_scheduling_interval`` and ``max_scheduling_interval`` parameters
+control the rescheduling interval of the KNI kthreads.
+
+This might be useful if we have use cases in which we require improved
+latency or performance for control plane traffic.
+
+The implementation is backed by Linux High Precision Timers, and uses ``usleep_range``.
+Hence, it will have the same granularity constraints as this Linux subsystem.
+
+For Linux High Precision Timers, you can check the following resource: `Kernel Timers <http://www.kernel.org/doc/Documentation/timers/timers-howto.txt>`_
+
+To set the ``min_scheduling_interval`` to a value of 100 microseconds:
+
+.. code-block:: console
+
+ # insmod <build_dir>/kernel/linux/kni/rte_kni.ko min_scheduling_interval=100
+
+To set the ``max_scheduling_interval`` to a value of 200 microseconds:
+
+.. code-block:: console
+
+ # insmod <build_dir>/kernel/linux/kni/rte_kni.ko max_scheduling_interval=200
+
+If the ``min_scheduling_interval`` and ``max_scheduling_interval`` parameters are
+not specified, the default interval limits will be set to *100* and *200* respectively.
+
KNI Creation and Deletion
-------------------------
#include <linux/list.h>
#include <rte_kni_common.h>
-#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
+#define KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL 1000000 /* us */
#define MBUF_BURST_SZ 32
static char *enable_bifurcated;
uint32_t bifurcated_support;
+/* KNI thread scheduling interval */
+static long min_scheduling_interval = 100; /* us */
+static long max_scheduling_interval = 200; /* us */
+
#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
static int kni_net_id;
}
}
up_read(&knet->kni_list_lock);
-#ifdef RTE_KNI_PREEMPT_DEFAULT
/* reschedule out for a while */
- schedule_timeout_interruptible(
- usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
+ usleep_range(min_scheduling_interval, max_scheduling_interval);
}
return 0;
kni_net_rx(dev);
kni_net_poll_resp(dev);
}
-#ifdef RTE_KNI_PREEMPT_DEFAULT
- schedule_timeout_interruptible(
- usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
-#endif
+ usleep_range(min_scheduling_interval, max_scheduling_interval);
}
return 0;
if (bifurcated_support == 1)
pr_debug("bifurcated support is enabled.\n");
+ if (min_scheduling_interval < 0 || max_scheduling_interval < 0 ||
+ min_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL ||
+ max_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL ||
+ min_scheduling_interval >= max_scheduling_interval) {
+ pr_err("Invalid parameters for scheduling interval\n");
+ return -EINVAL;
+ }
+
#ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
rc = register_pernet_subsys(&kni_net_ops);
#else
"\t\ton Enable request processing support for bifurcated drivers.\n"
"\t\t"
);
+
+module_param(min_scheduling_interval, long, 0644);
+MODULE_PARM_DESC(min_scheduling_interval,
+"KNI thread min scheduling interval (default=100 microseconds)"
+);
+
+module_param(max_scheduling_interval, long, 0644);
+MODULE_PARM_DESC(max_scheduling_interval,
+"KNI thread max scheduling interval (default=200 microseconds)"
+);