1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(c) 2010-2014 Intel Corporation.
7 * This code is inspired from the book "Linux Device Drivers" by
8 * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates
11 #include <linux/device.h>
12 #include <linux/module.h>
13 #include <linux/version.h>
14 #include <linux/netdevice.h>
15 #include <linux/etherdevice.h> /* eth_type_trans */
16 #include <linux/ethtool.h>
17 #include <linux/skbuff.h>
18 #include <linux/kthread.h>
19 #include <linux/delay.h>
21 #include <rte_kni_common.h>
27 #define WD_TIMEOUT 5 /*jiffies */
29 #define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
31 /* typedef for rx function */
32 typedef void (*kni_net_rx_t)(struct kni_dev *kni);
34 static void kni_net_rx_normal(struct kni_dev *kni);
36 /* kni rx function pointer, with default to normal rx */
37 static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
39 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
40 /* iova to kernel virtual address */
42 iova2kva(struct kni_dev *kni, void *iova)
44 return phys_to_virt(iova_to_phys(kni->usr_tsk, (unsigned long)iova));
48 iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
50 return phys_to_virt(iova_to_phys(kni->usr_tsk, m->buf_physaddr) +
55 /* physical address to kernel virtual address */
59 return phys_to_virt((unsigned long)pa);
62 /* physical address to virtual address */
64 pa2va(void *pa, struct rte_kni_mbuf *m)
68 va = (void *)((unsigned long)pa +
69 (unsigned long)m->buf_addr -
70 (unsigned long)m->buf_physaddr);
74 /* mbuf data kernel virtual address from mbuf kernel virtual address */
76 kva2data_kva(struct rte_kni_mbuf *m)
78 return phys_to_virt(m->buf_physaddr + m->data_off);
82 get_kva(struct kni_dev *kni, void *pa)
84 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
85 if (kni->iova_mode == 1)
86 return iova2kva(kni, pa);
92 get_data_kva(struct kni_dev *kni, void *pkt_kva)
94 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
95 if (kni->iova_mode == 1)
96 return iova2data_kva(kni, pkt_kva);
98 return kva2data_kva(pkt_kva);
102 * It can be called to process the request.
105 kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
113 pr_err("No kni instance or request\n");
117 mutex_lock(&kni->sync_lock);
120 memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
121 num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
123 pr_err("Cannot send to req_q\n");
128 ret_val = wait_event_interruptible_timeout(kni->wq,
129 kni_fifo_count(kni->resp_q), 3 * HZ);
130 if (signal_pending(current) || ret_val <= 0) {
134 num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
135 if (num != 1 || resp_va != kni->sync_va) {
136 /* This should never happen */
137 pr_err("No data in resp_q\n");
142 memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
146 mutex_unlock(&kni->sync_lock);
154 kni_net_open(struct net_device *dev)
157 struct rte_kni_request req;
158 struct kni_dev *kni = netdev_priv(dev);
160 netif_start_queue(dev);
161 if (kni_dflt_carrier == 1)
162 netif_carrier_on(dev);
164 netif_carrier_off(dev);
166 memset(&req, 0, sizeof(req));
167 req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
169 /* Setting if_up to non-zero means up */
171 ret = kni_net_process_request(kni, &req);
173 return (ret == 0) ? req.result : ret;
177 kni_net_release(struct net_device *dev)
180 struct rte_kni_request req;
181 struct kni_dev *kni = netdev_priv(dev);
183 netif_stop_queue(dev); /* can't transmit any more */
184 netif_carrier_off(dev);
186 memset(&req, 0, sizeof(req));
187 req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
189 /* Setting if_up to 0 means down */
191 ret = kni_net_process_request(kni, &req);
193 return (ret == 0) ? req.result : ret;
197 kni_fifo_trans_pa2va(struct kni_dev *kni,
198 struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
200 uint32_t ret, i, num_dst, num_rx;
201 struct rte_kni_mbuf *kva, *prev_kva;
206 num_dst = kni_fifo_free_count(dst_va);
210 num_rx = min_t(uint32_t, num_dst, MBUF_BURST_SZ);
212 num_rx = kni_fifo_get(src_pa, kni->pa, num_rx);
216 for (i = 0; i < num_rx; i++) {
217 kva = get_kva(kni, kni->pa[i]);
218 kni->va[i] = pa2va(kni->pa[i], kva);
220 kva_nb_segs = kva->nb_segs;
221 for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
226 kva = pa2kva(kva->next);
227 /* Convert physical address to virtual address */
228 prev_kva->next = pa2va(prev_kva->next, kva);
232 ret = kni_fifo_put(dst_va, kni->va, num_rx);
234 /* Failing should not happen */
235 pr_err("Fail to enqueue entries into dst_va\n");
241 /* Try to release mbufs when kni release */
242 void kni_net_release_fifo_phy(struct kni_dev *kni)
244 /* release rx_q first, because it can't release in userspace */
245 kni_fifo_trans_pa2va(kni, kni->rx_q, kni->free_q);
246 /* release alloc_q for speeding up kni release in userspace */
247 kni_fifo_trans_pa2va(kni, kni->alloc_q, kni->free_q);
251 * Configuration changes (passed on by ifconfig)
254 kni_net_config(struct net_device *dev, struct ifmap *map)
256 if (dev->flags & IFF_UP) /* can't act on a running interface */
259 /* ignore other fields */
264 * Transmit a packet (called by the kernel)
267 kni_net_tx(struct sk_buff *skb, struct net_device *dev)
271 struct kni_dev *kni = netdev_priv(dev);
272 struct rte_kni_mbuf *pkt_kva = NULL;
276 /* save the timestamp */
277 #ifdef HAVE_TRANS_START_HELPER
278 netif_trans_update(dev);
280 dev->trans_start = jiffies;
283 /* Check if the length of skb is less than mbuf size */
284 if (skb->len > kni->mbuf_size)
288 * Check if it has at least one free entry in tx_q and
289 * one entry in alloc_q.
291 if (kni_fifo_free_count(kni->tx_q) == 0 ||
292 kni_fifo_count(kni->alloc_q) == 0) {
294 * If no free entry in tx_q or no entry in alloc_q,
295 * drops skb and goes out.
300 /* dequeue a mbuf from alloc_q */
301 ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1);
302 if (likely(ret == 1)) {
305 pkt_kva = get_kva(kni, pkt_pa);
306 data_kva = get_data_kva(kni, pkt_kva);
307 pkt_va = pa2va(pkt_pa, pkt_kva);
310 memcpy(data_kva, skb->data, len);
311 if (unlikely(len < ETH_ZLEN)) {
312 memset(data_kva + len, 0, ETH_ZLEN - len);
315 pkt_kva->pkt_len = len;
316 pkt_kva->data_len = len;
318 /* enqueue mbuf into tx_q */
319 ret = kni_fifo_put(kni->tx_q, &pkt_va, 1);
320 if (unlikely(ret != 1)) {
321 /* Failing should not happen */
322 pr_err("Fail to enqueue mbuf into tx_q\n");
326 /* Failing should not happen */
327 pr_err("Fail to dequeue mbuf from alloc_q\n");
331 /* Free skb and update statistics */
333 dev->stats.tx_bytes += len;
334 dev->stats.tx_packets++;
339 /* Free skb and update statistics */
341 dev->stats.tx_dropped++;
347 * RX: normal working mode
350 kni_net_rx_normal(struct kni_dev *kni)
354 uint32_t i, num_rx, num_fq;
355 struct rte_kni_mbuf *kva, *prev_kva;
358 struct net_device *dev = kni->net_dev;
360 /* Get the number of free entries in free_q */
361 num_fq = kni_fifo_free_count(kni->free_q);
363 /* No room on the free_q, bail out */
367 /* Calculate the number of entries to dequeue from rx_q */
368 num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ);
370 /* Burst dequeue from rx_q */
371 num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx);
375 /* Transfer received packets to netif */
376 for (i = 0; i < num_rx; i++) {
377 kva = get_kva(kni, kni->pa[i]);
379 data_kva = get_data_kva(kni, kva);
380 kni->va[i] = pa2va(kni->pa[i], kva);
382 skb = netdev_alloc_skb(dev, len);
384 /* Update statistics */
385 dev->stats.rx_dropped++;
389 if (kva->nb_segs == 1) {
390 memcpy(skb_put(skb, len), data_kva, len);
393 int kva_nb_segs = kva->nb_segs;
395 for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
396 memcpy(skb_put(skb, kva->data_len),
397 data_kva, kva->data_len);
403 kva = pa2kva(kva->next);
404 data_kva = kva2data_kva(kva);
405 /* Convert physical address to virtual address */
406 prev_kva->next = pa2va(prev_kva->next, kva);
410 skb->protocol = eth_type_trans(skb, dev);
411 skb->ip_summed = CHECKSUM_UNNECESSARY;
413 /* Call netif interface */
416 /* Update statistics */
417 dev->stats.rx_bytes += len;
418 dev->stats.rx_packets++;
421 /* Burst enqueue mbufs into free_q */
422 ret = kni_fifo_put(kni->free_q, kni->va, num_rx);
424 /* Failing should not happen */
425 pr_err("Fail to enqueue entries into free_q\n");
429 * RX: loopback with enqueue/dequeue fifos.
432 kni_net_rx_lo_fifo(struct kni_dev *kni)
436 uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
437 struct rte_kni_mbuf *kva, *next_kva;
439 struct rte_kni_mbuf *alloc_kva;
440 void *alloc_data_kva;
441 struct net_device *dev = kni->net_dev;
443 /* Get the number of entries in rx_q */
444 num_rq = kni_fifo_count(kni->rx_q);
446 /* Get the number of free entries in tx_q */
447 num_tq = kni_fifo_free_count(kni->tx_q);
449 /* Get the number of entries in alloc_q */
450 num_aq = kni_fifo_count(kni->alloc_q);
452 /* Get the number of free entries in free_q */
453 num_fq = kni_fifo_free_count(kni->free_q);
455 /* Calculate the number of entries to be dequeued from rx_q */
456 num = min(num_rq, num_tq);
457 num = min(num, num_aq);
458 num = min(num, num_fq);
459 num = min_t(uint32_t, num, MBUF_BURST_SZ);
461 /* Return if no entry to dequeue from rx_q */
465 /* Burst dequeue from rx_q */
466 ret = kni_fifo_get(kni->rx_q, kni->pa, num);
468 return; /* Failing should not happen */
470 /* Dequeue entries from alloc_q */
471 ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num);
475 for (i = 0; i < num; i++) {
476 kva = get_kva(kni, kni->pa[i]);
478 data_kva = get_data_kva(kni, kva);
479 kni->va[i] = pa2va(kni->pa[i], kva);
482 next_kva = pa2kva(kva->next);
483 /* Convert physical address to virtual address */
484 kva->next = pa2va(kva->next, next_kva);
488 alloc_kva = get_kva(kni, kni->alloc_pa[i]);
489 alloc_data_kva = get_data_kva(kni, alloc_kva);
490 kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
492 memcpy(alloc_data_kva, data_kva, len);
493 alloc_kva->pkt_len = len;
494 alloc_kva->data_len = len;
496 dev->stats.tx_bytes += len;
497 dev->stats.rx_bytes += len;
500 /* Burst enqueue mbufs into tx_q */
501 ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num);
503 /* Failing should not happen */
504 pr_err("Fail to enqueue mbufs into tx_q\n");
507 /* Burst enqueue mbufs into free_q */
508 ret = kni_fifo_put(kni->free_q, kni->va, num);
510 /* Failing should not happen */
511 pr_err("Fail to enqueue mbufs into free_q\n");
514 * Update statistic, and enqueue/dequeue failure is impossible,
515 * as all queues are checked at first.
517 dev->stats.tx_packets += num;
518 dev->stats.rx_packets += num;
522 * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
525 kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
529 uint32_t i, num_rq, num_fq, num;
530 struct rte_kni_mbuf *kva, *prev_kva;
533 struct net_device *dev = kni->net_dev;
535 /* Get the number of entries in rx_q */
536 num_rq = kni_fifo_count(kni->rx_q);
538 /* Get the number of free entries in free_q */
539 num_fq = kni_fifo_free_count(kni->free_q);
541 /* Calculate the number of entries to dequeue from rx_q */
542 num = min(num_rq, num_fq);
543 num = min_t(uint32_t, num, MBUF_BURST_SZ);
545 /* Return if no entry to dequeue from rx_q */
549 /* Burst dequeue mbufs from rx_q */
550 ret = kni_fifo_get(kni->rx_q, kni->pa, num);
554 /* Copy mbufs to sk buffer and then call tx interface */
555 for (i = 0; i < num; i++) {
556 kva = get_kva(kni, kni->pa[i]);
558 data_kva = get_data_kva(kni, kva);
559 kni->va[i] = pa2va(kni->pa[i], kva);
561 skb = netdev_alloc_skb(dev, len);
563 memcpy(skb_put(skb, len), data_kva, len);
564 skb->ip_summed = CHECKSUM_UNNECESSARY;
568 /* Simulate real usage, allocate/copy skb twice */
569 skb = netdev_alloc_skb(dev, len);
571 dev->stats.rx_dropped++;
575 if (kva->nb_segs == 1) {
576 memcpy(skb_put(skb, len), data_kva, len);
579 int kva_nb_segs = kva->nb_segs;
581 for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
582 memcpy(skb_put(skb, kva->data_len),
583 data_kva, kva->data_len);
589 kva = get_kva(kni, kva->next);
590 data_kva = get_data_kva(kni, kva);
591 /* Convert physical address to virtual address */
592 prev_kva->next = pa2va(prev_kva->next, kva);
596 skb->ip_summed = CHECKSUM_UNNECESSARY;
598 dev->stats.rx_bytes += len;
599 dev->stats.rx_packets++;
601 /* call tx interface */
602 kni_net_tx(skb, dev);
605 /* enqueue all the mbufs from rx_q into free_q */
606 ret = kni_fifo_put(kni->free_q, kni->va, num);
608 /* Failing should not happen */
609 pr_err("Fail to enqueue mbufs into free_q\n");
614 kni_net_rx(struct kni_dev *kni)
617 * It doesn't need to check if it is NULL pointer,
618 * as it has a default value
620 (*kni_net_rx_func)(kni);
624 * Deal with a transmit timeout.
626 #ifdef HAVE_TX_TIMEOUT_TXQUEUE
628 kni_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
631 kni_net_tx_timeout(struct net_device *dev)
634 pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies,
635 jiffies - dev_trans_start(dev));
637 dev->stats.tx_errors++;
638 netif_wake_queue(dev);
642 kni_net_change_mtu(struct net_device *dev, int new_mtu)
645 struct rte_kni_request req;
646 struct kni_dev *kni = netdev_priv(dev);
648 pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
650 memset(&req, 0, sizeof(req));
651 req.req_id = RTE_KNI_REQ_CHANGE_MTU;
652 req.new_mtu = new_mtu;
653 ret = kni_net_process_request(kni, &req);
654 if (ret == 0 && req.result == 0)
657 return (ret == 0) ? req.result : ret;
661 kni_net_change_rx_flags(struct net_device *netdev, int flags)
663 struct rte_kni_request req;
664 struct kni_dev *kni = netdev_priv(netdev);
666 memset(&req, 0, sizeof(req));
668 if (flags & IFF_ALLMULTI) {
669 req.req_id = RTE_KNI_REQ_CHANGE_ALLMULTI;
671 if (netdev->flags & IFF_ALLMULTI)
677 if (flags & IFF_PROMISC) {
678 req.req_id = RTE_KNI_REQ_CHANGE_PROMISC;
680 if (netdev->flags & IFF_PROMISC)
681 req.promiscusity = 1;
683 req.promiscusity = 0;
686 kni_net_process_request(kni, &req);
690 * Checks if the user space application provided the resp message
693 kni_net_poll_resp(struct kni_dev *kni)
695 if (kni_fifo_count(kni->resp_q))
696 wake_up_interruptible(&kni->wq);
700 * Fill the eth header
703 kni_net_header(struct sk_buff *skb, struct net_device *dev,
704 unsigned short type, const void *daddr,
705 const void *saddr, uint32_t len)
707 struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
709 memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len);
710 memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len);
711 eth->h_proto = htons(type);
713 return dev->hard_header_len;
717 * Re-fill the eth header
719 #ifdef HAVE_REBUILD_HEADER
721 kni_net_rebuild_header(struct sk_buff *skb)
723 struct net_device *dev = skb->dev;
724 struct ethhdr *eth = (struct ethhdr *) skb->data;
726 memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
727 memcpy(eth->h_dest, dev->dev_addr, dev->addr_len);
734 * kni_net_set_mac - Change the Ethernet Address of the KNI NIC
735 * @netdev: network interface device structure
736 * @p: pointer to an address structure
738 * Returns 0 on success, negative on failure
741 kni_net_set_mac(struct net_device *netdev, void *p)
744 struct rte_kni_request req;
746 struct sockaddr *addr = p;
748 memset(&req, 0, sizeof(req));
749 req.req_id = RTE_KNI_REQ_CHANGE_MAC_ADDR;
751 if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
752 return -EADDRNOTAVAIL;
754 memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
755 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
757 kni = netdev_priv(netdev);
758 ret = kni_net_process_request(kni, &req);
760 return (ret == 0 ? req.result : ret);
763 #ifdef HAVE_CHANGE_CARRIER_CB
765 kni_net_change_carrier(struct net_device *dev, bool new_carrier)
768 netif_carrier_on(dev);
770 netif_carrier_off(dev);
775 static const struct header_ops kni_net_header_ops = {
776 .create = kni_net_header,
777 .parse = eth_header_parse,
778 #ifdef HAVE_REBUILD_HEADER
779 .rebuild = kni_net_rebuild_header,
781 .cache = NULL, /* disable caching */
784 static const struct net_device_ops kni_net_netdev_ops = {
785 .ndo_open = kni_net_open,
786 .ndo_stop = kni_net_release,
787 .ndo_set_config = kni_net_config,
788 .ndo_change_rx_flags = kni_net_change_rx_flags,
789 .ndo_start_xmit = kni_net_tx,
790 .ndo_change_mtu = kni_net_change_mtu,
791 .ndo_tx_timeout = kni_net_tx_timeout,
792 .ndo_set_mac_address = kni_net_set_mac,
793 #ifdef HAVE_CHANGE_CARRIER_CB
794 .ndo_change_carrier = kni_net_change_carrier,
798 static void kni_get_drvinfo(struct net_device *dev,
799 struct ethtool_drvinfo *info)
801 strlcpy(info->version, KNI_VERSION, sizeof(info->version));
802 strlcpy(info->driver, "kni", sizeof(info->driver));
805 static const struct ethtool_ops kni_net_ethtool_ops = {
806 .get_drvinfo = kni_get_drvinfo,
807 .get_link = ethtool_op_get_link,
811 kni_net_init(struct net_device *dev)
813 struct kni_dev *kni = netdev_priv(dev);
815 init_waitqueue_head(&kni->wq);
816 mutex_init(&kni->sync_lock);
818 ether_setup(dev); /* assign some of the fields */
819 dev->netdev_ops = &kni_net_netdev_ops;
820 dev->header_ops = &kni_net_header_ops;
821 dev->ethtool_ops = &kni_net_ethtool_ops;
822 dev->watchdog_timeo = WD_TIMEOUT;
826 kni_net_config_lo_mode(char *lo_str)
829 pr_debug("loopback disabled");
833 if (!strcmp(lo_str, "lo_mode_none"))
834 pr_debug("loopback disabled");
835 else if (!strcmp(lo_str, "lo_mode_fifo")) {
836 pr_debug("loopback mode=lo_mode_fifo enabled");
837 kni_net_rx_func = kni_net_rx_lo_fifo;
838 } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
839 pr_debug("loopback mode=lo_mode_fifo_skb enabled");
840 kni_net_rx_func = kni_net_rx_lo_fifo_skb;
842 pr_debug("Unknown loopback parameter, disabled");