1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(c) 2010-2014 Intel Corporation.
7 * This code is inspired from the book "Linux Device Drivers" by
8 * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates
11 #include <linux/device.h>
12 #include <linux/module.h>
13 #include <linux/version.h>
14 #include <linux/netdevice.h>
15 #include <linux/etherdevice.h> /* eth_type_trans */
16 #include <linux/ethtool.h>
17 #include <linux/skbuff.h>
18 #include <linux/kthread.h>
19 #include <linux/delay.h>
20 #include <linux/rtnetlink.h>
22 #include <rte_kni_common.h>
28 #define WD_TIMEOUT 5 /*jiffies */
30 #define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
32 /* typedef for rx function */
33 typedef void (*kni_net_rx_t)(struct kni_dev *kni);
35 static void kni_net_rx_normal(struct kni_dev *kni);
37 /* kni rx function pointer, with default to normal rx */
38 static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
40 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
41 /* iova to kernel virtual address */
43 iova2kva(struct kni_dev *kni, void *iova)
45 return phys_to_virt(iova_to_phys(kni->usr_tsk, (unsigned long)iova));
49 iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
51 return phys_to_virt(iova_to_phys(kni->usr_tsk, m->buf_iova) +
56 /* physical address to kernel virtual address */
60 return phys_to_virt((unsigned long)pa);
63 /* physical address to virtual address */
65 pa2va(void *pa, struct rte_kni_mbuf *m)
69 va = (void *)((unsigned long)pa +
70 (unsigned long)m->buf_addr -
71 (unsigned long)m->buf_iova);
75 /* mbuf data kernel virtual address from mbuf kernel virtual address */
77 kva2data_kva(struct rte_kni_mbuf *m)
79 return phys_to_virt(m->buf_iova + m->data_off);
83 get_kva(struct kni_dev *kni, void *pa)
85 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
86 if (kni->iova_mode == 1)
87 return iova2kva(kni, pa);
93 get_data_kva(struct kni_dev *kni, void *pkt_kva)
95 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
96 if (kni->iova_mode == 1)
97 return iova2data_kva(kni, pkt_kva);
99 return kva2data_kva(pkt_kva);
103 * It can be called to process the request.
106 kni_net_process_request(struct net_device *dev, struct rte_kni_request *req)
108 struct kni_dev *kni = netdev_priv(dev);
116 if (bifurcated_support) {
117 /* If we need to wait and RTNL mutex is held
118 * drop the mutex and hold reference to keep device
120 if (req->async == 0) {
126 mutex_lock(&kni->sync_lock);
129 memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
130 num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
132 pr_err("Cannot send to req_q\n");
137 if (bifurcated_support) {
138 /* No result available since request is handled
139 * asynchronously. set response to success.
141 if (req->async != 0) {
147 ret_val = wait_event_interruptible_timeout(kni->wq,
148 kni_fifo_count(kni->resp_q), 3 * HZ);
149 if (signal_pending(current) || ret_val <= 0) {
153 num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
154 if (num != 1 || resp_va != kni->sync_va) {
155 /* This should never happen */
156 pr_err("No data in resp_q\n");
161 memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
166 mutex_unlock(&kni->sync_lock);
167 if (bifurcated_support) {
168 if (req->async == 0) {
180 kni_net_open(struct net_device *dev)
183 struct rte_kni_request req;
185 netif_start_queue(dev);
186 if (kni_dflt_carrier == 1)
187 netif_carrier_on(dev);
189 netif_carrier_off(dev);
191 memset(&req, 0, sizeof(req));
192 req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
194 /* Setting if_up to non-zero means up */
196 ret = kni_net_process_request(dev, &req);
198 return (ret == 0) ? req.result : ret;
202 kni_net_release(struct net_device *dev)
205 struct rte_kni_request req;
207 netif_stop_queue(dev); /* can't transmit any more */
208 netif_carrier_off(dev);
210 memset(&req, 0, sizeof(req));
211 req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
213 /* Setting if_up to 0 means down */
216 if (bifurcated_support) {
217 /* request async because of the deadlock problem */
221 ret = kni_net_process_request(dev, &req);
223 return (ret == 0) ? req.result : ret;
227 kni_fifo_trans_pa2va(struct kni_dev *kni,
228 struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
230 uint32_t ret, i, num_dst, num_rx;
231 struct rte_kni_mbuf *kva, *prev_kva;
236 num_dst = kni_fifo_free_count(dst_va);
240 num_rx = min_t(uint32_t, num_dst, MBUF_BURST_SZ);
242 num_rx = kni_fifo_get(src_pa, kni->pa, num_rx);
246 for (i = 0; i < num_rx; i++) {
247 kva = get_kva(kni, kni->pa[i]);
248 kni->va[i] = pa2va(kni->pa[i], kva);
250 kva_nb_segs = kva->nb_segs;
251 for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
256 kva = get_kva(kni, kva->next);
257 /* Convert physical address to virtual address */
258 prev_kva->next = pa2va(prev_kva->next, kva);
262 ret = kni_fifo_put(dst_va, kni->va, num_rx);
264 /* Failing should not happen */
265 pr_err("Fail to enqueue entries into dst_va\n");
271 /* Try to release mbufs when kni release */
272 void kni_net_release_fifo_phy(struct kni_dev *kni)
274 /* release rx_q first, because it can't release in userspace */
275 kni_fifo_trans_pa2va(kni, kni->rx_q, kni->free_q);
276 /* release alloc_q for speeding up kni release in userspace */
277 kni_fifo_trans_pa2va(kni, kni->alloc_q, kni->free_q);
281 * Configuration changes (passed on by ifconfig)
284 kni_net_config(struct net_device *dev, struct ifmap *map)
286 if (dev->flags & IFF_UP) /* can't act on a running interface */
289 /* ignore other fields */
294 * Transmit a packet (called by the kernel)
297 kni_net_tx(struct sk_buff *skb, struct net_device *dev)
301 struct kni_dev *kni = netdev_priv(dev);
302 struct rte_kni_mbuf *pkt_kva = NULL;
306 /* save the timestamp */
307 #ifdef HAVE_TRANS_START_HELPER
308 netif_trans_update(dev);
310 dev->trans_start = jiffies;
313 /* Check if the length of skb is less than mbuf size */
314 if (skb->len > kni->mbuf_size)
318 * Check if it has at least one free entry in tx_q and
319 * one entry in alloc_q.
321 if (kni_fifo_free_count(kni->tx_q) == 0 ||
322 kni_fifo_count(kni->alloc_q) == 0) {
324 * If no free entry in tx_q or no entry in alloc_q,
325 * drops skb and goes out.
330 /* dequeue a mbuf from alloc_q */
331 ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1);
332 if (likely(ret == 1)) {
335 pkt_kva = get_kva(kni, pkt_pa);
336 data_kva = get_data_kva(kni, pkt_kva);
337 pkt_va = pa2va(pkt_pa, pkt_kva);
340 memcpy(data_kva, skb->data, len);
341 if (unlikely(len < ETH_ZLEN)) {
342 memset(data_kva + len, 0, ETH_ZLEN - len);
345 pkt_kva->pkt_len = len;
346 pkt_kva->data_len = len;
348 /* enqueue mbuf into tx_q */
349 ret = kni_fifo_put(kni->tx_q, &pkt_va, 1);
350 if (unlikely(ret != 1)) {
351 /* Failing should not happen */
352 pr_err("Fail to enqueue mbuf into tx_q\n");
356 /* Failing should not happen */
357 pr_err("Fail to dequeue mbuf from alloc_q\n");
361 /* Free skb and update statistics */
363 dev->stats.tx_bytes += len;
364 dev->stats.tx_packets++;
369 /* Free skb and update statistics */
371 dev->stats.tx_dropped++;
377 * RX: normal working mode
380 kni_net_rx_normal(struct kni_dev *kni)
384 uint32_t i, num_rx, num_fq;
385 struct rte_kni_mbuf *kva, *prev_kva;
388 struct net_device *dev = kni->net_dev;
390 /* Get the number of free entries in free_q */
391 num_fq = kni_fifo_free_count(kni->free_q);
393 /* No room on the free_q, bail out */
397 /* Calculate the number of entries to dequeue from rx_q */
398 num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ);
400 /* Burst dequeue from rx_q */
401 num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx);
405 /* Transfer received packets to netif */
406 for (i = 0; i < num_rx; i++) {
407 kva = get_kva(kni, kni->pa[i]);
409 data_kva = get_data_kva(kni, kva);
410 kni->va[i] = pa2va(kni->pa[i], kva);
412 skb = netdev_alloc_skb(dev, len);
414 /* Update statistics */
415 dev->stats.rx_dropped++;
419 if (kva->nb_segs == 1) {
420 memcpy(skb_put(skb, len), data_kva, len);
423 int kva_nb_segs = kva->nb_segs;
425 for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
426 memcpy(skb_put(skb, kva->data_len),
427 data_kva, kva->data_len);
433 kva = get_kva(kni, kva->next);
434 data_kva = kva2data_kva(kva);
435 /* Convert physical address to virtual address */
436 prev_kva->next = pa2va(prev_kva->next, kva);
440 skb->protocol = eth_type_trans(skb, dev);
441 skb->ip_summed = CHECKSUM_UNNECESSARY;
443 /* Call netif interface */
444 #ifdef HAVE_NETIF_RX_NI
450 /* Update statistics */
451 dev->stats.rx_bytes += len;
452 dev->stats.rx_packets++;
455 /* Burst enqueue mbufs into free_q */
456 ret = kni_fifo_put(kni->free_q, kni->va, num_rx);
458 /* Failing should not happen */
459 pr_err("Fail to enqueue entries into free_q\n");
463 * RX: loopback with enqueue/dequeue fifos.
466 kni_net_rx_lo_fifo(struct kni_dev *kni)
470 uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
471 struct rte_kni_mbuf *kva, *next_kva;
473 struct rte_kni_mbuf *alloc_kva;
474 void *alloc_data_kva;
475 struct net_device *dev = kni->net_dev;
477 /* Get the number of entries in rx_q */
478 num_rq = kni_fifo_count(kni->rx_q);
480 /* Get the number of free entries in tx_q */
481 num_tq = kni_fifo_free_count(kni->tx_q);
483 /* Get the number of entries in alloc_q */
484 num_aq = kni_fifo_count(kni->alloc_q);
486 /* Get the number of free entries in free_q */
487 num_fq = kni_fifo_free_count(kni->free_q);
489 /* Calculate the number of entries to be dequeued from rx_q */
490 num = min(num_rq, num_tq);
491 num = min(num, num_aq);
492 num = min(num, num_fq);
493 num = min_t(uint32_t, num, MBUF_BURST_SZ);
495 /* Return if no entry to dequeue from rx_q */
499 /* Burst dequeue from rx_q */
500 ret = kni_fifo_get(kni->rx_q, kni->pa, num);
502 return; /* Failing should not happen */
504 /* Dequeue entries from alloc_q */
505 ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num);
509 for (i = 0; i < num; i++) {
510 kva = get_kva(kni, kni->pa[i]);
512 data_kva = get_data_kva(kni, kva);
513 kni->va[i] = pa2va(kni->pa[i], kva);
516 next_kva = get_kva(kni, kva->next);
517 /* Convert physical address to virtual address */
518 kva->next = pa2va(kva->next, next_kva);
522 alloc_kva = get_kva(kni, kni->alloc_pa[i]);
523 alloc_data_kva = get_data_kva(kni, alloc_kva);
524 kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
526 memcpy(alloc_data_kva, data_kva, len);
527 alloc_kva->pkt_len = len;
528 alloc_kva->data_len = len;
530 dev->stats.tx_bytes += len;
531 dev->stats.rx_bytes += len;
534 /* Burst enqueue mbufs into tx_q */
535 ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num);
537 /* Failing should not happen */
538 pr_err("Fail to enqueue mbufs into tx_q\n");
541 /* Burst enqueue mbufs into free_q */
542 ret = kni_fifo_put(kni->free_q, kni->va, num);
544 /* Failing should not happen */
545 pr_err("Fail to enqueue mbufs into free_q\n");
548 * Update statistic, and enqueue/dequeue failure is impossible,
549 * as all queues are checked at first.
551 dev->stats.tx_packets += num;
552 dev->stats.rx_packets += num;
556 * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
559 kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
563 uint32_t i, num_rq, num_fq, num;
564 struct rte_kni_mbuf *kva, *prev_kva;
567 struct net_device *dev = kni->net_dev;
569 /* Get the number of entries in rx_q */
570 num_rq = kni_fifo_count(kni->rx_q);
572 /* Get the number of free entries in free_q */
573 num_fq = kni_fifo_free_count(kni->free_q);
575 /* Calculate the number of entries to dequeue from rx_q */
576 num = min(num_rq, num_fq);
577 num = min_t(uint32_t, num, MBUF_BURST_SZ);
579 /* Return if no entry to dequeue from rx_q */
583 /* Burst dequeue mbufs from rx_q */
584 ret = kni_fifo_get(kni->rx_q, kni->pa, num);
588 /* Copy mbufs to sk buffer and then call tx interface */
589 for (i = 0; i < num; i++) {
590 kva = get_kva(kni, kni->pa[i]);
592 data_kva = get_data_kva(kni, kva);
593 kni->va[i] = pa2va(kni->pa[i], kva);
595 skb = netdev_alloc_skb(dev, len);
597 memcpy(skb_put(skb, len), data_kva, len);
598 skb->ip_summed = CHECKSUM_UNNECESSARY;
602 /* Simulate real usage, allocate/copy skb twice */
603 skb = netdev_alloc_skb(dev, len);
605 dev->stats.rx_dropped++;
609 if (kva->nb_segs == 1) {
610 memcpy(skb_put(skb, len), data_kva, len);
613 int kva_nb_segs = kva->nb_segs;
615 for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
616 memcpy(skb_put(skb, kva->data_len),
617 data_kva, kva->data_len);
623 kva = get_kva(kni, kva->next);
624 data_kva = get_data_kva(kni, kva);
625 /* Convert physical address to virtual address */
626 prev_kva->next = pa2va(prev_kva->next, kva);
630 skb->ip_summed = CHECKSUM_UNNECESSARY;
632 dev->stats.rx_bytes += len;
633 dev->stats.rx_packets++;
635 /* call tx interface */
636 kni_net_tx(skb, dev);
639 /* enqueue all the mbufs from rx_q into free_q */
640 ret = kni_fifo_put(kni->free_q, kni->va, num);
642 /* Failing should not happen */
643 pr_err("Fail to enqueue mbufs into free_q\n");
648 kni_net_rx(struct kni_dev *kni)
651 * It doesn't need to check if it is NULL pointer,
652 * as it has a default value
654 (*kni_net_rx_func)(kni);
658 * Deal with a transmit timeout.
660 #ifdef HAVE_TX_TIMEOUT_TXQUEUE
662 kni_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
665 kni_net_tx_timeout(struct net_device *dev)
668 pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies,
669 jiffies - dev_trans_start(dev));
671 dev->stats.tx_errors++;
672 netif_wake_queue(dev);
676 kni_net_change_mtu(struct net_device *dev, int new_mtu)
679 struct rte_kni_request req;
681 pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
683 memset(&req, 0, sizeof(req));
684 req.req_id = RTE_KNI_REQ_CHANGE_MTU;
685 req.new_mtu = new_mtu;
686 ret = kni_net_process_request(dev, &req);
687 if (ret == 0 && req.result == 0)
690 return (ret == 0) ? req.result : ret;
694 kni_net_change_rx_flags(struct net_device *netdev, int flags)
696 struct rte_kni_request req;
698 memset(&req, 0, sizeof(req));
700 if (flags & IFF_ALLMULTI) {
701 req.req_id = RTE_KNI_REQ_CHANGE_ALLMULTI;
703 if (netdev->flags & IFF_ALLMULTI)
709 if (flags & IFF_PROMISC) {
710 req.req_id = RTE_KNI_REQ_CHANGE_PROMISC;
712 if (netdev->flags & IFF_PROMISC)
713 req.promiscusity = 1;
715 req.promiscusity = 0;
718 kni_net_process_request(netdev, &req);
722 * Checks if the user space application provided the resp message
725 kni_net_poll_resp(struct kni_dev *kni)
727 if (kni_fifo_count(kni->resp_q))
728 wake_up_interruptible(&kni->wq);
732 * Fill the eth header
735 kni_net_header(struct sk_buff *skb, struct net_device *dev,
736 unsigned short type, const void *daddr,
737 const void *saddr, uint32_t len)
739 struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
741 memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len);
742 memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len);
743 eth->h_proto = htons(type);
745 return dev->hard_header_len;
749 * Re-fill the eth header
751 #ifdef HAVE_REBUILD_HEADER
753 kni_net_rebuild_header(struct sk_buff *skb)
755 struct net_device *dev = skb->dev;
756 struct ethhdr *eth = (struct ethhdr *) skb->data;
758 memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
759 memcpy(eth->h_dest, dev->dev_addr, dev->addr_len);
766 * kni_net_set_mac - Change the Ethernet Address of the KNI NIC
767 * @netdev: network interface device structure
768 * @p: pointer to an address structure
770 * Returns 0 on success, negative on failure
773 kni_net_set_mac(struct net_device *netdev, void *p)
776 struct rte_kni_request req;
777 struct sockaddr *addr = p;
779 memset(&req, 0, sizeof(req));
780 req.req_id = RTE_KNI_REQ_CHANGE_MAC_ADDR;
782 if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
783 return -EADDRNOTAVAIL;
785 memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
786 #ifdef HAVE_ETH_HW_ADDR_SET
787 eth_hw_addr_set(netdev, addr->sa_data);
789 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
792 ret = kni_net_process_request(netdev, &req);
794 return (ret == 0 ? req.result : ret);
797 #ifdef HAVE_CHANGE_CARRIER_CB
799 kni_net_change_carrier(struct net_device *dev, bool new_carrier)
802 netif_carrier_on(dev);
804 netif_carrier_off(dev);
809 static const struct header_ops kni_net_header_ops = {
810 .create = kni_net_header,
811 .parse = eth_header_parse,
812 #ifdef HAVE_REBUILD_HEADER
813 .rebuild = kni_net_rebuild_header,
815 .cache = NULL, /* disable caching */
818 static const struct net_device_ops kni_net_netdev_ops = {
819 .ndo_open = kni_net_open,
820 .ndo_stop = kni_net_release,
821 .ndo_set_config = kni_net_config,
822 .ndo_change_rx_flags = kni_net_change_rx_flags,
823 .ndo_start_xmit = kni_net_tx,
824 .ndo_change_mtu = kni_net_change_mtu,
825 .ndo_tx_timeout = kni_net_tx_timeout,
826 .ndo_set_mac_address = kni_net_set_mac,
827 #ifdef HAVE_CHANGE_CARRIER_CB
828 .ndo_change_carrier = kni_net_change_carrier,
832 static void kni_get_drvinfo(struct net_device *dev,
833 struct ethtool_drvinfo *info)
835 strlcpy(info->version, KNI_VERSION, sizeof(info->version));
836 strlcpy(info->driver, "kni", sizeof(info->driver));
839 static const struct ethtool_ops kni_net_ethtool_ops = {
840 .get_drvinfo = kni_get_drvinfo,
841 .get_link = ethtool_op_get_link,
845 kni_net_init(struct net_device *dev)
847 struct kni_dev *kni = netdev_priv(dev);
849 init_waitqueue_head(&kni->wq);
850 mutex_init(&kni->sync_lock);
852 ether_setup(dev); /* assign some of the fields */
853 dev->netdev_ops = &kni_net_netdev_ops;
854 dev->header_ops = &kni_net_header_ops;
855 dev->ethtool_ops = &kni_net_ethtool_ops;
856 dev->watchdog_timeo = WD_TIMEOUT;
860 kni_net_config_lo_mode(char *lo_str)
863 pr_debug("loopback disabled");
867 if (!strcmp(lo_str, "lo_mode_none"))
868 pr_debug("loopback disabled");
869 else if (!strcmp(lo_str, "lo_mode_fifo")) {
870 pr_debug("loopback mode=lo_mode_fifo enabled");
871 kni_net_rx_func = kni_net_rx_lo_fifo;
872 } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
873 pr_debug("loopback mode=lo_mode_fifo_skb enabled");
874 kni_net_rx_func = kni_net_rx_lo_fifo_skb;
876 pr_debug("Unknown loopback parameter, disabled");