1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(c) 2010-2014 Intel Corporation.
7 * This code is inspired from the book "Linux Device Drivers" by
8 * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates
11 #include <linux/device.h>
12 #include <linux/module.h>
13 #include <linux/version.h>
14 #include <linux/netdevice.h>
15 #include <linux/etherdevice.h> /* eth_type_trans */
16 #include <linux/skbuff.h>
17 #include <linux/kthread.h>
18 #include <linux/delay.h>
20 #include <exec-env/rte_kni_common.h>
26 #define WD_TIMEOUT 5 /*jiffies */
28 #define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
30 /* typedef for rx function */
31 typedef void (*kni_net_rx_t)(struct kni_dev *kni);
33 static void kni_net_rx_normal(struct kni_dev *kni);
35 /* kni rx function pointer, with default to normal rx */
36 static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
38 /* physical address to kernel virtual address */
42 return phys_to_virt((unsigned long)pa);
45 /* physical address to virtual address */
47 pa2va(void *pa, struct rte_kni_mbuf *m)
51 va = (void *)((unsigned long)pa +
52 (unsigned long)m->buf_addr -
53 (unsigned long)m->buf_physaddr);
57 /* mbuf data kernel virtual address from mbuf kernel virtual address */
59 kva2data_kva(struct rte_kni_mbuf *m)
61 return phys_to_virt(m->buf_physaddr + m->data_off);
64 /* virtual address to physical address */
66 va2pa(void *va, struct rte_kni_mbuf *m)
70 pa = (void *)((unsigned long)va -
71 ((unsigned long)m->buf_addr -
72 (unsigned long)m->buf_physaddr));
77 * It can be called to process the request.
80 kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req)
88 pr_err("No kni instance or request\n");
92 mutex_lock(&kni->sync_lock);
95 memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request));
96 num = kni_fifo_put(kni->req_q, &kni->sync_va, 1);
98 pr_err("Cannot send to req_q\n");
103 ret_val = wait_event_interruptible_timeout(kni->wq,
104 kni_fifo_count(kni->resp_q), 3 * HZ);
105 if (signal_pending(current) || ret_val <= 0) {
109 num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1);
110 if (num != 1 || resp_va != kni->sync_va) {
111 /* This should never happen */
112 pr_err("No data in resp_q\n");
117 memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request));
121 mutex_unlock(&kni->sync_lock);
129 kni_net_open(struct net_device *dev)
132 struct rte_kni_request req;
133 struct kni_dev *kni = netdev_priv(dev);
135 netif_start_queue(dev);
136 if (dflt_carrier == 1)
137 netif_carrier_on(dev);
139 netif_carrier_off(dev);
141 memset(&req, 0, sizeof(req));
142 req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
144 /* Setting if_up to non-zero means up */
146 ret = kni_net_process_request(kni, &req);
148 return (ret == 0) ? req.result : ret;
152 kni_net_release(struct net_device *dev)
155 struct rte_kni_request req;
156 struct kni_dev *kni = netdev_priv(dev);
158 netif_stop_queue(dev); /* can't transmit any more */
159 netif_carrier_off(dev);
161 memset(&req, 0, sizeof(req));
162 req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
164 /* Setting if_up to 0 means down */
166 ret = kni_net_process_request(kni, &req);
168 return (ret == 0) ? req.result : ret;
172 kni_fifo_trans_pa2va(struct kni_dev *kni,
173 struct rte_kni_fifo *src_pa, struct rte_kni_fifo *dst_va)
175 uint32_t ret, i, num_dst, num_rx;
178 num_dst = kni_fifo_free_count(dst_va);
182 num_rx = min_t(uint32_t, num_dst, MBUF_BURST_SZ);
184 num_rx = kni_fifo_get(src_pa, kni->pa, num_rx);
188 for (i = 0; i < num_rx; i++) {
189 kva = pa2kva(kni->pa[i]);
190 kni->va[i] = pa2va(kni->pa[i], kva);
193 ret = kni_fifo_put(dst_va, kni->va, num_rx);
195 /* Failing should not happen */
196 pr_err("Fail to enqueue entries into dst_va\n");
202 /* Try to release mbufs when kni release */
203 void kni_net_release_fifo_phy(struct kni_dev *kni)
205 /* release rx_q first, because it can't release in userspace */
206 kni_fifo_trans_pa2va(kni, kni->rx_q, kni->free_q);
207 /* release alloc_q for speeding up kni release in userspace */
208 kni_fifo_trans_pa2va(kni, kni->alloc_q, kni->free_q);
212 * Configuration changes (passed on by ifconfig)
215 kni_net_config(struct net_device *dev, struct ifmap *map)
217 if (dev->flags & IFF_UP) /* can't act on a running interface */
220 /* ignore other fields */
225 * Transmit a packet (called by the kernel)
228 kni_net_tx(struct sk_buff *skb, struct net_device *dev)
232 struct kni_dev *kni = netdev_priv(dev);
233 struct rte_kni_mbuf *pkt_kva = NULL;
237 /* save the timestamp */
238 #ifdef HAVE_TRANS_START_HELPER
239 netif_trans_update(dev);
241 dev->trans_start = jiffies;
244 /* Check if the length of skb is less than mbuf size */
245 if (skb->len > kni->mbuf_size)
249 * Check if it has at least one free entry in tx_q and
250 * one entry in alloc_q.
252 if (kni_fifo_free_count(kni->tx_q) == 0 ||
253 kni_fifo_count(kni->alloc_q) == 0) {
255 * If no free entry in tx_q or no entry in alloc_q,
256 * drops skb and goes out.
261 /* dequeue a mbuf from alloc_q */
262 ret = kni_fifo_get(kni->alloc_q, &pkt_pa, 1);
263 if (likely(ret == 1)) {
266 pkt_kva = pa2kva(pkt_pa);
267 data_kva = kva2data_kva(pkt_kva);
268 pkt_va = pa2va(pkt_pa, pkt_kva);
271 memcpy(data_kva, skb->data, len);
272 if (unlikely(len < ETH_ZLEN)) {
273 memset(data_kva + len, 0, ETH_ZLEN - len);
276 pkt_kva->pkt_len = len;
277 pkt_kva->data_len = len;
279 /* enqueue mbuf into tx_q */
280 ret = kni_fifo_put(kni->tx_q, &pkt_va, 1);
281 if (unlikely(ret != 1)) {
282 /* Failing should not happen */
283 pr_err("Fail to enqueue mbuf into tx_q\n");
287 /* Failing should not happen */
288 pr_err("Fail to dequeue mbuf from alloc_q\n");
292 /* Free skb and update statistics */
294 kni->stats.tx_bytes += len;
295 kni->stats.tx_packets++;
300 /* Free skb and update statistics */
302 kni->stats.tx_dropped++;
308 * RX: normal working mode
311 kni_net_rx_normal(struct kni_dev *kni)
315 uint32_t i, num_rx, num_fq;
316 struct rte_kni_mbuf *kva;
319 struct net_device *dev = kni->net_dev;
321 /* Get the number of free entries in free_q */
322 num_fq = kni_fifo_free_count(kni->free_q);
324 /* No room on the free_q, bail out */
328 /* Calculate the number of entries to dequeue from rx_q */
329 num_rx = min_t(uint32_t, num_fq, MBUF_BURST_SZ);
331 /* Burst dequeue from rx_q */
332 num_rx = kni_fifo_get(kni->rx_q, kni->pa, num_rx);
336 /* Transfer received packets to netif */
337 for (i = 0; i < num_rx; i++) {
338 kva = pa2kva(kni->pa[i]);
340 data_kva = kva2data_kva(kva);
341 kni->va[i] = pa2va(kni->pa[i], kva);
343 skb = dev_alloc_skb(len + 2);
345 /* Update statistics */
346 kni->stats.rx_dropped++;
350 /* Align IP on 16B boundary */
353 if (kva->nb_segs == 1) {
354 memcpy(skb_put(skb, len), data_kva, len);
357 int kva_nb_segs = kva->nb_segs;
359 for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
360 memcpy(skb_put(skb, kva->data_len),
361 data_kva, kva->data_len);
366 kva = pa2kva(va2pa(kva->next, kva));
367 data_kva = kva2data_kva(kva);
372 skb->protocol = eth_type_trans(skb, dev);
373 skb->ip_summed = CHECKSUM_UNNECESSARY;
375 /* Call netif interface */
378 /* Update statistics */
379 kni->stats.rx_bytes += len;
380 kni->stats.rx_packets++;
383 /* Burst enqueue mbufs into free_q */
384 ret = kni_fifo_put(kni->free_q, kni->va, num_rx);
386 /* Failing should not happen */
387 pr_err("Fail to enqueue entries into free_q\n");
391 * RX: loopback with enqueue/dequeue fifos.
394 kni_net_rx_lo_fifo(struct kni_dev *kni)
398 uint32_t i, num, num_rq, num_tq, num_aq, num_fq;
399 struct rte_kni_mbuf *kva;
401 struct rte_kni_mbuf *alloc_kva;
402 void *alloc_data_kva;
404 /* Get the number of entries in rx_q */
405 num_rq = kni_fifo_count(kni->rx_q);
407 /* Get the number of free entrie in tx_q */
408 num_tq = kni_fifo_free_count(kni->tx_q);
410 /* Get the number of entries in alloc_q */
411 num_aq = kni_fifo_count(kni->alloc_q);
413 /* Get the number of free entries in free_q */
414 num_fq = kni_fifo_free_count(kni->free_q);
416 /* Calculate the number of entries to be dequeued from rx_q */
417 num = min(num_rq, num_tq);
418 num = min(num, num_aq);
419 num = min(num, num_fq);
420 num = min_t(uint32_t, num, MBUF_BURST_SZ);
422 /* Return if no entry to dequeue from rx_q */
426 /* Burst dequeue from rx_q */
427 ret = kni_fifo_get(kni->rx_q, kni->pa, num);
429 return; /* Failing should not happen */
431 /* Dequeue entries from alloc_q */
432 ret = kni_fifo_get(kni->alloc_q, kni->alloc_pa, num);
436 for (i = 0; i < num; i++) {
437 kva = pa2kva(kni->pa[i]);
439 data_kva = kva2data_kva(kva);
440 kni->va[i] = pa2va(kni->pa[i], kva);
442 alloc_kva = pa2kva(kni->alloc_pa[i]);
443 alloc_data_kva = kva2data_kva(alloc_kva);
444 kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
446 memcpy(alloc_data_kva, data_kva, len);
447 alloc_kva->pkt_len = len;
448 alloc_kva->data_len = len;
450 kni->stats.tx_bytes += len;
451 kni->stats.rx_bytes += len;
454 /* Burst enqueue mbufs into tx_q */
455 ret = kni_fifo_put(kni->tx_q, kni->alloc_va, num);
457 /* Failing should not happen */
458 pr_err("Fail to enqueue mbufs into tx_q\n");
461 /* Burst enqueue mbufs into free_q */
462 ret = kni_fifo_put(kni->free_q, kni->va, num);
464 /* Failing should not happen */
465 pr_err("Fail to enqueue mbufs into free_q\n");
468 * Update statistic, and enqueue/dequeue failure is impossible,
469 * as all queues are checked at first.
471 kni->stats.tx_packets += num;
472 kni->stats.rx_packets += num;
476 * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
479 kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
483 uint32_t i, num_rq, num_fq, num;
484 struct rte_kni_mbuf *kva;
487 struct net_device *dev = kni->net_dev;
489 /* Get the number of entries in rx_q */
490 num_rq = kni_fifo_count(kni->rx_q);
492 /* Get the number of free entries in free_q */
493 num_fq = kni_fifo_free_count(kni->free_q);
495 /* Calculate the number of entries to dequeue from rx_q */
496 num = min(num_rq, num_fq);
497 num = min_t(uint32_t, num, MBUF_BURST_SZ);
499 /* Return if no entry to dequeue from rx_q */
503 /* Burst dequeue mbufs from rx_q */
504 ret = kni_fifo_get(kni->rx_q, kni->pa, num);
508 /* Copy mbufs to sk buffer and then call tx interface */
509 for (i = 0; i < num; i++) {
510 kva = pa2kva(kni->pa[i]);
512 data_kva = kva2data_kva(kva);
513 kni->va[i] = pa2va(kni->pa[i], kva);
515 skb = dev_alloc_skb(len + 2);
517 /* Align IP on 16B boundary */
519 memcpy(skb_put(skb, len), data_kva, len);
521 skb->ip_summed = CHECKSUM_UNNECESSARY;
525 /* Simulate real usage, allocate/copy skb twice */
526 skb = dev_alloc_skb(len + 2);
528 kni->stats.rx_dropped++;
532 /* Align IP on 16B boundary */
535 if (kva->nb_segs == 1) {
536 memcpy(skb_put(skb, len), data_kva, len);
539 int kva_nb_segs = kva->nb_segs;
541 for (nb_segs = 0; nb_segs < kva_nb_segs; nb_segs++) {
542 memcpy(skb_put(skb, kva->data_len),
543 data_kva, kva->data_len);
548 kva = pa2kva(va2pa(kva->next, kva));
549 data_kva = kva2data_kva(kva);
554 skb->ip_summed = CHECKSUM_UNNECESSARY;
556 kni->stats.rx_bytes += len;
557 kni->stats.rx_packets++;
559 /* call tx interface */
560 kni_net_tx(skb, dev);
563 /* enqueue all the mbufs from rx_q into free_q */
564 ret = kni_fifo_put(kni->free_q, kni->va, num);
566 /* Failing should not happen */
567 pr_err("Fail to enqueue mbufs into free_q\n");
572 kni_net_rx(struct kni_dev *kni)
575 * It doesn't need to check if it is NULL pointer,
576 * as it has a default value
578 (*kni_net_rx_func)(kni);
582 * Deal with a transmit timeout.
585 kni_net_tx_timeout(struct net_device *dev)
587 struct kni_dev *kni = netdev_priv(dev);
589 pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies,
590 jiffies - dev_trans_start(dev));
592 kni->stats.tx_errors++;
593 netif_wake_queue(dev);
600 kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
602 pr_debug("kni_net_ioctl group:%d cmd:%d\n",
603 ((struct kni_dev *)netdev_priv(dev))->group_id, cmd);
609 kni_net_set_rx_mode(struct net_device *dev)
614 kni_net_change_mtu(struct net_device *dev, int new_mtu)
617 struct rte_kni_request req;
618 struct kni_dev *kni = netdev_priv(dev);
620 pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu);
622 memset(&req, 0, sizeof(req));
623 req.req_id = RTE_KNI_REQ_CHANGE_MTU;
624 req.new_mtu = new_mtu;
625 ret = kni_net_process_request(kni, &req);
626 if (ret == 0 && req.result == 0)
629 return (ret == 0) ? req.result : ret;
633 kni_net_set_promiscusity(struct net_device *netdev, int flags)
635 struct rte_kni_request req;
636 struct kni_dev *kni = netdev_priv(netdev);
638 memset(&req, 0, sizeof(req));
639 req.req_id = RTE_KNI_REQ_CHANGE_PROMISC;
641 if (netdev->flags & IFF_PROMISC)
642 req.promiscusity = 1;
644 req.promiscusity = 0;
645 kni_net_process_request(kni, &req);
649 * Checks if the user space application provided the resp message
652 kni_net_poll_resp(struct kni_dev *kni)
654 if (kni_fifo_count(kni->resp_q))
655 wake_up_interruptible(&kni->wq);
659 * Return statistics to the caller
661 static struct net_device_stats *
662 kni_net_stats(struct net_device *dev)
664 struct kni_dev *kni = netdev_priv(dev);
670 * Fill the eth header
673 kni_net_header(struct sk_buff *skb, struct net_device *dev,
674 unsigned short type, const void *daddr,
675 const void *saddr, uint32_t len)
677 struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
679 memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len);
680 memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len);
681 eth->h_proto = htons(type);
683 return dev->hard_header_len;
687 * Re-fill the eth header
689 #ifdef HAVE_REBUILD_HEADER
691 kni_net_rebuild_header(struct sk_buff *skb)
693 struct net_device *dev = skb->dev;
694 struct ethhdr *eth = (struct ethhdr *) skb->data;
696 memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
697 memcpy(eth->h_dest, dev->dev_addr, dev->addr_len);
704 * kni_net_set_mac - Change the Ethernet Address of the KNI NIC
705 * @netdev: network interface device structure
706 * @p: pointer to an address structure
708 * Returns 0 on success, negative on failure
711 kni_net_set_mac(struct net_device *netdev, void *p)
714 struct rte_kni_request req;
716 struct sockaddr *addr = p;
718 memset(&req, 0, sizeof(req));
719 req.req_id = RTE_KNI_REQ_CHANGE_MAC_ADDR;
721 if (!is_valid_ether_addr((unsigned char *)(addr->sa_data)))
722 return -EADDRNOTAVAIL;
724 memcpy(req.mac_addr, addr->sa_data, netdev->addr_len);
725 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
727 kni = netdev_priv(netdev);
728 ret = kni_net_process_request(kni, &req);
730 return (ret == 0 ? req.result : ret);
733 #ifdef HAVE_CHANGE_CARRIER_CB
735 kni_net_change_carrier(struct net_device *dev, bool new_carrier)
738 netif_carrier_on(dev);
740 netif_carrier_off(dev);
745 static const struct header_ops kni_net_header_ops = {
746 .create = kni_net_header,
747 #ifdef HAVE_REBUILD_HEADER
748 .rebuild = kni_net_rebuild_header,
750 .cache = NULL, /* disable caching */
753 static const struct net_device_ops kni_net_netdev_ops = {
754 .ndo_open = kni_net_open,
755 .ndo_stop = kni_net_release,
756 .ndo_set_config = kni_net_config,
757 .ndo_change_rx_flags = kni_net_set_promiscusity,
758 .ndo_start_xmit = kni_net_tx,
759 .ndo_change_mtu = kni_net_change_mtu,
760 .ndo_do_ioctl = kni_net_ioctl,
761 .ndo_set_rx_mode = kni_net_set_rx_mode,
762 .ndo_get_stats = kni_net_stats,
763 .ndo_tx_timeout = kni_net_tx_timeout,
764 .ndo_set_mac_address = kni_net_set_mac,
765 #ifdef HAVE_CHANGE_CARRIER_CB
766 .ndo_change_carrier = kni_net_change_carrier,
771 kni_net_init(struct net_device *dev)
773 struct kni_dev *kni = netdev_priv(dev);
775 init_waitqueue_head(&kni->wq);
776 mutex_init(&kni->sync_lock);
778 ether_setup(dev); /* assign some of the fields */
779 dev->netdev_ops = &kni_net_netdev_ops;
780 dev->header_ops = &kni_net_header_ops;
781 dev->watchdog_timeo = WD_TIMEOUT;
785 kni_net_config_lo_mode(char *lo_str)
788 pr_debug("loopback disabled");
792 if (!strcmp(lo_str, "lo_mode_none"))
793 pr_debug("loopback disabled");
794 else if (!strcmp(lo_str, "lo_mode_fifo")) {
795 pr_debug("loopback mode=lo_mode_fifo enabled");
796 kni_net_rx_func = kni_net_rx_lo_fifo;
797 } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) {
798 pr_debug("loopback mode=lo_mode_fifo_skb enabled");
799 kni_net_rx_func = kni_net_rx_lo_fifo_skb;
801 pr_debug("Incognizant parameter, loopback disabled");