net: add rte prefix to ether structures
[dpdk.git] / lib / librte_kni / rte_kni.c
index df2e015..7a41788 100644 (file)
@@ -1,37 +1,8 @@
-/*-
- *   BSD LICENSE
- * 
- *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *   All rights reserved.
- * 
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- * 
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- * 
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
  */
 
-#ifndef RTE_EXEC_ENV_LINUXAPP
+#ifndef RTE_EXEC_ENV_LINUX
 #error "KNI is not supported"
 #endif
 
 #include <unistd.h>
 #include <sys/ioctl.h>
 
+#include <rte_spinlock.h>
 #include <rte_string_fns.h>
 #include <rte_ethdev.h>
 #include <rte_malloc.h>
 #include <rte_log.h>
 #include <rte_kni.h>
 #include <rte_memzone.h>
-#include <exec-env/rte_kni_common.h>
+#include <rte_tailq.h>
+#include <rte_rwlock.h>
+#include <rte_eal_memconfig.h>
+#include <rte_kni_common.h>
 #include "rte_kni_fifo.h"
 
 #define MAX_MBUF_BURST_NUM            32
 
 #define KNI_REQUEST_MBUF_NUM_MAX      32
 
-#define KNI_MZ_CHECK(mz) do { if (mz) goto fail; } while (0)
+#define KNI_MEM_CHECK(cond, fail) do { if (cond) goto fail; } while (0)
+
+#define KNI_MZ_NAME_FMT                        "kni_info_%s"
+#define KNI_TX_Q_MZ_NAME_FMT           "kni_tx_%s"
+#define KNI_RX_Q_MZ_NAME_FMT           "kni_rx_%s"
+#define KNI_ALLOC_Q_MZ_NAME_FMT                "kni_alloc_%s"
+#define KNI_FREE_Q_MZ_NAME_FMT         "kni_free_%s"
+#define KNI_REQ_Q_MZ_NAME_FMT          "kni_req_%s"
+#define KNI_RESP_Q_MZ_NAME_FMT         "kni_resp_%s"
+#define KNI_SYNC_ADDR_MZ_NAME_FMT      "kni_sync_%s"
+
+TAILQ_HEAD(rte_kni_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_kni_tailq = {
+       .name = "RTE_KNI",
+};
+EAL_REGISTER_TAILQ(rte_kni_tailq)
 
 /**
  * KNI context
 struct rte_kni {
        char name[RTE_KNI_NAMESIZE];        /**< KNI interface name */
        uint16_t group_id;                  /**< Group ID of KNI devices */
+       uint32_t slot_id;                   /**< KNI pool slot ID */
        struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool */
        unsigned mbuf_size;                 /**< mbuf size */
 
+       const struct rte_memzone *m_tx_q;   /**< TX queue memzone */
+       const struct rte_memzone *m_rx_q;   /**< RX queue memzone */
+       const struct rte_memzone *m_alloc_q;/**< Alloc queue memzone */
+       const struct rte_memzone *m_free_q; /**< Free queue memzone */
+
        struct rte_kni_fifo *tx_q;          /**< TX queue */
        struct rte_kni_fifo *rx_q;          /**< RX queue */
        struct rte_kni_fifo *alloc_q;       /**< Allocated mbufs queue */
        struct rte_kni_fifo *free_q;        /**< To be freed mbufs queue */
 
+       const struct rte_memzone *m_req_q;  /**< Request queue memzone */
+       const struct rte_memzone *m_resp_q; /**< Response queue memzone */
+       const struct rte_memzone *m_sync_addr;/**< Sync addr memzone */
+
        /* For request & response */
        struct rte_kni_fifo *req_q;         /**< Request queue */
        struct rte_kni_fifo *resp_q;        /**< Response queue */
        void * sync_addr;                   /**< Req/Resp Mem address */
 
        struct rte_kni_ops ops;             /**< operations for request */
-       uint8_t in_use : 1;                 /**< kni in use */
 };
 
 enum kni_ops_status {
@@ -93,42 +93,105 @@ static void kni_allocate_mbufs(struct rte_kni *kni);
 
 static volatile int kni_fd = -1;
 
-static const struct rte_memzone *
-kni_memzone_reserve(const char *name, size_t len, int socket_id,
-                                               unsigned flags)
+/* Shall be called before any allocation happens */
+int
+rte_kni_init(unsigned int max_kni_ifaces __rte_unused)
+{
+       /* Check FD and open */
+       if (kni_fd < 0) {
+               kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
+               if (kni_fd < 0) {
+                       RTE_LOG(ERR, KNI,
+                               "Can not open /dev/%s\n", KNI_DEVICE);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static struct rte_kni *
+__rte_kni_get(const char *name)
 {
-       const struct rte_memzone *mz = rte_memzone_lookup(name);
+       struct rte_kni *kni;
+       struct rte_tailq_entry *te;
+       struct rte_kni_list *kni_list;
 
-       if (mz == NULL)
-               mz = rte_memzone_reserve(name, len, socket_id, flags);
+       kni_list = RTE_TAILQ_CAST(rte_kni_tailq.head, rte_kni_list);
 
-       return mz;
+       TAILQ_FOREACH(te, kni_list, next) {
+               kni = te->data;
+               if (strncmp(name, kni->name, RTE_KNI_NAMESIZE) == 0)
+                       break;
+       }
+
+       if (te == NULL)
+               kni = NULL;
+
+       return kni;
 }
 
-/* It is deprecated and just for backward compatibility */
-struct rte_kni *
-rte_kni_create(uint8_t port_id,
-              unsigned mbuf_size,
-              struct rte_mempool *pktmbuf_pool,
-              struct rte_kni_ops *ops)
+static int
+kni_reserve_mz(struct rte_kni *kni)
 {
-       struct rte_kni_conf conf;
-       struct rte_eth_dev_info info;
+       char mz_name[RTE_MEMZONE_NAMESIZE];
 
-       memset(&info, 0, sizeof(info));
-       memset(&conf, 0, sizeof(conf));
-       rte_eth_dev_info_get(port_id, &info);
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_TX_Q_MZ_NAME_FMT, kni->name);
+       kni->m_tx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_tx_q == NULL, tx_q_fail);
 
-       rte_snprintf(conf.name, sizeof(conf.name), "vEth%u", port_id);
-       conf.addr = info.pci_dev->addr;
-       conf.id = info.pci_dev->id;
-       conf.group_id = (uint16_t)port_id;
-       conf.mbuf_size = mbuf_size;
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_RX_Q_MZ_NAME_FMT, kni->name);
+       kni->m_rx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_rx_q == NULL, rx_q_fail);
 
-       /* Save the port id for request handling */
-       ops->port_id = port_id;
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_ALLOC_Q_MZ_NAME_FMT, kni->name);
+       kni->m_alloc_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_alloc_q == NULL, alloc_q_fail);
 
-       return rte_kni_alloc(pktmbuf_pool, &conf, ops);
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_FREE_Q_MZ_NAME_FMT, kni->name);
+       kni->m_free_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_free_q == NULL, free_q_fail);
+
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_REQ_Q_MZ_NAME_FMT, kni->name);
+       kni->m_req_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_req_q == NULL, req_q_fail);
+
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_RESP_Q_MZ_NAME_FMT, kni->name);
+       kni->m_resp_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_resp_q == NULL, resp_q_fail);
+
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_SYNC_ADDR_MZ_NAME_FMT, kni->name);
+       kni->m_sync_addr = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_sync_addr == NULL, sync_addr_fail);
+
+       return 0;
+
+sync_addr_fail:
+       rte_memzone_free(kni->m_resp_q);
+resp_q_fail:
+       rte_memzone_free(kni->m_req_q);
+req_q_fail:
+       rte_memzone_free(kni->m_free_q);
+free_q_fail:
+       rte_memzone_free(kni->m_alloc_q);
+alloc_q_fail:
+       rte_memzone_free(kni->m_rx_q);
+rx_q_fail:
+       rte_memzone_free(kni->m_tx_q);
+tx_q_fail:
+       return -1;
+}
+
+static void
+kni_release_mz(struct rte_kni *kni)
+{
+       rte_memzone_free(kni->m_tx_q);
+       rte_memzone_free(kni->m_rx_q);
+       rte_memzone_free(kni->m_alloc_q);
+       rte_memzone_free(kni->m_free_q);
+       rte_memzone_free(kni->m_req_q);
+       rte_memzone_free(kni->m_resp_q);
+       rte_memzone_free(kni->m_sync_addr);
 }
 
 struct rte_kni *
@@ -138,40 +201,45 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 {
        int ret;
        struct rte_kni_device_info dev_info;
-       struct rte_kni *ctx;
-       char intf_name[RTE_KNI_NAMESIZE];
-#define OBJNAMSIZ 32
-       char obj_name[OBJNAMSIZ];
-       char mz_name[RTE_MEMZONE_NAMESIZE];
-       const struct rte_memzone *mz;
+       struct rte_kni *kni;
+       struct rte_tailq_entry *te;
+       struct rte_kni_list *kni_list;
 
        if (!pktmbuf_pool || !conf || !conf->name[0])
                return NULL;
 
-       /* Check FD and open once */
+       /* Check if KNI subsystem has been initialized */
        if (kni_fd < 0) {
-               kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
-               if (kni_fd < 0) {
-                       RTE_LOG(ERR, KNI, "Can not open /dev/%s\n",
-                                                       KNI_DEVICE);
-                       return NULL;
-               }
+               RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first\n");
+               return NULL;
+       }
+
+       rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+       kni = __rte_kni_get(conf->name);
+       if (kni != NULL) {
+               RTE_LOG(ERR, KNI, "KNI already exists\n");
+               goto unlock;
        }
 
-       rte_snprintf(intf_name, RTE_KNI_NAMESIZE, conf->name);
-       rte_snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s", intf_name);
-       mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni), 
-                               SOCKET_ID_ANY, 0);
-       KNI_MZ_CHECK(mz == NULL);
-       ctx = mz->addr;
+       te = rte_zmalloc("KNI_TAILQ_ENTRY", sizeof(*te), 0);
+       if (te == NULL) {
+               RTE_LOG(ERR, KNI, "Failed to allocate tailq entry\n");
+               goto unlock;
+       }
 
-       if (ctx->in_use) {
-               RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
-               goto fail;
+       kni = rte_zmalloc("KNI", sizeof(struct rte_kni), RTE_CACHE_LINE_SIZE);
+       if (kni == NULL) {
+               RTE_LOG(ERR, KNI, "KNI memory allocation failed\n");
+               goto kni_fail;
        }
-       memset(ctx, 0, sizeof(struct rte_kni));
+
+       strlcpy(kni->name, conf->name, RTE_KNI_NAMESIZE);
+
        if (ops)
-               memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops));
+               memcpy(&kni->ops, ops, sizeof(struct rte_kni_ops));
+       else
+               kni->ops.port_id = UINT16_MAX;
 
        memset(&dev_info, 0, sizeof(dev_info));
        dev_info.bus = conf->addr.bus;
@@ -183,88 +251,83 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
        dev_info.force_bind = conf->force_bind;
        dev_info.group_id = conf->group_id;
        dev_info.mbuf_size = conf->mbuf_size;
+       dev_info.mtu = conf->mtu;
 
-       rte_snprintf(ctx->name, RTE_KNI_NAMESIZE, intf_name);
-       rte_snprintf(dev_info.name, RTE_KNI_NAMESIZE, intf_name);
+       memcpy(dev_info.mac_addr, conf->mac_addr, ETHER_ADDR_LEN);
+
+       strlcpy(dev_info.name, conf->name, RTE_KNI_NAMESIZE);
 
        RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
                dev_info.bus, dev_info.devid, dev_info.function,
                        dev_info.vendor_id, dev_info.device_id);
 
+       ret = kni_reserve_mz(kni);
+       if (ret < 0)
+               goto mz_fail;
+
        /* TX RING */
-       rte_snprintf(obj_name, OBJNAMSIZ, "kni_tx_%s", intf_name);
-       mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-       KNI_MZ_CHECK(mz == NULL);
-       ctx->tx_q = mz->addr;
-       kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
-       dev_info.tx_phys = mz->phys_addr;
+       kni->tx_q = kni->m_tx_q->addr;
+       kni_fifo_init(kni->tx_q, KNI_FIFO_COUNT_MAX);
+       dev_info.tx_phys = kni->m_tx_q->phys_addr;
 
        /* RX RING */
-       rte_snprintf(obj_name, OBJNAMSIZ, "kni_rx_%s", intf_name);
-       mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-       KNI_MZ_CHECK(mz == NULL);
-       ctx->rx_q = mz->addr;
-       kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
-       dev_info.rx_phys = mz->phys_addr;
+       kni->rx_q = kni->m_rx_q->addr;
+       kni_fifo_init(kni->rx_q, KNI_FIFO_COUNT_MAX);
+       dev_info.rx_phys = kni->m_rx_q->phys_addr;
 
        /* ALLOC RING */
-       rte_snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%s", intf_name);
-       mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-       KNI_MZ_CHECK(mz == NULL);
-       ctx->alloc_q = mz->addr;
-       kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
-       dev_info.alloc_phys = mz->phys_addr;
+       kni->alloc_q = kni->m_alloc_q->addr;
+       kni_fifo_init(kni->alloc_q, KNI_FIFO_COUNT_MAX);
+       dev_info.alloc_phys = kni->m_alloc_q->phys_addr;
 
        /* FREE RING */
-       rte_snprintf(obj_name, OBJNAMSIZ, "kni_free_%s", intf_name);
-       mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-       KNI_MZ_CHECK(mz == NULL);
-       ctx->free_q = mz->addr;
-       kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
-       dev_info.free_phys = mz->phys_addr;
+       kni->free_q = kni->m_free_q->addr;
+       kni_fifo_init(kni->free_q, KNI_FIFO_COUNT_MAX);
+       dev_info.free_phys = kni->m_free_q->phys_addr;
 
        /* Request RING */
-       rte_snprintf(obj_name, OBJNAMSIZ, "kni_req_%s", intf_name);
-       mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-       KNI_MZ_CHECK(mz == NULL);
-       ctx->req_q = mz->addr;
-       kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
-       dev_info.req_phys = mz->phys_addr;
+       kni->req_q = kni->m_req_q->addr;
+       kni_fifo_init(kni->req_q, KNI_FIFO_COUNT_MAX);
+       dev_info.req_phys = kni->m_req_q->phys_addr;
 
        /* Response RING */
-       rte_snprintf(obj_name, OBJNAMSIZ, "kni_resp_%s", intf_name);
-       mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-       KNI_MZ_CHECK(mz == NULL);
-       ctx->resp_q = mz->addr;
-       kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
-       dev_info.resp_phys = mz->phys_addr;
+       kni->resp_q = kni->m_resp_q->addr;
+       kni_fifo_init(kni->resp_q, KNI_FIFO_COUNT_MAX);
+       dev_info.resp_phys = kni->m_resp_q->phys_addr;
 
        /* Req/Resp sync mem area */
-       rte_snprintf(obj_name, OBJNAMSIZ, "kni_sync_%s", intf_name);
-       mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
-       KNI_MZ_CHECK(mz == NULL);
-       ctx->sync_addr = mz->addr;
-       dev_info.sync_va = mz->addr;
-       dev_info.sync_phys = mz->phys_addr;
-
-       /* MBUF mempool */
-       rte_snprintf(mz_name, sizeof(mz_name), "MP_%s", pktmbuf_pool->name);
-       mz = rte_memzone_lookup(mz_name);
-       KNI_MZ_CHECK(mz == NULL);
-       dev_info.mbuf_va = mz->addr;
-       dev_info.mbuf_phys = mz->phys_addr;
-       ctx->pktmbuf_pool = pktmbuf_pool;
-       ctx->group_id = conf->group_id;
-       ctx->mbuf_size = conf->mbuf_size;
+       kni->sync_addr = kni->m_sync_addr->addr;
+       dev_info.sync_va = kni->m_sync_addr->addr;
+       dev_info.sync_phys = kni->m_sync_addr->phys_addr;
+
+       kni->pktmbuf_pool = pktmbuf_pool;
+       kni->group_id = conf->group_id;
+       kni->mbuf_size = conf->mbuf_size;
 
        ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
-       KNI_MZ_CHECK(ret < 0);
+       if (ret < 0)
+               goto ioctl_fail;
+
+       te->data = kni;
+
+       kni_list = RTE_TAILQ_CAST(rte_kni_tailq.head, rte_kni_list);
+       TAILQ_INSERT_TAIL(kni_list, te, next);
 
-       ctx->in_use = 1;
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+       /* Allocate mbufs and then put them into alloc_q */
+       kni_allocate_mbufs(kni);
 
-       return ctx;
+       return kni;
 
-fail:
+ioctl_fail:
+       kni_release_mz(kni);
+mz_fail:
+       rte_free(kni);
+kni_fail:
+       rte_free(te);
+unlock:
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
        return NULL;
 }
@@ -282,26 +345,136 @@ kni_free_fifo(struct rte_kni_fifo *fifo)
        } while (ret);
 }
 
+static void *
+va2pa(struct rte_mbuf *m)
+{
+       return (void *)((unsigned long)m -
+                       ((unsigned long)m->buf_addr -
+                        (unsigned long)m->buf_iova));
+}
+
+static void
+obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
+               unsigned obj_idx __rte_unused)
+{
+       struct rte_mbuf *m = obj;
+       void *mbuf_phys = opaque;
+
+       if (va2pa(m) == mbuf_phys)
+               rte_pktmbuf_free(m);
+}
+
+static void
+kni_free_fifo_phy(struct rte_mempool *mp, struct rte_kni_fifo *fifo)
+{
+       void *mbuf_phys;
+       int ret;
+
+       do {
+               ret = kni_fifo_get(fifo, &mbuf_phys, 1);
+               if (ret)
+                       rte_mempool_obj_iter(mp, obj_free, mbuf_phys);
+       } while (ret);
+}
+
 int
 rte_kni_release(struct rte_kni *kni)
 {
+       struct rte_tailq_entry *te;
+       struct rte_kni_list *kni_list;
        struct rte_kni_device_info dev_info;
+       uint32_t retry = 5;
 
-       if (!kni || !kni->in_use)
+       if (!kni)
                return -1;
 
-       rte_snprintf(dev_info.name, sizeof(dev_info.name), kni->name);
+       kni_list = RTE_TAILQ_CAST(rte_kni_tailq.head, rte_kni_list);
+
+       rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+       TAILQ_FOREACH(te, kni_list, next) {
+               if (te->data == kni)
+                       break;
+       }
+
+       if (te == NULL)
+               goto unlock;
+
+       strlcpy(dev_info.name, kni->name, sizeof(dev_info.name));
        if (ioctl(kni_fd, RTE_KNI_IOCTL_RELEASE, &dev_info) < 0) {
                RTE_LOG(ERR, KNI, "Fail to release kni device\n");
-               return -1;
+               goto unlock;
        }
 
+       TAILQ_REMOVE(kni_list, te, next);
+
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
        /* mbufs in all fifo should be released, except request/response */
+
+       /* wait until all rxq packets processed by kernel */
+       while (kni_fifo_count(kni->rx_q) && retry--)
+               usleep(1000);
+
+       if (kni_fifo_count(kni->rx_q))
+               RTE_LOG(ERR, KNI, "Fail to free all Rx-q items\n");
+
+       kni_free_fifo_phy(kni->pktmbuf_pool, kni->alloc_q);
        kni_free_fifo(kni->tx_q);
-       kni_free_fifo(kni->rx_q);
-       kni_free_fifo(kni->alloc_q);
        kni_free_fifo(kni->free_q);
-       memset(kni, 0, sizeof(struct rte_kni));
+
+       kni_release_mz(kni);
+
+       rte_free(kni);
+
+       rte_free(te);
+
+       return 0;
+
+unlock:
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+       return -1;
+}
+
+/* default callback for request of configuring device mac address */
+static int
+kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[])
+{
+       int ret = 0;
+
+       if (!rte_eth_dev_is_valid_port(port_id)) {
+               RTE_LOG(ERR, KNI, "Invalid port id %d\n", port_id);
+               return -EINVAL;
+       }
+
+       RTE_LOG(INFO, KNI, "Configure mac address of %d", port_id);
+
+       ret = rte_eth_dev_default_mac_addr_set(port_id,
+                                       (struct rte_ether_addr *)mac_addr);
+       if (ret < 0)
+               RTE_LOG(ERR, KNI, "Failed to config mac_addr for port %d\n",
+                       port_id);
+
+       return ret;
+}
+
+/* default callback for request of configuring promiscuous mode */
+static int
+kni_config_promiscusity(uint16_t port_id, uint8_t to_on)
+{
+       if (!rte_eth_dev_is_valid_port(port_id)) {
+               RTE_LOG(ERR, KNI, "Invalid port id %d\n", port_id);
+               return -EINVAL;
+       }
+
+       RTE_LOG(INFO, KNI, "Configure promiscuous mode of %d to %d\n",
+               port_id, to_on);
+
+       if (to_on)
+               rte_eth_promiscuous_enable(port_id);
+       else
+               rte_eth_promiscuous_disable(port_id);
 
        return 0;
 }
@@ -310,7 +483,7 @@ int
 rte_kni_handle_request(struct rte_kni *kni)
 {
        unsigned ret;
-       struct rte_kni_request *req;
+       struct rte_kni_request *req = NULL;
 
        if (kni == NULL)
                return -1;
@@ -321,7 +494,8 @@ rte_kni_handle_request(struct rte_kni *kni)
                return 0; /* It is OK of can not getting the request mbuf */
 
        if (req != kni->sync_addr) {
-               rte_panic("Wrong req pointer %p\n", req);
+               RTE_LOG(ERR, KNI, "Wrong req pointer %p\n", req);
+               return -1;
        }
 
        /* Analyze the request and call the relevant actions for it */
@@ -336,6 +510,22 @@ rte_kni_handle_request(struct rte_kni *kni)
                        req->result = kni->ops.config_network_if(\
                                        kni->ops.port_id, req->if_up);
                break;
+       case RTE_KNI_REQ_CHANGE_MAC_ADDR: /* Change MAC Address */
+               if (kni->ops.config_mac_address)
+                       req->result = kni->ops.config_mac_address(
+                                       kni->ops.port_id, req->mac_addr);
+               else if (kni->ops.port_id != UINT16_MAX)
+                       req->result = kni_config_mac_address(
+                                       kni->ops.port_id, req->mac_addr);
+               break;
+       case RTE_KNI_REQ_CHANGE_PROMISC: /* Change PROMISCUOUS MODE */
+               if (kni->ops.config_promiscusity)
+                       req->result = kni->ops.config_promiscusity(
+                                       kni->ops.port_id, req->promiscusity);
+               else if (kni->ops.port_id != UINT16_MAX)
+                       req->result = kni_config_promiscusity(
+                                       kni->ops.port_id, req->promiscusity);
+               break;
        default:
                RTE_LOG(ERR, KNI, "Unknown request id %u\n", req->req_id);
                req->result = -EINVAL;
@@ -355,7 +545,14 @@ rte_kni_handle_request(struct rte_kni *kni)
 unsigned
 rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
 {
-       unsigned ret = kni_fifo_put(kni->rx_q, (void **)mbufs, num);
+       void *phy_mbufs[num];
+       unsigned int ret;
+       unsigned int i;
+
+       for (i = 0; i < num; i++)
+               phy_mbufs[i] = va2pa(mbufs[i]);
+
+       ret = kni_fifo_put(kni->rx_q, phy_mbufs, num);
 
        /* Get mbufs from free_q and then free them */
        kni_free_mbufs(kni);
@@ -368,8 +565,9 @@ rte_kni_rx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
 {
        unsigned ret = kni_fifo_get(kni->tx_q, (void **)mbufs, num);
 
-       /* Allocate mbufs and then put them into alloc_q */
-       kni_allocate_mbufs(kni);
+       /* If buffers removed, allocate mbufs and then put them into alloc_q */
+       if (ret)
+               kni_allocate_mbufs(kni);
 
        return ret;
 }
@@ -392,6 +590,23 @@ kni_allocate_mbufs(struct rte_kni *kni)
 {
        int i, ret;
        struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM];
+       void *phys[MAX_MBUF_BURST_NUM];
+       int allocq_free;
+
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pool) !=
+                        offsetof(struct rte_kni_mbuf, pool));
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_addr) !=
+                        offsetof(struct rte_kni_mbuf, buf_addr));
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
+                        offsetof(struct rte_kni_mbuf, next));
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
+                        offsetof(struct rte_kni_mbuf, data_off));
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
+                        offsetof(struct rte_kni_mbuf, data_len));
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
+                        offsetof(struct rte_kni_mbuf, pkt_len));
+       RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
+                        offsetof(struct rte_kni_mbuf, ol_flags));
 
        /* Check if pktmbuf pool has been configured */
        if (kni->pktmbuf_pool == NULL) {
@@ -399,20 +614,23 @@ kni_allocate_mbufs(struct rte_kni *kni)
                return;
        }
 
-       for (i = 0; i < MAX_MBUF_BURST_NUM; i++) {
+       allocq_free = (kni->alloc_q->read - kni->alloc_q->write - 1) \
+                       & (MAX_MBUF_BURST_NUM - 1);
+       for (i = 0; i < allocq_free; i++) {
                pkts[i] = rte_pktmbuf_alloc(kni->pktmbuf_pool);
                if (unlikely(pkts[i] == NULL)) {
                        /* Out of memory */
                        RTE_LOG(ERR, KNI, "Out of memory\n");
                        break;
                }
+               phys[i] = va2pa(pkts[i]);
        }
 
-       /* No pkt mbuf alocated */
+       /* No pkt mbuf allocated */
        if (i <= 0)
                return;
 
-       ret = kni_fifo_put(kni->alloc_q, (void **)pkts, i);
+       ret = kni_fifo_put(kni->alloc_q, phys, i);
 
        /* Check if any mbufs not put into alloc_q, and then free them */
        if (ret >= 0 && ret < i && ret < MAX_MBUF_BURST_NUM) {
@@ -423,52 +641,27 @@ kni_allocate_mbufs(struct rte_kni *kni)
        }
 }
 
-/* It is deprecated and just for backward compatibility */
-uint8_t
-rte_kni_get_port_id(struct rte_kni *kni)
-{
-       if (!kni)
-               return ~0x0;
-
-       return kni->ops.port_id;
-}
-
 struct rte_kni *
 rte_kni_get(const char *name)
 {
        struct rte_kni *kni;
-       const struct rte_memzone *mz;
-       char mz_name[RTE_MEMZONE_NAMESIZE];
 
-       if (!name || !name[0])
+       if (name == NULL || name[0] == '\0')
                return NULL;
 
-       rte_snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%s", name);
-       mz = rte_memzone_lookup(mz_name);
-       if (!mz)
-               return NULL;
+       rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
 
-       kni = mz->addr;
-       if (!kni->in_use)
-               return NULL;
+       kni = __rte_kni_get(name);
+
+       rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
 
        return kni;
 }
 
-/*
- * It is deprecated and just for backward compatibility.
- */
-struct rte_kni *
-rte_kni_info_get(uint8_t port_id)
+const char *
+rte_kni_get_name(const struct rte_kni *kni)
 {
-       char name[RTE_MEMZONE_NAMESIZE];
-
-       if (port_id >= RTE_MAX_ETHPORTS)
-               return NULL;
-
-       rte_snprintf(name, RTE_MEMZONE_NAMESIZE, "vEth%u", port_id);
-
-       return rte_kni_get(name);
+       return kni->name;
 }
 
 static enum kni_ops_status
@@ -477,8 +670,11 @@ kni_check_request_register(struct rte_kni_ops *ops)
        /* check if KNI request ops has been registered*/
        if( NULL == ops )
                return KNI_REQ_NO_REGISTER;
-                
-       if((NULL == ops->change_mtu) && (NULL == ops->config_network_if))
+
+       if ((ops->change_mtu == NULL)
+               && (ops->config_network_if == NULL)
+               && (ops->config_mac_address == NULL)
+               && (ops->config_promiscusity == NULL))
                return KNI_REQ_NO_REGISTER;
 
        return KNI_REQ_REGISTERED;
@@ -488,7 +684,7 @@ int
 rte_kni_register_handlers(struct rte_kni *kni,struct rte_kni_ops *ops)
 {
        enum kni_ops_status req_status;
-       
+
        if (NULL == ops) {
                RTE_LOG(ERR, KNI, "Invalid KNI request operation.\n");
                return -1;
@@ -501,12 +697,11 @@ rte_kni_register_handlers(struct rte_kni *kni,struct rte_kni_ops *ops)
 
        req_status = kni_check_request_register(&kni->ops);
        if ( KNI_REQ_REGISTERED == req_status) {
-               RTE_LOG(ERR, KNI, "The KNI request operation"
-                                       "has already registered.\n");
+               RTE_LOG(ERR, KNI, "The KNI request operation has already registered.\n");
                return -1;
        }
 
-       memcpy(&kni->ops, ops, sizeof(struct rte_kni_ops));     
+       memcpy(&kni->ops, ops, sizeof(struct rte_kni_ops));
        return 0;
 }
 
@@ -517,8 +712,58 @@ rte_kni_unregister_handlers(struct rte_kni *kni)
                RTE_LOG(ERR, KNI, "Invalid kni info.\n");
                return -1;
        }
-       
-       kni->ops.change_mtu = NULL;
-       kni->ops.config_network_if = NULL;
+
+       memset(&kni->ops, 0, sizeof(struct rte_kni_ops));
+
        return 0;
 }
+
+int __rte_experimental
+rte_kni_update_link(struct rte_kni *kni, unsigned int linkup)
+{
+       char path[64];
+       char old_carrier[2];
+       const char *new_carrier;
+       int old_linkup;
+       int fd, ret;
+
+       if (kni == NULL)
+               return -1;
+
+       snprintf(path, sizeof(path), "/sys/devices/virtual/net/%s/carrier",
+               kni->name);
+
+       fd = open(path, O_RDWR);
+       if (fd == -1) {
+               RTE_LOG(ERR, KNI, "Failed to open file: %s.\n", path);
+               return -1;
+       }
+
+       ret = read(fd, old_carrier, 2);
+       if (ret < 1) {
+               close(fd);
+               return -1;
+       }
+       old_linkup = (old_carrier[0] == '1');
+
+       new_carrier = linkup ? "1" : "0";
+       ret = write(fd, new_carrier, 1);
+       if (ret < 1) {
+               RTE_LOG(ERR, KNI, "Failed to write file: %s.\n", path);
+               close(fd);
+               return -1;
+       }
+
+       close(fd);
+       return old_linkup;
+}
+
+void
+rte_kni_close(void)
+{
+       if (kni_fd < 0)
+               return;
+
+       close(kni_fd);
+       kni_fd = -1;
+}