raw/ntb: add handshake process
authorXiaoyun Li <xiaoyun.li@intel.com>
Tue, 2 Jul 2019 06:25:20 +0000 (14:25 +0800)
committerThomas Monjalon <thomas@monjalon.net>
Fri, 5 Jul 2019 10:50:19 +0000 (12:50 +0200)
Add handshake process using doorbell so that two hosts can
communicate to start and stop.

Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
Acked-by: Jingjing Wu <jingjing.wu@intel.com>
Reviewed-by: Xiaolong Ye <xiaolong.ye@intel.com>
doc/guides/rawdevs/ntb.rst
drivers/raw/ntb/ntb.c

index ff0795b..0a61ec0 100644 (file)
@@ -9,6 +9,11 @@ separate hosts so that they can communicate with each other. Thus, many
 user cases can benefit from this, such as fault tolerance and visual
 acceleration.
 
+This PMD allows two hosts to handshake for device start and stop, memory
+allocation for the peer to access and read/write allocated memory from peer.
+Also, the PMD allows to use doorbell registers to notify the peer and share
+some information by using scratchpad registers.
+
 BIOS setting on Intel Skylake
 -----------------------------
 
index 5fde704..c83b8d9 100644 (file)
@@ -28,6 +28,183 @@ static const struct rte_pci_id pci_id_ntb_map[] = {
        { .vendor_id = 0, /* sentinel */ },
 };
 
+static int
+ntb_set_mw(struct rte_rawdev *dev, int mw_idx, uint64_t mw_size)
+{
+       struct ntb_hw *hw = dev->dev_private;
+       char mw_name[RTE_MEMZONE_NAMESIZE];
+       const struct rte_memzone *mz;
+       int ret = 0;
+
+       if (hw->ntb_ops->mw_set_trans == NULL) {
+               NTB_LOG(ERR, "Not supported to set mw.");
+               return -ENOTSUP;
+       }
+
+       snprintf(mw_name, sizeof(mw_name), "ntb_%d_mw_%d",
+                dev->dev_id, mw_idx);
+
+       mz = rte_memzone_lookup(mw_name);
+       if (mz)
+               return 0;
+
+       /**
+        * Hardware requires that mapped memory base address should be
+        * aligned with EMBARSZ and needs continuous memzone.
+        */
+       mz = rte_memzone_reserve_aligned(mw_name, mw_size, dev->socket_id,
+                               RTE_MEMZONE_IOVA_CONTIG, hw->mw_size[mw_idx]);
+       if (!mz) {
+               NTB_LOG(ERR, "Cannot allocate aligned memzone.");
+               return -EIO;
+       }
+       hw->mz[mw_idx] = mz;
+
+       ret = (*hw->ntb_ops->mw_set_trans)(dev, mw_idx, mz->iova, mw_size);
+       if (ret) {
+               NTB_LOG(ERR, "Cannot set mw translation.");
+               return ret;
+       }
+
+       return ret;
+}
+
+static void
+ntb_link_cleanup(struct rte_rawdev *dev)
+{
+       struct ntb_hw *hw = dev->dev_private;
+       int status, i;
+
+       if (hw->ntb_ops->spad_write == NULL ||
+           hw->ntb_ops->mw_set_trans == NULL) {
+               NTB_LOG(ERR, "Not supported to clean up link.");
+               return;
+       }
+
+       /* Clean spad registers. */
+       for (i = 0; i < hw->spad_cnt; i++) {
+               status = (*hw->ntb_ops->spad_write)(dev, i, 0, 0);
+               if (status)
+                       NTB_LOG(ERR, "Failed to clean local spad.");
+       }
+
+       /* Clear mw so that peer cannot access local memory.*/
+       for (i = 0; i < hw->mw_cnt; i++) {
+               status = (*hw->ntb_ops->mw_set_trans)(dev, i, 0, 0);
+               if (status)
+                       NTB_LOG(ERR, "Failed to clean mw.");
+       }
+}
+
+static void
+ntb_dev_intr_handler(void *param)
+{
+       struct rte_rawdev *dev = (struct rte_rawdev *)param;
+       struct ntb_hw *hw = dev->dev_private;
+       uint32_t mw_size_h, mw_size_l;
+       uint64_t db_bits = 0;
+       int i = 0;
+
+       if (hw->ntb_ops->db_read == NULL ||
+           hw->ntb_ops->db_clear == NULL ||
+           hw->ntb_ops->peer_db_set == NULL) {
+               NTB_LOG(ERR, "Doorbell is not supported.");
+               return;
+       }
+
+       db_bits = (*hw->ntb_ops->db_read)(dev);
+       if (!db_bits)
+               NTB_LOG(ERR, "No doorbells");
+
+       /* Doorbell 0 is for peer device ready. */
+       if (db_bits & 1) {
+               NTB_LOG(DEBUG, "DB0: Peer device is up.");
+               /* Clear received doorbell. */
+               (*hw->ntb_ops->db_clear)(dev, 1);
+
+               /**
+                * Peer dev is already up. All mw settings are already done.
+                * Skip them.
+                */
+               if (hw->peer_dev_up)
+                       return;
+
+               if (hw->ntb_ops->spad_read == NULL ||
+                   hw->ntb_ops->spad_write == NULL) {
+                       NTB_LOG(ERR, "Scratchpad is not supported.");
+                       return;
+               }
+
+               hw->peer_mw_cnt = (*hw->ntb_ops->spad_read)
+                                 (dev, SPAD_NUM_MWS, 0);
+               hw->peer_mw_size = rte_zmalloc("uint64_t",
+                                  hw->peer_mw_cnt * sizeof(uint64_t), 0);
+               for (i = 0; i < hw->mw_cnt; i++) {
+                       mw_size_h = (*hw->ntb_ops->spad_read)
+                                   (dev, SPAD_MW0_SZ_H + 2 * i, 0);
+                       mw_size_l = (*hw->ntb_ops->spad_read)
+                                   (dev, SPAD_MW0_SZ_L + 2 * i, 0);
+                       hw->peer_mw_size[i] = ((uint64_t)mw_size_h << 32) |
+                                             mw_size_l;
+                       NTB_LOG(DEBUG, "Peer %u mw size: 0x%"PRIx64"", i,
+                                       hw->peer_mw_size[i]);
+               }
+
+               hw->peer_dev_up = 1;
+
+               /**
+                * Handshake with peer. Spad_write only works when both
+                * devices are up. So write spad again when db is received.
+                * And set db again for the later device who may miss
+                * the 1st db.
+                */
+               for (i = 0; i < hw->mw_cnt; i++) {
+                       (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_MWS,
+                                                  1, hw->mw_cnt);
+                       mw_size_h = hw->mw_size[i] >> 32;
+                       (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_H + 2 * i,
+                                                  1, mw_size_h);
+
+                       mw_size_l = hw->mw_size[i];
+                       (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_L + 2 * i,
+                                                  1, mw_size_l);
+               }
+               (*hw->ntb_ops->peer_db_set)(dev, 0);
+
+               /* To get the link info. */
+               if (hw->ntb_ops->get_link_status == NULL) {
+                       NTB_LOG(ERR, "Not supported to get link status.");
+                       return;
+               }
+               (*hw->ntb_ops->get_link_status)(dev);
+               NTB_LOG(INFO, "Link is up. Link speed: %u. Link width: %u",
+                       hw->link_speed, hw->link_width);
+               return;
+       }
+
+       if (db_bits & (1 << 1)) {
+               NTB_LOG(DEBUG, "DB1: Peer device is down.");
+               /* Clear received doorbell. */
+               (*hw->ntb_ops->db_clear)(dev, 2);
+
+               /* Peer device will be down, So clean local side too. */
+               ntb_link_cleanup(dev);
+
+               hw->peer_dev_up = 0;
+               /* Response peer's dev_stop request. */
+               (*hw->ntb_ops->peer_db_set)(dev, 2);
+               return;
+       }
+
+       if (db_bits & (1 << 2)) {
+               NTB_LOG(DEBUG, "DB2: Peer device agrees dev to be down.");
+               /* Clear received doorbell. */
+               (*hw->ntb_ops->db_clear)(dev, (1 << 2));
+               hw->peer_dev_up = 0;
+               return;
+       }
+}
+
 static void
 ntb_queue_conf_get(struct rte_rawdev *dev __rte_unused,
                   uint16_t queue_id __rte_unused,
@@ -147,7 +324,22 @@ ntb_dev_configure(const struct rte_rawdev *dev __rte_unused,
 static int
 ntb_dev_start(struct rte_rawdev *dev)
 {
+       struct ntb_hw *hw = dev->dev_private;
+       int ret, i;
+
        /* TODO: init queues and start queues. */
+
+       /* Map memory of bar_size to remote. */
+       hw->mz = rte_zmalloc("struct rte_memzone *",
+                            hw->mw_cnt * sizeof(struct rte_memzone *), 0);
+       for (i = 0; i < hw->mw_cnt; i++) {
+               ret = ntb_set_mw(dev, i, hw->mw_size[i]);
+               if (ret) {
+                       NTB_LOG(ERR, "Fail to set mw.");
+                       return ret;
+               }
+       }
+
        dev->started = 1;
 
        return 0;
@@ -156,13 +348,59 @@ ntb_dev_start(struct rte_rawdev *dev)
 static void
 ntb_dev_stop(struct rte_rawdev *dev)
 {
+       struct ntb_hw *hw = dev->dev_private;
+       uint32_t time_out;
+       int status;
+
        /* TODO: stop rx/tx queues. */
+
+       if (!hw->peer_dev_up)
+               goto clean;
+
+       ntb_link_cleanup(dev);
+
+       /* Notify the peer that device will be down. */
+       if (hw->ntb_ops->peer_db_set == NULL) {
+               NTB_LOG(ERR, "Peer doorbell setting is not supported.");
+               return;
+       }
+       status = (*hw->ntb_ops->peer_db_set)(dev, 1);
+       if (status) {
+               NTB_LOG(ERR, "Failed to tell peer device is down.");
+               return;
+       }
+
+       /*
+        * Set time out as 1s in case that the peer is stopped accidently
+        * without any notification.
+        */
+       time_out = 1000000;
+
+       /* Wait for cleanup work down before db mask clear. */
+       while (hw->peer_dev_up && time_out) {
+               time_out -= 10;
+               rte_delay_us(10);
+       }
+
+clean:
+       /* Clear doorbells mask. */
+       if (hw->ntb_ops->db_set_mask == NULL) {
+               NTB_LOG(ERR, "Doorbell mask setting is not supported.");
+               return;
+       }
+       status = (*hw->ntb_ops->db_set_mask)(dev,
+                               (((uint64_t)1 << hw->db_cnt) - 1));
+       if (status)
+               NTB_LOG(ERR, "Failed to clear doorbells.");
+
        dev->started = 0;
 }
 
 static int
 ntb_dev_close(struct rte_rawdev *dev)
 {
+       struct ntb_hw *hw = dev->dev_private;
+       struct rte_intr_handle *intr_handle;
        int ret = 0;
 
        if (dev->started)
@@ -170,6 +408,20 @@ ntb_dev_close(struct rte_rawdev *dev)
 
        /* TODO: free queues. */
 
+       intr_handle = &hw->pci_dev->intr_handle;
+       /* Clean datapath event and vec mapping */
+       rte_intr_efd_disable(intr_handle);
+       if (intr_handle->intr_vec) {
+               rte_free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
+       }
+       /* Disable uio intr before callback unregister */
+       rte_intr_disable(intr_handle);
+
+       /* Unregister callback func to eal lib */
+       rte_intr_callback_unregister(intr_handle,
+                                    ntb_dev_intr_handler, dev);
+
        return ret;
 }
 
@@ -346,7 +598,9 @@ static int
 ntb_init_hw(struct rte_rawdev *dev, struct rte_pci_device *pci_dev)
 {
        struct ntb_hw *hw = dev->dev_private;
-       int ret;
+       struct rte_intr_handle *intr_handle;
+       uint32_t val;
+       int ret, i;
 
        hw->pci_dev = pci_dev;
        hw->peer_dev_up = 0;
@@ -377,6 +631,86 @@ ntb_init_hw(struct rte_rawdev *dev, struct rte_pci_device *pci_dev)
        if (ret)
                return ret;
 
+       /* Init doorbell. */
+       hw->db_valid_mask = RTE_LEN2MASK(hw->db_cnt, uint64_t);
+
+       intr_handle = &pci_dev->intr_handle;
+       /* Register callback func to eal lib */
+       rte_intr_callback_register(intr_handle,
+                                  ntb_dev_intr_handler, dev);
+
+       ret = rte_intr_efd_enable(intr_handle, hw->db_cnt);
+       if (ret)
+               return ret;
+
+       /* To clarify, the interrupt for each doorbell is already mapped
+        * by default for intel gen3. They are mapped to msix vec 1-32,
+        * and hardware intr is mapped to 0. Map all to 0 for uio.
+        */
+       if (!rte_intr_cap_multiple(intr_handle)) {
+               for (i = 0; i < hw->db_cnt; i++) {
+                       if (hw->ntb_ops->vector_bind == NULL)
+                               return -ENOTSUP;
+                       ret = (*hw->ntb_ops->vector_bind)(dev, i, 0);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       if (hw->ntb_ops->db_set_mask == NULL ||
+           hw->ntb_ops->peer_db_set == NULL) {
+               NTB_LOG(ERR, "Doorbell is not supported.");
+               return -ENOTSUP;
+       }
+       hw->db_mask = 0;
+       ret = (*hw->ntb_ops->db_set_mask)(dev, hw->db_mask);
+       if (ret) {
+               NTB_LOG(ERR, "Unable to enable intr for all dbs.");
+               return ret;
+       }
+
+       /* enable uio intr after callback register */
+       rte_intr_enable(intr_handle);
+
+       if (hw->ntb_ops->spad_write == NULL) {
+               NTB_LOG(ERR, "Scratchpad is not supported.");
+               return -ENOTSUP;
+       }
+       /* Tell peer the mw_cnt of local side. */
+       ret = (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_MWS, 1, hw->mw_cnt);
+       if (ret) {
+               NTB_LOG(ERR, "Failed to tell peer mw count.");
+               return ret;
+       }
+
+       /* Tell peer each mw size on local side. */
+       for (i = 0; i < hw->mw_cnt; i++) {
+               NTB_LOG(DEBUG, "Local %u mw size: 0x%"PRIx64"", i,
+                               hw->mw_size[i]);
+               val = hw->mw_size[i] >> 32;
+               ret = (*hw->ntb_ops->spad_write)
+                               (dev, SPAD_MW0_SZ_H + 2 * i, 1, val);
+               if (ret) {
+                       NTB_LOG(ERR, "Failed to tell peer mw size.");
+                       return ret;
+               }
+
+               val = hw->mw_size[i];
+               ret = (*hw->ntb_ops->spad_write)
+                               (dev, SPAD_MW0_SZ_L + 2 * i, 1, val);
+               if (ret) {
+                       NTB_LOG(ERR, "Failed to tell peer mw size.");
+                       return ret;
+               }
+       }
+
+       /* Ring doorbell 0 to tell peer the device is ready. */
+       ret = (*hw->ntb_ops->peer_db_set)(dev, 0);
+       if (ret) {
+               NTB_LOG(ERR, "Failed to tell peer device is probed.");
+               return ret;
+       }
+
        return ret;
 }