1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2019 Intel Corporation.
9 #include <rte_common.h>
10 #include <rte_lcore.h>
11 #include <rte_cycles.h>
16 #include <rte_bus_pci.h>
17 #include <rte_memzone.h>
18 #include <rte_memcpy.h>
19 #include <rte_rawdev.h>
20 #include <rte_rawdev_pmd.h>
22 #include "ntb_hw_intel.h"
23 #include "rte_pmd_ntb.h"
26 static const struct rte_pci_id pci_id_ntb_map[] = {
27 { RTE_PCI_DEVICE(NTB_INTEL_VENDOR_ID, NTB_INTEL_DEV_ID_B2B_SKX) },
28 { .vendor_id = 0, /* sentinel */ },
31 /* Align with enum ntb_xstats_idx */
32 static struct rte_rawdev_xstats_name ntb_xstats_names[] = {
40 #define NTB_XSTATS_NUM RTE_DIM(ntb_xstats_names)
43 ntb_link_cleanup(struct rte_rawdev *dev)
45 struct ntb_hw *hw = dev->dev_private;
48 if (hw->ntb_ops->spad_write == NULL ||
49 hw->ntb_ops->mw_set_trans == NULL) {
50 NTB_LOG(ERR, "Not supported to clean up link.");
54 /* Clean spad registers. */
55 for (i = 0; i < hw->spad_cnt; i++) {
56 status = (*hw->ntb_ops->spad_write)(dev, i, 0, 0);
58 NTB_LOG(ERR, "Failed to clean local spad.");
61 /* Clear mw so that peer cannot access local memory.*/
62 for (i = 0; i < hw->used_mw_num; i++) {
63 status = (*hw->ntb_ops->mw_set_trans)(dev, i, 0, 0);
65 NTB_LOG(ERR, "Failed to clean mw.");
70 ntb_handshake_work(const struct rte_rawdev *dev)
72 struct ntb_hw *hw = dev->dev_private;
76 if (hw->ntb_ops->spad_write == NULL ||
77 hw->ntb_ops->mw_set_trans == NULL) {
78 NTB_LOG(ERR, "Scratchpad/MW setting is not supported.");
82 /* Tell peer the mw info of local side. */
83 ret = (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_MWS, 1, hw->mw_cnt);
86 for (i = 0; i < hw->mw_cnt; i++) {
87 NTB_LOG(INFO, "Local %u mw size: 0x%"PRIx64"", i,
89 val = hw->mw_size[i] >> 32;
90 ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_H + 2 * i,
95 ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_SZ_L + 2 * i,
101 /* Tell peer about the queue info and map memory to the peer. */
102 ret = (*hw->ntb_ops->spad_write)(dev, SPAD_Q_SZ, 1, hw->queue_size);
105 ret = (*hw->ntb_ops->spad_write)(dev, SPAD_NUM_QPS, 1,
109 ret = (*hw->ntb_ops->spad_write)(dev, SPAD_USED_MWS, 1,
113 for (i = 0; i < hw->used_mw_num; i++) {
114 val = (uint64_t)(size_t)(hw->mz[i]->addr) >> 32;
115 ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_BA_H + 2 * i,
119 val = (uint64_t)(size_t)(hw->mz[i]->addr);
120 ret = (*hw->ntb_ops->spad_write)(dev, SPAD_MW0_BA_L + 2 * i,
126 for (i = 0; i < hw->used_mw_num; i++) {
127 ret = (*hw->ntb_ops->mw_set_trans)(dev, i, hw->mz[i]->iova,
133 /* Ring doorbell 0 to tell peer the device is ready. */
134 ret = (*hw->ntb_ops->peer_db_set)(dev, 0);
142 ntb_dev_intr_handler(void *param)
144 struct rte_rawdev *dev = (struct rte_rawdev *)param;
145 struct ntb_hw *hw = dev->dev_private;
146 uint32_t val_h, val_l;
147 uint64_t peer_mw_size;
148 uint64_t db_bits = 0;
152 if (hw->ntb_ops->db_read == NULL ||
153 hw->ntb_ops->db_clear == NULL ||
154 hw->ntb_ops->peer_db_set == NULL) {
155 NTB_LOG(ERR, "Doorbell is not supported.");
159 db_bits = (*hw->ntb_ops->db_read)(dev);
161 NTB_LOG(ERR, "No doorbells");
163 /* Doorbell 0 is for peer device ready. */
165 NTB_LOG(INFO, "DB0: Peer device is up.");
166 /* Clear received doorbell. */
167 (*hw->ntb_ops->db_clear)(dev, 1);
170 * Peer dev is already up. All mw settings are already done.
176 if (hw->ntb_ops->spad_read == NULL) {
177 NTB_LOG(ERR, "Scratchpad read is not supported.");
181 /* Check if mw setting on the peer is the same as local. */
182 peer_mw_cnt = (*hw->ntb_ops->spad_read)(dev, SPAD_NUM_MWS, 0);
183 if (peer_mw_cnt != hw->mw_cnt) {
184 NTB_LOG(ERR, "Both mw cnt must be the same.");
188 for (i = 0; i < hw->mw_cnt; i++) {
189 val_h = (*hw->ntb_ops->spad_read)
190 (dev, SPAD_MW0_SZ_H + 2 * i, 0);
191 val_l = (*hw->ntb_ops->spad_read)
192 (dev, SPAD_MW0_SZ_L + 2 * i, 0);
193 peer_mw_size = ((uint64_t)val_h << 32) | val_l;
194 NTB_LOG(DEBUG, "Peer %u mw size: 0x%"PRIx64"", i,
196 if (peer_mw_size != hw->mw_size[i]) {
197 NTB_LOG(ERR, "Mw config must be the same.");
205 * Handshake with peer. Spad_write & mw_set_trans only works
206 * when both devices are up. So write spad again when db is
207 * received. And set db again for the later device who may miss
210 if (ntb_handshake_work(dev) < 0) {
211 NTB_LOG(ERR, "Handshake work failed.");
215 /* To get the link info. */
216 if (hw->ntb_ops->get_link_status == NULL) {
217 NTB_LOG(ERR, "Not supported to get link status.");
220 (*hw->ntb_ops->get_link_status)(dev);
221 NTB_LOG(INFO, "Link is up. Link speed: %u. Link width: %u",
222 hw->link_speed, hw->link_width);
226 if (db_bits & (1 << 1)) {
227 NTB_LOG(INFO, "DB1: Peer device is down.");
228 /* Clear received doorbell. */
229 (*hw->ntb_ops->db_clear)(dev, 2);
231 /* Peer device will be down, So clean local side too. */
232 ntb_link_cleanup(dev);
235 /* Response peer's dev_stop request. */
236 (*hw->ntb_ops->peer_db_set)(dev, 2);
240 if (db_bits & (1 << 2)) {
241 NTB_LOG(INFO, "DB2: Peer device agrees dev to be down.");
242 /* Clear received doorbell. */
243 (*hw->ntb_ops->db_clear)(dev, (1 << 2));
250 ntb_queue_conf_get(struct rte_rawdev *dev,
252 rte_rawdev_obj_t queue_conf,
255 struct ntb_queue_conf *q_conf = queue_conf;
256 struct ntb_hw *hw = dev->dev_private;
258 if (conf_size != sizeof(*q_conf))
261 q_conf->tx_free_thresh = hw->tx_queues[queue_id]->tx_free_thresh;
262 q_conf->nb_desc = hw->rx_queues[queue_id]->nb_rx_desc;
263 q_conf->rx_mp = hw->rx_queues[queue_id]->mpool;
267 ntb_rxq_release_mbufs(struct ntb_rx_queue *q)
271 if (!q || !q->sw_ring) {
272 NTB_LOG(ERR, "Pointer to rxq or sw_ring is NULL");
276 for (i = 0; i < q->nb_rx_desc; i++) {
277 if (q->sw_ring[i].mbuf) {
278 rte_pktmbuf_free_seg(q->sw_ring[i].mbuf);
279 q->sw_ring[i].mbuf = NULL;
285 ntb_rxq_release(struct ntb_rx_queue *rxq)
288 NTB_LOG(ERR, "Pointer to rxq is NULL");
292 ntb_rxq_release_mbufs(rxq);
294 rte_free(rxq->sw_ring);
299 ntb_rxq_setup(struct rte_rawdev *dev,
301 rte_rawdev_obj_t queue_conf,
304 struct ntb_queue_conf *rxq_conf = queue_conf;
305 struct ntb_hw *hw = dev->dev_private;
306 struct ntb_rx_queue *rxq;
308 if (conf_size != sizeof(*rxq_conf))
311 /* Allocate the rx queue data structure */
312 rxq = rte_zmalloc_socket("ntb rx queue",
313 sizeof(struct ntb_rx_queue),
317 NTB_LOG(ERR, "Failed to allocate memory for "
318 "rx queue data structure.");
322 if (rxq_conf->rx_mp == NULL) {
323 NTB_LOG(ERR, "Invalid null mempool pointer.");
326 rxq->nb_rx_desc = rxq_conf->nb_desc;
327 rxq->mpool = rxq_conf->rx_mp;
328 rxq->port_id = dev->dev_id;
329 rxq->queue_id = qp_id;
332 /* Allocate the software ring. */
334 rte_zmalloc_socket("ntb rx sw ring",
335 sizeof(struct ntb_rx_entry) *
340 ntb_rxq_release(rxq);
342 NTB_LOG(ERR, "Failed to allocate memory for SW ring");
346 hw->rx_queues[qp_id] = rxq;
352 ntb_txq_release_mbufs(struct ntb_tx_queue *q)
356 if (!q || !q->sw_ring) {
357 NTB_LOG(ERR, "Pointer to txq or sw_ring is NULL");
361 for (i = 0; i < q->nb_tx_desc; i++) {
362 if (q->sw_ring[i].mbuf) {
363 rte_pktmbuf_free_seg(q->sw_ring[i].mbuf);
364 q->sw_ring[i].mbuf = NULL;
370 ntb_txq_release(struct ntb_tx_queue *txq)
373 NTB_LOG(ERR, "Pointer to txq is NULL");
377 ntb_txq_release_mbufs(txq);
379 rte_free(txq->sw_ring);
384 ntb_txq_setup(struct rte_rawdev *dev,
386 rte_rawdev_obj_t queue_conf,
389 struct ntb_queue_conf *txq_conf = queue_conf;
390 struct ntb_hw *hw = dev->dev_private;
391 struct ntb_tx_queue *txq;
394 if (conf_size != sizeof(*txq_conf))
397 /* Allocate the TX queue data structure. */
398 txq = rte_zmalloc_socket("ntb tx queue",
399 sizeof(struct ntb_tx_queue),
403 NTB_LOG(ERR, "Failed to allocate memory for "
404 "tx queue structure");
408 txq->nb_tx_desc = txq_conf->nb_desc;
409 txq->port_id = dev->dev_id;
410 txq->queue_id = qp_id;
413 /* Allocate software ring */
415 rte_zmalloc_socket("ntb tx sw ring",
416 sizeof(struct ntb_tx_entry) *
421 ntb_txq_release(txq);
423 NTB_LOG(ERR, "Failed to allocate memory for SW TX ring");
427 prev = txq->nb_tx_desc - 1;
428 for (i = 0; i < txq->nb_tx_desc; i++) {
429 txq->sw_ring[i].mbuf = NULL;
430 txq->sw_ring[i].last_id = i;
431 txq->sw_ring[prev].next_id = i;
435 txq->tx_free_thresh = txq_conf->tx_free_thresh ?
436 txq_conf->tx_free_thresh :
437 NTB_DFLT_TX_FREE_THRESH;
438 if (txq->tx_free_thresh >= txq->nb_tx_desc - 3) {
439 NTB_LOG(ERR, "tx_free_thresh must be less than nb_desc - 3. "
440 "(tx_free_thresh=%u qp_id=%u)", txq->tx_free_thresh,
445 hw->tx_queues[qp_id] = txq;
452 ntb_queue_setup(struct rte_rawdev *dev,
454 rte_rawdev_obj_t queue_conf,
457 struct ntb_hw *hw = dev->dev_private;
460 if (queue_id >= hw->queue_pairs)
463 ret = ntb_txq_setup(dev, queue_id, queue_conf, conf_size);
467 ret = ntb_rxq_setup(dev, queue_id, queue_conf, conf_size);
473 ntb_queue_release(struct rte_rawdev *dev, uint16_t queue_id)
475 struct ntb_hw *hw = dev->dev_private;
477 if (queue_id >= hw->queue_pairs)
480 ntb_txq_release(hw->tx_queues[queue_id]);
481 hw->tx_queues[queue_id] = NULL;
482 ntb_rxq_release(hw->rx_queues[queue_id]);
483 hw->rx_queues[queue_id] = NULL;
489 ntb_queue_count(struct rte_rawdev *dev)
491 struct ntb_hw *hw = dev->dev_private;
492 return hw->queue_pairs;
496 ntb_queue_init(struct rte_rawdev *dev, uint16_t qp_id)
498 struct ntb_hw *hw = dev->dev_private;
499 struct ntb_rx_queue *rxq = hw->rx_queues[qp_id];
500 struct ntb_tx_queue *txq = hw->tx_queues[qp_id];
501 volatile struct ntb_header *local_hdr;
502 struct ntb_header *remote_hdr;
503 uint16_t q_size = hw->queue_size;
508 if (hw->ntb_ops->get_peer_mw_addr == NULL) {
509 NTB_LOG(ERR, "Getting peer mw addr is not supported.");
513 /* Put queue info into the start of shared memory. */
514 hdr_offset = hw->hdr_size_per_queue * qp_id;
515 local_hdr = (volatile struct ntb_header *)
516 ((size_t)hw->mz[0]->addr + hdr_offset);
517 bar_addr = (*hw->ntb_ops->get_peer_mw_addr)(dev, 0);
518 if (bar_addr == NULL)
520 remote_hdr = (struct ntb_header *)
521 ((size_t)bar_addr + hdr_offset);
524 rxq->rx_desc_ring = (struct ntb_desc *)
525 (&remote_hdr->desc_ring);
526 rxq->rx_used_ring = (volatile struct ntb_used *)
527 (&local_hdr->desc_ring[q_size]);
528 rxq->avail_cnt = &remote_hdr->avail_cnt;
529 rxq->used_cnt = &local_hdr->used_cnt;
531 for (i = 0; i < rxq->nb_rx_desc - 1; i++) {
532 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mpool);
533 if (unlikely(!mbuf)) {
534 NTB_LOG(ERR, "Failed to allocate mbuf for RX");
537 mbuf->port = dev->dev_id;
539 rxq->sw_ring[i].mbuf = mbuf;
541 rxq->rx_desc_ring[i].addr = rte_pktmbuf_mtod(mbuf, size_t);
542 rxq->rx_desc_ring[i].len = mbuf->buf_len - RTE_PKTMBUF_HEADROOM;
545 *rxq->avail_cnt = rxq->nb_rx_desc - 1;
546 rxq->last_avail = rxq->nb_rx_desc - 1;
550 txq->tx_desc_ring = (volatile struct ntb_desc *)
551 (&local_hdr->desc_ring);
552 txq->tx_used_ring = (struct ntb_used *)
553 (&remote_hdr->desc_ring[q_size]);
554 txq->avail_cnt = &local_hdr->avail_cnt;
555 txq->used_cnt = &remote_hdr->used_cnt;
561 txq->nb_tx_free = txq->nb_tx_desc - 1;
563 /* Set per queue stats. */
564 for (i = 0; i < NTB_XSTATS_NUM; i++) {
565 hw->ntb_xstats[i + NTB_XSTATS_NUM * (qp_id + 1)] = 0;
566 hw->ntb_xstats_off[i + NTB_XSTATS_NUM * (qp_id + 1)] = 0;
573 ntb_enqueue_cleanup(struct ntb_tx_queue *txq)
575 struct ntb_tx_entry *sw_ring = txq->sw_ring;
576 uint16_t tx_free = txq->last_avail;
577 uint16_t nb_to_clean, i;
579 /* avail_cnt + 1 represents where to rx next in the peer. */
580 nb_to_clean = (*txq->avail_cnt - txq->last_avail + 1 +
581 txq->nb_tx_desc) & (txq->nb_tx_desc - 1);
582 nb_to_clean = RTE_MIN(nb_to_clean, txq->tx_free_thresh);
583 for (i = 0; i < nb_to_clean; i++) {
584 if (sw_ring[tx_free].mbuf)
585 rte_pktmbuf_free_seg(sw_ring[tx_free].mbuf);
586 tx_free = (tx_free + 1) & (txq->nb_tx_desc - 1);
589 txq->nb_tx_free += nb_to_clean;
590 txq->last_avail = tx_free;
594 ntb_enqueue_bufs(struct rte_rawdev *dev,
595 struct rte_rawdev_buf **buffers,
597 rte_rawdev_obj_t context)
599 struct ntb_hw *hw = dev->dev_private;
600 struct ntb_tx_queue *txq = hw->tx_queues[(size_t)context];
601 struct ntb_tx_entry *sw_ring = txq->sw_ring;
602 struct rte_mbuf *txm;
603 struct ntb_used tx_used[NTB_MAX_DESC_SIZE];
604 volatile struct ntb_desc *tx_item;
605 uint16_t tx_last, nb_segs, off, last_used, avail_cnt;
606 uint16_t nb_mbufs = 0;
612 if (unlikely(hw->ntb_ops->ioremap == NULL)) {
613 NTB_LOG(ERR, "Ioremap not supported.");
617 if (unlikely(dev->started == 0 || hw->peer_dev_up == 0)) {
618 NTB_LOG(DEBUG, "Link is not up.");
622 if (txq->nb_tx_free < txq->tx_free_thresh)
623 ntb_enqueue_cleanup(txq);
625 off = NTB_XSTATS_NUM * ((size_t)context + 1);
626 last_used = txq->last_used;
627 avail_cnt = *txq->avail_cnt;/* Where to alloc next. */
628 for (nb_tx = 0; nb_tx < count; nb_tx++) {
629 txm = (struct rte_mbuf *)(buffers[nb_tx]->buf_addr);
630 if (txm == NULL || txq->nb_tx_free < txm->nb_segs)
633 tx_last = (txq->last_used + txm->nb_segs - 1) &
634 (txq->nb_tx_desc - 1);
635 nb_segs = txm->nb_segs;
636 for (i = 0; i < nb_segs; i++) {
637 /* Not enough ring space for tx. */
638 if (txq->last_used == avail_cnt)
640 sw_ring[txq->last_used].mbuf = txm;
641 tx_item = txq->tx_desc_ring + txq->last_used;
644 (hw->ntb_xstats[NTB_TX_ERRS_ID + off])++;
647 if (txm->data_len > tx_item->len) {
648 NTB_LOG(ERR, "Data length exceeds buf length."
649 " Only %u data would be transmitted.",
651 txm->data_len = tx_item->len;
654 /* translate remote virtual addr to bar virtual addr */
655 buf_addr = (*hw->ntb_ops->ioremap)(dev, tx_item->addr);
656 if (buf_addr == NULL) {
657 (hw->ntb_xstats[NTB_TX_ERRS_ID + off])++;
658 NTB_LOG(ERR, "Null remap addr.");
661 rte_memcpy(buf_addr, rte_pktmbuf_mtod(txm, void *),
664 tx_used[nb_mbufs].len = txm->data_len;
665 tx_used[nb_mbufs++].flags = (txq->last_used ==
670 bytes += txm->data_len;
674 sw_ring[txq->last_used].next_id = (txq->last_used + 1) &
675 (txq->nb_tx_desc - 1);
676 sw_ring[txq->last_used].last_id = tx_last;
677 txq->last_used = (txq->last_used + 1) &
678 (txq->nb_tx_desc - 1);
680 txq->nb_tx_free -= nb_segs;
686 if (nb_mbufs > txq->nb_tx_desc - last_used) {
687 nb1 = txq->nb_tx_desc - last_used;
688 nb2 = nb_mbufs - txq->nb_tx_desc + last_used;
693 rte_memcpy(txq->tx_used_ring + last_used, tx_used,
694 sizeof(struct ntb_used) * nb1);
695 rte_memcpy(txq->tx_used_ring, tx_used + nb1,
696 sizeof(struct ntb_used) * nb2);
698 *txq->used_cnt = txq->last_used;
700 /* update queue stats */
701 hw->ntb_xstats[NTB_TX_BYTES_ID + off] += bytes;
702 hw->ntb_xstats[NTB_TX_PKTS_ID + off] += nb_tx;
709 ntb_dequeue_bufs(struct rte_rawdev *dev,
710 struct rte_rawdev_buf **buffers,
712 rte_rawdev_obj_t context)
714 struct ntb_hw *hw = dev->dev_private;
715 struct ntb_rx_queue *rxq = hw->rx_queues[(size_t)context];
716 struct ntb_rx_entry *sw_ring = rxq->sw_ring;
717 struct ntb_desc rx_desc[NTB_MAX_DESC_SIZE];
718 struct rte_mbuf *first, *rxm_t;
719 struct rte_mbuf *prev = NULL;
720 volatile struct ntb_used *rx_item;
721 uint16_t nb_mbufs = 0;
724 uint16_t off, last_avail, used_cnt, used_nb;
727 if (unlikely(dev->started == 0 || hw->peer_dev_up == 0)) {
728 NTB_LOG(DEBUG, "Link is not up");
732 used_cnt = *rxq->used_cnt;
734 if (rxq->last_used == used_cnt)
737 last_avail = rxq->last_avail;
738 used_nb = (used_cnt - rxq->last_used) & (rxq->nb_rx_desc - 1);
739 count = RTE_MIN(count, used_nb);
740 for (nb_rx = 0; nb_rx < count; nb_rx++) {
743 rx_item = rxq->rx_used_ring + rxq->last_used;
744 rxm_t = sw_ring[rxq->last_used].mbuf;
745 rxm_t->data_len = rx_item->len;
746 rxm_t->data_off = RTE_PKTMBUF_HEADROOM;
747 rxm_t->port = rxq->port_id;
753 buffers[nb_rx]->buf_addr = rxm_t;
760 first->pkt_len += prev->data_len;
761 rxq->last_used = (rxq->last_used + 1) &
762 (rxq->nb_rx_desc - 1);
765 rxm_t = rte_mbuf_raw_alloc(rxq->mpool);
766 if (unlikely(rxm_t == NULL)) {
767 NTB_LOG(ERR, "recv alloc mbuf failed.");
770 rxm_t->port = rxq->port_id;
771 sw_ring[rxq->last_avail].mbuf = rxm_t;
775 rx_desc[nb_mbufs].addr =
776 rte_pktmbuf_mtod(rxm_t, size_t);
777 rx_desc[nb_mbufs++].len = rxm_t->buf_len -
778 RTE_PKTMBUF_HEADROOM;
779 rxq->last_avail = (rxq->last_avail + 1) &
780 (rxq->nb_rx_desc - 1);
782 if (rx_item->flags & NTB_FLAG_EOP)
786 bytes += first->pkt_len;
792 if (nb_mbufs > rxq->nb_rx_desc - last_avail) {
793 nb1 = rxq->nb_rx_desc - last_avail;
794 nb2 = nb_mbufs - rxq->nb_rx_desc + last_avail;
799 rte_memcpy(rxq->rx_desc_ring + last_avail, rx_desc,
800 sizeof(struct ntb_desc) * nb1);
801 rte_memcpy(rxq->rx_desc_ring, rx_desc + nb1,
802 sizeof(struct ntb_desc) * nb2);
804 *rxq->avail_cnt = rxq->last_avail;
806 /* update queue stats */
807 off = NTB_XSTATS_NUM * ((size_t)context + 1);
808 hw->ntb_xstats[NTB_RX_BYTES_ID + off] += bytes;
809 hw->ntb_xstats[NTB_RX_PKTS_ID + off] += nb_rx;
810 hw->ntb_xstats[NTB_RX_MISS_ID + off] += (count - nb_rx);
817 ntb_dev_info_get(struct rte_rawdev *dev, rte_rawdev_obj_t dev_info,
818 size_t dev_info_size)
820 struct ntb_hw *hw = dev->dev_private;
821 struct ntb_dev_info *info = dev_info;
823 if (dev_info_size != sizeof(*info)) {
824 NTB_LOG(ERR, "Invalid size parameter to %s", __func__);
828 info->mw_cnt = hw->mw_cnt;
829 info->mw_size = hw->mw_size;
832 * Intel hardware requires that mapped memory base address should be
833 * aligned with EMBARSZ and needs continuous memzone.
835 info->mw_size_align = (uint8_t)(hw->pci_dev->id.vendor_id ==
836 NTB_INTEL_VENDOR_ID);
838 if (!hw->queue_size || !hw->queue_pairs) {
839 NTB_LOG(ERR, "No queue size and queue num assigned.");
843 hw->hdr_size_per_queue = RTE_ALIGN(sizeof(struct ntb_header) +
844 hw->queue_size * sizeof(struct ntb_desc) +
845 hw->queue_size * sizeof(struct ntb_used),
846 RTE_CACHE_LINE_SIZE);
847 info->ntb_hdr_size = hw->hdr_size_per_queue * hw->queue_pairs;
853 ntb_dev_configure(const struct rte_rawdev *dev, rte_rawdev_obj_t config,
856 struct ntb_dev_config *conf = config;
857 struct ntb_hw *hw = dev->dev_private;
861 if (conf == NULL || config_size != sizeof(*conf))
864 hw->queue_pairs = conf->num_queues;
865 hw->queue_size = conf->queue_size;
866 hw->used_mw_num = conf->mz_num;
867 hw->mz = conf->mz_list;
868 hw->rx_queues = rte_zmalloc("ntb_rx_queues",
869 sizeof(struct ntb_rx_queue *) * hw->queue_pairs, 0);
870 hw->tx_queues = rte_zmalloc("ntb_tx_queues",
871 sizeof(struct ntb_tx_queue *) * hw->queue_pairs, 0);
872 /* First total stats, then per queue stats. */
873 xstats_num = (hw->queue_pairs + 1) * NTB_XSTATS_NUM;
874 hw->ntb_xstats = rte_zmalloc("ntb_xstats", xstats_num *
875 sizeof(uint64_t), 0);
876 hw->ntb_xstats_off = rte_zmalloc("ntb_xstats_off", xstats_num *
877 sizeof(uint64_t), 0);
879 /* Start handshake with the peer. */
880 ret = ntb_handshake_work(dev);
882 rte_free(hw->rx_queues);
883 rte_free(hw->tx_queues);
884 hw->rx_queues = NULL;
885 hw->tx_queues = NULL;
893 ntb_dev_start(struct rte_rawdev *dev)
895 struct ntb_hw *hw = dev->dev_private;
896 uint32_t peer_base_l, peer_val;
897 uint64_t peer_base_h;
901 if (!hw->link_status || !hw->peer_dev_up)
904 /* Set total stats. */
905 for (i = 0; i < NTB_XSTATS_NUM; i++) {
906 hw->ntb_xstats[i] = 0;
907 hw->ntb_xstats_off[i] = 0;
910 for (i = 0; i < hw->queue_pairs; i++) {
911 ret = ntb_queue_init(dev, i);
913 NTB_LOG(ERR, "Failed to init queue.");
918 hw->peer_mw_base = rte_zmalloc("ntb_peer_mw_base", hw->mw_cnt *
919 sizeof(uint64_t), 0);
921 if (hw->ntb_ops->spad_read == NULL) {
926 peer_val = (*hw->ntb_ops->spad_read)(dev, SPAD_Q_SZ, 0);
927 if (peer_val != hw->queue_size) {
928 NTB_LOG(ERR, "Inconsistent queue size! (local: %u peer: %u)",
929 hw->queue_size, peer_val);
934 peer_val = (*hw->ntb_ops->spad_read)(dev, SPAD_NUM_QPS, 0);
935 if (peer_val != hw->queue_pairs) {
936 NTB_LOG(ERR, "Inconsistent number of queues! (local: %u peer:"
937 " %u)", hw->queue_pairs, peer_val);
942 hw->peer_used_mws = (*hw->ntb_ops->spad_read)(dev, SPAD_USED_MWS, 0);
944 for (i = 0; i < hw->peer_used_mws; i++) {
945 peer_base_h = (*hw->ntb_ops->spad_read)(dev,
946 SPAD_MW0_BA_H + 2 * i, 0);
947 peer_base_l = (*hw->ntb_ops->spad_read)(dev,
948 SPAD_MW0_BA_L + 2 * i, 0);
949 hw->peer_mw_base[i] = (peer_base_h << 32) + peer_base_l;
957 rte_free(hw->peer_mw_base);
959 for (i = 0; i < hw->queue_pairs; i++) {
960 ntb_rxq_release_mbufs(hw->rx_queues[i]);
961 ntb_txq_release_mbufs(hw->tx_queues[i]);
968 ntb_dev_stop(struct rte_rawdev *dev)
970 struct ntb_hw *hw = dev->dev_private;
974 if (!hw->peer_dev_up)
977 ntb_link_cleanup(dev);
979 /* Notify the peer that device will be down. */
980 if (hw->ntb_ops->peer_db_set == NULL) {
981 NTB_LOG(ERR, "Peer doorbell setting is not supported.");
984 status = (*hw->ntb_ops->peer_db_set)(dev, 1);
986 NTB_LOG(ERR, "Failed to tell peer device is down.");
991 * Set time out as 1s in case that the peer is stopped accidently
992 * without any notification.
996 /* Wait for cleanup work down before db mask clear. */
997 while (hw->peer_dev_up && time_out) {
1003 /* Clear doorbells mask. */
1004 if (hw->ntb_ops->db_set_mask == NULL) {
1005 NTB_LOG(ERR, "Doorbell mask setting is not supported.");
1008 status = (*hw->ntb_ops->db_set_mask)(dev,
1009 (((uint64_t)1 << hw->db_cnt) - 1));
1011 NTB_LOG(ERR, "Failed to clear doorbells.");
1013 for (i = 0; i < hw->queue_pairs; i++) {
1014 ntb_rxq_release_mbufs(hw->rx_queues[i]);
1015 ntb_txq_release_mbufs(hw->tx_queues[i]);
1022 ntb_dev_close(struct rte_rawdev *dev)
1024 struct ntb_hw *hw = dev->dev_private;
1025 struct rte_intr_handle *intr_handle;
1032 for (i = 0; i < hw->queue_pairs; i++)
1033 ntb_queue_release(dev, i);
1034 hw->queue_pairs = 0;
1036 intr_handle = &hw->pci_dev->intr_handle;
1037 /* Clean datapath event and vec mapping */
1038 rte_intr_efd_disable(intr_handle);
1039 if (intr_handle->intr_vec) {
1040 rte_free(intr_handle->intr_vec);
1041 intr_handle->intr_vec = NULL;
1043 /* Disable uio intr before callback unregister */
1044 rte_intr_disable(intr_handle);
1046 /* Unregister callback func to eal lib */
1047 rte_intr_callback_unregister(intr_handle,
1048 ntb_dev_intr_handler, dev);
1054 ntb_dev_reset(struct rte_rawdev *rawdev __rte_unused)
1060 ntb_attr_set(struct rte_rawdev *dev, const char *attr_name,
1061 uint64_t attr_value)
1066 if (dev == NULL || attr_name == NULL) {
1067 NTB_LOG(ERR, "Invalid arguments for setting attributes");
1071 hw = dev->dev_private;
1073 if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
1074 if (hw->ntb_ops->spad_write == NULL)
1076 index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
1077 (*hw->ntb_ops->spad_write)(dev, hw->spad_user_list[index],
1079 NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")",
1080 attr_name, attr_value);
1084 if (!strncmp(attr_name, NTB_QUEUE_SZ_NAME, NTB_ATTR_NAME_LEN)) {
1085 hw->queue_size = attr_value;
1086 NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")",
1087 attr_name, attr_value);
1091 if (!strncmp(attr_name, NTB_QUEUE_NUM_NAME, NTB_ATTR_NAME_LEN)) {
1092 hw->queue_pairs = attr_value;
1093 NTB_LOG(DEBUG, "Set attribute (%s) Value (%" PRIu64 ")",
1094 attr_name, attr_value);
1098 /* Attribute not found. */
1099 NTB_LOG(ERR, "Attribute not found.");
1104 ntb_attr_get(struct rte_rawdev *dev, const char *attr_name,
1105 uint64_t *attr_value)
1110 if (dev == NULL || attr_name == NULL || attr_value == NULL) {
1111 NTB_LOG(ERR, "Invalid arguments for getting attributes");
1115 hw = dev->dev_private;
1117 if (!strncmp(attr_name, NTB_TOPO_NAME, NTB_ATTR_NAME_LEN)) {
1118 *attr_value = hw->topo;
1119 NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1120 attr_name, *attr_value);
1124 if (!strncmp(attr_name, NTB_LINK_STATUS_NAME, NTB_ATTR_NAME_LEN)) {
1125 /* hw->link_status only indicates hw link status. */
1126 *attr_value = hw->link_status && hw->peer_dev_up;
1127 NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1128 attr_name, *attr_value);
1132 if (!strncmp(attr_name, NTB_SPEED_NAME, NTB_ATTR_NAME_LEN)) {
1133 *attr_value = hw->link_speed;
1134 NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1135 attr_name, *attr_value);
1139 if (!strncmp(attr_name, NTB_WIDTH_NAME, NTB_ATTR_NAME_LEN)) {
1140 *attr_value = hw->link_width;
1141 NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1142 attr_name, *attr_value);
1146 if (!strncmp(attr_name, NTB_MW_CNT_NAME, NTB_ATTR_NAME_LEN)) {
1147 *attr_value = hw->mw_cnt;
1148 NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1149 attr_name, *attr_value);
1153 if (!strncmp(attr_name, NTB_DB_CNT_NAME, NTB_ATTR_NAME_LEN)) {
1154 *attr_value = hw->db_cnt;
1155 NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1156 attr_name, *attr_value);
1160 if (!strncmp(attr_name, NTB_SPAD_CNT_NAME, NTB_ATTR_NAME_LEN)) {
1161 *attr_value = hw->spad_cnt;
1162 NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1163 attr_name, *attr_value);
1167 if (!strncmp(attr_name, NTB_SPAD_USER, NTB_SPAD_USER_LEN)) {
1168 if (hw->ntb_ops->spad_read == NULL)
1170 index = atoi(&attr_name[NTB_SPAD_USER_LEN]);
1171 *attr_value = (*hw->ntb_ops->spad_read)(dev,
1172 hw->spad_user_list[index], 0);
1173 NTB_LOG(DEBUG, "Attribute (%s) Value (%" PRIu64 ")",
1174 attr_name, *attr_value);
1178 /* Attribute not found. */
1179 NTB_LOG(ERR, "Attribute not found.");
1183 static inline uint64_t
1184 ntb_stats_update(uint64_t offset, uint64_t stat)
1187 return (stat - offset);
1189 return (uint64_t)(((uint64_t)-1) - offset + stat + 1);
1193 ntb_xstats_get(const struct rte_rawdev *dev,
1194 const unsigned int ids[],
1198 struct ntb_hw *hw = dev->dev_private;
1199 uint32_t i, j, off, xstats_num;
1201 /* Calculate total stats of all queues. */
1202 for (i = 0; i < NTB_XSTATS_NUM; i++) {
1203 hw->ntb_xstats[i] = 0;
1204 for (j = 0; j < hw->queue_pairs; j++) {
1205 off = NTB_XSTATS_NUM * (j + 1) + i;
1206 hw->ntb_xstats[i] +=
1207 ntb_stats_update(hw->ntb_xstats_off[off],
1208 hw->ntb_xstats[off]);
1212 xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1213 for (i = 0; i < n && ids[i] < xstats_num; i++) {
1214 if (ids[i] < NTB_XSTATS_NUM)
1215 values[i] = hw->ntb_xstats[ids[i]];
1218 ntb_stats_update(hw->ntb_xstats_off[ids[i]],
1219 hw->ntb_xstats[ids[i]]);
1226 ntb_xstats_get_names(const struct rte_rawdev *dev,
1227 struct rte_rawdev_xstats_name *xstats_names,
1230 struct ntb_hw *hw = dev->dev_private;
1231 uint32_t xstats_num, i, j, off;
1233 xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1234 if (xstats_names == NULL || size < xstats_num)
1237 /* Total stats names */
1238 memcpy(xstats_names, ntb_xstats_names, sizeof(ntb_xstats_names));
1240 /* Queue stats names */
1241 for (i = 0; i < hw->queue_pairs; i++) {
1242 for (j = 0; j < NTB_XSTATS_NUM; j++) {
1243 off = j + (i + 1) * NTB_XSTATS_NUM;
1244 snprintf(xstats_names[off].name,
1245 sizeof(xstats_names[0].name),
1246 "%s_q%u", ntb_xstats_names[j].name, i);
1254 ntb_xstats_get_by_name(const struct rte_rawdev *dev,
1255 const char *name, unsigned int *id)
1257 struct rte_rawdev_xstats_name *xstats_names;
1258 struct ntb_hw *hw = dev->dev_private;
1259 uint32_t xstats_num, i, j, off;
1264 xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1265 xstats_names = rte_zmalloc("ntb_stats_name",
1266 sizeof(struct rte_rawdev_xstats_name) *
1268 ntb_xstats_get_names(dev, xstats_names, xstats_num);
1270 /* Calculate total stats of all queues. */
1271 for (i = 0; i < NTB_XSTATS_NUM; i++) {
1272 for (j = 0; j < hw->queue_pairs; j++) {
1273 off = NTB_XSTATS_NUM * (j + 1) + i;
1274 hw->ntb_xstats[i] +=
1275 ntb_stats_update(hw->ntb_xstats_off[off],
1276 hw->ntb_xstats[off]);
1280 for (i = 0; i < xstats_num; i++) {
1281 if (!strncmp(name, xstats_names[i].name,
1282 RTE_RAW_DEV_XSTATS_NAME_SIZE)) {
1284 rte_free(xstats_names);
1285 if (i < NTB_XSTATS_NUM)
1286 return hw->ntb_xstats[i];
1288 return ntb_stats_update(hw->ntb_xstats_off[i],
1293 NTB_LOG(ERR, "Cannot find the xstats name.");
1299 ntb_xstats_reset(struct rte_rawdev *dev,
1300 const uint32_t ids[],
1303 struct ntb_hw *hw = dev->dev_private;
1304 uint32_t i, j, off, xstats_num;
1306 xstats_num = NTB_XSTATS_NUM * (hw->queue_pairs + 1);
1307 for (i = 0; i < nb_ids && ids[i] < xstats_num; i++) {
1308 if (ids[i] < NTB_XSTATS_NUM) {
1309 for (j = 0; j < hw->queue_pairs; j++) {
1310 off = NTB_XSTATS_NUM * (j + 1) + ids[i];
1311 hw->ntb_xstats_off[off] = hw->ntb_xstats[off];
1314 hw->ntb_xstats_off[ids[i]] = hw->ntb_xstats[ids[i]];
1321 static const struct rte_rawdev_ops ntb_ops = {
1322 .dev_info_get = ntb_dev_info_get,
1323 .dev_configure = ntb_dev_configure,
1324 .dev_start = ntb_dev_start,
1325 .dev_stop = ntb_dev_stop,
1326 .dev_close = ntb_dev_close,
1327 .dev_reset = ntb_dev_reset,
1329 .queue_def_conf = ntb_queue_conf_get,
1330 .queue_setup = ntb_queue_setup,
1331 .queue_release = ntb_queue_release,
1332 .queue_count = ntb_queue_count,
1334 .enqueue_bufs = ntb_enqueue_bufs,
1335 .dequeue_bufs = ntb_dequeue_bufs,
1337 .attr_get = ntb_attr_get,
1338 .attr_set = ntb_attr_set,
1340 .xstats_get = ntb_xstats_get,
1341 .xstats_get_names = ntb_xstats_get_names,
1342 .xstats_get_by_name = ntb_xstats_get_by_name,
1343 .xstats_reset = ntb_xstats_reset,
1347 ntb_init_hw(struct rte_rawdev *dev, struct rte_pci_device *pci_dev)
1349 struct ntb_hw *hw = dev->dev_private;
1350 struct rte_intr_handle *intr_handle;
1353 hw->pci_dev = pci_dev;
1354 hw->peer_dev_up = 0;
1355 hw->link_status = NTB_LINK_DOWN;
1356 hw->link_speed = NTB_SPEED_NONE;
1357 hw->link_width = NTB_WIDTH_NONE;
1359 switch (pci_dev->id.device_id) {
1360 case NTB_INTEL_DEV_ID_B2B_SKX:
1361 hw->ntb_ops = &intel_ntb_ops;
1364 NTB_LOG(ERR, "Not supported device.");
1368 if (hw->ntb_ops->ntb_dev_init == NULL)
1370 ret = (*hw->ntb_ops->ntb_dev_init)(dev);
1372 NTB_LOG(ERR, "Unable to init ntb dev.");
1376 if (hw->ntb_ops->set_link == NULL)
1378 ret = (*hw->ntb_ops->set_link)(dev, 1);
1382 /* Init doorbell. */
1383 hw->db_valid_mask = RTE_LEN2MASK(hw->db_cnt, uint64_t);
1385 intr_handle = &pci_dev->intr_handle;
1386 /* Register callback func to eal lib */
1387 rte_intr_callback_register(intr_handle,
1388 ntb_dev_intr_handler, dev);
1390 ret = rte_intr_efd_enable(intr_handle, hw->db_cnt);
1394 /* To clarify, the interrupt for each doorbell is already mapped
1395 * by default for intel gen3. They are mapped to msix vec 1-32,
1396 * and hardware intr is mapped to 0. Map all to 0 for uio.
1398 if (!rte_intr_cap_multiple(intr_handle)) {
1399 for (i = 0; i < hw->db_cnt; i++) {
1400 if (hw->ntb_ops->vector_bind == NULL)
1402 ret = (*hw->ntb_ops->vector_bind)(dev, i, 0);
1408 if (hw->ntb_ops->db_set_mask == NULL ||
1409 hw->ntb_ops->peer_db_set == NULL) {
1410 NTB_LOG(ERR, "Doorbell is not supported.");
1414 ret = (*hw->ntb_ops->db_set_mask)(dev, hw->db_mask);
1416 NTB_LOG(ERR, "Unable to enable intr for all dbs.");
1420 /* enable uio intr after callback register */
1421 rte_intr_enable(intr_handle);
1427 ntb_create(struct rte_pci_device *pci_dev, int socket_id)
1429 char name[RTE_RAWDEV_NAME_MAX_LEN];
1430 struct rte_rawdev *rawdev = NULL;
1433 if (pci_dev == NULL) {
1434 NTB_LOG(ERR, "Invalid pci_dev.");
1438 memset(name, 0, sizeof(name));
1439 snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
1440 pci_dev->addr.bus, pci_dev->addr.devid,
1441 pci_dev->addr.function);
1443 NTB_LOG(INFO, "Init %s on NUMA node %d", name, socket_id);
1445 /* Allocate device structure. */
1446 rawdev = rte_rawdev_pmd_allocate(name, sizeof(struct ntb_hw),
1448 if (rawdev == NULL) {
1449 NTB_LOG(ERR, "Unable to allocate rawdev.");
1453 rawdev->dev_ops = &ntb_ops;
1454 rawdev->device = &pci_dev->device;
1455 rawdev->driver_name = pci_dev->driver->driver.name;
1457 ret = ntb_init_hw(rawdev, pci_dev);
1459 NTB_LOG(ERR, "Unable to init ntb hw.");
1467 rte_rawdev_pmd_release(rawdev);
1473 ntb_destroy(struct rte_pci_device *pci_dev)
1475 char name[RTE_RAWDEV_NAME_MAX_LEN];
1476 struct rte_rawdev *rawdev;
1479 if (pci_dev == NULL) {
1480 NTB_LOG(ERR, "Invalid pci_dev.");
1485 memset(name, 0, sizeof(name));
1486 snprintf(name, RTE_RAWDEV_NAME_MAX_LEN, "NTB:%x:%02x.%x",
1487 pci_dev->addr.bus, pci_dev->addr.devid,
1488 pci_dev->addr.function);
1490 NTB_LOG(INFO, "Closing %s on NUMA node %d", name, rte_socket_id());
1492 rawdev = rte_rawdev_pmd_get_named_dev(name);
1493 if (rawdev == NULL) {
1494 NTB_LOG(ERR, "Invalid device name (%s)", name);
1499 ret = rte_rawdev_pmd_release(rawdev);
1501 NTB_LOG(ERR, "Failed to destroy ntb rawdev.");
1507 ntb_probe(struct rte_pci_driver *pci_drv __rte_unused,
1508 struct rte_pci_device *pci_dev)
1510 return ntb_create(pci_dev, rte_socket_id());
1514 ntb_remove(struct rte_pci_device *pci_dev)
1516 return ntb_destroy(pci_dev);
1520 static struct rte_pci_driver rte_ntb_pmd = {
1521 .id_table = pci_id_ntb_map,
1522 .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_WC_ACTIVATE,
1524 .remove = ntb_remove,
1527 RTE_PMD_REGISTER_PCI(raw_ntb, rte_ntb_pmd);
1528 RTE_PMD_REGISTER_PCI_TABLE(raw_ntb, pci_id_ntb_map);
1529 RTE_PMD_REGISTER_KMOD_DEP(raw_ntb, "* igb_uio | uio_pci_generic | vfio-pci");
1530 RTE_LOG_REGISTER(ntb_logtype, pmd.raw.ntb, INFO);