net/sfc: support regioned NIC DMA memory mapping type
authorViacheslav Galaktionov <viacheslav.galaktionov@oktetlabs.ru>
Wed, 17 Nov 2021 07:05:45 +0000 (10:05 +0300)
committerFerruh Yigit <ferruh.yigit@intel.com>
Wed, 17 Nov 2021 11:37:07 +0000 (12:37 +0100)
DMA on SN1022 SoC requires extra mapping of the memory via MCDI.

Signed-off-by: Viacheslav Galaktionov <viacheslav.galaktionov@oktetlabs.ru>
Signed-off-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
Reviewed-by: Ivan Malov <ivan.malov@oktetlabs.ru>
20 files changed:
doc/guides/rel_notes/release_21_11.rst
drivers/net/sfc/meson.build
drivers/net/sfc/sfc.c
drivers/net/sfc/sfc.h
drivers/net/sfc/sfc_dp_rx.h
drivers/net/sfc/sfc_dp_tx.h
drivers/net/sfc/sfc_ef100_rx.c
drivers/net/sfc/sfc_ef100_tx.c
drivers/net/sfc/sfc_ef10_essb_rx.c
drivers/net/sfc/sfc_ef10_rx.c
drivers/net/sfc/sfc_ef10_tx.c
drivers/net/sfc/sfc_ethdev.c
drivers/net/sfc/sfc_ev.c
drivers/net/sfc/sfc_mcdi.c
drivers/net/sfc/sfc_nic_dma.c [new file with mode: 0644]
drivers/net/sfc/sfc_nic_dma.h [new file with mode: 0644]
drivers/net/sfc/sfc_nic_dma_dp.h [new file with mode: 0644]
drivers/net/sfc/sfc_port.c
drivers/net/sfc/sfc_rx.c
drivers/net/sfc/sfc_tx.c

index 1a48430..4d8c594 100644 (file)
@@ -242,6 +242,7 @@ New Features
   * Added flow API transfer proxy support
   * Added SN1000 virtual functions (VF) support
   * Added support for flow counters without service cores
+  * Added support for regioned DMA mapping required on SN1022 SoC
 
 * **Added power monitor API in vhost library.**
 
index 3308733..46d9418 100644 (file)
@@ -100,4 +100,5 @@ sources = files(
         'sfc_service.c',
         'sfc_repr_proxy.c',
         'sfc_repr.c',
+        'sfc_nic_dma.c',
 )
index 9c1be10..ed714fe 100644 (file)
@@ -26,6 +26,7 @@
 #include "sfc_tweak.h"
 #include "sfc_sw_stats.h"
 #include "sfc_switch.h"
+#include "sfc_nic_dma.h"
 
 bool
 sfc_repr_supported(const struct sfc_adapter *sa)
@@ -53,10 +54,12 @@ sfc_repr_available(const struct sfc_adapter_shared *sas)
 }
 
 int
-sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
-             size_t len, int socket_id, efsys_mem_t *esmp)
+sfc_dma_alloc(struct sfc_adapter *sa, const char *name, uint16_t id,
+             efx_nic_dma_addr_type_t addr_type, size_t len, int socket_id,
+             efsys_mem_t *esmp)
 {
        const struct rte_memzone *mz;
+       int rc;
 
        sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
                     name, id, len, socket_id);
@@ -69,13 +72,17 @@ sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
                        rte_strerror(rte_errno));
                return ENOMEM;
        }
-
-       esmp->esm_addr = mz->iova;
-       if (esmp->esm_addr == RTE_BAD_IOVA) {
+       if (mz->iova == RTE_BAD_IOVA) {
                (void)rte_memzone_free(mz);
                return EFAULT;
        }
 
+       rc = sfc_nic_dma_mz_map(sa, mz, addr_type, &esmp->esm_addr);
+       if (rc != 0) {
+               (void)rte_memzone_free(mz);
+               return rc;
+       }
+
        esmp->esm_mz = mz;
        esmp->esm_base = mz->addr;
 
@@ -457,6 +464,13 @@ sfc_try_start(struct sfc_adapter *sa)
        if (rc != 0)
                goto fail_nic_init;
 
+       sfc_log_init(sa, "reconfigure NIC DMA");
+       rc = efx_nic_dma_reconfigure(sa->nic);
+       if (rc != 0) {
+               sfc_err(sa, "cannot reconfigure NIC DMA: %s", rte_strerror(rc));
+               goto fail_nic_dma_reconfigure;
+       }
+
        encp = efx_nic_cfg_get(sa->nic);
 
        /*
@@ -525,6 +539,7 @@ fail_ev_start:
 
 fail_intr_start:
 fail_tunnel_reconfigure:
+fail_nic_dma_reconfigure:
        efx_nic_fini(sa->nic);
 
 fail_nic_init:
index fee1738..3337cb5 100644 (file)
@@ -35,6 +35,7 @@
 #include "sfc_repr_proxy.h"
 #include "sfc_service.h"
 #include "sfc_ethdev_state.h"
+#include "sfc_nic_dma_dp.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -145,6 +146,8 @@ struct sfc_adapter_shared {
        bool                            counters_rxq_allocated;
        unsigned int                    nb_repr_rxq;
        unsigned int                    nb_repr_txq;
+
+       struct sfc_nic_dma_info         nic_dma_info;
 };
 
 /* Adapter process private data */
@@ -392,8 +395,9 @@ sfc_get_system_msecs(void)
        return rte_get_timer_cycles() * MS_PER_S / rte_get_timer_hz();
 }
 
-int sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
-                 size_t len, int socket_id, efsys_mem_t *esmp);
+int sfc_dma_alloc(struct sfc_adapter *sa, const char *name, uint16_t id,
+                 efx_nic_dma_addr_type_t addr_type, size_t len, int socket_id,
+                 efsys_mem_t *esmp);
 void sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp);
 
 uint32_t sfc_register_logtype(const struct rte_pci_addr *pci_addr,
index 099f026..760540b 100644 (file)
@@ -14,6 +14,7 @@
 #include <ethdev_driver.h>
 
 #include "sfc_dp.h"
+#include "sfc_nic_dma_dp.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -95,6 +96,9 @@ struct sfc_dp_rx_qcreate_info {
 
        /** Mask to extract user bits from Rx prefix mark field */
        uint32_t                user_mark_mask;
+
+       /** NIC's DMA mapping information */
+       const struct sfc_nic_dma_info   *nic_dma_info;
 };
 
 /**
index 61cc0fa..aad3b06 100644 (file)
@@ -15,6 +15,7 @@
 #include "sfc_dp.h"
 #include "sfc_debug.h"
 #include "sfc_tso.h"
+#include "sfc_nic_dma_dp.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -80,6 +81,9 @@ struct sfc_dp_tx_qcreate_info {
        uint32_t                tso_max_payload_len;
        /** Maximum number of frames to be generated per TSOv3 transaction */
        uint32_t                tso_max_nb_outgoing_frames;
+
+       /** NIC's DMA mapping information */
+       const struct sfc_nic_dma_info   *nic_dma_info;
 };
 
 /**
index 259290f..5d16bf2 100644 (file)
@@ -27,6 +27,7 @@
 #include "sfc_dp_rx.h"
 #include "sfc_kvargs.h"
 #include "sfc_ef100.h"
+#include "sfc_nic_dma_dp.h"
 
 
 #define sfc_ef100_rx_err(_rxq, ...) \
@@ -66,6 +67,7 @@ struct sfc_ef100_rxq {
 #define SFC_EF100_RXQ_FLAG_INTR_EN     0x40
 #define SFC_EF100_RXQ_INGRESS_MPORT    0x80
 #define SFC_EF100_RXQ_USER_FLAG                0x100
+#define SFC_EF100_RXQ_NIC_DMA_MAP      0x200
        unsigned int                    ptr_mask;
        unsigned int                    evq_phase_bit_shift;
        unsigned int                    ready_pkts;
@@ -92,6 +94,8 @@ struct sfc_ef100_rxq {
 
        /* Datapath receive queue anchor */
        struct sfc_dp_rxq               dp;
+
+       const struct sfc_nic_dma_info   *nic_dma_info;
 };
 
 static inline struct sfc_ef100_rxq *
@@ -150,7 +154,6 @@ sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
        SFC_ASSERT(bulks > 0);
 
        do {
-               unsigned int id;
                unsigned int i;
 
                if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
@@ -170,17 +173,28 @@ sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
                        break;
                }
 
-               for (i = 0, id = added & ptr_mask;
-                    i < RTE_DIM(objs);
-                    ++i, ++id) {
+               for (i = 0; i < RTE_DIM(objs); ++i) {
                        struct rte_mbuf *m = objs[i];
                        struct sfc_ef100_rx_sw_desc *rxd;
-                       rte_iova_t phys_addr;
+                       rte_iova_t dma_addr;
 
                        __rte_mbuf_raw_sanity_check(m);
 
-                       SFC_ASSERT((id & ~ptr_mask) == 0);
-                       rxd = &rxq->sw_ring[id];
+                       dma_addr = rte_mbuf_data_iova_default(m);
+                       if (rxq->flags & SFC_EF100_RXQ_NIC_DMA_MAP) {
+                               dma_addr = sfc_nic_dma_map(rxq->nic_dma_info,
+                                               dma_addr,
+                                               rte_pktmbuf_data_len(m));
+                               if (unlikely(dma_addr == RTE_BAD_IOVA)) {
+                                       sfc_ef100_rx_err(rxq,
+                                               "failed to map DMA address on Rx");
+                                       /* Just skip buffer and try to continue */
+                                       rte_mempool_put(rxq->refill_mb_pool, m);
+                                       continue;
+                               }
+                       }
+
+                       rxd = &rxq->sw_ring[added & ptr_mask];
                        rxd->mbuf = m;
 
                        /*
@@ -189,12 +203,10 @@ sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
                         * structure members.
                         */
 
-                       phys_addr = rte_mbuf_data_iova_default(m);
-                       EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
-                           ESF_GZ_RX_BUF_ADDR, phys_addr);
+                       EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[added & ptr_mask],
+                           ESF_GZ_RX_BUF_ADDR, dma_addr);
+                       added++;
                }
-
-               added += RTE_DIM(objs);
        } while (--bulks > 0);
 
        SFC_ASSERT(rxq->added != added);
@@ -794,6 +806,10 @@ sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
                         info->fcw_offset +
                         ER_GZ_EVQ_INT_PRIME_OFST;
 
+       rxq->nic_dma_info = info->nic_dma_info;
+       if (rxq->nic_dma_info->nb_regions > 0)
+               rxq->flags |= SFC_EF100_RXQ_NIC_DMA_MAP;
+
        sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
 
        *dp_rxqp = &rxq->dp;
index b41eddb..4c2205f 100644 (file)
@@ -24,6 +24,7 @@
 #include "sfc_tweak.h"
 #include "sfc_kvargs.h"
 #include "sfc_ef100.h"
+#include "sfc_nic_dma_dp.h"
 
 
 #define sfc_ef100_tx_err(_txq, ...) \
@@ -63,6 +64,7 @@ struct sfc_ef100_txq {
 #define SFC_EF100_TXQ_STARTED          0x1
 #define SFC_EF100_TXQ_NOT_RUNNING      0x2
 #define SFC_EF100_TXQ_EXCEPTION                0x4
+#define SFC_EF100_TXQ_NIC_DMA_MAP      0x8
 
        unsigned int                    ptr_mask;
        unsigned int                    added;
@@ -87,6 +89,8 @@ struct sfc_ef100_txq {
 
        /* Datapath transmit queue anchor */
        struct sfc_dp_txq               dp;
+
+       const struct sfc_nic_dma_info   *nic_dma_info;
 };
 
 static inline struct sfc_ef100_txq *
@@ -342,8 +346,23 @@ sfc_ef100_tx_qdesc_cso_inner_l3(uint64_t tx_tunnel)
        return inner_l3;
 }
 
-static void
-sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc)
+static int
+sfc_ef100_tx_map(const struct sfc_ef100_txq *txq, rte_iova_t iova, size_t len,
+                rte_iova_t *dma_addr)
+{
+       if ((txq->flags & SFC_EF100_TXQ_NIC_DMA_MAP) == 0) {
+               *dma_addr = iova;
+       } else {
+               *dma_addr = sfc_nic_dma_map(txq->nic_dma_info, iova, len);
+               if (unlikely(*dma_addr == RTE_BAD_IOVA))
+                       sfc_ef100_tx_err(txq, "failed to map DMA address on Tx");
+       }
+       return 0;
+}
+
+static int
+sfc_ef100_tx_qdesc_send_create(const struct sfc_ef100_txq *txq,
+                              const struct rte_mbuf *m, efx_oword_t *tx_desc)
 {
        bool outer_l3;
        bool outer_l4;
@@ -351,6 +370,8 @@ sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc)
        uint8_t partial_en;
        uint16_t part_cksum_w;
        uint16_t l4_offset_w;
+       rte_iova_t dma_addr;
+       int rc;
 
        if ((m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) == 0) {
                outer_l3 = (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
@@ -384,8 +405,13 @@ sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc)
                                m->l2_len + m->l3_len) >> 1;
        }
 
+       rc = sfc_ef100_tx_map(txq, rte_mbuf_data_iova_default(m),
+                             rte_pktmbuf_data_len(m), &dma_addr);
+       if (unlikely(rc != 0))
+               return rc;
+
        EFX_POPULATE_OWORD_10(*tx_desc,
-                       ESF_GZ_TX_SEND_ADDR, rte_mbuf_data_iova(m),
+                       ESF_GZ_TX_SEND_ADDR, dma_addr,
                        ESF_GZ_TX_SEND_LEN, rte_pktmbuf_data_len(m),
                        ESF_GZ_TX_SEND_NUM_SEGS, m->nb_segs,
                        ESF_GZ_TX_SEND_CSO_PARTIAL_START_W, l4_offset_w,
@@ -405,6 +431,8 @@ sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc)
 
                EFX_OR_OWORD(*tx_desc, tx_desc_extra_fields);
        }
+
+       return 0;
 }
 
 static void
@@ -554,11 +582,11 @@ sfc_ef100_tx_pkt_descs_max(const struct rte_mbuf *m)
        return m->nb_segs + extra_descs;
 }
 
-static struct rte_mbuf *
+static int
 sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
-                      struct rte_mbuf *m, unsigned int *added)
+                      struct rte_mbuf **m, unsigned int *added)
 {
-       struct rte_mbuf *m_seg = m;
+       struct rte_mbuf *m_seg = *m;
        unsigned int nb_hdr_descs;
        unsigned int nb_pld_descs;
        unsigned int seg_split = 0;
@@ -570,17 +598,19 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
        size_t tcph_off;
        size_t header_len;
        size_t remaining_hdr_len;
+       rte_iova_t dma_addr;
+       int rc;
 
-       if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
-               outer_iph_off = m->outer_l2_len;
-               outer_udph_off = outer_iph_off + m->outer_l3_len;
+       if (m_seg->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+               outer_iph_off = m_seg->outer_l2_len;
+               outer_udph_off = outer_iph_off + m_seg->outer_l3_len;
        } else {
                outer_iph_off = 0;
                outer_udph_off = 0;
        }
-       iph_off = outer_udph_off + m->l2_len;
-       tcph_off = iph_off + m->l3_len;
-       header_len = tcph_off + m->l4_len;
+       iph_off = outer_udph_off + m_seg->l2_len;
+       tcph_off = iph_off + m_seg->l3_len;
+       header_len = tcph_off + m_seg->l4_len;
 
        /*
         * Remember ID of the TX_TSO descriptor to be filled in.
@@ -592,11 +622,15 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
 
        remaining_hdr_len = header_len;
        do {
+               rc = sfc_ef100_tx_map(txq, rte_mbuf_data_iova(m_seg),
+                                     rte_pktmbuf_data_len(m_seg), &dma_addr);
+               if (unlikely(rc != 0))
+                       return rc;
+
                id = (*added)++ & txq->ptr_mask;
                if (rte_pktmbuf_data_len(m_seg) <= remaining_hdr_len) {
                        /* The segment is fully header segment */
-                       sfc_ef100_tx_qdesc_seg_create(
-                               rte_mbuf_data_iova(m_seg),
+                       sfc_ef100_tx_qdesc_seg_create(dma_addr,
                                rte_pktmbuf_data_len(m_seg),
                                &txq->txq_hw_ring[id]);
                        remaining_hdr_len -= rte_pktmbuf_data_len(m_seg);
@@ -605,15 +639,13 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
                         * The segment must be split into header and
                         * payload segments
                         */
-                       sfc_ef100_tx_qdesc_seg_create(
-                               rte_mbuf_data_iova(m_seg),
-                               remaining_hdr_len,
-                               &txq->txq_hw_ring[id]);
-                       SFC_ASSERT(txq->sw_ring[id].mbuf == NULL);
+                       sfc_ef100_tx_qdesc_seg_create(dma_addr,
+                               remaining_hdr_len, &txq->txq_hw_ring[id]);
+                       txq->sw_ring[id].mbuf = NULL;
 
                        id = (*added)++ & txq->ptr_mask;
                        sfc_ef100_tx_qdesc_seg_create(
-                               rte_mbuf_data_iova(m_seg) + remaining_hdr_len,
+                               dma_addr + remaining_hdr_len,
                                rte_pktmbuf_data_len(m_seg) - remaining_hdr_len,
                                &txq->txq_hw_ring[id]);
                        remaining_hdr_len = 0;
@@ -628,15 +660,16 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
         * pointer counts it twice and we should correct it.
         */
        nb_hdr_descs = ((id - tso_desc_id) & txq->ptr_mask) - seg_split;
-       nb_pld_descs = m->nb_segs - nb_hdr_descs + seg_split;
+       nb_pld_descs = (*m)->nb_segs - nb_hdr_descs + seg_split;
 
-       sfc_ef100_tx_qdesc_tso_create(m, nb_hdr_descs, nb_pld_descs, header_len,
-                                     rte_pktmbuf_pkt_len(m) - header_len,
+       sfc_ef100_tx_qdesc_tso_create(*m, nb_hdr_descs, nb_pld_descs, header_len,
+                                     rte_pktmbuf_pkt_len(*m) - header_len,
                                      outer_iph_off, outer_udph_off,
                                      iph_off, tcph_off,
                                      &txq->txq_hw_ring[tso_desc_id]);
 
-       return m_seg;
+       *m = m_seg;
+       return 0;
 }
 
 static uint16_t
@@ -648,6 +681,8 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        bool reap_done;
        struct rte_mbuf **pktp;
        struct rte_mbuf **pktp_end;
+       rte_iova_t dma_addr;
+       int rc;
 
        if (unlikely(txq->flags &
                     (SFC_EF100_TXQ_NOT_RUNNING | SFC_EF100_TXQ_EXCEPTION)))
@@ -694,14 +729,15 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        id = added++ & txq->ptr_mask;
                        sfc_ef100_tx_qdesc_prefix_create(m_seg,
                                                         &txq->txq_hw_ring[id]);
+                       txq->sw_ring[id].mbuf = NULL;
                }
 
                if (m_seg->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
-                       m_seg = sfc_ef100_xmit_tso_pkt(txq, m_seg, &added);
+                       rc = sfc_ef100_xmit_tso_pkt(txq, &m_seg, &added);
                } else {
                        id = added++ & txq->ptr_mask;
-                       sfc_ef100_tx_qdesc_send_create(m_seg,
-                                                      &txq->txq_hw_ring[id]);
+                       rc = sfc_ef100_tx_qdesc_send_create(txq, m_seg,
+                                                       &txq->txq_hw_ring[id]);
 
                        /*
                         * rte_pktmbuf_free() is commonly used in DPDK for
@@ -722,22 +758,29 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        m_seg = m_seg->next;
                }
 
-               while (m_seg != NULL) {
+               while (likely(rc == 0) && m_seg != NULL) {
                        RTE_BUILD_BUG_ON(SFC_MBUF_SEG_LEN_MAX >
                                         SFC_EF100_TX_SEG_DESC_LEN_MAX);
 
                        id = added++ & txq->ptr_mask;
-                       sfc_ef100_tx_qdesc_seg_create(rte_mbuf_data_iova(m_seg),
+                       rc = sfc_ef100_tx_map(txq, rte_mbuf_data_iova(m_seg),
+                                             rte_pktmbuf_data_len(m_seg),
+                                             &dma_addr);
+                       sfc_ef100_tx_qdesc_seg_create(dma_addr,
                                        rte_pktmbuf_data_len(m_seg),
                                        &txq->txq_hw_ring[id]);
                        txq->sw_ring[id].mbuf = m_seg;
                        m_seg = m_seg->next;
                }
 
-               dma_desc_space -= (added - pkt_start);
+               if (likely(rc == 0)) {
+                       dma_desc_space -= (added - pkt_start);
 
-               sfc_pkts_bytes_add(&txq->dp.dpq.stats, 1,
-                                  rte_pktmbuf_pkt_len(*pktp));
+                       sfc_pkts_bytes_add(&txq->dp.dpq.stats, 1,
+                                          rte_pktmbuf_pkt_len(*pktp));
+               } else {
+                       added = pkt_start;
+               }
        }
 
        if (likely(added != txq->added)) {
@@ -837,6 +880,10 @@ sfc_ef100_tx_qcreate(uint16_t port_id, uint16_t queue_id,
        txq->tso_max_payload_len = info->tso_max_payload_len;
        txq->tso_max_nb_outgoing_frames = info->tso_max_nb_outgoing_frames;
 
+       txq->nic_dma_info = info->nic_dma_info;
+       if (txq->nic_dma_info->nb_regions > 0)
+               txq->flags |= SFC_EF100_TXQ_NIC_DMA_MAP;
+
        sfc_ef100_tx_debug(txq, "TxQ doorbell is %p", txq->doorbell);
 
        *dp_txqp = &txq->dp;
index 4f7d712..712c207 100644 (file)
@@ -573,6 +573,10 @@ sfc_ef10_essb_rx_qcreate(uint16_t port_id, uint16_t queue_id,
        struct sfc_ef10_essb_rxq *rxq;
        int rc;
 
+       rc = ENOTSUP;
+       if (info->nic_dma_info->nb_regions > 0)
+               goto fail_nic_dma;
+
        rc = rte_mempool_ops_get_info(mp, &mp_info);
        if (rc != 0) {
                /* Positive errno is used in the driver */
@@ -641,6 +645,7 @@ fail_desc_alloc:
 fail_rxq_alloc:
 fail_no_block_dequeue:
 fail_get_contig_block_size:
+fail_nic_dma:
        return rc;
 }
 
index 8503c3c..7be224c 100644 (file)
@@ -651,6 +651,10 @@ sfc_ef10_rx_qcreate(uint16_t port_id, uint16_t queue_id,
        if (info->rxq_entries != info->evq_entries)
                goto fail_rxq_args;
 
+       rc = ENOTSUP;
+       if (info->nic_dma_info->nb_regions > 0)
+               goto fail_nic_dma;
+
        rc = ENOMEM;
        rxq = rte_zmalloc_socket("sfc-ef10-rxq", sizeof(*rxq),
                                 RTE_CACHE_LINE_SIZE, socket_id);
@@ -696,6 +700,7 @@ fail_desc_alloc:
        rte_free(rxq);
 
 fail_rxq_alloc:
+fail_nic_dma:
 fail_rxq_args:
        return rc;
 }
index 2463c14..5403a60 100644 (file)
@@ -942,6 +942,10 @@ sfc_ef10_tx_qcreate(uint16_t port_id, uint16_t queue_id,
        if (info->txq_entries != info->evq_entries)
                goto fail_bad_args;
 
+       rc = ENOTSUP;
+       if (info->nic_dma_info->nb_regions > 0)
+               goto fail_nic_dma;
+
        rc = ENOMEM;
        txq = rte_zmalloc_socket("sfc-ef10-txq", sizeof(*txq),
                                 RTE_CACHE_LINE_SIZE, socket_id);
@@ -995,6 +999,7 @@ fail_sw_ring_alloc:
        rte_free(txq);
 
 fail_txq_alloc:
+fail_nic_dma:
 fail_bad_args:
        return rc;
 }
index a8b971a..d4210b6 100644 (file)
@@ -32,6 +32,7 @@
 #include "sfc_repr.h"
 #include "sfc_sw_stats.h"
 #include "sfc_switch.h"
+#include "sfc_nic_dma.h"
 
 #define SFC_XSTAT_ID_INVALID_VAL  UINT64_MAX
 #define SFC_XSTAT_ID_INVALID_NAME '\0'
@@ -375,6 +376,7 @@ sfc_dev_close(struct rte_eth_dev *dev)
 
        sfc_eth_dev_clear_ops(dev);
 
+       sfc_nic_dma_detach(sa);
        sfc_detach(sa);
        sfc_unprobe(sa);
 
@@ -2840,11 +2842,22 @@ sfc_eth_dev_init(struct rte_eth_dev *dev, void *init_params)
        from = (const struct rte_ether_addr *)(encp->enc_mac_addr);
        rte_ether_addr_copy(from, &dev->data->mac_addrs[0]);
 
+       /*
+        * Setup the NIC DMA mapping handler. All internal mempools
+        * MUST be created on attach before this point, and the
+        * adapter MUST NOT create mempools with the adapter lock
+        * held after this point.
+        */
+       rc = sfc_nic_dma_attach(sa);
+       if (rc != 0)
+               goto fail_nic_dma_attach;
+
        sfc_adapter_unlock(sa);
 
        sfc_log_init(sa, "done");
        return 0;
 
+fail_nic_dma_attach:
 fail_switchdev_no_mae:
        sfc_detach(sa);
 
index ba44093..f949abb 100644 (file)
@@ -911,6 +911,7 @@ sfc_ev_qinit(struct sfc_adapter *sa,
 
        /* Allocate DMA space */
        rc = sfc_dma_alloc(sa, sfc_evq_type2str(type), type_index,
+                          EFX_NIC_DMA_ADDR_EVENT_RING,
                           efx_evq_size(sa->nic, evq->entries, sa->evq_flags),
                           socket_id, &evq->mem);
        if (rc != 0)
index 1a23765..65e5e38 100644 (file)
@@ -19,9 +19,10 @@ static int
 sfc_mcdi_dma_alloc(void *cookie, const char *name, size_t len,
                   efsys_mem_t *esmp)
 {
-       const struct sfc_adapter *sa = cookie;
+       struct sfc_adapter *sa = cookie;
 
-       return sfc_dma_alloc(sa, name, 0, len, sa->socket_id, esmp);
+       return sfc_dma_alloc(sa, name, 0, EFX_NIC_DMA_ADDR_MCDI_BUF, len,
+                            sa->socket_id, esmp);
 }
 
 static sfc_efx_mcdi_dma_free_cb sfc_mcdi_dma_free;
diff --git a/drivers/net/sfc/sfc_nic_dma.c b/drivers/net/sfc/sfc_nic_dma.c
new file mode 100644 (file)
index 0000000..59bc113
--- /dev/null
@@ -0,0 +1,335 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Xilinx, Inc.
+ */
+
+#include <rte_mempool.h>
+#include <rte_memzone.h>
+
+#include "efx.h"
+
+#include "sfc_log.h"
+#include "sfc.h"
+#include "sfc_nic_dma.h"
+
+static int
+sfc_nic_dma_add_region(struct sfc_nic_dma_info *nic_dma_info,
+                      rte_iova_t nic_base, rte_iova_t trgt_base,
+                      size_t map_len)
+{
+       struct sfc_nic_dma_region *region;
+
+       if (nic_dma_info->nb_regions >= RTE_DIM(nic_dma_info->regions))
+               return ENOMEM;
+
+       region = &nic_dma_info->regions[nic_dma_info->nb_regions];
+       region->nic_base = nic_base;
+       region->trgt_base = trgt_base;
+       region->trgt_end = trgt_base + map_len;
+
+       nic_dma_info->nb_regions++;
+       return 0;
+}
+
+/*
+ * Register mapping for all IOVA mempools at the time of creation to
+ * have mapping for all mbufs.
+ */
+
+struct sfc_nic_dma_register_mempool_data {
+       struct sfc_adapter              *sa;
+       int                             rc;
+};
+
+static void
+sfc_nic_dma_register_mempool_chunk(struct rte_mempool *mp __rte_unused,
+                                  void *opaque,
+                                  struct rte_mempool_memhdr *memhdr,
+                                  unsigned mem_idx __rte_unused)
+{
+       struct sfc_nic_dma_register_mempool_data *register_data = opaque;
+       struct sfc_adapter *sa = register_data->sa;
+       struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+       efsys_dma_addr_t nic_base;
+       efsys_dma_addr_t trgt_base;
+       size_t map_len;
+       int rc;
+
+       if (memhdr->iova == RTE_BAD_IOVA)
+               return;
+
+       /*
+        * Check if the memory chunk is mapped already. In that case, there's
+        * nothing left to do.
+        */
+       nic_base = sfc_nic_dma_map(&sas->nic_dma_info, memhdr->iova,
+                                  memhdr->len);
+       if (nic_base != RTE_BAD_IOVA)
+               return;
+
+       rc = efx_nic_dma_config_add(sa->nic, memhdr->iova, memhdr->len,
+                                   &nic_base, &trgt_base, &map_len);
+       if (rc != 0) {
+               sfc_err(sa,
+                       "cannot handle memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+                       memhdr->addr, (uint64_t)memhdr->iova, memhdr->len,
+                       rte_strerror(rc));
+               register_data->rc = rc;
+               return;
+       }
+
+       sfc_info(sa,
+                "registered memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 " -> NIC_BASE=%" PRIx64 " TRGT_BASE=%" PRIx64 " MAP_LEN=%" PRIx64,
+                memhdr->addr, (uint64_t)memhdr->iova, memhdr->len,
+                (uint64_t)nic_base, (uint64_t)trgt_base, (uint64_t)map_len);
+
+       rc = sfc_nic_dma_add_region(&sas->nic_dma_info, nic_base, trgt_base,
+                                   map_len);
+       if (rc != 0) {
+               sfc_err(sa, "failed to add regioned NIC DMA mapping: %s",
+                       rte_strerror(rc));
+               register_data->rc = rc;
+       }
+}
+
+static int
+sfc_nic_dma_register_mempool(struct sfc_adapter *sa, struct rte_mempool *mp)
+{
+       struct sfc_nic_dma_register_mempool_data register_data = {
+               .sa = sa,
+       };
+       uint32_t iters;
+       int result = 0;
+       int rc;
+
+       SFC_ASSERT(sfc_adapter_is_locked(sa));
+
+       if (mp->flags & RTE_MEMPOOL_F_NON_IO)
+               return 0;
+
+       iters = rte_mempool_mem_iter(mp, sfc_nic_dma_register_mempool_chunk,
+                                    &register_data);
+       if (iters != mp->nb_mem_chunks) {
+               sfc_err(sa,
+                       "failed to iterate over memory chunks, some mbufs may be unusable");
+               result = EFAULT;
+               /*
+                * Return an error, but try to continue if error is
+                * async and cannot be handled properly.
+                */
+       }
+
+       if (register_data.rc != 0) {
+               sfc_err(sa,
+                       "failed to map some memory chunks (%s), some mbufs may be unusable",
+                       rte_strerror(register_data.rc));
+               result = register_data.rc;
+               /* Try to continue */
+       }
+
+       /*
+        * There is no point to apply mapping changes triggered by mempool
+        * registration. Configuration will be propagated on start and
+        * mbufs mapping is required in started state only.
+        */
+       if (sa->state == SFC_ETHDEV_STARTED) {
+               /*
+                * It's safe to reconfigure the DMA mapping even if no changes
+                * have been made during memory chunks iteration. In that case,
+                * this operation will not change anything either.
+                */
+               rc = efx_nic_dma_reconfigure(sa->nic);
+               if (rc != 0) {
+                       sfc_err(sa, "cannot reconfigure NIC DMA: %s",
+                               rte_strerror(rc));
+                       result = rc;
+               }
+       }
+
+       return result;
+}
+
+static void
+sfc_mempool_event_cb(enum rte_mempool_event event, struct rte_mempool *mp,
+                    void *user_data)
+{
+       struct sfc_adapter *sa = user_data;
+
+       if (event != RTE_MEMPOOL_EVENT_READY)
+               return;
+
+       sfc_adapter_lock(sa);
+
+       (void)sfc_nic_dma_register_mempool(sa, mp);
+
+       sfc_adapter_unlock(sa);
+}
+
+struct sfc_mempool_walk_data {
+       struct sfc_adapter              *sa;
+       int                             rc;
+};
+
+static void
+sfc_mempool_walk_cb(struct rte_mempool *mp, void *arg)
+{
+       struct sfc_mempool_walk_data *walk_data = arg;
+       int rc;
+
+       rc = sfc_nic_dma_register_mempool(walk_data->sa, mp);
+       if (rc != 0)
+               walk_data->rc = rc;
+}
+
+static int
+sfc_nic_dma_attach_regioned(struct sfc_adapter *sa)
+{
+       struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+       struct sfc_mempool_walk_data walk_data = {
+               .sa = sa,
+       };
+       int rc;
+
+       rc = rte_mempool_event_callback_register(sfc_mempool_event_cb, sa);
+       if (rc != 0) {
+               sfc_err(sa, "failed to register mempool event callback");
+               rc = EFAULT;
+               goto fail_mempool_event_callback_register;
+       }
+
+       rte_mempool_walk(sfc_mempool_walk_cb, &walk_data);
+       if (walk_data.rc != 0) {
+               rc = walk_data.rc;
+               goto fail_mempool_walk;
+       }
+
+       return 0;
+
+fail_mempool_walk:
+       rte_mempool_event_callback_unregister(sfc_mempool_event_cb, sa);
+       sas->nic_dma_info.nb_regions = 0;
+
+fail_mempool_event_callback_register:
+       return rc;
+}
+
+static void
+sfc_nic_dma_detach_regioned(struct sfc_adapter *sa)
+{
+       struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+
+       rte_mempool_event_callback_unregister(sfc_mempool_event_cb, sa);
+       sas->nic_dma_info.nb_regions = 0;
+}
+
+int
+sfc_nic_dma_attach(struct sfc_adapter *sa)
+{
+       const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
+       int rc;
+
+       sfc_log_init(sa, "dma_mapping_type=%u", encp->enc_dma_mapping);
+
+       switch (encp->enc_dma_mapping) {
+       case EFX_NIC_DMA_MAPPING_FLAT:
+               /* No mapping required */
+               rc = 0;
+               break;
+       case EFX_NIC_DMA_MAPPING_REGIONED:
+               rc = sfc_nic_dma_attach_regioned(sa);
+               break;
+       default:
+               rc = ENOTSUP;
+               break;
+       }
+
+       sfc_log_init(sa, "done: %s", rte_strerror(rc));
+       return rc;
+}
+
+void
+sfc_nic_dma_detach(struct sfc_adapter *sa)
+{
+       const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
+
+       sfc_log_init(sa, "dma_mapping_type=%u", encp->enc_dma_mapping);
+
+       switch (encp->enc_dma_mapping) {
+       case EFX_NIC_DMA_MAPPING_FLAT:
+               /* Nothing to do here */
+               break;
+       case EFX_NIC_DMA_MAPPING_REGIONED:
+               sfc_nic_dma_detach_regioned(sa);
+               break;
+       default:
+               break;
+       }
+
+       sfc_log_init(sa, "done");
+}
+
+int
+sfc_nic_dma_mz_map(struct sfc_adapter *sa, const struct rte_memzone *mz,
+                  efx_nic_dma_addr_type_t addr_type,
+                  efsys_dma_addr_t *dma_addr)
+{
+       efsys_dma_addr_t nic_base;
+       efsys_dma_addr_t trgt_base;
+       size_t map_len;
+       int rc;
+
+       /*
+        * Check if the memzone can be mapped already without changing the DMA
+        * configuration.
+        * libefx is used instead of the driver cache since it can take the type
+        * of the buffer into account and make a better decision when it comes
+        * to buffers that are mapped by the FW itself.
+        */
+       rc = efx_nic_dma_map(sa->nic, addr_type, mz->iova, mz->len, dma_addr);
+       if (rc == 0)
+               return 0;
+
+       if (rc != ENOENT) {
+               sfc_err(sa,
+                       "failed to map memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+                       mz->addr, (uint64_t)mz->iova, mz->len,
+                       rte_strerror(rc));
+               return rc;
+       }
+
+       rc = efx_nic_dma_config_add(sa->nic, mz->iova, mz->len,
+                                   &nic_base, &trgt_base, &map_len);
+       if (rc != 0) {
+               sfc_err(sa,
+                       "cannot handle memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+                       mz->addr, (uint64_t)mz->iova, mz->len,
+                       rte_strerror(rc));
+               return EFAULT;
+       }
+
+       rc = sfc_nic_dma_add_region(&sfc_sa2shared(sa)->nic_dma_info,
+                                   nic_base, trgt_base, map_len);
+       if (rc != 0) {
+               sfc_err(sa,
+                       "failed to add DMA region VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+                       mz->addr, (uint64_t)mz->iova, mz->len,
+                       rte_strerror(rc));
+               return rc;
+       }
+
+       rc = efx_nic_dma_reconfigure(sa->nic);
+       if (rc != 0) {
+               sfc_err(sa, "failed to reconfigure DMA");
+               return rc;
+       }
+
+       rc = efx_nic_dma_map(sa->nic, addr_type, mz->iova, mz->len, dma_addr);
+       if (rc != 0) {
+               sfc_err(sa,
+                       "failed to map memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+                       mz->addr, (uint64_t)mz->iova, mz->len,
+                       rte_strerror(rc));
+               return rc;
+       }
+
+       return 0;
+}
diff --git a/drivers/net/sfc/sfc_nic_dma.h b/drivers/net/sfc/sfc_nic_dma.h
new file mode 100644 (file)
index 0000000..46ce74e
--- /dev/null
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Xilinx, Inc.
+ */
+
+#ifndef _SFC_NIC_DMA_H
+#define _SFC_NIC_DMA_H
+
+#include <rte_memzone.h>
+
+#include "efx.h"
+
+#include "sfc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int sfc_nic_dma_attach(struct sfc_adapter *sa);
+void sfc_nic_dma_detach(struct sfc_adapter *sa);
+
+int sfc_nic_dma_mz_map(struct sfc_adapter *sa, const struct rte_memzone *mz,
+                      efx_nic_dma_addr_type_t addr_type,
+                      efsys_dma_addr_t *dma_addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _SFC_NIC_DMA_H */
diff --git a/drivers/net/sfc/sfc_nic_dma_dp.h b/drivers/net/sfc/sfc_nic_dma_dp.h
new file mode 100644 (file)
index 0000000..b7e52ec
--- /dev/null
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Xilinx, Inc.
+ */
+
+#ifndef _SFC_NIC_DMA_DP_H
+#define _SFC_NIC_DMA_DP_H
+
+#include <rte_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SFC_NIC_DMA_REGIONS_MAX 2
+
+struct sfc_nic_dma_region {
+       rte_iova_t      nic_base;
+       rte_iova_t      trgt_base;
+       rte_iova_t      trgt_end;
+};
+
+/** Driver cache for NIC DMA regions */
+struct sfc_nic_dma_info {
+       struct sfc_nic_dma_region               regions[SFC_NIC_DMA_REGIONS_MAX];
+       unsigned int                            nb_regions;
+};
+
+static inline rte_iova_t
+sfc_nic_dma_map(const struct sfc_nic_dma_info *nic_dma_info,
+               rte_iova_t trgt_addr, size_t len)
+{
+       unsigned int i;
+
+       for (i = 0; i < nic_dma_info->nb_regions; i++) {
+               const struct sfc_nic_dma_region *region;
+
+               region = &nic_dma_info->regions[i];
+               /*
+                * Do not sum trgt_addr and len to avoid overflow
+                * checking.
+                */
+               if (region->trgt_base <= trgt_addr &&
+                   trgt_addr <= region->trgt_end &&
+                   len <= region->trgt_end - trgt_addr) {
+                       return region->nic_base +
+                                       (trgt_addr - region->trgt_base);
+               }
+       }
+
+       return RTE_BAD_IOVA;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _SFC_NIC_DMA_DP_H */
index 27b02b1..9113937 100644 (file)
@@ -440,7 +440,8 @@ sfc_port_attach(struct sfc_adapter *sa)
 
        mac_nstats = efx_nic_cfg_get(sa->nic)->enc_mac_stats_nstats;
        mac_stats_size = RTE_ALIGN(mac_nstats * sizeof(uint64_t), EFX_BUF_SIZE);
-       rc = sfc_dma_alloc(sa, "mac_stats", 0, mac_stats_size,
+       rc = sfc_dma_alloc(sa, "mac_stats", 0, EFX_NIC_DMA_ADDR_MAC_STATS_BUF,
+                          mac_stats_size,
                           sa->socket_id, &port->mac_stats_dma_mem);
        if (rc != 0)
                goto fail_mac_stats_dma_alloc;
index 17ff2aa..7104284 100644 (file)
@@ -483,6 +483,10 @@ sfc_efx_rx_qcreate(uint16_t port_id, uint16_t queue_id,
        struct sfc_efx_rxq *rxq;
        int rc;
 
+       rc = ENOTSUP;
+       if (info->nic_dma_info->nb_regions > 0)
+               goto fail_nic_dma;
+
        rc = ENOMEM;
        rxq = rte_zmalloc_socket("sfc-efx-rxq", sizeof(*rxq),
                                 RTE_CACHE_LINE_SIZE, socket_id);
@@ -518,6 +522,7 @@ fail_desc_alloc:
        rte_free(rxq);
 
 fail_rxq_alloc:
+fail_nic_dma:
        return rc;
 }
 
@@ -1218,7 +1223,7 @@ sfc_rx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index,
 
        rxq->buf_size = buf_size;
 
-       rc = sfc_dma_alloc(sa, "rxq", sw_index,
+       rc = sfc_dma_alloc(sa, "rxq", sw_index, EFX_NIC_DMA_ADDR_RX_RING,
                           efx_rxq_size(sa->nic, rxq_info->entries),
                           socket_id, &rxq->mem);
        if (rc != 0)
@@ -1248,6 +1253,8 @@ sfc_rx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index,
        info.vi_window_shift = encp->enc_vi_window_shift;
        info.fcw_offset = sa->fcw_offset;
 
+       info.nic_dma_info = &sas->nic_dma_info;
+
        rc = sa->priv.dp_rx->qcreate(sa->eth_dev->data->port_id, sw_index,
                                     &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
                                     socket_id, &info, &rxq_info->dp);
index d59a1af..0dccf21 100644 (file)
@@ -194,7 +194,7 @@ sfc_tx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index,
                SFC_TX_DEFAULT_FREE_THRESH;
        txq_info->offloads = offloads;
 
-       rc = sfc_dma_alloc(sa, "txq", sw_index,
+       rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_NIC_DMA_ADDR_TX_RING,
                           efx_txq_size(sa->nic, txq_info->entries),
                           socket_id, &txq->mem);
        if (rc != 0)
@@ -226,6 +226,8 @@ sfc_tx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index,
        info.tso_max_payload_len = encp->enc_tx_tso_max_payload_length;
        info.tso_max_nb_outgoing_frames = encp->enc_tx_tso_max_nframes;
 
+       info.nic_dma_info = &sas->nic_dma_info;
+
        rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index,
                                     &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
                                     socket_id, &info, &txq_info->dp);
@@ -1082,6 +1084,10 @@ sfc_efx_tx_qcreate(uint16_t port_id, uint16_t queue_id,
        struct sfc_txq *ctrl_txq;
        int rc;
 
+       rc = ENOTSUP;
+       if (info->nic_dma_info->nb_regions > 0)
+               goto fail_nic_dma;
+
        rc = ENOMEM;
        txq = rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq),
                                 RTE_CACHE_LINE_SIZE, socket_id);
@@ -1133,6 +1139,7 @@ fail_pend_desc_alloc:
        rte_free(txq);
 
 fail_txq_alloc:
+fail_nic_dma:
        return rc;
 }