]> git.droids-corp.org - dpdk.git/commitdiff
net/cnxk: avoid command copy from Tx queue
authorPavan Nikhilesh <pbhagavatula@marvell.com>
Thu, 10 Feb 2022 13:15:26 +0000 (18:45 +0530)
committerJerin Jacob <jerinj@marvell.com>
Fri, 11 Feb 2022 10:28:30 +0000 (11:28 +0100)
Tx command is prepared based on offloads enabled and stored in
Tx queue structure at tx_queue_setup phase.
In fastpath the command is copied from Tx queue to LMT line for
all the packets.
Since, the command contents are mostly constants we can move the
command preparation to fastpath and avoid accessing Tx queue
memory.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
16 files changed:
drivers/common/cnxk/roc_io.h
drivers/common/cnxk/roc_io_generic.h
drivers/crypto/cnxk/cn9k_cryptodev_ops.c
drivers/crypto/cnxk/cn9k_ipsec.c
drivers/event/cnxk/cn10k_eventdev.c
drivers/event/cnxk/cn10k_worker.h
drivers/event/cnxk/cn9k_eventdev.c
drivers/event/cnxk/cn9k_worker.h
drivers/event/cnxk/cnxk_eventdev.h
drivers/event/cnxk/cnxk_eventdev_adptr.c
drivers/net/cnxk/cn10k_ethdev.c
drivers/net/cnxk/cn10k_ethdev.h
drivers/net/cnxk/cn10k_tx.h
drivers/net/cnxk/cn9k_ethdev.c
drivers/net/cnxk/cn9k_ethdev.h
drivers/net/cnxk/cn9k_tx.h

index 4f15503c29bcc46b8a1bcbdb40609d6930a938f6..62e98d9d004cbbcfd0c6bf5b112f2a946ad54509 100644 (file)
@@ -164,13 +164,36 @@ roc_lmt_mov(void *out, const void *in, const uint32_t lmtext)
        dst128[1] = src128[1];
        /* lmtext receives following value:
         * 1: NIX_SUBDC_EXT needed i.e. tx vlan case
-        * 2: NIX_SUBDC_EXT + NIX_SUBDC_MEM i.e. tstamp case
         */
-       if (lmtext) {
+       if (lmtext)
+               dst128[2] = src128[2];
+}
+
+static __plt_always_inline void
+roc_lmt_mov64(void *out, const void *in)
+{
+       volatile const __uint128_t *src128 = (const __uint128_t *)in;
+       volatile __uint128_t *dst128 = (__uint128_t *)out;
+
+       dst128[0] = src128[0];
+       dst128[1] = src128[1];
+       dst128[2] = src128[2];
+       dst128[3] = src128[3];
+}
+
+static __plt_always_inline void
+roc_lmt_mov_nv(void *out, const void *in, const uint32_t lmtext)
+{
+       const __uint128_t *src128 = (const __uint128_t *)in;
+       __uint128_t *dst128 = (__uint128_t *)out;
+
+       dst128[0] = src128[0];
+       dst128[1] = src128[1];
+       /* lmtext receives following value:
+        * 1: NIX_SUBDC_EXT needed i.e. tx vlan case
+        */
+       if (lmtext)
                dst128[2] = src128[2];
-               if (lmtext > 1)
-                       dst128[3] = src128[3];
-       }
 }
 
 static __plt_always_inline void
index 5f90835c096abacfbcf3b47a1253a8463643c253..42764455cc984ca0519c561e1c9400b265d9a7f6 100644 (file)
@@ -106,6 +106,21 @@ roc_lmt_mov(void *out, const void *in, const uint32_t lmtext)
        memset(out, 0, sizeof(__uint128_t) * (lmtext ? lmtext > 1 ? 4 : 3 : 2));
 }
 
+static __plt_always_inline void
+roc_lmt_mov64(void *out, const void *in)
+{
+       PLT_SET_USED(out);
+       PLT_SET_USED(in);
+}
+
+static __plt_always_inline void
+roc_lmt_mov_nv(void *out, const void *in, const uint32_t lmtext)
+{
+       PLT_SET_USED(in);
+       PLT_SET_USED(lmtext);
+       memset(out, 0, sizeof(__uint128_t) * (lmtext ? lmtext > 1 ? 4 : 3 : 2));
+}
+
 static __plt_always_inline void
 roc_lmt_mov_seg(void *out, const void *in, const uint16_t segdw)
 {
index ac1953b66df4501c72616849ecb047a7638e3077..ddba9d5dd0eb8ec406423c212e66ba399c7f4bec 100644 (file)
@@ -161,7 +161,7 @@ cn9k_cpt_inst_submit(struct cpt_inst_s *inst, uint64_t lmtline,
 
        do {
                /* Copy CPT command to LMTLINE */
-               roc_lmt_mov((void *)lmtline, inst, 2);
+               roc_lmt_mov64((void *)lmtline, inst);
 
                /*
                 * Make sure compiler does not reorder memcpy and ldeor.
index 9f876f75f290a1170762770dfbc1688fa8d4fad1..672b65a5d241355755d7fcfa914c087bb0d55647 100644 (file)
@@ -53,7 +53,7 @@ cn9k_cpt_enq_sa_write(struct cn9k_ipsec_sa *sa, struct cnxk_cpt_qp *qp,
 
        do {
                /* Copy CPT command to LMTLINE */
-               roc_lmt_mov((void *)lmtline, &inst, 2);
+               roc_lmt_mov64((void *)lmtline, &inst);
                lmt_status = roc_lmt_submit_ldeor(io_addr);
        } while (lmt_status == 0);
 
index 7b7ce44c7474874b8675d1219dde33d11ea794ea..97a88feb1313716f9fe0f4ee5e59d84c4ef12cf6 100644 (file)
@@ -50,7 +50,6 @@ cn10k_sso_init_hws_mem(void *arg, uint8_t port_id)
        /* First cache line is reserved for cookie */
        ws = (struct cn10k_sso_hws *)((uint8_t *)ws + RTE_CACHE_LINE_SIZE);
        ws->base = roc_sso_hws_base_get(&dev->sso, port_id);
-       ws->tx_base = ws->base;
        ws->hws_id = port_id;
        ws->swtag_req = 0;
        ws->gw_wdata = cn10k_sso_gw_mode_wdata(dev);
@@ -259,15 +258,13 @@ cn10k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
                        ws_cookie,
                        sizeof(struct cnxk_sso_hws_cookie) +
                                sizeof(struct cn10k_sso_hws) +
-                               (sizeof(uint64_t) * (dev->max_port_id + 1) *
-                                RTE_MAX_QUEUES_PER_PORT),
+                               dev->tx_adptr_data_sz,
                        RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
                if (ws_cookie == NULL)
                        return -ENOMEM;
                ws = RTE_PTR_ADD(ws_cookie, sizeof(struct cnxk_sso_hws_cookie));
                memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
-                      sizeof(uint64_t) * (dev->max_port_id + 1) *
-                              RTE_MAX_QUEUES_PER_PORT);
+                      dev->tx_adptr_data_sz);
                event_dev->data->ports[i] = ws;
        }
 
@@ -721,16 +718,35 @@ cn10k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
                               const struct rte_eth_dev *eth_dev,
                               int32_t tx_queue_id)
 {
+       struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+       struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+       uint64_t tx_offloads;
        int rc;
 
        RTE_SET_USED(id);
        rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
        if (rc < 0)
                return rc;
+
+       /* Can't enable tstamp if all the ports don't have it enabled. */
+       tx_offloads = cnxk_eth_dev->tx_offload_flags;
+       if (dev->tx_adptr_configured) {
+               uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+               uint8_t tstmp_ena =
+                       !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+               if (tstmp_ena && !tstmp_req)
+                       dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+               else if (!tstmp_ena && tstmp_req)
+                       tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+       }
+
+       dev->tx_offloads |= tx_offloads;
        rc = cn10k_sso_updt_tx_adptr_data(event_dev);
        if (rc < 0)
                return rc;
        cn10k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+       dev->tx_adptr_configured = 1;
 
        return 0;
 }
index 4019c13bd2077f08893cbf064ccab592aceb83f6..ff08b2d974e0e086a2242b1fa6bfda1c94fc27a7 100644 (file)
@@ -455,18 +455,18 @@ NIX_RX_FASTPATH_MODES
        }
 
 static __rte_always_inline struct cn10k_eth_txq *
-cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
-                         const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m, const uint64_t *txq_data)
 {
-       return (struct cn10k_eth_txq *)
-               txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+       return (struct cn10k_eth_txq
+                       *)(txq_data[(txq_data[m->port] >> 48) +
+                                   rte_event_eth_tx_adapter_txq_get(m)] &
+                          (BIT_ULL(48) - 1));
 }
 
 static __rte_always_inline void
-cn10k_sso_tx_one(struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
-                uintptr_t lmt_addr, uint8_t sched_type, uintptr_t base,
-                const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
-                const uint32_t flags)
+cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
+                uint16_t lmt_id, uintptr_t lmt_addr, uint8_t sched_type,
+                const uint64_t *txq_data, const uint32_t flags)
 {
        uint8_t lnum = 0, loff = 0, shft = 0;
        struct cn10k_eth_txq *txq;
@@ -476,7 +476,7 @@ cn10k_sso_tx_one(struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
        bool sec;
 
        txq = cn10k_sso_hws_xtract_meta(m, txq_data);
-       cn10k_nix_tx_skeleton(txq, cmd, flags);
+       cn10k_nix_tx_skeleton(txq, cmd, flags, 0);
        /* Perform header writes before barrier
         * for TSO
         */
@@ -501,23 +501,23 @@ cn10k_sso_tx_one(struct rte_mbuf *m, uint64_t *cmd, uint16_t lmt_id,
        else
                segdw = cn10k_nix_tx_ext_subs(flags) + 2;
 
+       cn10k_nix_xmit_prepare_tstamp(txq, laddr, m->ol_flags, segdw, flags);
        if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
                pa = txq->cpt_io_addr | 3 << 4;
        else
                pa = txq->io_addr | ((segdw - 1) << 4);
 
        if (!sched_type)
-               roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+               roc_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG);
 
        roc_lmt_submit_steorl(lmt_id, pa);
 }
 
 static __rte_always_inline void
-cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
-                       uint64_t *cmd, uint16_t lmt_id, uintptr_t lmt_addr,
-                       uint8_t sched_type, uintptr_t base,
-                       const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
-                       const uint32_t flags)
+cn10k_sso_vwqe_split_tx(struct cn10k_sso_hws *ws, struct rte_mbuf **mbufs,
+                       uint16_t nb_mbufs, uint64_t *cmd, uint16_t lmt_id,
+                       uintptr_t lmt_addr, uint8_t sched_type,
+                       const uint64_t *txq_data, const uint32_t flags)
 {
        uint16_t port[4], queue[4];
        uint16_t i, j, pkts, scalar;
@@ -540,14 +540,16 @@ cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
                if (((port[0] ^ port[1]) & (port[2] ^ port[3])) ||
                    ((queue[0] ^ queue[1]) & (queue[2] ^ queue[3]))) {
                        for (j = 0; j < 4; j++)
-                               cn10k_sso_tx_one(mbufs[i + j], cmd, lmt_id,
-                                                lmt_addr, sched_type, base,
-                                                txq_data, flags);
+                               cn10k_sso_tx_one(ws, mbufs[i + j], cmd, lmt_id,
+                                                lmt_addr, sched_type, txq_data,
+                                                flags);
                } else {
-                       txq = (struct cn10k_eth_txq *)
-                               txq_data[port[0]][queue[0]];
-                       cn10k_nix_xmit_pkts_vector(txq, &mbufs[i], 4, cmd,
-                                                  base + SSOW_LF_GWS_TAG,
+                       txq = (struct cn10k_eth_txq
+                                      *)(txq_data[(txq_data[port[0]] >> 48) +
+                                                  queue[0]] &
+                                         (BIT_ULL(48) - 1));
+                       cn10k_nix_xmit_pkts_vector(txq, (uint64_t *)ws,
+                                                  &mbufs[i], 4, cmd,
                                                   flags | NIX_TX_VWQE_F);
                }
        }
@@ -555,15 +557,14 @@ cn10k_sso_vwqe_split_tx(struct rte_mbuf **mbufs, uint16_t nb_mbufs,
        mbufs += i;
 
        for (i = 0; i < scalar; i++) {
-               cn10k_sso_tx_one(mbufs[i], cmd, lmt_id, lmt_addr, sched_type,
-                                base, txq_data, flags);
+               cn10k_sso_tx_one(ws, mbufs[i], cmd, lmt_id, lmt_addr,
+                                sched_type, txq_data, flags);
        }
 }
 
 static __rte_always_inline uint16_t
 cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
-                      uint64_t *cmd,
-                      const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+                      uint64_t *cmd, const uint64_t *txq_data,
                       const uint32_t flags)
 {
        struct cn10k_eth_txq *txq;
@@ -580,17 +581,19 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
                uint64_t meta = *(uint64_t *)ev->vec;
 
                if (meta & BIT(31)) {
-                       txq = (struct cn10k_eth_txq *)
-                               txq_data[meta >> 32][meta >> 48];
-
-                       cn10k_nix_xmit_pkts_vector(
-                               txq, mbufs, meta & 0xFFFF, cmd,
-                               ws->tx_base + SSOW_LF_GWS_TAG,
-                               flags | NIX_TX_VWQE_F);
+                       txq = (struct cn10k_eth_txq
+                                      *)(txq_data[(txq_data[meta >> 32] >>
+                                                   48) +
+                                                  (meta >> 48)] &
+                                         (BIT_ULL(48) - 1));
+
+                       cn10k_nix_xmit_pkts_vector(txq, (uint64_t *)ws, mbufs,
+                                                  meta & 0xFFFF, cmd,
+                                                  flags | NIX_TX_VWQE_F);
                } else {
                        cn10k_sso_vwqe_split_tx(
-                               mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
-                               ev->sched_type, ws->tx_base, txq_data, flags);
+                               ws, mbufs, meta & 0xFFFF, cmd, lmt_id, lmt_addr,
+                               ev->sched_type, txq_data, flags);
                }
                rte_mempool_put(rte_mempool_from_obj(ev->vec), ev->vec);
                return (meta & 0xFFFF);
@@ -598,16 +601,16 @@ cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
 
        m = ev->mbuf;
        ref_cnt = m->refcnt;
-       cn10k_sso_tx_one(m, cmd, lmt_id, lmt_addr, ev->sched_type, ws->tx_base,
-                        txq_data, flags);
+       cn10k_sso_tx_one(ws, m, cmd, lmt_id, lmt_addr, ev->sched_type, txq_data,
+                        flags);
 
        if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
                if (ref_cnt > 1)
                        return 1;
        }
 
-       cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
-                                ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+       cnxk_sso_hws_swtag_flush(ws->base + SSOW_LF_GWS_TAG,
+                                ws->base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
        return 1;
 }
 
@@ -628,9 +631,7 @@ NIX_TX_FASTPATH_MODES
                uint64_t cmd[sz];                                              \
                RTE_SET_USED(nb_events);                                       \
                return cn10k_sso_hws_event_tx(                                 \
-                       ws, &ev[0], cmd,                                       \
-                       (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
-                               ws->tx_adptr_data,                             \
+                       ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data,  \
                        flags);                                                \
        }
 
@@ -642,9 +643,7 @@ NIX_TX_FASTPATH_MODES
                struct cn10k_sso_hws *ws = port;                               \
                RTE_SET_USED(nb_events);                                       \
                return cn10k_sso_hws_event_tx(                                 \
-                       ws, &ev[0], cmd,                                       \
-                       (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
-                               ws->tx_adptr_data,                             \
+                       ws, &ev[0], cmd, (const uint64_t *)ws->tx_adptr_data,  \
                        (flags) | NIX_TX_MULTI_SEG_F);                         \
        }
 
index 4611936b7fbc2e69b3cf610cb6a10c65f5d18529..f8652d4fbc0a83f8ef5f47ff49cdb33d3b9190cf 100644 (file)
@@ -259,17 +259,14 @@ cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
                                ws_cookie,
                                sizeof(struct cnxk_sso_hws_cookie) +
                                        sizeof(struct cn9k_sso_hws_dual) +
-                                       (sizeof(uint64_t) *
-                                        (dev->max_port_id + 1) *
-                                        RTE_MAX_QUEUES_PER_PORT),
+                                       dev->tx_adptr_data_sz,
                                RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
                        if (ws_cookie == NULL)
                                return -ENOMEM;
                        dws = RTE_PTR_ADD(ws_cookie,
                                          sizeof(struct cnxk_sso_hws_cookie));
                        memcpy(&dws->tx_adptr_data, dev->tx_adptr_data,
-                              sizeof(uint64_t) * (dev->max_port_id + 1) *
-                                      RTE_MAX_QUEUES_PER_PORT);
+                              dev->tx_adptr_data_sz);
                        event_dev->data->ports[i] = dws;
                } else {
                        struct cn9k_sso_hws *ws = event_dev->data->ports[i];
@@ -280,17 +277,14 @@ cn9k_sso_updt_tx_adptr_data(const struct rte_eventdev *event_dev)
                                ws_cookie,
                                sizeof(struct cnxk_sso_hws_cookie) +
                                        sizeof(struct cn9k_sso_hws_dual) +
-                                       (sizeof(uint64_t) *
-                                        (dev->max_port_id + 1) *
-                                        RTE_MAX_QUEUES_PER_PORT),
+                                       dev->tx_adptr_data_sz,
                                RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
                        if (ws_cookie == NULL)
                                return -ENOMEM;
                        ws = RTE_PTR_ADD(ws_cookie,
                                         sizeof(struct cnxk_sso_hws_cookie));
                        memcpy(&ws->tx_adptr_data, dev->tx_adptr_data,
-                              sizeof(uint64_t) * (dev->max_port_id + 1) *
-                                      RTE_MAX_QUEUES_PER_PORT);
+                              dev->tx_adptr_data_sz);
                        event_dev->data->ports[i] = ws;
                }
        }
@@ -987,17 +981,36 @@ cn9k_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
                              const struct rte_eth_dev *eth_dev,
                              int32_t tx_queue_id)
 {
+       struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
+       struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+       uint64_t tx_offloads;
        int rc;
 
        RTE_SET_USED(id);
        rc = cnxk_sso_tx_adapter_queue_add(event_dev, eth_dev, tx_queue_id);
        if (rc < 0)
                return rc;
+
+       /* Can't enable tstamp if all the ports don't have it enabled. */
+       tx_offloads = cnxk_eth_dev->tx_offload_flags;
+       if (dev->tx_adptr_configured) {
+               uint8_t tstmp_req = !!(tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+               uint8_t tstmp_ena =
+                       !!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F);
+
+               if (tstmp_ena && !tstmp_req)
+                       dev->tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+               else if (!tstmp_ena && tstmp_req)
+                       tx_offloads &= ~(NIX_TX_OFFLOAD_TSTAMP_F);
+       }
+
+       dev->tx_offloads |= tx_offloads;
        cn9k_sso_txq_fc_update(eth_dev, tx_queue_id, true);
        rc = cn9k_sso_updt_tx_adptr_data(event_dev);
        if (rc < 0)
                return rc;
        cn9k_sso_fp_fns_set((struct rte_eventdev *)(uintptr_t)event_dev);
+       dev->tx_adptr_configured = 1;
 
        return 0;
 }
index c99e459c1b63063482aabd7d62e8f7a1e925f2ea..303b04c215dad1a20fc4a9075cd3ef711523de55 100644 (file)
@@ -599,20 +599,13 @@ cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
                ;
 }
 
-static __rte_always_inline const struct cn9k_eth_txq *
-cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
-                        const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+static __rte_always_inline struct cn9k_eth_txq *
+cn9k_sso_hws_xtract_meta(struct rte_mbuf *m, uint64_t *txq_data)
 {
-       return (const struct cn9k_eth_txq *)
-               txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
-}
-
-static __rte_always_inline void
-cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
-                        uint64_t *cmd, const uint32_t flags)
-{
-       roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
-       cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
+       return (struct cn9k_eth_txq
+                       *)(txq_data[(txq_data[m->port] >> 48) +
+                                   rte_event_eth_tx_adapter_txq_get(m)] &
+                          (BIT_ULL(48) - 1));
 }
 
 #if defined(RTE_ARCH_ARM64)
@@ -669,7 +662,7 @@ cn9k_sso_hws_xmit_sec_one(const struct cn9k_eth_txq *txq, uint64_t base,
        nixtx += BIT_ULL(7);
        nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
 
-       roc_lmt_mov((void *)(nixtx + 16), cmd, cn9k_nix_tx_ext_subs(flags));
+       roc_lmt_mov_nv((void *)(nixtx + 16), cmd, cn9k_nix_tx_ext_subs(flags));
 
        /* Load opcode and cptr already prepared at pkt metadata set */
        pkt_len -= l2_len;
@@ -756,12 +749,11 @@ cn9k_sso_hws_xmit_sec_one(const struct cn9k_eth_txq *txq, uint64_t base,
 
 static __rte_always_inline uint16_t
 cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
-                     const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
-                     const uint32_t flags)
+                     uint64_t *txq_data, const uint32_t flags)
 {
        struct rte_mbuf *m = ev->mbuf;
-       const struct cn9k_eth_txq *txq;
        uint16_t ref_cnt = m->refcnt;
+       struct cn9k_eth_txq *txq;
 
        /* Perform header writes before barrier for TSO */
        cn9k_nix_xmit_prepare_tso(m, flags);
@@ -774,7 +766,8 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
            !(flags & NIX_TX_OFFLOAD_SECURITY_F))
                rte_io_wmb();
        txq = cn9k_sso_hws_xtract_meta(m, txq_data);
-       cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
+       cn9k_nix_tx_skeleton(txq, cmd, flags, 0);
+       cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
 
        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
                uint64_t ol_flags = m->ol_flags;
@@ -796,6 +789,8 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
 
        if (flags & NIX_TX_MULTI_SEG_F) {
                const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+               cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, segdw,
+                                            flags);
                if (!CNXK_TT_FROM_EVENT(ev->event)) {
                        cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
                        roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
@@ -808,6 +803,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
                                               segdw);
                }
        } else {
+               cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, 4, flags);
                if (!CNXK_TT_FROM_EVENT(ev->event)) {
                        cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
                        roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
@@ -853,11 +849,9 @@ NIX_TX_FASTPATH_MODES
                struct cn9k_sso_hws *ws = port;                                \
                uint64_t cmd[sz];                                              \
                RTE_SET_USED(nb_events);                                       \
-               return cn9k_sso_hws_event_tx(                                  \
-                       ws->base, &ev[0], cmd,                                 \
-                       (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
-                               ws->tx_adptr_data,                             \
-                       flags);                                                \
+               return cn9k_sso_hws_event_tx(ws->base, &ev[0], cmd,            \
+                                            (uint64_t *)ws->tx_adptr_data,    \
+                                            flags);                           \
        }
 
 #define SSO_TX_SEG(fn, sz, flags)                                              \
@@ -867,11 +861,9 @@ NIX_TX_FASTPATH_MODES
                uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
                struct cn9k_sso_hws *ws = port;                                \
                RTE_SET_USED(nb_events);                                       \
-               return cn9k_sso_hws_event_tx(                                  \
-                       ws->base, &ev[0], cmd,                                 \
-                       (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
-                               ws->tx_adptr_data,                             \
-                       (flags) | NIX_TX_MULTI_SEG_F);                         \
+               return cn9k_sso_hws_event_tx(ws->base, &ev[0], cmd,            \
+                                            (uint64_t *)ws->tx_adptr_data,    \
+                                            (flags) | NIX_TX_MULTI_SEG_F);    \
        }
 
 #define SSO_DUAL_TX(fn, sz, flags)                                             \
@@ -881,11 +873,9 @@ NIX_TX_FASTPATH_MODES
                struct cn9k_sso_hws_dual *ws = port;                           \
                uint64_t cmd[sz];                                              \
                RTE_SET_USED(nb_events);                                       \
-               return cn9k_sso_hws_event_tx(                                  \
-                       ws->base[!ws->vws], &ev[0], cmd,                       \
-                       (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
-                               ws->tx_adptr_data,                             \
-                       flags);                                                \
+               return cn9k_sso_hws_event_tx(ws->base[!ws->vws], &ev[0], cmd,  \
+                                            (uint64_t *)ws->tx_adptr_data,    \
+                                            flags);                           \
        }
 
 #define SSO_DUAL_TX_SEG(fn, sz, flags)                                         \
@@ -895,11 +885,9 @@ NIX_TX_FASTPATH_MODES
                uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
                struct cn9k_sso_hws_dual *ws = port;                           \
                RTE_SET_USED(nb_events);                                       \
-               return cn9k_sso_hws_event_tx(                                  \
-                       ws->base[!ws->vws], &ev[0], cmd,                       \
-                       (const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
-                               ws->tx_adptr_data,                             \
-                       (flags) | NIX_TX_MULTI_SEG_F);                         \
+               return cn9k_sso_hws_event_tx(ws->base[!ws->vws], &ev[0], cmd,  \
+                                            (uint64_t *)ws->tx_adptr_data,    \
+                                            (flags) | NIX_TX_MULTI_SEG_F);    \
        }
 
 #endif
index 4652b58a848aa1444ec8af1c3aa408c6c97504e9..b26df58588021b34a86a370891f7327b104b299a 100644 (file)
@@ -99,7 +99,10 @@ struct cnxk_sso_evdev {
        uint16_t rx_adptr_pool_cnt;
        uint64_t *rx_adptr_pools;
        uint64_t *tx_adptr_data;
+       size_t tx_adptr_data_sz;
        uint16_t max_port_id;
+       uint16_t max_queue_id[RTE_MAX_ETHPORTS];
+       uint8_t tx_adptr_configured;
        uint16_t tim_adptr_ring_cnt;
        uint16_t *timer_adptr_rings;
        uint64_t *timer_adptr_sz;
@@ -131,8 +134,8 @@ struct cn10k_sso_hws {
        uint64_t *fc_mem;
        uintptr_t grp_base;
        /* Tx Fastpath data */
-       uint64_t tx_base __rte_cache_aligned;
-       uintptr_t lmt_base;
+       uintptr_t lmt_base __rte_cache_aligned;
+       uint64_t lso_tun_fmt;
        uint8_t tx_adptr_data[];
 } __rte_cache_aligned;
 
@@ -149,7 +152,8 @@ struct cn9k_sso_hws {
        uint64_t *fc_mem;
        uintptr_t grp_base;
        /* Tx Fastpath data */
-       uint8_t tx_adptr_data[] __rte_cache_aligned;
+       uint64_t lso_tun_fmt __rte_cache_aligned;
+       uint8_t tx_adptr_data[];
 } __rte_cache_aligned;
 
 struct cn9k_sso_hws_dual {
@@ -165,7 +169,8 @@ struct cn9k_sso_hws_dual {
        uint64_t *fc_mem;
        uintptr_t grp_base;
        /* Tx Fastpath data */
-       uint8_t tx_adptr_data[] __rte_cache_aligned;
+       uint64_t lso_tun_fmt __rte_cache_aligned;
+       uint8_t tx_adptr_data[];
 } __rte_cache_aligned;
 
 struct cnxk_sso_hws_cookie {
index fdcd68ca63fe49fcca789bb330fb3b34da0ef6f0..5ebd3340e7bb9f909c2e34765aa5184d9aef7ea6 100644 (file)
@@ -339,30 +339,179 @@ cnxk_sso_sqb_aura_limit_edit(struct roc_nix_sq *sq, uint16_t nb_sqb_bufs)
                sq->aura_handle, RTE_MIN(nb_sqb_bufs, sq->aura_sqb_bufs));
 }
 
+static void
+cnxk_sso_tx_queue_data_init(struct cnxk_sso_evdev *dev, uint64_t *txq_data,
+                           uint16_t eth_port_id, uint16_t tx_queue_id)
+{
+       uint64_t offset = 0;
+       int i;
+
+       dev->max_queue_id[0] = RTE_MAX(dev->max_queue_id[0], eth_port_id);
+       for (i = 1; i < eth_port_id; i++) {
+               offset += (dev->max_queue_id[i - 1] + 1);
+               txq_data[i] |= offset << 48;
+       }
+       dev->max_port_id = RTE_MAX(dev->max_port_id, eth_port_id);
+       dev->max_queue_id[eth_port_id] =
+               RTE_MAX(dev->max_queue_id[eth_port_id], tx_queue_id);
+}
+
+static void
+cnxk_sso_tx_queue_data_cpy(struct cnxk_sso_evdev *dev, uint64_t *txq_data,
+                          uint64_t *otxq_data, uint16_t eth_port_id)
+{
+       uint64_t offset = 0;
+       int i, j;
+
+       for (i = 1; i < eth_port_id; i++) {
+               offset += (dev->max_queue_id[i - 1] + 1);
+               txq_data[i] |= offset << 48;
+               for (j = 0;
+                    (i < dev->max_port_id) && (j < dev->max_queue_id[i] + 1);
+                    j++)
+                       txq_data[offset + j] =
+                               otxq_data[(otxq_data[i] >> 48) + j];
+       }
+}
+
+static void
+cnxk_sso_tx_queue_data_cpy_max(struct cnxk_sso_evdev *dev, uint64_t *txq_data,
+                              uint64_t *otxq_data, uint16_t eth_port_id,
+                              uint16_t max_port_id, uint16_t max_queue_id)
+{
+       uint64_t offset = 0;
+       int i, j;
+
+       for (i = 1; i < max_port_id + 1; i++) {
+               offset += (dev->max_queue_id[i - 1] + 1);
+               txq_data[i] |= offset << 48;
+               for (j = 0; j < dev->max_queue_id[i] + 1; j++) {
+                       if (i == eth_port_id && j > max_queue_id)
+                               continue;
+                       txq_data[offset + j] =
+                               otxq_data[(otxq_data[i] >> 48) + j];
+               }
+       }
+}
+
+static void
+cnxk_sso_tx_queue_data_rewrite(struct cnxk_sso_evdev *dev, uint64_t *txq_data,
+                              uint16_t eth_port_id, uint16_t tx_queue_id,
+                              uint64_t *otxq_data, uint16_t max_port_id,
+                              uint16_t max_queue_id)
+{
+       int i;
+
+       for (i = 0; i < dev->max_queue_id[0] + 1; i++)
+               txq_data[i] |= (otxq_data[i] & ~((BIT_ULL(16) - 1) << 48));
+
+       if (eth_port_id > max_port_id) {
+               dev->max_queue_id[0] =
+                       RTE_MAX(dev->max_queue_id[0], eth_port_id);
+               dev->max_port_id = RTE_MAX(dev->max_port_id, eth_port_id);
+
+               cnxk_sso_tx_queue_data_cpy(dev, txq_data, otxq_data,
+                                          eth_port_id);
+               dev->max_queue_id[eth_port_id] =
+                       RTE_MAX(dev->max_queue_id[eth_port_id], tx_queue_id);
+       } else if (tx_queue_id > max_queue_id) {
+               dev->max_queue_id[eth_port_id] =
+                       RTE_MAX(dev->max_queue_id[eth_port_id], tx_queue_id);
+               dev->max_port_id = RTE_MAX(max_port_id, eth_port_id);
+               cnxk_sso_tx_queue_data_cpy_max(dev, txq_data, otxq_data,
+                                              eth_port_id, max_port_id,
+                                              max_queue_id);
+       }
+}
+
+static void
+cnxk_sso_tx_queue_data_sz(struct cnxk_sso_evdev *dev, uint16_t eth_port_id,
+                         uint16_t tx_queue_id, uint16_t max_port_id,
+                         uint16_t max_queue_id, uint64_t *r, size_t *sz)
+{
+       uint64_t row = 0;
+       size_t size = 0;
+       int i;
+
+       if (dev->tx_adptr_data == NULL) {
+               size = (eth_port_id + 1);
+               size += (eth_port_id + tx_queue_id);
+               row = 2 * eth_port_id;
+               *r = row;
+               *sz = size;
+               return;
+       }
+
+       if (eth_port_id > max_port_id) {
+               size = (RTE_MAX(eth_port_id, dev->max_queue_id[0]) + 1);
+               for (i = 1; i < eth_port_id; i++)
+                       size += (dev->max_queue_id[i] + 1);
+               row = size;
+               size += (tx_queue_id + 1);
+       } else if (tx_queue_id > max_queue_id) {
+               size = !eth_port_id ?
+                              tx_queue_id + 1 :
+                                    RTE_MAX(max_port_id, dev->max_queue_id[0]) + 1;
+               for (i = 1; i < max_port_id + 1; i++) {
+                       if (i == eth_port_id) {
+                               row = size;
+                               size += tx_queue_id + 1;
+                       } else {
+                               size += dev->max_queue_id[i] + 1;
+                       }
+               }
+       }
+       *r = row;
+       *sz = size;
+}
+
 static int
 cnxk_sso_updt_tx_queue_data(const struct rte_eventdev *event_dev,
                            uint16_t eth_port_id, uint16_t tx_queue_id,
                            void *txq)
 {
        struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+       uint16_t max_queue_id = dev->max_queue_id[eth_port_id];
        uint16_t max_port_id = dev->max_port_id;
-       uint64_t *txq_data = dev->tx_adptr_data;
-
-       if (txq_data == NULL || eth_port_id > max_port_id) {
-               max_port_id = RTE_MAX(max_port_id, eth_port_id);
-               txq_data = rte_realloc_socket(
-                       txq_data,
-                       (sizeof(uint64_t) * (max_port_id + 1) *
-                        RTE_MAX_QUEUES_PER_PORT),
-                       RTE_CACHE_LINE_SIZE, event_dev->data->socket_id);
+       uint64_t *txq_data = NULL;
+       uint64_t row = 0;
+       size_t size = 0;
+
+       if (((uint64_t)txq) & 0xFFFF000000000000)
+               return -EINVAL;
+
+       cnxk_sso_tx_queue_data_sz(dev, eth_port_id, tx_queue_id, max_port_id,
+                                 max_queue_id, &row, &size);
+
+       size *= sizeof(uint64_t);
+
+       if (size) {
+               uint64_t *otxq_data = dev->tx_adptr_data;
+
+               txq_data = malloc(size);
                if (txq_data == NULL)
                        return -ENOMEM;
+               memset(txq_data, 0, size);
+               txq_data[eth_port_id] = ((uint64_t)row) << 48;
+               txq_data[row + tx_queue_id] = (uint64_t)txq;
+
+               if (otxq_data != NULL)
+                       cnxk_sso_tx_queue_data_rewrite(
+                               dev, txq_data, eth_port_id, tx_queue_id,
+                               otxq_data, max_port_id, max_queue_id);
+               else
+                       cnxk_sso_tx_queue_data_init(dev, txq_data, eth_port_id,
+                                                   tx_queue_id);
+               dev->tx_adptr_data_sz = size;
+               free(otxq_data);
+               dev->tx_adptr_data = txq_data;
+       } else {
+               txq_data = dev->tx_adptr_data;
+               row = txq_data[eth_port_id] >> 48;
+               txq_data[row + tx_queue_id] &= ~(BIT_ULL(48) - 1);
+               txq_data[row + tx_queue_id] |= (uint64_t)txq;
        }
 
-       ((uint64_t(*)[RTE_MAX_QUEUES_PER_PORT])
-                txq_data)[eth_port_id][tx_queue_id] = (uint64_t)txq;
-       dev->max_port_id = max_port_id;
-       dev->tx_adptr_data = txq_data;
        return 0;
 }
 
@@ -372,7 +521,6 @@ cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
                              int32_t tx_queue_id)
 {
        struct cnxk_eth_dev *cnxk_eth_dev = eth_dev->data->dev_private;
-       struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
        struct roc_nix_sq *sq;
        int i, ret;
        void *txq;
@@ -388,8 +536,6 @@ cnxk_sso_tx_adapter_queue_add(const struct rte_eventdev *event_dev,
                        event_dev, eth_dev->data->port_id, tx_queue_id, txq);
                if (ret < 0)
                        return ret;
-
-               dev->tx_offloads |= cnxk_eth_dev->tx_offload_flags;
        }
 
        return 0;
index 8378cbffc2f197d8784485b283fbde6f6b386304..9bb08e1824cf63044c6aa2932cc0ce6ff1c0c1cd 100644 (file)
@@ -131,53 +131,31 @@ static void
 nix_form_default_desc(struct cnxk_eth_dev *dev, struct cn10k_eth_txq *txq,
                      uint16_t qid)
 {
-       struct nix_send_ext_s *send_hdr_ext;
        union nix_send_hdr_w0_u send_hdr_w0;
-       struct nix_send_mem_s *send_mem;
-       union nix_send_sg_s sg_w0;
-
-       RTE_SET_USED(dev);
 
        /* Initialize the fields based on basic single segment packet */
-       memset(&txq->cmd, 0, sizeof(txq->cmd));
        send_hdr_w0.u = 0;
-       sg_w0.u = 0;
-
        if (dev->tx_offload_flags & NIX_TX_NEED_EXT_HDR) {
                /* 2(HDR) + 2(EXT_HDR) + 1(SG) + 1(IOVA) = 6/2 - 1 = 2 */
                send_hdr_w0.sizem1 = 2;
-
-               send_hdr_ext = (struct nix_send_ext_s *)&txq->cmd[0];
-               send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
                if (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSTAMP_F) {
                        /* Default: one seg packet would have:
                         * 2(HDR) + 2(EXT) + 1(SG) + 1(IOVA) + 2(MEM)
                         * => 8/2 - 1 = 3
                         */
                        send_hdr_w0.sizem1 = 3;
-                       send_hdr_ext->w0.tstmp = 1;
 
                        /* To calculate the offset for send_mem,
                         * send_hdr->w0.sizem1 * 2
                         */
-                       send_mem = (struct nix_send_mem_s *)(txq->cmd + 2);
-                       send_mem->w0.subdc = NIX_SUBDC_MEM;
-                       send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP;
-                       send_mem->addr = dev->tstamp.tx_tstamp_iova;
+                       txq->ts_mem = dev->tstamp.tx_tstamp_iova;
                }
        } else {
                /* 2(HDR) + 1(SG) + 1(IOVA) = 4/2 - 1 = 1 */
                send_hdr_w0.sizem1 = 1;
        }
-
        send_hdr_w0.sq = qid;
-       sg_w0.subdc = NIX_SUBDC_SG;
-       sg_w0.segs = 1;
-       sg_w0.ld_type = NIX_SENDLDTYPE_LDD;
-
        txq->send_hdr_w0 = send_hdr_w0.u;
-       txq->sg_w0 = sg_w0.u;
-
        rte_wmb();
 }
 
index 0982158c6282f6cde50b97cf4f64154862054c77..ec40e53152cb7b44880cadb8d8e75173f81eae71 100644 (file)
@@ -9,7 +9,6 @@
 
 struct cn10k_eth_txq {
        uint64_t send_hdr_w0;
-       uint64_t sg_w0;
        int64_t fc_cache_pkts;
        uint64_t *fc_mem;
        uintptr_t lmt_base;
@@ -20,8 +19,8 @@ struct cn10k_eth_txq {
        uint64_t sa_base;
        uint64_t *cpt_fc;
        uint16_t cpt_desc;
-       uint64_t cmd[4];
        uint64_t lso_tun_fmt;
+       uint64_t ts_mem;
 } __plt_cache_aligned;
 
 struct cn10k_eth_rxq {
index fc1f6ceb8c795a9e34095cf6de90cae65a842807..4ae6bbf517331a2c9910701a01d1bfcdc152874c 100644 (file)
@@ -186,23 +186,26 @@ cn10k_cpt_tx_steor_data(void)
 }
 
 static __rte_always_inline void
-cn10k_nix_tx_skeleton(const struct cn10k_eth_txq *txq, uint64_t *cmd,
-                     const uint16_t flags)
+cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
+                     const uint16_t flags, const uint16_t static_sz)
 {
-       /* Send hdr */
-       cmd[0] = txq->send_hdr_w0;
+       if (static_sz)
+               cmd[0] = txq->send_hdr_w0;
+       else
+               cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
+                        ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
        cmd[1] = 0;
-       cmd += 2;
 
-       /* Send ext if present */
        if (flags & NIX_TX_NEED_EXT_HDR) {
-               *(__uint128_t *)cmd = *(const __uint128_t *)txq->cmd;
-               cmd += 2;
+               if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
+                       cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
+               else
+                       cmd[2] = NIX_SUBDC_EXT << 60;
+               cmd[3] = 0;
+               cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
+       } else {
+               cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
        }
-
-       /* Send sg */
-       cmd[0] = txq->sg_w0;
-       cmd[1] = 0;
 }
 
 static __rte_always_inline void
@@ -718,41 +721,29 @@ cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
 }
 
 static __rte_always_inline void
-cn10k_nix_xmit_prepare_tstamp(uintptr_t lmt_addr, const uint64_t *cmd,
+cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
                              const uint64_t ol_flags, const uint16_t no_segdw,
                              const uint16_t flags)
 {
        if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
-               const uint8_t is_ol_tstamp = !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
-               struct nix_send_ext_s *send_hdr_ext =
-                       (struct nix_send_ext_s *)lmt_addr + 16;
+               const uint8_t is_ol_tstamp =
+                       !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
                uint64_t *lmt = (uint64_t *)lmt_addr;
                uint16_t off = (no_segdw - 1) << 1;
                struct nix_send_mem_s *send_mem;
 
                send_mem = (struct nix_send_mem_s *)(lmt + off);
-               send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
-               send_hdr_ext->w0.tstmp = 1;
-               if (flags & NIX_TX_MULTI_SEG_F) {
-                       /* Retrieving the default desc values */
-                       lmt[off] = cmd[2];
-
-                       /* Using compiler barrier to avoid violation of C
-                        * aliasing rules.
-                        */
-                       rte_compiler_barrier();
-               }
-
-               /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, tx tstamp
+               /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
                 * should not be recorded, hence changing the alg type to
-                * NIX_SENDMEMALG_SET and also changing send mem addr field to
+                * NIX_SENDMEMALG_SUB and also changing send mem addr field to
                 * next 8 bytes as it corrupts the actual Tx tstamp registered
                 * address.
                 */
                send_mem->w0.subdc = NIX_SUBDC_MEM;
-               send_mem->w0.alg = NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
+               send_mem->w0.alg =
+                       NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
                send_mem->addr =
-                       (rte_iova_t)(((uint64_t *)cmd[3]) + is_ol_tstamp);
+                       (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
        }
 }
 
@@ -841,8 +832,8 @@ done:
 }
 
 static __rte_always_inline uint16_t
-cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
-                   uint64_t *cmd, uintptr_t base, const uint16_t flags)
+cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
+                   uint16_t pkts, uint64_t *cmd, const uint16_t flags)
 {
        struct cn10k_eth_txq *txq = tx_queue;
        const rte_iova_t io_addr = txq->io_addr;
@@ -863,9 +854,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
                /* Reduce the cached count */
                txq->fc_cache_pkts -= pkts;
        }
-
        /* Get cmd skeleton */
-       cn10k_nix_tx_skeleton(txq, cmd, flags);
+       cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
 
        if (flags & NIX_TX_OFFLOAD_TSO_F)
                lso_tun_fmt = txq->lso_tun_fmt;
@@ -909,14 +899,14 @@ again:
 
                /* Move NIX desc to LMT/NIXTX area */
                cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
-               cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
-                                             tx_pkts[i]->ol_flags, 4, flags);
+               cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
+                                             4, flags);
                if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
                        lnum++;
        }
 
        if (flags & NIX_TX_VWQE_F)
-               roc_sso_hws_head_wait(base);
+               roc_sso_hws_head_wait(ws[0]);
 
        left -= burst;
        tx_pkts += burst;
@@ -967,9 +957,9 @@ again:
 }
 
 static __rte_always_inline uint16_t
-cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
-                        uint16_t pkts, uint64_t *cmd, uintptr_t base,
-                        const uint16_t flags)
+cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
+                        struct rte_mbuf **tx_pkts, uint16_t pkts,
+                        uint64_t *cmd, const uint16_t flags)
 {
        struct cn10k_eth_txq *txq = tx_queue;
        uintptr_t pa0, pa1, lbase = txq->lmt_base;
@@ -987,12 +977,13 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
        uintptr_t laddr;
        bool sec;
 
-       NIX_XMIT_FC_OR_RETURN(txq, pkts);
-
-       cn10k_nix_tx_skeleton(txq, cmd, flags);
-
-       /* Reduce the cached count */
-       txq->fc_cache_pkts -= pkts;
+       if (!(flags & NIX_TX_VWQE_F)) {
+               NIX_XMIT_FC_OR_RETURN(txq, pkts);
+               /* Reduce the cached count */
+               txq->fc_cache_pkts -= pkts;
+       }
+       /* Get cmd skeleton */
+       cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
 
        if (flags & NIX_TX_OFFLOAD_TSO_F)
                lso_tun_fmt = txq->lso_tun_fmt;
@@ -1038,13 +1029,11 @@ again:
 
                /* Move NIX desc to LMT/NIXTX area */
                cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
-
                /* Store sg list directly on lmt line */
                segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
                                               flags);
-               cn10k_nix_xmit_prepare_tstamp(laddr, &txq->cmd[0],
-                                             tx_pkts[i]->ol_flags, segdw,
-                                             flags);
+               cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
+                                             segdw, flags);
                if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
                        lnum++;
                        data128 |= (((__uint128_t)(segdw - 1)) << shft);
@@ -1053,7 +1042,7 @@ again:
        }
 
        if (flags & NIX_TX_VWQE_F)
-               roc_sso_hws_head_wait(base);
+               roc_sso_hws_head_wait(ws[0]);
 
        left -= burst;
        tx_pkts += burst;
@@ -1474,9 +1463,9 @@ cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
 }
 
 static __rte_always_inline uint16_t
-cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
-                          uint16_t pkts, uint64_t *cmd, uintptr_t base,
-                          const uint16_t flags)
+cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
+                          struct rte_mbuf **tx_pkts, uint16_t pkts,
+                          uint64_t *cmd, const uint16_t flags)
 {
        uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
        uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
@@ -1526,25 +1515,42 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
                        cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
        }
 
-       senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
+       if (!(flags & NIX_TX_VWQE_F)) {
+               senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
+       } else {
+               uint64_t w0 =
+                       (txq->send_hdr_w0 & 0xFFFFF00000000000) |
+                       ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
+
+               senddesc01_w0 = vdupq_n_u64(w0);
+       }
        senddesc23_w0 = senddesc01_w0;
+
        senddesc01_w1 = vdupq_n_u64(0);
        senddesc23_w1 = senddesc01_w1;
-       sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
+       sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
        sgdesc23_w0 = sgdesc01_w0;
 
-       /* Load command defaults into vector variables. */
        if (flags & NIX_TX_NEED_EXT_HDR) {
-               sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
-               sendext23_w0 = sendext01_w0;
-               sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
-               sendext23_w1 = sendext01_w1;
                if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
-                       sendmem01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+                       sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
+                                                  BIT_ULL(15));
+                       sendmem01_w0 =
+                               vdupq_n_u64((NIX_SUBDC_MEM << 60) |
+                                           (NIX_SENDMEMALG_SETTSTMP << 56));
                        sendmem23_w0 = sendmem01_w0;
-                       sendmem01_w1 = vld1q_dup_u64(&txq->cmd[3]);
+                       sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
                        sendmem23_w1 = sendmem01_w1;
+               } else {
+                       sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
                }
+               sendext23_w0 = sendext01_w0;
+
+               if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
+                       sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+               else
+                       sendext01_w1 = vdupq_n_u64(0);
+               sendext23_w1 = sendext01_w1;
        }
 
        /* Get LMT base address and LMT ID as lcore id */
@@ -2577,7 +2583,7 @@ again:
                wd.data[0] >>= 16;
 
        if (flags & NIX_TX_VWQE_F)
-               roc_sso_hws_head_wait(base);
+               roc_sso_hws_head_wait(ws[0]);
 
        left -= burst;
 
@@ -2640,12 +2646,11 @@ again:
 
        if (unlikely(scalar)) {
                if (flags & NIX_TX_MULTI_SEG_F)
-                       pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts,
-                                                        scalar, cmd, base,
-                                                        flags);
+                       pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
+                                                        scalar, cmd, flags);
                else
-                       pkts += cn10k_nix_xmit_pkts(tx_queue, tx_pkts, scalar,
-                                                   cmd, base, flags);
+                       pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
+                                                   scalar, cmd, flags);
        }
 
        return pkts;
@@ -2653,16 +2658,16 @@ again:
 
 #else
 static __rte_always_inline uint16_t
-cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
-                          uint16_t pkts, uint64_t *cmd, uintptr_t base,
-                          const uint16_t flags)
+cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
+                          struct rte_mbuf **tx_pkts, uint16_t pkts,
+                          uint64_t *cmd, const uint16_t flags)
 {
+       RTE_SET_USED(ws);
        RTE_SET_USED(tx_queue);
        RTE_SET_USED(tx_pkts);
        RTE_SET_USED(pkts);
        RTE_SET_USED(cmd);
        RTE_SET_USED(flags);
-       RTE_SET_USED(base);
        return 0;
 }
 #endif
@@ -2892,7 +2897,7 @@ NIX_TX_FASTPATH_MODES
                if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
                    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
                        return 0;                                              \
-               return cn10k_nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, 0,    \
+               return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
                                           flags);                             \
        }
 
@@ -2905,8 +2910,8 @@ NIX_TX_FASTPATH_MODES
                if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
                    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
                        return 0;                                              \
-               return cn10k_nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd,  \
-                                               0,                             \
+               return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
+                                               cmd,                           \
                                                flags | NIX_TX_MULTI_SEG_F);   \
        }
 
@@ -2919,8 +2924,8 @@ NIX_TX_FASTPATH_MODES
                if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
                    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
                        return 0;                                              \
-               return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts,     \
-                                                 cmd, 0, (flags));            \
+               return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts,     \
+                                                 pkts, cmd, (flags));         \
        }
 
 #define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
@@ -2933,7 +2938,7 @@ NIX_TX_FASTPATH_MODES
                    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
                        return 0;                                              \
                return cn10k_nix_xmit_pkts_vector(                             \
-                       tx_queue, tx_pkts, pkts, cmd, 0,                       \
+                       tx_queue, NULL, tx_pkts, pkts, cmd,                    \
                        (flags) | NIX_TX_MULTI_SEG_F);                         \
        }
 
index d34bc6898f79534363cd1d69aa7be0feb5105ad2..01e385056174a844b331b6a68e6fdab157dee022 100644 (file)
@@ -131,51 +131,31 @@ static void
 nix_form_default_desc(struct cnxk_eth_dev *dev, struct cn9k_eth_txq *txq,
                      uint16_t qid)
 {
-       struct nix_send_ext_s *send_hdr_ext;
-       struct nix_send_hdr_s *send_hdr;
-       struct nix_send_mem_s *send_mem;
-       union nix_send_sg_s *sg;
+       union nix_send_hdr_w0_u send_hdr_w0;
 
        /* Initialize the fields based on basic single segment packet */
-       memset(&txq->cmd, 0, sizeof(txq->cmd));
-
+       send_hdr_w0.u = 0;
        if (dev->tx_offload_flags & NIX_TX_NEED_EXT_HDR) {
-               send_hdr = (struct nix_send_hdr_s *)&txq->cmd[0];
                /* 2(HDR) + 2(EXT_HDR) + 1(SG) + 1(IOVA) = 6/2 - 1 = 2 */
-               send_hdr->w0.sizem1 = 2;
-
-               send_hdr_ext = (struct nix_send_ext_s *)&txq->cmd[2];
-               send_hdr_ext->w0.subdc = NIX_SUBDC_EXT;
+               send_hdr_w0.sizem1 = 2;
                if (dev->tx_offload_flags & NIX_TX_OFFLOAD_TSTAMP_F) {
                        /* Default: one seg packet would have:
                         * 2(HDR) + 2(EXT) + 1(SG) + 1(IOVA) + 2(MEM)
                         * => 8/2 - 1 = 3
                         */
-                       send_hdr->w0.sizem1 = 3;
-                       send_hdr_ext->w0.tstmp = 1;
+                       send_hdr_w0.sizem1 = 3;
 
                        /* To calculate the offset for send_mem,
                         * send_hdr->w0.sizem1 * 2
                         */
-                       send_mem = (struct nix_send_mem_s *)
-                               (txq->cmd + (send_hdr->w0.sizem1 << 1));
-                       send_mem->w0.cn9k.subdc = NIX_SUBDC_MEM;
-                       send_mem->w0.cn9k.alg = NIX_SENDMEMALG_SETTSTMP;
-                       send_mem->addr = dev->tstamp.tx_tstamp_iova;
+                       txq->ts_mem = dev->tstamp.tx_tstamp_iova;
                }
-               sg = (union nix_send_sg_s *)&txq->cmd[4];
        } else {
-               send_hdr = (struct nix_send_hdr_s *)&txq->cmd[0];
                /* 2(HDR) + 1(SG) + 1(IOVA) = 4/2 - 1 = 1 */
-               send_hdr->w0.sizem1 = 1;
-               sg = (union nix_send_sg_s *)&txq->cmd[2];
+               send_hdr_w0.sizem1 = 1;
        }
-
-       send_hdr->w0.sq = qid;
-       sg->subdc = NIX_SUBDC_SG;
-       sg->segs = 1;
-       sg->ld_type = NIX_SENDLDTYPE_LDD;
-
+       send_hdr_w0.sq = qid;
+       txq->send_hdr_w0 = send_hdr_w0.u;
        rte_wmb();
 }
 
index 2b452fe009b3100d0daee97de7d4f5ea29f120e5..8ab924944c346838902126f00dfa866e0c192c05 100644 (file)
@@ -9,12 +9,13 @@
 #include <cnxk_security_ar.h>
 
 struct cn9k_eth_txq {
-       uint64_t cmd[8];
+       uint64_t send_hdr_w0;
        int64_t fc_cache_pkts;
        uint64_t *fc_mem;
        void *lmt_addr;
        rte_iova_t io_addr;
        uint64_t lso_tun_fmt;
+       uint64_t ts_mem;
        uint16_t sqes_per_sqb_log2;
        int16_t nb_sqb_bufs_adj;
        rte_iova_t cpt_io_addr;
index 8564dd85ee622190ff7e7cb27de06362a222c497..d23e4b61b4a57791ec6b0b12f16e884db612500f 100644 (file)
@@ -58,6 +58,29 @@ cn9k_nix_tx_ext_subs(const uint16_t flags)
                                  : 0);
 }
 
+static __rte_always_inline void
+cn9k_nix_tx_skeleton(struct cn9k_eth_txq *txq, uint64_t *cmd,
+                    const uint16_t flags, const uint16_t static_sz)
+{
+       if (static_sz)
+               cmd[0] = txq->send_hdr_w0;
+       else
+               cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
+                        ((uint64_t)(cn9k_nix_tx_ext_subs(flags) + 1) << 40);
+       cmd[1] = 0;
+
+       if (flags & NIX_TX_NEED_EXT_HDR) {
+               if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
+                       cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
+               else
+                       cmd[2] = NIX_SUBDC_EXT << 60;
+               cmd[3] = 0;
+               cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
+       } else {
+               cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
+       }
+}
+
 static __rte_always_inline void
 cn9k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
 {
@@ -136,11 +159,11 @@ cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
                w1.u = 0;
        }
 
-       if (!(flags & NIX_TX_MULTI_SEG_F)) {
+       if (!(flags & NIX_TX_MULTI_SEG_F))
                send_hdr->w0.total = m->data_len;
-               send_hdr->w0.aura =
-                       roc_npa_aura_handle_to_aura(m->pool->pool_id);
-       }
+       else
+               send_hdr->w0.total = m->pkt_len;
+       send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
 
        /*
         * L3type:  2 => IPV4
@@ -287,41 +310,39 @@ cn9k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
                /* Mark mempool object as "put" since it is freed by NIX */
                if (!send_hdr->w0.df)
                        RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
+       } else {
+               sg->seg1_size = m->data_len;
+               *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
+
+               /* NOFF is handled later for multi-seg */
        }
 }
 
 static __rte_always_inline void
-cn9k_nix_xmit_prepare_tstamp(uint64_t *cmd, const uint64_t *send_mem_desc,
+cn9k_nix_xmit_prepare_tstamp(struct cn9k_eth_txq *txq, uint64_t *cmd,
                             const uint64_t ol_flags, const uint16_t no_segdw,
                             const uint16_t flags)
 {
        if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
                struct nix_send_mem_s *send_mem;
                uint16_t off = (no_segdw - 1) << 1;
-               const uint8_t is_ol_tstamp = !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
+               const uint8_t is_ol_tstamp =
+                       !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
 
                send_mem = (struct nix_send_mem_s *)(cmd + off);
-               if (flags & NIX_TX_MULTI_SEG_F) {
-                       /* Retrieving the default desc values */
-                       cmd[off] = send_mem_desc[6];
 
-                       /* Using compiler barrier to avoid violation of C
-                        * aliasing rules.
-                        */
-                       rte_compiler_barrier();
-               }
-
-               /* Packets for which RTE_MBUF_F_TX_IEEE1588_TMST is not set, tx tstamp
+               /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
                 * should not be recorded, hence changing the alg type to
-                * NIX_SENDMEMALG_SET and also changing send mem addr field to
+                * NIX_SENDMEMALG_SUB and also changing send mem addr field to
                 * next 8 bytes as it corrupts the actual Tx tstamp registered
                 * address.
                 */
+               send_mem->w0.cn9k.subdc = NIX_SUBDC_MEM;
                send_mem->w0.cn9k.alg =
-                       NIX_SENDMEMALG_SETTSTMP - (is_ol_tstamp);
+                       NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
 
-               send_mem->addr = (rte_iova_t)((uint64_t *)send_mem_desc[7] +
-                                             (is_ol_tstamp));
+               send_mem->addr = (rte_iova_t)(((uint64_t *)txq->ts_mem) +
+                               (is_ol_tstamp));
        }
 }
 
@@ -367,8 +388,6 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
        uint8_t off, i;
 
        send_hdr = (struct nix_send_hdr_s *)cmd;
-       send_hdr->w0.total = m->pkt_len;
-       send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
 
        if (flags & NIX_TX_NEED_EXT_HDR)
                off = 2;
@@ -376,13 +395,29 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
                off = 0;
 
        sg = (union nix_send_sg_s *)&cmd[2 + off];
-       /* Clear sg->u header before use */
-       sg->u &= 0xFC00000000000000;
+
+       /* Start from second segment, first segment is already there */
+       i = 1;
        sg_u = sg->u;
-       slist = &cmd[3 + off];
+       nb_segs = m->nb_segs - 1;
+       m_next = m->next;
+       slist = &cmd[3 + off + 1];
 
-       i = 0;
-       nb_segs = m->nb_segs;
+       /* Set invert df if buffer is not to be freed by H/W */
+       if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+               sg_u |= (cnxk_nix_prefree_seg(m) << 55);
+               rte_io_wmb();
+       }
+
+       /* Mark mempool object as "put" since it is freed by NIX */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+       if (!(sg_u & (1ULL << 55)))
+               RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
+       rte_io_wmb();
+#endif
+       m = m_next;
+       if (!m)
+               goto done;
 
        /* Fill mbuf segments */
        do {
@@ -417,6 +452,7 @@ cn9k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
                m = m_next;
        } while (nb_segs);
 
+done:
        sg->u = sg_u;
        sg->segs = i;
        segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
@@ -472,7 +508,7 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
 
        NIX_XMIT_FC_OR_RETURN(txq, pkts);
 
-       roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
+       cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
 
        /* Perform header writes before barrier for TSO */
        if (flags & NIX_TX_OFFLOAD_TSO_F) {
@@ -490,8 +526,8 @@ cn9k_nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts,
 
        for (i = 0; i < pkts; i++) {
                cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
-               cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
-                                            tx_pkts[i]->ol_flags, 4, flags);
+               cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags, 4,
+                                            flags);
                cn9k_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
        }
 
@@ -514,7 +550,7 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 
        NIX_XMIT_FC_OR_RETURN(txq, pkts);
 
-       roc_lmt_mov(cmd, &txq->cmd[0], cn9k_nix_tx_ext_subs(flags));
+       cn9k_nix_tx_skeleton(txq, cmd, flags, 1);
 
        /* Perform header writes before barrier for TSO */
        if (flags & NIX_TX_OFFLOAD_TSO_F) {
@@ -533,9 +569,8 @@ cn9k_nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
        for (i = 0; i < pkts; i++) {
                cn9k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
                segdw = cn9k_nix_prepare_mseg(tx_pkts[i], cmd, flags);
-               cn9k_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
-                                            tx_pkts[i]->ol_flags, segdw,
-                                            flags);
+               cn9k_nix_xmit_prepare_tstamp(txq, cmd, tx_pkts[i]->ol_flags,
+                                            segdw, flags);
                cn9k_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
        }
 
@@ -862,28 +897,34 @@ cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
        if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
                rte_io_wmb();
 
-       senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
+       senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
        senddesc23_w0 = senddesc01_w0;
+
        senddesc01_w1 = vdupq_n_u64(0);
        senddesc23_w1 = senddesc01_w1;
+       sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
+       sgdesc23_w0 = sgdesc01_w0;
 
-       /* Load command defaults into vector variables. */
        if (flags & NIX_TX_NEED_EXT_HDR) {
-               sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
-               sendext23_w0 = sendext01_w0;
-               sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
-               sendext23_w1 = sendext01_w1;
-               sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
-               sgdesc23_w0 = sgdesc01_w0;
                if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
-                       sendmem01_w0 = vld1q_dup_u64(&txq->cmd[6]);
+                       sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
+                                                  BIT_ULL(15));
+                       sendmem01_w0 =
+                               vdupq_n_u64((NIX_SUBDC_MEM << 60) |
+                                           (NIX_SENDMEMALG_SETTSTMP << 56));
                        sendmem23_w0 = sendmem01_w0;
-                       sendmem01_w1 = vld1q_dup_u64(&txq->cmd[7]);
+                       sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
                        sendmem23_w1 = sendmem01_w1;
+               } else {
+                       sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
                }
-       } else {
-               sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
-               sgdesc23_w0 = sgdesc01_w0;
+               sendext23_w0 = sendext01_w0;
+
+               if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
+                       sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+               else
+                       sendext01_w1 = vdupq_n_u64(0);
+               sendext23_w1 = sendext01_w1;
        }
 
        for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {