event/octeontx2: improve datapath memory locality
authorPavan Nikhilesh <pbhagavatula@marvell.com>
Mon, 29 Jun 2020 01:33:28 +0000 (07:03 +0530)
committerJerin Jacob <jerinj@marvell.com>
Tue, 30 Jun 2020 05:41:26 +0000 (07:41 +0200)
When event device is transmitting packet on OCTEONTX2 it needs to access
the destined ethernet device TXq data.
Currently, we get the TXq data through rte_eth_devices global array.
Instead save the TXq address inside event port memory.

Cc: stable@dpdk.org
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
drivers/event/octeontx2/otx2_evdev.h
drivers/event/octeontx2/otx2_evdev_adptr.c
drivers/event/octeontx2/otx2_worker.c
drivers/event/octeontx2/otx2_worker.h
drivers/event/octeontx2/otx2_worker_dual.c

index 3b47782..873724d 100644 (file)
@@ -141,6 +141,7 @@ struct otx2_sso_evdev {
        uint64_t adptr_xae_cnt;
        uint16_t rx_adptr_pool_cnt;
        uint64_t *rx_adptr_pools;
+       uint16_t max_port_id;
        uint16_t tim_adptr_ring_cnt;
        uint16_t *timer_adptr_rings;
        uint64_t *timer_adptr_sz;
@@ -185,6 +186,8 @@ struct otx2_ssogws {
        uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
        /* PTP timestamp */
        struct otx2_timesync_info *tstamp;
+       /* Tx Fastpath data */
+       uint8_t tx_adptr_data[] __rte_cache_aligned;
 } __rte_cache_aligned;
 
 struct otx2_ssogws_state {
@@ -204,6 +207,8 @@ struct otx2_ssogws_dual {
        uintptr_t grps_base[OTX2_SSO_MAX_VHGRP];
        /* PTP timestamp */
        struct otx2_timesync_info *tstamp;
+       /* Tx Fastpath data */
+       uint8_t tx_adptr_data[] __rte_cache_aligned;
 } __rte_cache_aligned;
 
 static inline struct otx2_sso_evdev *
index 8bdcfa3..0a5d792 100644 (file)
@@ -438,6 +438,60 @@ sso_sqb_aura_limit_edit(struct rte_mempool *mp, uint16_t nb_sqb_bufs)
        return otx2_mbox_process(npa_lf->mbox);
 }
 
+static int
+sso_add_tx_queue_data(const struct rte_eventdev *event_dev,
+                     uint16_t eth_port_id, uint16_t tx_queue_id,
+                     struct otx2_eth_txq *txq)
+{
+       struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev);
+       int i;
+
+       for (i = 0; i < event_dev->data->nb_ports; i++) {
+               dev->max_port_id = RTE_MAX(dev->max_port_id, eth_port_id);
+               if (dev->dual_ws) {
+                       struct otx2_ssogws_dual *old_dws;
+                       struct otx2_ssogws_dual *dws;
+
+                       old_dws = event_dev->data->ports[i];
+                       dws = rte_realloc_socket(old_dws,
+                                                sizeof(struct otx2_ssogws_dual)
+                                                + (sizeof(uint64_t) *
+                                                   (dev->max_port_id + 1) *
+                                                   RTE_MAX_QUEUES_PER_PORT),
+                                                RTE_CACHE_LINE_SIZE,
+                                                event_dev->data->socket_id);
+                       if (dws == NULL)
+                               return -ENOMEM;
+
+                       ((uint64_t (*)[RTE_MAX_QUEUES_PER_PORT]
+                        )&dws->tx_adptr_data)[eth_port_id][tx_queue_id] =
+                               (uint64_t)txq;
+                       event_dev->data->ports[i] = dws;
+               } else {
+                       struct otx2_ssogws *old_ws;
+                       struct otx2_ssogws *ws;
+
+                       old_ws = event_dev->data->ports[i];
+                       ws = rte_realloc_socket(old_ws,
+                                               sizeof(struct otx2_ssogws_dual)
+                                               + (sizeof(uint64_t) *
+                                                  (dev->max_port_id + 1) *
+                                                  RTE_MAX_QUEUES_PER_PORT),
+                                               RTE_CACHE_LINE_SIZE,
+                                               event_dev->data->socket_id);
+                       if (ws == NULL)
+                               return -ENOMEM;
+
+                       ((uint64_t (*)[RTE_MAX_QUEUES_PER_PORT]
+                        )&ws->tx_adptr_data)[eth_port_id][tx_queue_id] =
+                               (uint64_t)txq;
+                       event_dev->data->ports[i] = ws;
+               }
+       }
+
+       return 0;
+}
+
 int
 otx2_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
                              const struct rte_eth_dev *eth_dev,
@@ -446,18 +500,27 @@ otx2_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev,
        struct otx2_eth_dev *otx2_eth_dev = eth_dev->data->dev_private;
        struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev);
        struct otx2_eth_txq *txq;
-       int i;
+       int i, ret;
 
        RTE_SET_USED(id);
        if (tx_queue_id < 0) {
                for (i = 0 ; i < eth_dev->data->nb_tx_queues; i++) {
                        txq = eth_dev->data->tx_queues[i];
                        sso_sqb_aura_limit_edit(txq->sqb_pool,
-                                               OTX2_SSO_SQB_LIMIT);
+                                       OTX2_SSO_SQB_LIMIT);
+                       ret = sso_add_tx_queue_data(event_dev,
+                                                   eth_dev->data->port_id, i,
+                                                   txq);
+                       if (ret < 0)
+                               return ret;
                }
        } else {
                txq = eth_dev->data->tx_queues[tx_queue_id];
                sso_sqb_aura_limit_edit(txq->sqb_pool, OTX2_SSO_SQB_LIMIT);
+               ret = sso_add_tx_queue_data(event_dev, eth_dev->data->port_id,
+                                           tx_queue_id, txq);
+               if (ret < 0)
+                       return ret;
        }
 
        dev->tx_offloads |= otx2_eth_dev->tx_offload_flags;
index 88bac39..1d427e4 100644 (file)
@@ -268,7 +268,7 @@ otx2_ssogws_enq_fwd_burst(void *port, const struct rte_event ev[],
 }
 
 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)                 \
-uint16_t __rte_hot                                                             \
+uint16_t __rte_hot                                                     \
 otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],   \
                                  uint16_t nb_events)                   \
 {                                                                      \
@@ -276,13 +276,16 @@ otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],      \
        uint64_t cmd[sz];                                               \
                                                                        \
        RTE_SET_USED(nb_events);                                        \
-       return otx2_ssogws_event_tx(ws, ev, cmd, flags);                \
+       return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t        \
+                                   (*)[RTE_MAX_QUEUES_PER_PORT])       \
+                                   &ws->tx_adptr_data,                 \
+                                   flags);                             \
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)                 \
-uint16_t __rte_hot                                                             \
+uint16_t __rte_hot                                                     \
 otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
                                      uint16_t nb_events)               \
 {                                                                      \
@@ -290,8 +293,10 @@ otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\
        uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];                 \
                                                                        \
        RTE_SET_USED(nb_events);                                        \
-       return otx2_ssogws_event_tx(ws, ev, cmd, (flags) |              \
-                                   NIX_TX_MULTI_SEG_F);                \
+       return otx2_ssogws_event_tx(ws, ev, cmd, (const uint64_t        \
+                                   (*)[RTE_MAX_QUEUES_PER_PORT])       \
+                                   &ws->tx_adptr_data,                 \
+                                   (flags) | NIX_TX_MULTI_SEG_F);      \
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
index 5f5aa87..924ff7f 100644 (file)
@@ -260,10 +260,11 @@ otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag)
 }
 
 static __rte_always_inline const struct otx2_eth_txq *
-otx2_ssogws_xtract_meta(struct rte_mbuf *m)
+otx2_ssogws_xtract_meta(struct rte_mbuf *m,
+                       const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
 {
-       return rte_eth_devices[m->port].data->tx_queues[
-                       rte_event_eth_tx_adapter_txq_get(m)];
+       return (const struct otx2_eth_txq *)txq_data[m->port][
+                                       rte_event_eth_tx_adapter_txq_get(m)];
 }
 
 static __rte_always_inline void
@@ -276,20 +277,24 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
 
 static __rte_always_inline uint16_t
 otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
-                    uint64_t *cmd, const uint32_t flags)
+                    uint64_t *cmd, const uint64_t
+                    txq_data[][RTE_MAX_QUEUES_PER_PORT],
+                    const uint32_t flags)
 {
        struct rte_mbuf *m = ev[0].mbuf;
-       const struct otx2_eth_txq *txq = otx2_ssogws_xtract_meta(m);
-
-       rte_prefetch_non_temporal(txq);
+       const struct otx2_eth_txq *txq;
 
        if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
-           (m->ol_flags & PKT_TX_SEC_OFFLOAD))
+           (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
+               txq = otx2_ssogws_xtract_meta(m, txq_data);
                return otx2_sec_event_tx(ws, ev, m, txq, flags);
+       }
 
+       rte_prefetch_non_temporal(&txq_data[m->port][0]);
        /* Perform header writes before barrier for TSO */
        otx2_nix_xmit_prepare_tso(m, flags);
        otx2_ssogws_order(ws, !ev->sched_type);
+       txq = otx2_ssogws_xtract_meta(m, txq_data);
        otx2_ssogws_prepare_pkt(txq, m, cmd, flags);
 
        if (flags & NIX_TX_MULTI_SEG_F) {
index 3d55d92..946488e 100644 (file)
@@ -308,7 +308,7 @@ SSO_RX_ADPTR_ENQ_FASTPATH_FUNC
 #undef R
 
 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)                 \
-uint16_t __rte_hot                                                             \
+uint16_t __rte_hot                                                     \
 otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,                     \
                                       struct rte_event ev[],           \
                                       uint16_t nb_events)              \
@@ -319,13 +319,16 @@ otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port,                        \
        uint64_t cmd[sz];                                               \
                                                                        \
        RTE_SET_USED(nb_events);                                        \
-       return otx2_ssogws_event_tx(vws, ev, cmd, flags);               \
+       return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t       \
+                                   (*)[RTE_MAX_QUEUES_PER_PORT])       \
+                                   ws->tx_adptr_data,                  \
+                                   flags);                             \
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T
 
 #define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags)                 \
-uint16_t __rte_hot                                                             \
+uint16_t __rte_hot                                                     \
 otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,                 \
                                           struct rte_event ev[],       \
                                           uint16_t nb_events)          \
@@ -336,8 +339,10 @@ otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port,                     \
        uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2];                 \
                                                                        \
        RTE_SET_USED(nb_events);                                        \
-       return otx2_ssogws_event_tx(vws, ev, cmd, (flags) |             \
-                                   NIX_TX_MULTI_SEG_F);                \
+       return otx2_ssogws_event_tx(vws, ev, cmd, (const uint64_t       \
+                                   (*)[RTE_MAX_QUEUES_PER_PORT])       \
+                                   ws->tx_adptr_data,                  \
+                                   (flags) | NIX_TX_MULTI_SEG_F);      \
 }
 SSO_TX_ADPTR_ENQ_FASTPATH_FUNC
 #undef T