event/octeontx2: improve single flow performance
[dpdk.git] / drivers / event / octeontx2 / otx2_worker.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2019 Marvell International Ltd.
3  */
4
5 #ifndef __OTX2_WORKER_H__
6 #define __OTX2_WORKER_H__
7
8 #include <rte_common.h>
9 #include <rte_branch_prediction.h>
10
11 #include <otx2_common.h>
12 #include "otx2_evdev.h"
13 #include "otx2_evdev_crypto_adptr_dp.h"
14 #include "otx2_ethdev_sec_tx.h"
15
16 /* SSO Operations */
17
18 static __rte_always_inline uint16_t
19 otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev,
20                      const uint32_t flags, const void * const lookup_mem)
21 {
22         union otx2_sso_event event;
23         uint64_t tstamp_ptr;
24         uint64_t get_work1;
25         uint64_t mbuf;
26
27         otx2_write64(BIT_ULL(16) | /* wait for work. */
28                      1, /* Use Mask set 0. */
29                      ws->getwrk_op);
30
31         if (flags & NIX_RX_OFFLOAD_PTYPE_F)
32                 rte_prefetch_non_temporal(lookup_mem);
33 #ifdef RTE_ARCH_ARM64
34         asm volatile(
35                         "               ldr %[tag], [%[tag_loc]]        \n"
36                         "               ldr %[wqp], [%[wqp_loc]]        \n"
37                         "               tbz %[tag], 63, done%=          \n"
38                         "               sevl                            \n"
39                         "rty%=:         wfe                             \n"
40                         "               ldr %[tag], [%[tag_loc]]        \n"
41                         "               ldr %[wqp], [%[wqp_loc]]        \n"
42                         "               tbnz %[tag], 63, rty%=          \n"
43                         "done%=:        dmb ld                          \n"
44                         "               prfm pldl1keep, [%[wqp], #8]    \n"
45                         "               sub %[mbuf], %[wqp], #0x80      \n"
46                         "               prfm pldl1keep, [%[mbuf]]       \n"
47                         : [tag] "=&r" (event.get_work0),
48                           [wqp] "=&r" (get_work1),
49                           [mbuf] "=&r" (mbuf)
50                         : [tag_loc] "r" (ws->tag_op),
51                           [wqp_loc] "r" (ws->wqp_op)
52                         );
53 #else
54         event.get_work0 = otx2_read64(ws->tag_op);
55         while ((BIT_ULL(63)) & event.get_work0)
56                 event.get_work0 = otx2_read64(ws->tag_op);
57
58         get_work1 = otx2_read64(ws->wqp_op);
59         rte_prefetch0((const void *)get_work1);
60         mbuf = (uint64_t)((char *)get_work1 - sizeof(struct rte_mbuf));
61         rte_prefetch0((const void *)mbuf);
62 #endif
63
64         event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
65                 (event.get_work0 & (0x3FFull << 36)) << 4 |
66                 (event.get_work0 & 0xffffffff);
67         ws->cur_tt = event.sched_type;
68         ws->cur_grp = event.queue_id;
69
70         if (event.sched_type != SSO_TT_EMPTY) {
71                 if ((flags & NIX_RX_OFFLOAD_SECURITY_F) &&
72                     (event.event_type == RTE_EVENT_TYPE_CRYPTODEV)) {
73                         get_work1 = otx2_handle_crypto_event(get_work1);
74                 } else if (event.event_type == RTE_EVENT_TYPE_ETHDEV) {
75                         otx2_wqe_to_mbuf(get_work1, mbuf, event.sub_event_type,
76                                          (uint32_t) event.get_work0, flags,
77                                          lookup_mem);
78                         /* Extracting tstamp, if PTP enabled*/
79                         tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)
80                                                      get_work1) +
81                                                      OTX2_SSO_WQE_SG_PTR);
82                         otx2_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf,
83                                                 ws->tstamp, flags,
84                                                 (uint64_t *)tstamp_ptr);
85                         get_work1 = mbuf;
86                 }
87         }
88
89         ev->event = event.get_work0;
90         ev->u64 = get_work1;
91
92         return !!get_work1;
93 }
94
95 /* Used in cleaning up workslot. */
96 static __rte_always_inline uint16_t
97 otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev,
98                            const uint32_t flags)
99 {
100         union otx2_sso_event event;
101         uint64_t tstamp_ptr;
102         uint64_t get_work1;
103         uint64_t mbuf;
104
105 #ifdef RTE_ARCH_ARM64
106         asm volatile(
107                         "               ldr %[tag], [%[tag_loc]]        \n"
108                         "               ldr %[wqp], [%[wqp_loc]]        \n"
109                         "               tbz %[tag], 63, done%=          \n"
110                         "               sevl                            \n"
111                         "rty%=:         wfe                             \n"
112                         "               ldr %[tag], [%[tag_loc]]        \n"
113                         "               ldr %[wqp], [%[wqp_loc]]        \n"
114                         "               tbnz %[tag], 63, rty%=          \n"
115                         "done%=:        dmb ld                          \n"
116                         "               prfm pldl1keep, [%[wqp], #8]    \n"
117                         "               sub %[mbuf], %[wqp], #0x80      \n"
118                         "               prfm pldl1keep, [%[mbuf]]       \n"
119                         : [tag] "=&r" (event.get_work0),
120                           [wqp] "=&r" (get_work1),
121                           [mbuf] "=&r" (mbuf)
122                         : [tag_loc] "r" (ws->tag_op),
123                           [wqp_loc] "r" (ws->wqp_op)
124                         );
125 #else
126         event.get_work0 = otx2_read64(ws->tag_op);
127         while ((BIT_ULL(63)) & event.get_work0)
128                 event.get_work0 = otx2_read64(ws->tag_op);
129
130         get_work1 = otx2_read64(ws->wqp_op);
131         rte_prefetch_non_temporal((const void *)get_work1);
132         mbuf = (uint64_t)((char *)get_work1 - sizeof(struct rte_mbuf));
133         rte_prefetch_non_temporal((const void *)mbuf);
134 #endif
135
136         event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
137                 (event.get_work0 & (0x3FFull << 36)) << 4 |
138                 (event.get_work0 & 0xffffffff);
139         ws->cur_tt = event.sched_type;
140         ws->cur_grp = event.queue_id;
141
142         if (event.sched_type != SSO_TT_EMPTY &&
143             event.event_type == RTE_EVENT_TYPE_ETHDEV) {
144                 otx2_wqe_to_mbuf(get_work1, mbuf, event.sub_event_type,
145                                  (uint32_t) event.get_work0, flags, NULL);
146                 /* Extracting tstamp, if PTP enabled*/
147                 tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)get_work1)
148                                              + OTX2_SSO_WQE_SG_PTR);
149                 otx2_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, ws->tstamp,
150                                         flags, (uint64_t *)tstamp_ptr);
151                 get_work1 = mbuf;
152         }
153
154         ev->event = event.get_work0;
155         ev->u64 = get_work1;
156
157         return !!get_work1;
158 }
159
160 static __rte_always_inline void
161 otx2_ssogws_add_work(struct otx2_ssogws *ws, const uint64_t event_ptr,
162                      const uint32_t tag, const uint8_t new_tt,
163                      const uint16_t grp)
164 {
165         uint64_t add_work0;
166
167         add_work0 = tag | ((uint64_t)(new_tt) << 32);
168         otx2_store_pair(add_work0, event_ptr, ws->grps_base[grp]);
169 }
170
171 static __rte_always_inline void
172 otx2_ssogws_swtag_desched(struct otx2_ssogws *ws, uint32_t tag, uint8_t new_tt,
173                           uint16_t grp)
174 {
175         uint64_t val;
176
177         val = tag | ((uint64_t)(new_tt & 0x3) << 32) | ((uint64_t)grp << 34);
178         otx2_write64(val, ws->swtag_desched_op);
179 }
180
181 static __rte_always_inline void
182 otx2_ssogws_swtag_norm(struct otx2_ssogws *ws, uint32_t tag, uint8_t new_tt)
183 {
184         uint64_t val;
185
186         val = tag | ((uint64_t)(new_tt & 0x3) << 32);
187         otx2_write64(val, ws->swtag_norm_op);
188 }
189
190 static __rte_always_inline void
191 otx2_ssogws_swtag_untag(struct otx2_ssogws *ws)
192 {
193         otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
194                      SSOW_LF_GWS_OP_SWTAG_UNTAG);
195         ws->cur_tt = SSO_SYNC_UNTAGGED;
196 }
197
198 static __rte_always_inline void
199 otx2_ssogws_swtag_flush(struct otx2_ssogws *ws)
200 {
201         otx2_write64(0, ws->swtag_flush_op);
202         ws->cur_tt = SSO_SYNC_EMPTY;
203 }
204
205 static __rte_always_inline void
206 otx2_ssogws_desched(struct otx2_ssogws *ws)
207 {
208         otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) +
209                      SSOW_LF_GWS_OP_DESCHED);
210 }
211
212 static __rte_always_inline void
213 otx2_ssogws_swtag_wait(struct otx2_ssogws *ws)
214 {
215 #ifdef RTE_ARCH_ARM64
216         uint64_t swtp;
217
218         asm volatile("          ldr %[swtb], [%[swtp_loc]]      \n"
219                      "          tbz %[swtb], 62, done%=         \n"
220                      "          sevl                            \n"
221                      "rty%=:    wfe                             \n"
222                      "          ldr %[swtb], [%[swtp_loc]]      \n"
223                      "          tbnz %[swtb], 62, rty%=         \n"
224                      "done%=:                                   \n"
225                      : [swtb] "=&r" (swtp)
226                      : [swtp_loc] "r" (ws->tag_op));
227 #else
228         /* Wait for the SWTAG/SWTAG_FULL operation */
229         while (otx2_read64(ws->tag_op) & BIT_ULL(62))
230                 ;
231 #endif
232 }
233
234 static __rte_always_inline void
235 otx2_ssogws_head_wait(struct otx2_ssogws *ws)
236 {
237 #ifdef RTE_ARCH_ARM64
238         uint64_t tag;
239
240         asm volatile (
241                         "       ldr %[tag], [%[tag_op]]         \n"
242                         "       tbnz %[tag], 35, done%=         \n"
243                         "       sevl                            \n"
244                         "rty%=: wfe                             \n"
245                         "       ldr %[tag], [%[tag_op]]         \n"
246                         "       tbz %[tag], 35, rty%=           \n"
247                         "done%=:                                \n"
248                         : [tag] "=&r" (tag)
249                         : [tag_op] "r" (ws->tag_op)
250                         );
251 #else
252         /* Wait for the HEAD to be set */
253         while (!(otx2_read64(ws->tag_op) & BIT_ULL(35)))
254                 ;
255 #endif
256 }
257
258 static __rte_always_inline const struct otx2_eth_txq *
259 otx2_ssogws_xtract_meta(struct rte_mbuf *m,
260                         const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
261 {
262         return (const struct otx2_eth_txq *)txq_data[m->port][
263                                         rte_event_eth_tx_adapter_txq_get(m)];
264 }
265
266 static __rte_always_inline void
267 otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
268                         uint64_t *cmd, const uint32_t flags)
269 {
270         otx2_lmt_mov(cmd, txq->cmd, otx2_nix_tx_ext_subs(flags));
271         otx2_nix_xmit_prepare(m, cmd, flags);
272 }
273
274 static __rte_always_inline uint16_t
275 otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[],
276                      uint64_t *cmd, const uint64_t
277                      txq_data[][RTE_MAX_QUEUES_PER_PORT],
278                      const uint32_t flags)
279 {
280         struct rte_mbuf *m = ev[0].mbuf;
281         const struct otx2_eth_txq *txq;
282
283         if ((flags & NIX_TX_OFFLOAD_SECURITY_F) &&
284             (m->ol_flags & PKT_TX_SEC_OFFLOAD)) {
285                 txq = otx2_ssogws_xtract_meta(m, txq_data);
286                 return otx2_sec_event_tx(ws, ev, m, txq, flags);
287         }
288
289         /* Perform header writes before barrier for TSO */
290         otx2_nix_xmit_prepare_tso(m, flags);
291         rte_io_wmb();
292         txq = otx2_ssogws_xtract_meta(m, txq_data);
293         otx2_ssogws_prepare_pkt(txq, m, cmd, flags);
294
295         if (flags & NIX_TX_MULTI_SEG_F) {
296                 const uint16_t segdw = otx2_nix_prepare_mseg(m, cmd, flags);
297                 otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
298                                              m->ol_flags, segdw, flags);
299                 if (!ev->sched_type) {
300                         otx2_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
301                         otx2_ssogws_head_wait(ws);
302                         if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
303                                 otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr,
304                                                        txq->io_addr, segdw);
305                 } else {
306                         otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
307                                                segdw);
308                 }
309         } else {
310                 /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */
311                 otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
312                                              m->ol_flags, 4, flags);
313
314                 if (!ev->sched_type) {
315                         otx2_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
316                         otx2_ssogws_head_wait(ws);
317                         if (otx2_nix_xmit_submit_lmt(txq->io_addr) == 0)
318                                 otx2_nix_xmit_one(cmd, txq->lmt_addr,
319                                                   txq->io_addr, flags);
320                 } else {
321                         otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
322                                           flags);
323                 }
324         }
325
326         otx2_write64(0, ws->swtag_flush_op);
327
328         return 1;
329 }
330
331 #endif