net/octeontx2: fix PTP performance
[dpdk.git] / drivers / event / octeontx2 / otx2_worker_dual.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2019 Marvell International Ltd.
3  */
4
5 #ifndef __OTX2_WORKER_DUAL_H__
6 #define __OTX2_WORKER_DUAL_H__
7
8 #include <rte_branch_prediction.h>
9 #include <rte_common.h>
10
11 #include <otx2_common.h>
12 #include "otx2_evdev.h"
13
14 /* SSO Operations */
15 static __rte_always_inline uint16_t
16 otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws,
17                           struct otx2_ssogws_state *ws_pair,
18                           struct rte_event *ev, const uint32_t flags,
19                           const void * const lookup_mem,
20                           struct otx2_timesync_info * const tstamp)
21 {
22         const uint64_t set_gw = BIT_ULL(16) | 1;
23         union otx2_sso_event event;
24         uint64_t tstamp_ptr;
25         uint64_t get_work1;
26         uint64_t mbuf;
27
28         if (flags & NIX_RX_OFFLOAD_PTYPE_F)
29                 rte_prefetch_non_temporal(lookup_mem);
30 #ifdef RTE_ARCH_ARM64
31         asm volatile(
32                         "        ldr %[tag], [%[tag_loc]]    \n"
33                         "        ldr %[wqp], [%[wqp_loc]]    \n"
34                         "        tbz %[tag], 63, done%=      \n"
35                         "        sevl                        \n"
36                         "rty%=:  wfe                         \n"
37                         "        ldr %[tag], [%[tag_loc]]    \n"
38                         "        ldr %[wqp], [%[wqp_loc]]    \n"
39                         "        tbnz %[tag], 63, rty%=      \n"
40                         "done%=: str %[gw], [%[pong]]        \n"
41                         "        dmb ld                      \n"
42                         "        prfm pldl1keep, [%[wqp], #8]\n"
43                         "        sub %[mbuf], %[wqp], #0x80  \n"
44                         "        prfm pldl1keep, [%[mbuf]]   \n"
45                         : [tag] "=&r" (event.get_work0),
46                           [wqp] "=&r" (get_work1),
47                           [mbuf] "=&r" (mbuf)
48                         : [tag_loc] "r" (ws->tag_op),
49                           [wqp_loc] "r" (ws->wqp_op),
50                           [gw] "r" (set_gw),
51                           [pong] "r" (ws_pair->getwrk_op)
52                         );
53 #else
54         event.get_work0 = otx2_read64(ws->tag_op);
55         while ((BIT_ULL(63)) & event.get_work0)
56                 event.get_work0 = otx2_read64(ws->tag_op);
57         get_work1 = otx2_read64(ws->wqp_op);
58         otx2_write64(set_gw, ws_pair->getwrk_op);
59
60         rte_prefetch0((const void *)get_work1);
61         mbuf = (uint64_t)((char *)get_work1 - sizeof(struct rte_mbuf));
62         rte_prefetch0((const void *)mbuf);
63 #endif
64         event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 |
65                 (event.get_work0 & (0x3FFull << 36)) << 4 |
66                 (event.get_work0 & 0xffffffff);
67         ws->cur_tt = event.sched_type;
68         ws->cur_grp = event.queue_id;
69
70         if (event.sched_type != SSO_TT_EMPTY &&
71             event.event_type == RTE_EVENT_TYPE_ETHDEV) {
72                 otx2_wqe_to_mbuf(get_work1, mbuf, event.sub_event_type,
73                                  (uint32_t) event.get_work0, flags, lookup_mem);
74                 /* Extracting tstamp, if PTP enabled. CGX will prepend the
75                  * timestamp at starting of packet data and it can be derieved
76                  * from WQE 9 dword which corresponds to SG iova.
77                  * rte_pktmbuf_mtod_offset can be used for this purpose but it
78                  * brings down the performance as it reads mbuf->buf_addr which
79                  * is not part of cache in general fast path.
80                  */
81                 tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)get_work1)
82                                              + OTX2_SSO_WQE_SG_PTR);
83                 otx2_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, flags,
84                                         (uint64_t *)tstamp_ptr);
85                 get_work1 = mbuf;
86         }
87
88         ev->event = event.get_work0;
89         ev->u64 = get_work1;
90
91         return !!get_work1;
92 }
93
94 static __rte_always_inline void
95 otx2_ssogws_dual_add_work(struct otx2_ssogws_dual *ws, const uint64_t event_ptr,
96                           const uint32_t tag, const uint8_t new_tt,
97                           const uint16_t grp)
98 {
99         uint64_t add_work0;
100
101         add_work0 = tag | ((uint64_t)(new_tt) << 32);
102         otx2_store_pair(add_work0, event_ptr, ws->grps_base[grp]);
103 }
104
105 #endif