drivers/event/dsw/dsw_evdev.h

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(c) 2018 Ericsson AB
   3  */
   4
   5 #ifndef _DSW_EVDEV_H_
   6 #define _DSW_EVDEV_H_
   7
   8 #include <eventdev_pmd.h>
   9
  10 #include <rte_event_ring.h>
  11 #include <rte_eventdev.h>
  12
  13 #define DSW_PMD_NAME RTE_STR(event_dsw)
  14
  15 #define DSW_MAX_PORTS (64)
  16 #define DSW_MAX_PORT_DEQUEUE_DEPTH (128)
  17 #define DSW_MAX_PORT_ENQUEUE_DEPTH (128)
  18 #define DSW_MAX_PORT_OUT_BUFFER (32)
  19
  20 #define DSW_MAX_QUEUES (16)
  21
  22 #define DSW_MAX_EVENTS (16384)
  23
  24 /* Multiple 24-bit flow ids will map to the same DSW-level flow. The
  25  * number of DSW flows should be high enough make it unlikely that
  26  * flow ids of several large flows hash to the same DSW-level flow.
  27  * Such collisions will limit parallelism and thus the number of cores
  28  * that may be utilized. However, configuring a large number of DSW
  29  * flows might potentially, depending on traffic and actual
  30  * application flow id value range, result in each such DSW-level flow
  31  * being very small. The effect of migrating such flows will be small,
  32  * in terms amount of processing load redistributed. This will in turn
  33  * reduce the load balancing speed, since flow migration rate has an
  34  * upper limit. Code changes are required to allow > 32k DSW-level
  35  * flows.
  36  */
  37 #define DSW_MAX_FLOWS_BITS (13)
  38 #define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS))
  39 #define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1)
  40
  41 /* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows,
  42  * but the 'dsw' scheduler (more or less) randomly assign flow id to
  43  * events on parallel queues, to be able to reuse some of the
  44  * migration mechanism and scheduling logic from
  45  * RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a
  46  * particular port, the likely-hood of events being scheduled to this
  47  * port is reduced, and thus a kind of statistical load balancing is
  48  * achieved.
  49  */
  50 #define DSW_PARALLEL_FLOWS (1024)
  51
  52 /* 'Background tasks' are polling the control rings for *
  53  *  migration-related messages, or flush the output buffer (so
  54  *  buffered events doesn't linger too long). Shouldn't be too low,
  55  *  since the system won't benefit from the 'batching' effects from
  56  *  the output buffer, and shouldn't be too high, since it will make
  57  *  buffered events linger too long in case the port goes idle.
  58  */
  59 #define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
  60
  61 /* Avoid making small 'loans' from the central in-flight event credit
  62  * pool, to improve efficiency.
  63  */
  64 #define DSW_MIN_CREDIT_LOAN (64)
  65 #define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN)
  66 #define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN)
  67
  68 /* The rings are dimensioned so that all in-flight events can reside
  69  * on any one of the port rings, to avoid the trouble of having to
  70  * care about the case where there's no room on the destination port's
  71  * input ring.
  72  */
  73 #define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
  74
  75 #define DSW_MAX_LOAD (INT16_MAX)
  76 #define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
  77 #define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
  78
  79 /* The thought behind keeping the load update interval shorter than
  80  * the migration interval is that the load from newly migrated flows
  81  * should 'show up' on the load measurement before new migrations are
  82  * considered. This is to avoid having too many flows, from too many
  83  * source ports, to be migrated too quickly to a lightly loaded port -
  84  * in particular since this might cause the system to oscillate.
  85  */
  86 #define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
  87 #define DSW_OLD_LOAD_WEIGHT (1)
  88
  89 /* The minimum time (in us) between two flow migrations. What puts an
  90  * upper limit on the actual migration rate is primarily the pace in
  91  * which the ports send and receive control messages, which in turn is
  92  * largely a function of how much cycles are spent the processing of
  93  * an event burst.
  94  */
  95 #define DSW_MIGRATION_INTERVAL (1000)
  96 #define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
  97 #define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))
  98 #define DSW_REBALANCE_THRESHOLD (DSW_LOAD_FROM_PERCENT(3))
  99
 100 #define DSW_MAX_EVENTS_RECORDED (128)
 101
 102 #define DSW_MAX_FLOWS_PER_MIGRATION (8)
 103
 104 /* Only one outstanding migration per port is allowed */
 105 #define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS*DSW_MAX_FLOWS_PER_MIGRATION)
 106
 107 /* Enough room for pause request/confirm and unpaus request/confirm for
 108  * all possible senders.
 109  */
 110 #define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)
 111
 112 /* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
 113  * dequeue(), arrange events so that events with the same flow id on
 114  * the same queue forms a back-to-back "burst", and also so that such
 115  * bursts of different flow ids, but on the same queue, also come
 116  * consecutively. All this in an attempt to improve data and
 117  * instruction cache usage for the application, at the cost of a
 118  * scheduler overhead increase.
 119  */
 120
 121 /* #define DSW_SORT_DEQUEUED */
 122
 123 struct dsw_queue_flow {
 124         uint8_t queue_id;
 125         uint16_t flow_hash;
 126 };
 127
 128 enum dsw_migration_state {
 129         DSW_MIGRATION_STATE_IDLE,
 130         DSW_MIGRATION_STATE_PAUSING,
 131         DSW_MIGRATION_STATE_FORWARDING,
 132         DSW_MIGRATION_STATE_UNPAUSING
 133 };
 134
 135 struct dsw_port {
 136         uint16_t id;
 137
 138         /* Keeping a pointer here to avoid container_of() calls, which
 139          * are expensive since they are very frequent and will result
 140          * in an integer multiplication (since the port id is an index
 141          * into the dsw_evdev port array).
 142          */
 143         struct dsw_evdev *dsw;
 144
 145         uint16_t dequeue_depth;
 146         uint16_t enqueue_depth;
 147
 148         int32_t inflight_credits;
 149
 150         int32_t new_event_threshold;
 151
 152         uint16_t pending_releases;
 153
 154         uint16_t next_parallel_flow_id;
 155
 156         uint16_t ops_since_bg_task;
 157
 158         /* most recent 'background' processing */
 159         uint64_t last_bg;
 160
 161         /* For port load measurement. */
 162         uint64_t next_load_update;
 163         uint64_t load_update_interval;
 164         uint64_t measurement_start;
 165         uint64_t busy_start;
 166         uint64_t busy_cycles;
 167         uint64_t total_busy_cycles;
 168
 169         /* For the ctl interface and flow migration mechanism. */
 170         uint64_t next_emigration;
 171         uint64_t migration_interval;
 172         enum dsw_migration_state migration_state;
 173
 174         uint64_t emigration_start;
 175         uint64_t emigrations;
 176         uint64_t emigration_latency;
 177
 178         uint8_t emigration_target_port_ids[DSW_MAX_FLOWS_PER_MIGRATION];
 179         struct dsw_queue_flow
 180                 emigration_target_qfs[DSW_MAX_FLOWS_PER_MIGRATION];
 181         uint8_t emigration_targets_len;
 182         uint8_t cfm_cnt;
 183
 184         uint64_t immigrations;
 185
 186         uint16_t paused_flows_len;
 187         struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];
 188
 189         /* In a very contrived worst case all inflight events can be
 190          * laying around paused here.
 191          */
 192         uint16_t paused_events_len;
 193         struct rte_event paused_events[DSW_MAX_EVENTS];
 194
 195         uint16_t seen_events_len;
 196         uint16_t seen_events_idx;
 197         struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];
 198
 199         uint64_t enqueue_calls;
 200         uint64_t new_enqueued;
 201         uint64_t forward_enqueued;
 202         uint64_t release_enqueued;
 203         uint64_t queue_enqueued[DSW_MAX_QUEUES];
 204
 205         uint64_t dequeue_calls;
 206         uint64_t dequeued;
 207         uint64_t queue_dequeued[DSW_MAX_QUEUES];
 208
 209         uint16_t out_buffer_len[DSW_MAX_PORTS];
 210         struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
 211
 212         uint16_t in_buffer_len;
 213         uint16_t in_buffer_start;
 214         /* This buffer may contain events that were read up from the
 215          * in_ring during the flow migration process.
 216          */
 217         struct rte_event in_buffer[DSW_MAX_EVENTS];
 218
 219         struct rte_event_ring *in_ring __rte_cache_aligned;
 220
 221         struct rte_ring *ctl_in_ring __rte_cache_aligned;
 222
 223         /* Estimate of current port load. */
 224         int16_t load __rte_cache_aligned;
 225         /* Estimate of flows currently migrating to this port. */
 226         int32_t immigration_load __rte_cache_aligned;
 227 } __rte_cache_aligned;
 228
 229 struct dsw_queue {
 230         uint8_t schedule_type;
 231         uint8_t serving_ports[DSW_MAX_PORTS];
 232         uint16_t num_serving_ports;
 233
 234         uint8_t flow_to_port_map[DSW_MAX_FLOWS] __rte_cache_aligned;
 235 };
 236
 237 struct dsw_evdev {
 238         struct rte_eventdev_data *data;
 239
 240         struct dsw_port ports[DSW_MAX_PORTS];
 241         uint16_t num_ports;
 242         struct dsw_queue queues[DSW_MAX_QUEUES];
 243         uint8_t num_queues;
 244         int32_t max_inflight;
 245
 246         int32_t credits_on_loan __rte_cache_aligned;
 247 };
 248
 249 #define DSW_CTL_PAUS_REQ (0)
 250 #define DSW_CTL_UNPAUS_REQ (1)
 251 #define DSW_CTL_CFM (2)
 252
 253 struct dsw_ctl_msg {
 254         uint8_t type;
 255         uint8_t originating_port_id;
 256         uint8_t qfs_len;
 257         struct dsw_queue_flow qfs[DSW_MAX_FLOWS_PER_MIGRATION];
 258 } __rte_aligned(4);
 259
 260 uint16_t dsw_event_enqueue(void *port, const struct rte_event *event);
 261 uint16_t dsw_event_enqueue_burst(void *port,
 262                                  const struct rte_event events[],
 263                                  uint16_t events_len);
 264 uint16_t dsw_event_enqueue_new_burst(void *port,
 265                                      const struct rte_event events[],
 266                                      uint16_t events_len);
 267 uint16_t dsw_event_enqueue_forward_burst(void *port,
 268                                          const struct rte_event events[],
 269                                          uint16_t events_len);
 270
 271 uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait);
 272 uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
 273                                  uint16_t num, uint64_t wait);
 274 void dsw_event_maintain(void *port, int op);
 275
 276 int dsw_xstats_get_names(const struct rte_eventdev *dev,
 277                          enum rte_event_dev_xstats_mode mode,
 278                          uint8_t queue_port_id,
 279                          struct rte_event_dev_xstats_name *xstats_names,
 280                          unsigned int *ids, unsigned int size);
 281 int dsw_xstats_get(const struct rte_eventdev *dev,
 282                    enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
 283                    const unsigned int ids[], uint64_t values[], unsigned int n);
 284 uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
 285                                 const char *name, unsigned int *id);
 286
 287 static inline struct dsw_evdev *
 288 dsw_pmd_priv(const struct rte_eventdev *eventdev)
 289 {
 290         return eventdev->data->dev_private;
 291 }
 292
 293 #define DSW_LOG_DP(level, fmt, args...)                                 \
 294         RTE_LOG_DP(level, EVENTDEV, "[%s] %s() line %u: " fmt,          \
 295                    DSW_PMD_NAME,                                        \
 296                    __func__, __LINE__, ## args)
 297
 298 #define DSW_LOG_DP_PORT(level, port_id, fmt, args...)           \
 299         DSW_LOG_DP(level, "<Port %d> " fmt, port_id, ## args)
 300
 301 #endif