1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016-2017 Intel Corporation
6 #include <rte_hash_crc.h>
7 #include <rte_event_ring.h>
10 #include "event_ring.h"
12 #define SW_IQS_MASK (SW_IQS_MAX-1)
14 /* Retrieve the highest priority IQ or -1 if no pkts available. Doing the
15 * CLZ twice is faster than caching the value due to data dependencies
17 #define PKT_MASK_TO_IQ(pkts) \
18 (__builtin_ctz(pkts | (1 << SW_IQS_MAX)))
21 #error Misconfigured PRIO_TO_IQ caused by SW_IQS_MAX value change
23 #define PRIO_TO_IQ(prio) (prio >> 6)
25 #define MAX_PER_IQ_DEQUEUE 48
26 #define FLOWID_MASK (SW_QID_NUM_FIDS-1)
27 /* use cheap bit mixing, we only need to lose a few bits */
28 #define SW_HASH_FLOWID(f) (((f) ^ (f >> 10)) & FLOWID_MASK)
31 static inline uint32_t
32 sw_schedule_atomic_to_cq(struct sw_evdev *sw, struct sw_qid * const qid,
33 uint32_t iq_num, unsigned int count)
35 struct rte_event qes[MAX_PER_IQ_DEQUEUE]; /* count <= MAX */
36 struct rte_event blocked_qes[MAX_PER_IQ_DEQUEUE];
37 uint32_t nb_blocked = 0;
40 if (count > MAX_PER_IQ_DEQUEUE)
41 count = MAX_PER_IQ_DEQUEUE;
43 /* This is the QID ID. The QID ID is static, hence it can be
44 * used to identify the stage of processing in history lists etc
46 uint32_t qid_id = qid->id;
48 iq_dequeue_burst(sw, &qid->iq[iq_num], qes, count);
49 for (i = 0; i < count; i++) {
50 const struct rte_event *qe = &qes[i];
51 const uint16_t flow_id = SW_HASH_FLOWID(qes[i].flow_id);
52 struct sw_fid_t *fid = &qid->fids[flow_id];
57 if (qid->cq_next_tx >= qid->cq_num_mapped_cqs)
59 cq_idx = qid->cq_next_tx++;
61 cq = qid->cq_map[cq_idx];
64 int cq_free_cnt = sw->cq_ring_space[cq];
65 for (cq_idx = 0; cq_idx < qid->cq_num_mapped_cqs;
67 int test_cq = qid->cq_map[cq_idx];
68 int test_cq_free = sw->cq_ring_space[test_cq];
69 if (test_cq_free > cq_free_cnt) {
71 cq_free_cnt = test_cq_free;
75 fid->cq = cq; /* this pins early */
78 if (sw->cq_ring_space[cq] == 0 ||
79 sw->ports[cq].inflights == SW_PORT_HIST_LIST) {
80 blocked_qes[nb_blocked++] = *qe;
84 struct sw_port *p = &sw->ports[cq];
86 /* at this point we can queue up the packet on the cq_buf */
88 p->cq_buf[p->cq_buf_count++] = *qe;
90 sw->cq_ring_space[cq]--;
92 int head = (p->hist_head++ & (SW_PORT_HIST_LIST-1));
93 p->hist_list[head].fid = flow_id;
94 p->hist_list[head].qid = qid_id;
100 /* if we just filled in the last slot, flush the buffer */
101 if (sw->cq_ring_space[cq] == 0) {
102 struct rte_event_ring *worker = p->cq_worker_ring;
103 rte_event_ring_enqueue_burst(worker, p->cq_buf,
105 &sw->cq_ring_space[cq]);
109 iq_put_back(sw, &qid->iq[iq_num], blocked_qes, nb_blocked);
111 return count - nb_blocked;
114 static inline uint32_t
115 sw_schedule_parallel_to_cq(struct sw_evdev *sw, struct sw_qid * const qid,
116 uint32_t iq_num, unsigned int count, int keep_order)
119 uint32_t cq_idx = qid->cq_next_tx;
121 /* This is the QID ID. The QID ID is static, hence it can be
122 * used to identify the stage of processing in history lists etc
124 uint32_t qid_id = qid->id;
126 if (count > MAX_PER_IQ_DEQUEUE)
127 count = MAX_PER_IQ_DEQUEUE;
130 /* only schedule as many as we have reorder buffer entries */
131 count = RTE_MIN(count,
132 rob_ring_count(qid->reorder_buffer_freelist));
134 for (i = 0; i < count; i++) {
135 const struct rte_event *qe = iq_peek(&qid->iq[iq_num]);
136 uint32_t cq_check_count = 0;
140 * for parallel, just send to next available CQ in round-robin
141 * fashion. So scan for an available CQ. If all CQs are full
142 * just return and move on to next QID
145 if (++cq_check_count > qid->cq_num_mapped_cqs)
147 if (cq_idx >= qid->cq_num_mapped_cqs)
149 cq = qid->cq_map[cq_idx++];
151 } while (sw->ports[cq].inflights == SW_PORT_HIST_LIST ||
152 rte_event_ring_free_count(
153 sw->ports[cq].cq_worker_ring) == 0);
155 struct sw_port *p = &sw->ports[cq];
156 if (sw->cq_ring_space[cq] == 0 ||
157 p->inflights == SW_PORT_HIST_LIST)
160 sw->cq_ring_space[cq]--;
162 qid->stats.tx_pkts++;
164 const int head = (p->hist_head & (SW_PORT_HIST_LIST-1));
165 p->hist_list[head].fid = SW_HASH_FLOWID(qe->flow_id);
166 p->hist_list[head].qid = qid_id;
169 rob_ring_dequeue(qid->reorder_buffer_freelist,
170 (void *)&p->hist_list[head].rob_entry);
172 sw->ports[cq].cq_buf[sw->ports[cq].cq_buf_count++] = *qe;
173 iq_pop(sw, &qid->iq[iq_num]);
175 rte_compiler_barrier();
181 qid->cq_next_tx = cq_idx;
186 sw_schedule_dir_to_cq(struct sw_evdev *sw, struct sw_qid * const qid,
187 uint32_t iq_num, unsigned int count __rte_unused)
189 uint32_t cq_id = qid->cq_map[0];
190 struct sw_port *port = &sw->ports[cq_id];
192 /* get max burst enq size for cq_ring */
193 uint32_t count_free = sw->cq_ring_space[cq_id];
197 /* burst dequeue from the QID IQ ring */
198 struct sw_iq *iq = &qid->iq[iq_num];
199 uint32_t ret = iq_dequeue_burst(sw, iq,
200 &port->cq_buf[port->cq_buf_count], count_free);
201 port->cq_buf_count += ret;
203 /* Update QID, Port and Total TX stats */
204 qid->stats.tx_pkts += ret;
205 port->stats.tx_pkts += ret;
207 /* Subtract credits from cached value */
208 sw->cq_ring_space[cq_id] -= ret;
214 sw_schedule_qid_to_cq(struct sw_evdev *sw)
219 sw->sched_cq_qid_called++;
221 for (qid_idx = 0; qid_idx < sw->qid_count; qid_idx++) {
222 struct sw_qid *qid = sw->qids_prioritized[qid_idx];
224 int type = qid->type;
225 int iq_num = PKT_MASK_TO_IQ(qid->iq_pkt_mask);
227 /* zero mapped CQs indicates directed */
228 if (iq_num >= SW_IQS_MAX || qid->cq_num_mapped_cqs == 0)
231 uint32_t pkts_done = 0;
232 uint32_t count = iq_count(&qid->iq[iq_num]);
234 if (count >= sw->sched_min_burst) {
235 if (type == SW_SCHED_TYPE_DIRECT)
236 pkts_done += sw_schedule_dir_to_cq(sw, qid,
238 else if (type == RTE_SCHED_TYPE_ATOMIC)
239 pkts_done += sw_schedule_atomic_to_cq(sw, qid,
242 pkts_done += sw_schedule_parallel_to_cq(sw, qid,
244 type == RTE_SCHED_TYPE_ORDERED);
247 /* Check if the IQ that was polled is now empty, and unset it
248 * in the IQ mask if its empty.
250 int all_done = (pkts_done == count);
252 qid->iq_pkt_mask &= ~(all_done << (iq_num));
259 /* This function will perform re-ordering of packets, and injecting into
260 * the appropriate QID IQ. As LB and DIR QIDs are in the same array, but *NOT*
261 * contiguous in that array, this function accepts a "range" of QIDs to scan.
264 sw_schedule_reorder(struct sw_evdev *sw, int qid_start, int qid_end)
266 /* Perform egress reordering */
267 struct rte_event *qe;
268 uint32_t pkts_iter = 0;
270 for (; qid_start < qid_end; qid_start++) {
271 struct sw_qid *qid = &sw->qids[qid_start];
272 unsigned int i, num_entries_in_use;
274 if (qid->type != RTE_SCHED_TYPE_ORDERED)
277 num_entries_in_use = rob_ring_free_count(
278 qid->reorder_buffer_freelist);
280 if (num_entries_in_use < sw->sched_min_burst)
281 num_entries_in_use = 0;
283 for (i = 0; i < num_entries_in_use; i++) {
284 struct reorder_buffer_entry *entry;
287 entry = &qid->reorder_buffer[qid->reorder_buffer_index];
292 for (j = 0; j < entry->num_fragments; j++) {
296 int idx = entry->fragment_index + j;
297 qe = &entry->fragments[idx];
299 dest_qid = qe->queue_id;
300 dest_iq = PRIO_TO_IQ(qe->priority);
302 if (dest_qid >= sw->qid_count) {
303 sw->stats.rx_dropped++;
309 struct sw_qid *q = &sw->qids[dest_qid];
310 struct sw_iq *iq = &q->iq[dest_iq];
312 /* we checked for space above, so enqueue must
315 iq_enqueue(sw, iq, qe);
316 q->iq_pkt_mask |= (1 << (dest_iq));
317 q->iq_pkt_count[dest_iq]++;
321 entry->ready = (j != entry->num_fragments);
322 entry->num_fragments -= j;
323 entry->fragment_index += j;
326 entry->fragment_index = 0;
329 qid->reorder_buffer_freelist,
332 qid->reorder_buffer_index++;
333 qid->reorder_buffer_index %= qid->window_size;
340 static __rte_always_inline void
341 sw_refill_pp_buf(struct sw_evdev *sw, struct sw_port *port)
344 struct rte_event_ring *worker = port->rx_worker_ring;
345 port->pp_buf_start = 0;
346 port->pp_buf_count = rte_event_ring_dequeue_burst(worker, port->pp_buf,
347 sw->sched_deq_burst_size, NULL);
350 static __rte_always_inline uint32_t
351 __pull_port_lb(struct sw_evdev *sw, uint32_t port_id, int allow_reorder)
353 static struct reorder_buffer_entry dummy_rob;
354 uint32_t pkts_iter = 0;
355 struct sw_port *port = &sw->ports[port_id];
357 /* If shadow ring has 0 pkts, pull from worker ring */
358 if (!sw->refill_once_per_iter && port->pp_buf_count == 0)
359 sw_refill_pp_buf(sw, port);
361 while (port->pp_buf_count) {
362 const struct rte_event *qe = &port->pp_buf[port->pp_buf_start];
363 struct sw_hist_list_entry *hist_entry = NULL;
364 uint8_t flags = qe->op;
365 const uint16_t eop = !(flags & QE_FLAG_NOT_EOP);
366 int needs_reorder = 0;
367 /* if no-reordering, having PARTIAL == NEW */
368 if (!allow_reorder && !eop)
369 flags = QE_FLAG_VALID;
372 * if we don't have space for this packet in an IQ,
373 * then move on to next queue. Technically, for a
374 * packet that needs reordering, we don't need to check
375 * here, but it simplifies things not to special-case
377 uint32_t iq_num = PRIO_TO_IQ(qe->priority);
378 struct sw_qid *qid = &sw->qids[qe->queue_id];
380 /* now process based on flags. Note that for directed
381 * queues, the enqueue_flush masks off all but the
382 * valid flag. This makes FWD and PARTIAL enqueues just
383 * NEW type, and makes DROPS no-op calls.
385 if ((flags & QE_FLAG_COMPLETE) && port->inflights > 0) {
386 const uint32_t hist_tail = port->hist_tail &
387 (SW_PORT_HIST_LIST - 1);
389 hist_entry = &port->hist_list[hist_tail];
390 const uint32_t hist_qid = hist_entry->qid;
391 const uint32_t hist_fid = hist_entry->fid;
393 struct sw_fid_t *fid =
394 &sw->qids[hist_qid].fids[hist_fid];
396 if (fid->pcount == 0)
400 /* set reorder ready if an ordered QID */
402 (uintptr_t)hist_entry->rob_entry;
403 const uintptr_t valid = (rob_ptr != 0);
404 needs_reorder = valid;
406 ((valid - 1) & (uintptr_t)&dummy_rob);
407 struct reorder_buffer_entry *tmp_rob_ptr =
408 (struct reorder_buffer_entry *)rob_ptr;
409 tmp_rob_ptr->ready = eop * needs_reorder;
412 port->inflights -= eop;
413 port->hist_tail += eop;
415 if (flags & QE_FLAG_VALID) {
416 port->stats.rx_pkts++;
418 if (allow_reorder && needs_reorder) {
419 struct reorder_buffer_entry *rob_entry =
420 hist_entry->rob_entry;
422 hist_entry->rob_entry = NULL;
423 /* Although fragmentation not currently
424 * supported by eventdev API, we support it
425 * here. Open: How do we alert the user that
426 * they've exceeded max frags?
428 int num_frag = rob_entry->num_fragments;
429 if (num_frag == SW_FRAGMENTS_MAX)
430 sw->stats.rx_dropped++;
432 int idx = rob_entry->num_fragments++;
433 rob_entry->fragments[idx] = *qe;
438 /* Use the iq_num from above to push the QE
439 * into the qid at the right priority
442 qid->iq_pkt_mask |= (1 << (iq_num));
443 iq_enqueue(sw, &qid->iq[iq_num], qe);
444 qid->iq_pkt_count[iq_num]++;
445 qid->stats.rx_pkts++;
450 port->pp_buf_start++;
451 port->pp_buf_count--;
452 } /* while (avail_qes) */
458 sw_schedule_pull_port_lb(struct sw_evdev *sw, uint32_t port_id)
460 return __pull_port_lb(sw, port_id, 1);
464 sw_schedule_pull_port_no_reorder(struct sw_evdev *sw, uint32_t port_id)
466 return __pull_port_lb(sw, port_id, 0);
470 sw_schedule_pull_port_dir(struct sw_evdev *sw, uint32_t port_id)
472 uint32_t pkts_iter = 0;
473 struct sw_port *port = &sw->ports[port_id];
475 /* If shadow ring has 0 pkts, pull from worker ring */
476 if (!sw->refill_once_per_iter && port->pp_buf_count == 0)
477 sw_refill_pp_buf(sw, port);
479 while (port->pp_buf_count) {
480 const struct rte_event *qe = &port->pp_buf[port->pp_buf_start];
481 uint8_t flags = qe->op;
483 if ((flags & QE_FLAG_VALID) == 0)
486 uint32_t iq_num = PRIO_TO_IQ(qe->priority);
487 struct sw_qid *qid = &sw->qids[qe->queue_id];
488 struct sw_iq *iq = &qid->iq[iq_num];
490 port->stats.rx_pkts++;
492 /* Use the iq_num from above to push the QE
493 * into the qid at the right priority
495 qid->iq_pkt_mask |= (1 << (iq_num));
496 iq_enqueue(sw, iq, qe);
497 qid->iq_pkt_count[iq_num]++;
498 qid->stats.rx_pkts++;
502 port->pp_buf_start++;
503 port->pp_buf_count--;
504 } /* while port->pp_buf_count */
510 sw_event_schedule(struct rte_eventdev *dev)
512 struct sw_evdev *sw = sw_pmd_priv(dev);
513 uint32_t in_pkts, out_pkts;
514 uint32_t out_pkts_total = 0, in_pkts_total = 0;
515 int32_t sched_quanta = sw->sched_quanta;
519 if (unlikely(!sw->started))
523 uint32_t in_pkts_this_iteration = 0;
525 /* Pull from rx_ring for ports */
528 for (i = 0; i < sw->port_count; i++) {
529 /* ack the unlinks in progress as done */
530 if (sw->ports[i].unlinks_in_progress)
531 sw->ports[i].unlinks_in_progress = 0;
533 if (sw->ports[i].is_directed)
534 in_pkts += sw_schedule_pull_port_dir(sw, i);
535 else if (sw->ports[i].num_ordered_qids > 0)
536 in_pkts += sw_schedule_pull_port_lb(sw, i);
538 in_pkts += sw_schedule_pull_port_no_reorder(sw, i);
541 /* QID scan for re-ordered */
542 in_pkts += sw_schedule_reorder(sw, 0,
544 in_pkts_this_iteration += in_pkts;
545 } while (in_pkts > 4 &&
546 (int)in_pkts_this_iteration < sched_quanta);
548 out_pkts = sw_schedule_qid_to_cq(sw);
549 out_pkts_total += out_pkts;
550 in_pkts_total += in_pkts_this_iteration;
552 if (in_pkts == 0 && out_pkts == 0)
554 } while ((int)out_pkts_total < sched_quanta);
556 sw->stats.tx_pkts += out_pkts_total;
557 sw->stats.rx_pkts += in_pkts_total;
559 sw->sched_no_iq_enqueues += (in_pkts_total == 0);
560 sw->sched_no_cq_enqueues += (out_pkts_total == 0);
562 uint64_t work_done = (in_pkts_total + out_pkts_total) != 0;
563 sw->sched_progress_last_iter = work_done;
565 uint64_t cqs_scheds_last_iter = 0;
567 /* push all the internal buffered QEs in port->cq_ring to the
568 * worker cores: aka, do the ring transfers batched.
571 for (i = 0; i < sw->port_count; i++) {
572 struct sw_port *port = &sw->ports[i];
573 struct rte_event_ring *worker = port->cq_worker_ring;
575 /* If shadow ring has 0 pkts, pull from worker ring */
576 if (sw->refill_once_per_iter && port->pp_buf_count == 0)
577 sw_refill_pp_buf(sw, port);
579 if (port->cq_buf_count >= sw->sched_min_burst) {
580 rte_event_ring_enqueue_burst(worker,
583 &sw->cq_ring_space[i]);
584 port->cq_buf_count = 0;
586 cqs_scheds_last_iter |= (1ULL << i);
588 sw->cq_ring_space[i] =
589 rte_event_ring_free_count(worker) -
595 if (unlikely(sw->sched_flush_count > SCHED_NO_ENQ_CYCLE_FLUSH))
596 sw->sched_min_burst = 1;
598 sw->sched_flush_count++;
600 if (sw->sched_flush_count)
601 sw->sched_flush_count--;
603 sw->sched_min_burst = sw->sched_min_burst_size;
606 /* Provide stats on what eventdev ports were scheduled to this
607 * iteration. If more than 64 ports are active, always report that
608 * all Eventdev ports have been scheduled events.
610 sw->sched_last_iter_bitmask = cqs_scheds_last_iter;
611 if (unlikely(sw->port_count >= 64))
612 sw->sched_last_iter_bitmask = UINT64_MAX;