1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018-2020 NXP
9 #include "rte_ethdev.h"
10 #include "rte_malloc.h"
11 #include "rte_memzone.h"
13 #include "base/enetc_hw.h"
15 #include "enetc_logs.h"
17 #define ENETC_CACHE_LINE_RXBDS (RTE_CACHE_LINE_SIZE / \
18 sizeof(union enetc_rx_bd))
19 #define ENETC_RXBD_BUNDLE 16 /* Number of buffers to allocate at once */
22 enetc_clean_tx_ring(struct enetc_bdr *tx_ring)
25 struct enetc_swbd *tx_swbd, *tx_swbd_base;
26 int i, hwci, bd_count;
27 struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
29 /* we don't need barriers here, we just want a relatively current value
32 hwci = (int)(rte_read32_relaxed(tx_ring->tcisr) &
33 ENETC_TBCISR_IDX_MASK);
35 tx_swbd_base = tx_ring->q_swbd;
36 bd_count = tx_ring->bd_count;
37 i = tx_ring->next_to_clean;
38 tx_swbd = &tx_swbd_base[i];
40 /* we're only reading the CI index once here, which means HW may update
41 * it while we're doing clean-up. We could read the register in a loop
42 * but for now I assume it's OK to leave a few Tx frames for next call.
43 * The issue with reading the register in a loop is that we're stalling
44 * here trying to catch up with HW which keeps sending traffic as long
45 * as it has traffic to send, so in effect we could be waiting here for
46 * the Tx ring to be drained by HW, instead of us doing Rx in that
50 /* It seems calling rte_pktmbuf_free is wasting a lot of cycles,
51 * make a list and call _free when it's done.
53 if (tx_frm_cnt == ENETC_RXBD_BUNDLE) {
54 rte_pktmbuf_free_bulk(m, tx_frm_cnt);
58 m[tx_frm_cnt] = tx_swbd->buffer_addr;
59 tx_swbd->buffer_addr = NULL;
63 if (unlikely(i == bd_count)) {
65 tx_swbd = tx_swbd_base;
72 rte_pktmbuf_free_bulk(m, tx_frm_cnt);
74 tx_ring->next_to_clean = i;
80 enetc_xmit_pkts(void *tx_queue,
81 struct rte_mbuf **tx_pkts,
84 struct enetc_swbd *tx_swbd;
85 int i, start, bds_to_use;
86 struct enetc_tx_bd *txbd;
87 struct enetc_bdr *tx_ring = (struct enetc_bdr *)tx_queue;
89 i = tx_ring->next_to_use;
91 bds_to_use = enetc_bd_unused(tx_ring);
92 if (bds_to_use < nb_pkts)
97 tx_ring->q_swbd[i].buffer_addr = tx_pkts[start];
98 txbd = ENETC_TXBD(*tx_ring, i);
99 tx_swbd = &tx_ring->q_swbd[i];
100 txbd->frm_len = tx_pkts[start]->pkt_len;
101 txbd->buf_len = txbd->frm_len;
102 txbd->flags = rte_cpu_to_le_16(ENETC_TXBD_FLAGS_F);
103 txbd->addr = (uint64_t)(uintptr_t)
104 rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_iova +
105 tx_swbd->buffer_addr->data_off);
108 if (unlikely(i == tx_ring->bd_count))
112 /* we're only cleaning up the Tx ring here, on the assumption that
113 * software is slower than hardware and hardware completed sending
114 * older frames out by now.
115 * We're also cleaning up the ring before kicking off Tx for the new
116 * batch to minimize chances of contention on the Tx ring
118 enetc_clean_tx_ring(tx_ring);
120 tx_ring->next_to_use = i;
121 enetc_wr_reg(tx_ring->tcir, i);
126 enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
128 struct enetc_swbd *rx_swbd;
129 union enetc_rx_bd *rxbd;
130 int i, j, k = ENETC_RXBD_BUNDLE;
131 struct rte_mbuf *m[ENETC_RXBD_BUNDLE];
132 struct rte_mempool *mb_pool;
134 i = rx_ring->next_to_use;
135 mb_pool = rx_ring->mb_pool;
136 rx_swbd = &rx_ring->q_swbd[i];
137 rxbd = ENETC_RXBD(*rx_ring, i);
138 for (j = 0; j < buff_cnt; j++) {
139 /* bulk alloc for the next up to 8 BDs */
140 if (k == ENETC_RXBD_BUNDLE) {
142 int m_cnt = RTE_MIN(buff_cnt - j, ENETC_RXBD_BUNDLE);
144 if (rte_pktmbuf_alloc_bulk(mb_pool, m, m_cnt))
148 rx_swbd->buffer_addr = m[k];
149 rxbd->w.addr = (uint64_t)(uintptr_t)
150 rx_swbd->buffer_addr->buf_iova +
151 rx_swbd->buffer_addr->data_off;
152 /* clear 'R" as well */
158 if (unlikely(i == rx_ring->bd_count)) {
160 rxbd = ENETC_RXBD(*rx_ring, 0);
161 rx_swbd = &rx_ring->q_swbd[i];
166 rx_ring->next_to_alloc = i;
167 rx_ring->next_to_use = i;
168 enetc_wr_reg(rx_ring->rcir, i);
174 static inline void enetc_slow_parsing(struct rte_mbuf *m,
175 uint64_t parse_results)
177 m->ol_flags &= ~(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
179 switch (parse_results) {
180 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4:
181 m->packet_type = RTE_PTYPE_L2_ETHER |
183 m->ol_flags |= PKT_RX_IP_CKSUM_BAD;
185 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6:
186 m->packet_type = RTE_PTYPE_L2_ETHER |
188 m->ol_flags |= PKT_RX_IP_CKSUM_BAD;
190 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_TCP:
191 m->packet_type = RTE_PTYPE_L2_ETHER |
194 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
197 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_TCP:
198 m->packet_type = RTE_PTYPE_L2_ETHER |
201 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
204 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_UDP:
205 m->packet_type = RTE_PTYPE_L2_ETHER |
208 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
211 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_UDP:
212 m->packet_type = RTE_PTYPE_L2_ETHER |
215 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
218 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_SCTP:
219 m->packet_type = RTE_PTYPE_L2_ETHER |
222 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
225 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_SCTP:
226 m->packet_type = RTE_PTYPE_L2_ETHER |
229 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
232 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV4_ICMP:
233 m->packet_type = RTE_PTYPE_L2_ETHER |
236 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
239 case ENETC_PARSE_ERROR | ENETC_PKT_TYPE_IPV6_ICMP:
240 m->packet_type = RTE_PTYPE_L2_ETHER |
243 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD |
246 /* More switch cases can be added */
248 m->packet_type = RTE_PTYPE_UNKNOWN;
249 m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN |
250 PKT_RX_L4_CKSUM_UNKNOWN;
255 static inline void __rte_hot
256 enetc_dev_rx_parse(struct rte_mbuf *m, uint16_t parse_results)
258 ENETC_PMD_DP_DEBUG("parse summary = 0x%x ", parse_results);
259 m->ol_flags |= PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD;
261 switch (parse_results) {
262 case ENETC_PKT_TYPE_ETHER:
263 m->packet_type = RTE_PTYPE_L2_ETHER;
265 case ENETC_PKT_TYPE_IPV4:
266 m->packet_type = RTE_PTYPE_L2_ETHER |
269 case ENETC_PKT_TYPE_IPV6:
270 m->packet_type = RTE_PTYPE_L2_ETHER |
273 case ENETC_PKT_TYPE_IPV4_TCP:
274 m->packet_type = RTE_PTYPE_L2_ETHER |
278 case ENETC_PKT_TYPE_IPV6_TCP:
279 m->packet_type = RTE_PTYPE_L2_ETHER |
283 case ENETC_PKT_TYPE_IPV4_UDP:
284 m->packet_type = RTE_PTYPE_L2_ETHER |
288 case ENETC_PKT_TYPE_IPV6_UDP:
289 m->packet_type = RTE_PTYPE_L2_ETHER |
293 case ENETC_PKT_TYPE_IPV4_SCTP:
294 m->packet_type = RTE_PTYPE_L2_ETHER |
298 case ENETC_PKT_TYPE_IPV6_SCTP:
299 m->packet_type = RTE_PTYPE_L2_ETHER |
303 case ENETC_PKT_TYPE_IPV4_ICMP:
304 m->packet_type = RTE_PTYPE_L2_ETHER |
308 case ENETC_PKT_TYPE_IPV6_ICMP:
309 m->packet_type = RTE_PTYPE_L2_ETHER |
313 /* More switch cases can be added */
315 enetc_slow_parsing(m, parse_results);
321 enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
322 struct rte_mbuf **rx_pkts,
326 int cleaned_cnt, i, bd_count;
327 struct enetc_swbd *rx_swbd;
328 union enetc_rx_bd *rxbd;
330 /* next descriptor to process */
331 i = rx_ring->next_to_clean;
332 /* next descriptor to process */
333 rxbd = ENETC_RXBD(*rx_ring, i);
335 bd_count = rx_ring->bd_count;
336 /* LS1028A does not have platform cache so any software access following
337 * a hardware write will go directly to DDR. Latency of such a read is
338 * in excess of 100 core cycles, so try to prefetch more in advance to
340 * How much is worth prefetching really depends on traffic conditions.
341 * With congested Rx this could go up to 4 cache lines or so. But if
342 * software keeps up with hardware and follows behind Rx PI by a cache
343 * line or less then it's harmful in terms of performance to cache more.
344 * We would only prefetch BDs that have yet to be written by ENETC,
345 * which will have to be evicted again anyway.
347 rte_prefetch0(ENETC_RXBD(*rx_ring,
348 (i + ENETC_CACHE_LINE_RXBDS) % bd_count));
349 rte_prefetch0(ENETC_RXBD(*rx_ring,
350 (i + ENETC_CACHE_LINE_RXBDS * 2) % bd_count));
352 cleaned_cnt = enetc_bd_unused(rx_ring);
353 rx_swbd = &rx_ring->q_swbd[i];
354 while (likely(rx_frm_cnt < work_limit)) {
357 bd_status = rte_le_to_cpu_32(rxbd->r.lstatus);
361 rx_swbd->buffer_addr->pkt_len = rxbd->r.buf_len -
363 rx_swbd->buffer_addr->data_len = rxbd->r.buf_len -
365 rx_swbd->buffer_addr->hash.rss = rxbd->r.rss_hash;
366 rx_swbd->buffer_addr->ol_flags = 0;
367 enetc_dev_rx_parse(rx_swbd->buffer_addr,
368 rxbd->r.parse_summary);
369 rx_pkts[rx_frm_cnt] = rx_swbd->buffer_addr;
373 if (unlikely(i == rx_ring->bd_count)) {
375 rx_swbd = &rx_ring->q_swbd[i];
377 rxbd = ENETC_RXBD(*rx_ring, i);
378 rte_prefetch0(ENETC_RXBD(*rx_ring,
379 (i + ENETC_CACHE_LINE_RXBDS) %
381 rte_prefetch0(ENETC_RXBD(*rx_ring,
382 (i + ENETC_CACHE_LINE_RXBDS * 2) %
388 rx_ring->next_to_clean = i;
389 enetc_refill_rx_ring(rx_ring, cleaned_cnt);
395 enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
398 struct enetc_bdr *rx_ring = (struct enetc_bdr *)rxq;
400 return enetc_clean_rx_ring(rx_ring, rx_pkts, nb_pkts);