69ffbe4cc90244e219712114d5adfbcb7cbcb3f9
[dpdk.git] / drivers / net / bnxt / bnxt_rxtx_vec_sse.c
1 // SPDX-License-Identifier: BSD-3-Clause
2 /* Copyright(c) 2019 Broadcom All rights reserved. */
3
4 #include <inttypes.h>
5 #include <stdbool.h>
6
7 #include <rte_bitmap.h>
8 #include <rte_byteorder.h>
9 #include <rte_malloc.h>
10 #include <rte_memory.h>
11 #if defined(RTE_ARCH_X86)
12 #include <tmmintrin.h>
13 #else
14 #error "bnxt vector pmd: unsupported target."
15 #endif
16
17 #include "bnxt.h"
18 #include "bnxt_cpr.h"
19 #include "bnxt_ring.h"
20 #include "bnxt_rxtx_vec_common.h"
21
22 #include "bnxt_txq.h"
23 #include "bnxt_txr.h"
24
25 /*
26  * RX Ring handling
27  */
28
29 static inline void
30 bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
31 {
32         struct rx_prod_pkt_bd *rxbds = &rxr->rx_desc_ring[rxq->rxrearm_start];
33         struct rte_mbuf **rx_bufs = &rxr->rx_buf_ring[rxq->rxrearm_start];
34         struct rte_mbuf *mb0, *mb1;
35         int nb, i;
36
37         const __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, 0);
38         const __m128i addrmask = _mm_set_epi64x(UINT64_MAX, 0);
39
40         /*
41          * Number of mbufs to allocate must be a multiple of two. The
42          * allocation must not go past the end of the ring.
43          */
44         nb = RTE_MIN(rxq->rxrearm_nb & ~0x1,
45                      rxq->nb_rx_desc - rxq->rxrearm_start);
46
47         /* Allocate new mbufs into the software ring */
48         if (rte_mempool_get_bulk(rxq->mb_pool, (void *)rx_bufs, nb) < 0) {
49                 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += nb;
50
51                 return;
52         }
53
54         /* Initialize the mbufs in vector, process 2 mbufs in one loop */
55         for (i = 0; i < nb; i += 2, rx_bufs += 2) {
56                 __m128i buf_addr0, buf_addr1;
57                 __m128i rxbd0, rxbd1;
58
59                 mb0 = rx_bufs[0];
60                 mb1 = rx_bufs[1];
61
62                 /* Load address fields from both mbufs */
63                 buf_addr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
64                 buf_addr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
65
66                 /* Load both rx descriptors (preserving some existing fields) */
67                 rxbd0 = _mm_loadu_si128((__m128i *)(rxbds + 0));
68                 rxbd1 = _mm_loadu_si128((__m128i *)(rxbds + 1));
69
70                 /* Add default offset to buffer address. */
71                 buf_addr0 = _mm_add_epi64(buf_addr0, hdr_room);
72                 buf_addr1 = _mm_add_epi64(buf_addr1, hdr_room);
73
74                 /* Clear all fields except address. */
75                 buf_addr0 =  _mm_and_si128(buf_addr0, addrmask);
76                 buf_addr1 =  _mm_and_si128(buf_addr1, addrmask);
77
78                 /* Clear address field in descriptor. */
79                 rxbd0 = _mm_andnot_si128(addrmask, rxbd0);
80                 rxbd1 = _mm_andnot_si128(addrmask, rxbd1);
81
82                 /* Set address field in descriptor. */
83                 rxbd0 = _mm_add_epi64(rxbd0, buf_addr0);
84                 rxbd1 = _mm_add_epi64(rxbd1, buf_addr1);
85
86                 /* Store descriptors to memory. */
87                 _mm_store_si128((__m128i *)(rxbds++), rxbd0);
88                 _mm_store_si128((__m128i *)(rxbds++), rxbd1);
89         }
90
91         rxq->rxrearm_start += nb;
92         bnxt_db_write(&rxr->rx_db, rxq->rxrearm_start - 1);
93         if (rxq->rxrearm_start >= rxq->nb_rx_desc)
94                 rxq->rxrearm_start = 0;
95
96         rxq->rxrearm_nb -= nb;
97 }
98
99 static __m128i
100 bnxt_parse_pkt_type(__m128i mm_rxcmp, __m128i mm_rxcmp1)
101 {
102         uint32_t flags_type, flags2;
103         uint8_t index;
104
105         flags_type = _mm_extract_epi16(mm_rxcmp, 0);
106         flags2 = _mm_extract_epi32(mm_rxcmp1, 0);
107
108         /*
109          * Index format:
110          *     bit 0: RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC
111          *     bit 1: RX_CMPL_FLAGS2_IP_TYPE
112          *     bit 2: RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN
113          *     bits 3-6: RX_PKT_CMPL_FLAGS_ITYPE
114          */
115         index = ((flags_type & RX_PKT_CMPL_FLAGS_ITYPE_MASK) >> 9) |
116                 ((flags2 & (RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN |
117                            RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC)) >> 2) |
118                 ((flags2 & RX_PKT_CMPL_FLAGS2_IP_TYPE) >> 7);
119
120         return _mm_set_epi32(0, 0, 0, bnxt_ptype_table[index]);
121 }
122
123 static void
124 bnxt_parse_csum(struct rte_mbuf *mbuf, struct rx_pkt_cmpl_hi *rxcmp1)
125 {
126         uint32_t flags;
127
128         flags = flags2_0xf(rxcmp1);
129         /* IP Checksum */
130         if (likely(IS_IP_NONTUNNEL_PKT(flags))) {
131                 if (unlikely(RX_CMP_IP_CS_ERROR(rxcmp1)))
132                         mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
133                 else
134                         mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
135         } else if (IS_IP_TUNNEL_PKT(flags)) {
136                 if (unlikely(RX_CMP_IP_OUTER_CS_ERROR(rxcmp1) ||
137                              RX_CMP_IP_CS_ERROR(rxcmp1)))
138                         mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
139                 else
140                         mbuf->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
141         } else if (unlikely(RX_CMP_IP_CS_UNKNOWN(rxcmp1))) {
142                 mbuf->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
143         }
144
145         /* L4 Checksum */
146         if (likely(IS_L4_NONTUNNEL_PKT(flags))) {
147                 if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
148                         mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
149                 else
150                         mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
151         } else if (IS_L4_TUNNEL_PKT(flags)) {
152                 if (unlikely(RX_CMP_L4_INNER_CS_ERR2(rxcmp1)))
153                         mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
154                 else
155                         mbuf->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
156                 if (unlikely(RX_CMP_L4_OUTER_CS_ERR2(rxcmp1))) {
157                         mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
158                 } else if (unlikely(IS_L4_TUNNEL_PKT_ONLY_INNER_L4_CS
159                                     (flags))) {
160                         mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_UNKNOWN;
161                 } else {
162                         mbuf->ol_flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;
163                 }
164         } else if (unlikely(RX_CMP_L4_CS_UNKNOWN(rxcmp1))) {
165                 mbuf->ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
166         }
167 }
168
169 uint16_t
170 bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
171                    uint16_t nb_pkts)
172 {
173         struct bnxt_rx_queue *rxq = rx_queue;
174         struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
175         struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
176         uint32_t raw_cons = cpr->cp_raw_cons;
177         uint32_t cons;
178         int nb_rx_pkts = 0;
179         struct rx_pkt_cmpl *rxcmp;
180         const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
181         const __m128i shuf_msk =
182                 _mm_set_epi8(15, 14, 13, 12,          /* rss */
183                              0xFF, 0xFF,              /* vlan_tci (zeroes) */
184                              3, 2,                    /* data_len */
185                              0xFF, 0xFF, 3, 2,        /* pkt_len */
186                              0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */
187         int i;
188
189         /* If Rx Q was stopped return */
190         if (unlikely(!rxq->rx_started))
191                 return 0;
192
193         if (rxq->rxrearm_nb >= rxq->rx_free_thresh)
194                 bnxt_rxq_rearm(rxq, rxr);
195
196         /* Return no more than RTE_BNXT_MAX_RX_BURST per call. */
197         nb_pkts = RTE_MIN(nb_pkts, RTE_BNXT_MAX_RX_BURST);
198
199         /*
200          * Make nb_pkts an integer multiple of RTE_BNXT_DESCS_PER_LOOP.
201          * nb_pkts < RTE_BNXT_DESCS_PER_LOOP, just return no packet
202          */
203         nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_BNXT_DESCS_PER_LOOP);
204         if (!nb_pkts)
205                 return 0;
206
207         /* Handle RX burst request */
208         for (i = 0; i < nb_pkts; i++) {
209                 struct rx_pkt_cmpl_hi *rxcmp1;
210                 struct rte_mbuf *mbuf;
211                 __m128i mm_rxcmp, mm_rxcmp1, pkt_mb, ptype;
212
213                 cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
214
215                 rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
216                 rxcmp1 = (struct rx_pkt_cmpl_hi *)&cpr->cp_desc_ring[cons + 1];
217
218                 if (!CMP_VALID(rxcmp1, raw_cons + 1, cpr->cp_ring_struct))
219                         break;
220
221                 mm_rxcmp = _mm_load_si128((__m128i *)rxcmp);
222                 mm_rxcmp1 = _mm_load_si128((__m128i *)rxcmp1);
223
224                 raw_cons += 2;
225                 cons = rxcmp->opaque;
226
227                 mbuf = rxr->rx_buf_ring[cons];
228                 rte_prefetch0(mbuf);
229                 rxr->rx_buf_ring[cons] = NULL;
230
231                 /* Set constant fields from mbuf initializer. */
232                 _mm_store_si128((__m128i *)&mbuf->rearm_data, mbuf_init);
233
234                 /* Set mbuf pkt_len, data_len, and rss_hash fields. */
235                 pkt_mb = _mm_shuffle_epi8(mm_rxcmp, shuf_msk);
236                 ptype = bnxt_parse_pkt_type(mm_rxcmp, mm_rxcmp1);
237                 pkt_mb = _mm_blend_epi16(pkt_mb, ptype, 0x3);
238
239                 _mm_storeu_si128((void *)&mbuf->rx_descriptor_fields1, pkt_mb);
240
241                 rte_compiler_barrier();
242
243                 if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID)
244                         mbuf->ol_flags |= PKT_RX_RSS_HASH;
245
246                 if (rxcmp1->flags2 &
247                     RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN) {
248                         mbuf->vlan_tci = rxcmp1->metadata &
249                                 (RX_PKT_CMPL_METADATA_VID_MASK |
250                                 RX_PKT_CMPL_METADATA_DE |
251                                 RX_PKT_CMPL_METADATA_PRI_MASK);
252                         mbuf->ol_flags |=
253                                 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
254                 }
255
256                 bnxt_parse_csum(mbuf, rxcmp1);
257                 rx_pkts[nb_rx_pkts++] = mbuf;
258         }
259
260         if (nb_rx_pkts) {
261                 rxr->rx_prod =
262                         RING_ADV(rxr->rx_ring_struct, rxr->rx_prod, nb_rx_pkts);
263
264                 rxq->rxrearm_nb += nb_rx_pkts;
265                 cpr->cp_raw_cons = raw_cons;
266                 cpr->valid =
267                         !!(cpr->cp_raw_cons & cpr->cp_ring_struct->ring_size);
268                 bnxt_db_cq(cpr);
269         }
270
271         return nb_rx_pkts;
272 }
273
274 static void
275 bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, int nr_pkts)
276 {
277         struct bnxt_tx_ring_info *txr = txq->tx_ring;
278         struct rte_mbuf **free = txq->free;
279         uint16_t cons = txr->tx_cons;
280         unsigned int blk = 0;
281
282         while (nr_pkts--) {
283                 struct bnxt_sw_tx_bd *tx_buf;
284                 struct rte_mbuf *mbuf;
285
286                 tx_buf = &txr->tx_buf_ring[cons];
287                 cons = RING_NEXT(txr->tx_ring_struct, cons);
288                 mbuf = rte_pktmbuf_prefree_seg(tx_buf->mbuf);
289                 if (unlikely(mbuf == NULL))
290                         continue;
291                 tx_buf->mbuf = NULL;
292
293                 if (blk && mbuf->pool != free[0]->pool) {
294                         rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
295                         blk = 0;
296                 }
297                 free[blk++] = mbuf;
298         }
299         if (blk)
300                 rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
301
302         txr->tx_cons = cons;
303 }
304
305 static void
306 bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
307 {
308         struct bnxt_cp_ring_info *cpr = txq->cp_ring;
309         uint32_t raw_cons = cpr->cp_raw_cons;
310         uint32_t cons;
311         uint32_t nb_tx_pkts = 0;
312         struct tx_cmpl *txcmp;
313         struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
314         struct bnxt_ring *cp_ring_struct = cpr->cp_ring_struct;
315         uint32_t ring_mask = cp_ring_struct->ring_mask;
316
317         do {
318                 cons = RING_CMPL(ring_mask, raw_cons);
319                 txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
320
321                 if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
322                         break;
323
324                 if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2))
325                         nb_tx_pkts += txcmp->opaque;
326                 else
327                         RTE_LOG_DP(ERR, PMD,
328                                    "Unhandled CMP type %02x\n",
329                                    CMP_TYPE(txcmp));
330                 raw_cons = NEXT_RAW_CMP(raw_cons);
331         } while (nb_tx_pkts < ring_mask);
332
333         cpr->valid = !!(raw_cons & cp_ring_struct->ring_size);
334         if (nb_tx_pkts) {
335                 bnxt_tx_cmp_vec(txq, nb_tx_pkts);
336                 cpr->cp_raw_cons = raw_cons;
337                 bnxt_db_cq(cpr);
338         }
339 }
340
341 static uint16_t
342 bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
343                           uint16_t nb_pkts)
344 {
345         struct bnxt_tx_queue *txq = tx_queue;
346         struct bnxt_tx_ring_info *txr = txq->tx_ring;
347         uint16_t prod = txr->tx_prod;
348         struct rte_mbuf *tx_mbuf;
349         struct tx_bd_long *txbd = NULL;
350         struct bnxt_sw_tx_bd *tx_buf;
351         uint16_t to_send;
352
353         nb_pkts = RTE_MIN(nb_pkts, bnxt_tx_avail(txq));
354
355         if (unlikely(nb_pkts == 0))
356                 return 0;
357
358         /* Handle TX burst request */
359         to_send = nb_pkts;
360         while (to_send) {
361                 tx_mbuf = *tx_pkts++;
362                 rte_prefetch0(tx_mbuf);
363
364                 tx_buf = &txr->tx_buf_ring[prod];
365                 tx_buf->mbuf = tx_mbuf;
366                 tx_buf->nr_bds = 1;
367
368                 txbd = &txr->tx_desc_ring[prod];
369                 txbd->address = tx_mbuf->buf_iova + tx_mbuf->data_off;
370                 txbd->len = tx_mbuf->data_len;
371                 txbd->flags_type = bnxt_xmit_flags_len(tx_mbuf->data_len,
372                                                        TX_BD_FLAGS_NOCMPL);
373                 prod = RING_NEXT(txr->tx_ring_struct, prod);
374                 to_send--;
375         }
376
377         /* Request a completion for last packet in burst */
378         if (txbd) {
379                 txbd->opaque = nb_pkts;
380                 txbd->flags_type &= ~TX_BD_LONG_FLAGS_NO_CMPL;
381         }
382
383         rte_compiler_barrier();
384         bnxt_db_write(&txr->tx_db, prod);
385
386         txr->tx_prod = prod;
387
388         return nb_pkts;
389 }
390
391 uint16_t
392 bnxt_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
393                    uint16_t nb_pkts)
394 {
395         int nb_sent = 0;
396         struct bnxt_tx_queue *txq = tx_queue;
397
398         /* Tx queue was stopped; wait for it to be restarted */
399         if (unlikely(!txq->tx_started)) {
400                 PMD_DRV_LOG(DEBUG, "Tx q stopped;return\n");
401                 return 0;
402         }
403
404         /* Handle TX completions */
405         if (bnxt_tx_bds_in_hw(txq) >= txq->tx_free_thresh)
406                 bnxt_handle_tx_cp_vec(txq);
407
408         while (nb_pkts) {
409                 uint16_t ret, num;
410
411                 num = RTE_MIN(nb_pkts, RTE_BNXT_MAX_TX_BURST);
412                 ret = bnxt_xmit_fixed_burst_vec(tx_queue,
413                                                 &tx_pkts[nb_sent],
414                                                 num);
415                 nb_sent += ret;
416                 nb_pkts -= ret;
417                 if (ret < num)
418                         break;
419         }
420
421         return nb_sent;
422 }
423
424 int __rte_cold
425 bnxt_rxq_vec_setup(struct bnxt_rx_queue *rxq)
426 {
427         return bnxt_rxq_vec_setup_common(rxq);
428 }