net/i40e: fix Rx packet statistics
[dpdk.git] / drivers / net / mlx5 / mlx5_rxtx_vec_altivec.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2017 6WIND S.A.
3  * Copyright 2017 Mellanox Technologies, Ltd
4  */
5
6 #ifndef RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_
7 #define RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_
8
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdlib.h>
12
13 #include <rte_altivec.h>
14
15 #include <rte_mbuf.h>
16 #include <rte_mempool.h>
17 #include <rte_prefetch.h>
18
19 #include <mlx5_prm.h>
20
21 #include "mlx5_defs.h"
22 #include "mlx5.h"
23 #include "mlx5_utils.h"
24 #include "mlx5_rxtx.h"
25 #include "mlx5_rxtx_vec.h"
26 #include "mlx5_autoconf.h"
27
28 #ifndef __INTEL_COMPILER
29 #pragma GCC diagnostic ignored "-Wcast-qual"
30 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
31 #endif
32
33 /**
34  * Store free buffers to RX SW ring.
35  *
36  * @param elts
37  *   Pointer to SW ring to be filled.
38  * @param pkts
39  *   Pointer to array of packets to be stored.
40  * @param pkts_n
41  *   Number of packets to be stored.
42  */
43 static inline void
44 rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)
45 {
46         unsigned int pos;
47         uint16_t p = n & -2;
48
49         for (pos = 0; pos < p; pos += 2) {
50                 vector unsigned char mbp;
51
52                 mbp = (vector unsigned char)vec_vsx_ld(0,
53                                 (signed int const *)&elts[pos]);
54                 *(vector unsigned char *)&pkts[pos] = mbp;
55         }
56         if (n & 1)
57                 pkts[pos] = elts[pos];
58 }
59
60 /**
61  * Decompress a compressed completion and fill in mbufs in RX SW ring with data
62  * extracted from the title completion descriptor.
63  *
64  * @param rxq
65  *   Pointer to RX queue structure.
66  * @param cq
67  *   Pointer to completion array having a compressed completion at first.
68  * @param elts
69  *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from
70  *   the title completion descriptor to be copied to the rest of mbufs.
71  *
72  * @return
73  *   Number of mini-CQEs successfully decompressed.
74  */
75 static inline uint16_t
76 rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
77                     struct rte_mbuf **elts)
78 {
79         volatile struct mlx5_mini_cqe8 *mcq = (void *)&(cq + 1)->pkt_info;
80         struct rte_mbuf *t_pkt = elts[0]; /* Title packet is pre-built. */
81         const vector unsigned char zero = (vector unsigned char){0};
82         /* Mask to shuffle from extracted mini CQE to mbuf. */
83         const vector unsigned char shuf_mask1 = (vector unsigned char){
84                         -1, -1, -1, -1,   /* skip packet_type */
85                          7,  6, -1, -1,   /* bswap16, pkt_len */
86                          7,  6,           /* bswap16, data_len */
87                         -1, -1,           /* skip vlan_tci */
88                          3,  2,  1,  0};  /* bswap32, rss */
89         const vector unsigned char shuf_mask2 = (vector unsigned char){
90                         -1, -1, -1, -1,   /* skip packet_type */
91                         15, 14, -1, -1,   /* bswap16, pkt_len */
92                         15, 14,           /* data_len, bswap16 */
93                         -1, -1,           /* skip vlan_tci */
94                         11, 10,  9,  8};  /* bswap32, rss */
95         /* Restore the compressed count. Must be 16 bits. */
96         const uint16_t mcqe_n = t_pkt->data_len +
97                 (rxq->crc_present * RTE_ETHER_CRC_LEN);
98         const vector unsigned char rearm =
99                 (vector unsigned char)vec_vsx_ld(0,
100                 (signed int const *)&t_pkt->rearm_data);
101         const vector unsigned char rxdf =
102                 (vector unsigned char)vec_vsx_ld(0,
103                 (signed int const *)&t_pkt->rx_descriptor_fields1);
104         const vector unsigned char crc_adj =
105                 (vector unsigned char)(vector unsigned short){
106                         0, 0, rxq->crc_present * RTE_ETHER_CRC_LEN, 0,
107                         rxq->crc_present * RTE_ETHER_CRC_LEN, 0, 0, 0};
108         const vector unsigned short rxdf_sel_mask =
109                 (vector unsigned short){
110                         0xffff, 0xffff, 0, 0, 0, 0xffff, 0, 0};
111         vector unsigned char ol_flags = (vector unsigned char){0};
112         vector unsigned char ol_flags_mask = (vector unsigned char){0};
113         unsigned int pos;
114         unsigned int i;
115         unsigned int inv = 0;
116
117 #ifdef MLX5_PMD_SOFT_COUNTERS
118         const vector unsigned char ones = vec_splat_u8(-1);
119         uint32_t rcvd_byte = 0;
120         /* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
121         const vector unsigned char len_shuf_mask = (vector unsigned char){
122                  3,  2, 11, 10,
123                  7,  6, 15, 14,
124                 -1, -1, -1, -1,
125                 -1, -1, -1, -1};
126 #endif
127
128         /*
129          * A. load mCQEs into a 128bit register.
130          * B. store rearm data to mbuf.
131          * C. combine data from mCQEs with rx_descriptor_fields1.
132          * D. store rx_descriptor_fields1.
133          * E. store flow tag (rte_flow mark).
134          */
135         for (pos = 0; pos < mcqe_n; ) {
136                 vector unsigned char mcqe1, mcqe2;
137                 vector unsigned char rxdf1, rxdf2;
138 #ifdef MLX5_PMD_SOFT_COUNTERS
139                 const vector unsigned short mcqe_sel_mask =
140                         (vector unsigned short){0, 0, 0xffff, 0xffff,
141                         0, 0, 0xfff, 0xffff};
142                 const vector unsigned char lower_half = {
143                         0, 1, 4, 5, 8, 9, 12, 13, 16,
144                         17, 20, 21, 24, 25, 28, 29};
145                 const vector unsigned char upper_half = {
146                         2, 3, 6, 7, 10, 11, 14, 15,
147                         18, 19, 22, 23, 26, 27, 30, 31};
148                 vector unsigned short left, right;
149                 vector unsigned char byte_cnt, invalid_mask;
150                 vector unsigned long lshift;
151                 __attribute__((altivec(vector__)))
152                         __attribute__((altivec(bool__)))
153                         unsigned long long shmask;
154                 const vector unsigned long shmax = {64, 64};
155 #endif
156
157                 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
158                         if (likely(pos + i < mcqe_n))
159                                 rte_prefetch0((void *)(cq + pos + i));
160                 /* A.1 load mCQEs into a 128bit register. */
161                 mcqe1 = (vector unsigned char)vec_vsx_ld(0,
162                         (signed int const *)&mcq[pos % 8]);
163                 mcqe2 = (vector unsigned char)vec_vsx_ld(0,
164                         (signed int const *)&mcq[pos % 8 + 2]);
165
166                 /* B.1 store rearm data to mbuf. */
167                 *(vector unsigned char *)
168                         &elts[pos]->rearm_data = rearm;
169                 *(vector unsigned char *)
170                         &elts[pos + 1]->rearm_data = rearm;
171
172                 /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
173                 rxdf1 = vec_perm(mcqe1, zero, shuf_mask1);
174                 rxdf2 = vec_perm(mcqe1, zero, shuf_mask2);
175                 rxdf1 = (vector unsigned char)
176                         ((vector unsigned short)rxdf1 -
177                         (vector unsigned short)crc_adj);
178                 rxdf2 = (vector unsigned char)
179                         ((vector unsigned short)rxdf2 -
180                         (vector unsigned short)crc_adj);
181                 rxdf1 = (vector unsigned char)
182                         vec_sel((vector unsigned short)rxdf1,
183                         (vector unsigned short)rxdf, rxdf_sel_mask);
184                 rxdf2 = (vector unsigned char)
185                         vec_sel((vector unsigned short)rxdf2,
186                         (vector unsigned short)rxdf, rxdf_sel_mask);
187
188                 /* D.1 store rx_descriptor_fields1. */
189                 *(vector unsigned char *)
190                         &elts[pos]->rx_descriptor_fields1 = rxdf1;
191                 *(vector unsigned char *)
192                         &elts[pos + 1]->rx_descriptor_fields1 = rxdf2;
193
194                 /* B.1 store rearm data to mbuf. */
195                 *(vector unsigned char *)
196                         &elts[pos + 2]->rearm_data = rearm;
197                 *(vector unsigned char *)
198                         &elts[pos + 3]->rearm_data = rearm;
199
200                 /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
201                 rxdf1 = vec_perm(mcqe2, zero, shuf_mask1);
202                 rxdf2 = vec_perm(mcqe2, zero, shuf_mask2);
203                 rxdf1 = (vector unsigned char)
204                         ((vector unsigned short)rxdf1 -
205                         (vector unsigned short)crc_adj);
206                 rxdf2 = (vector unsigned char)
207                         ((vector unsigned short)rxdf2 -
208                         (vector unsigned short)crc_adj);
209                 rxdf1 = (vector unsigned char)
210                         vec_sel((vector unsigned short)rxdf1,
211                         (vector unsigned short)rxdf, rxdf_sel_mask);
212                 rxdf2 = (vector unsigned char)
213                         vec_sel((vector unsigned short)rxdf2,
214                         (vector unsigned short)rxdf, rxdf_sel_mask);
215
216                 /* D.1 store rx_descriptor_fields1. */
217                 *(vector unsigned char *)
218                         &elts[pos + 2]->rx_descriptor_fields1 = rxdf1;
219                 *(vector unsigned char *)
220                         &elts[pos + 3]->rx_descriptor_fields1 = rxdf2;
221
222 #ifdef MLX5_PMD_SOFT_COUNTERS
223                 invalid_mask = (vector unsigned char)(vector unsigned long){
224                         (mcqe_n - pos) * sizeof(uint16_t) * 8, 0};
225
226                 lshift =
227                         vec_splat((vector unsigned long)invalid_mask, 0);
228                 shmask = vec_cmpgt(shmax, lshift);
229                 invalid_mask = (vector unsigned char)
230                         vec_sl((vector unsigned long)ones, lshift);
231                 invalid_mask = (vector unsigned char)
232                         vec_sel((vector unsigned long)shmask,
233                         (vector unsigned long)invalid_mask, shmask);
234
235                 byte_cnt = (vector unsigned char)
236                         vec_sel((vector unsigned short)
237                         vec_sro((vector unsigned short)mcqe1,
238                         (vector unsigned char){32}),
239                         (vector unsigned short)mcqe2, mcqe_sel_mask);
240                 byte_cnt = vec_perm(byte_cnt, zero, len_shuf_mask);
241                 byte_cnt = (vector unsigned char)
242                         vec_andc((vector unsigned long)byte_cnt,
243                         (vector unsigned long)invalid_mask);
244                 left = vec_perm((vector unsigned short)byte_cnt,
245                         (vector unsigned short)zero, lower_half);
246                 right = vec_perm((vector unsigned short)byte_cnt,
247                         (vector unsigned short)zero, upper_half);
248                 byte_cnt = (vector unsigned char)vec_add(left, right);
249                 left = vec_perm((vector unsigned short)byte_cnt,
250                         (vector unsigned short)zero, lower_half);
251                 right = vec_perm((vector unsigned short)byte_cnt,
252                         (vector unsigned short)zero, upper_half);
253                 byte_cnt = (vector unsigned char)vec_add(left, right);
254                 rcvd_byte += ((vector unsigned long)byte_cnt)[0];
255 #endif
256
257                 if (rxq->mark) {
258                         if (rxq->mcqe_format !=
259                             MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
260                                 const uint32_t flow_tag = t_pkt->hash.fdir.hi;
261
262                                 /* E.1 store flow tag (rte_flow mark). */
263                                 elts[pos]->hash.fdir.hi = flow_tag;
264                                 elts[pos + 1]->hash.fdir.hi = flow_tag;
265                                 elts[pos + 2]->hash.fdir.hi = flow_tag;
266                                 elts[pos + 3]->hash.fdir.hi = flow_tag;
267                         } else {
268                                 const vector unsigned char flow_mark_adj =
269                                         (vector unsigned char)
270                                         (vector unsigned int){
271                                         -1, -1, -1, -1};
272                                 const vector unsigned char flow_mark_shuf =
273                                         (vector unsigned char){
274                                         -1, -1, -1, -1,
275                                         -1, -1, -1, -1,
276                                         12,  8,  9, -1,
277                                          4,  0,  1,  -1};
278                                 const vector unsigned char ft_mask =
279                                         (vector unsigned char)
280                                         (vector unsigned int){
281                                         0xffffff00, 0xffffff00,
282                                         0xffffff00, 0xffffff00};
283                                 const vector unsigned char fdir_flags =
284                                         (vector unsigned char)
285                                         (vector unsigned int){
286                                         PKT_RX_FDIR, PKT_RX_FDIR,
287                                         PKT_RX_FDIR, PKT_RX_FDIR};
288                                 const vector unsigned char fdir_all_flags =
289                                         (vector unsigned char)
290                                         (vector unsigned int){
291                                         PKT_RX_FDIR | PKT_RX_FDIR_ID,
292                                         PKT_RX_FDIR | PKT_RX_FDIR_ID,
293                                         PKT_RX_FDIR | PKT_RX_FDIR_ID,
294                                         PKT_RX_FDIR | PKT_RX_FDIR_ID};
295                                 vector unsigned char fdir_id_flags =
296                                         (vector unsigned char)
297                                         (vector unsigned int){
298                                         PKT_RX_FDIR_ID, PKT_RX_FDIR_ID,
299                                         PKT_RX_FDIR_ID, PKT_RX_FDIR_ID};
300                                 /* Extract flow_tag field. */
301                                 vector unsigned char ftag0 = vec_perm(mcqe1,
302                                                         zero, flow_mark_shuf);
303                                 vector unsigned char ftag1 = vec_perm(mcqe2,
304                                                         zero, flow_mark_shuf);
305                                 vector unsigned char ftag =
306                                         (vector unsigned char)
307                                         vec_mergel((vector unsigned int)ftag0,
308                                         (vector unsigned int)ftag1);
309                                 vector unsigned char invalid_mask =
310                                         (vector unsigned char)
311                                         vec_cmpeq((vector unsigned int)ftag,
312                                         (vector unsigned int)zero);
313
314                                 ol_flags_mask = (vector unsigned char)
315                                         vec_or((vector unsigned long)
316                                         ol_flags_mask,
317                                         (vector unsigned long)fdir_all_flags);
318
319                                 /* Set PKT_RX_FDIR if flow tag is non-zero. */
320                                 invalid_mask = (vector unsigned char)
321                                         vec_cmpeq((vector unsigned int)ftag,
322                                         (vector unsigned int)zero);
323                                 ol_flags = (vector unsigned char)
324                                         vec_or((vector unsigned long)ol_flags,
325                                         (vector unsigned long)
326                                         vec_andc((vector unsigned long)
327                                         fdir_flags,
328                                         (vector unsigned long)invalid_mask));
329                                 ol_flags_mask = (vector unsigned char)
330                                         vec_or((vector unsigned long)
331                                         ol_flags_mask,
332                                         (vector unsigned long)fdir_flags);
333
334                                 /* Mask out invalid entries. */
335                                 fdir_id_flags = (vector unsigned char)
336                                         vec_andc((vector unsigned long)
337                                         fdir_id_flags,
338                                         (vector unsigned long)invalid_mask);
339
340                                 /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
341                                 ol_flags = (vector unsigned char)
342                                         vec_or((vector unsigned long)ol_flags,
343                                         (vector unsigned long)
344                                         vec_andc((vector unsigned long)
345                                         fdir_id_flags,
346                                         (vector unsigned long)
347                                         vec_cmpeq((vector unsigned int)ftag,
348                                         (vector unsigned int)ft_mask)));
349
350                                 ftag = (vector unsigned char)
351                                         ((vector unsigned int)ftag +
352                                         (vector unsigned int)flow_mark_adj);
353                                 elts[pos]->hash.fdir.hi =
354                                         ((vector unsigned int)ftag)[0];
355                                 elts[pos + 1]->hash.fdir.hi =
356                                         ((vector unsigned int)ftag)[1];
357                                 elts[pos + 2]->hash.fdir.hi =
358                                         ((vector unsigned int)ftag)[2];
359                                 elts[pos + 3]->hash.fdir.hi =
360                                         ((vector unsigned int)ftag)[3];
361                         }
362                 }
363                 if (unlikely(rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)) {
364                         if (rxq->mcqe_format ==
365                             MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
366                                 const uint8_t pkt_info =
367                                         (cq->pkt_info & 0x3) << 6;
368                                 const uint8_t pkt_hdr0 =
369                                         mcq[pos % 8].hdr_type;
370                                 const uint8_t pkt_hdr1 =
371                                         mcq[pos % 8 + 1].hdr_type;
372                                 const uint8_t pkt_hdr2 =
373                                         mcq[pos % 8 + 2].hdr_type;
374                                 const uint8_t pkt_hdr3 =
375                                         mcq[pos % 8 + 3].hdr_type;
376                                 const vector unsigned char vlan_mask =
377                                         (vector unsigned char)
378                                         (vector unsigned int) {
379                                         (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
380                                         (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
381                                         (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
382                                         (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED)};
383                                 const vector unsigned char cv_mask =
384                                         (vector unsigned char)
385                                         (vector unsigned int) {
386                                         MLX5_CQE_VLAN_STRIPPED,
387                                         MLX5_CQE_VLAN_STRIPPED,
388                                         MLX5_CQE_VLAN_STRIPPED,
389                                         MLX5_CQE_VLAN_STRIPPED};
390                                 vector unsigned char pkt_cv =
391                                         (vector unsigned char)
392                                         (vector unsigned int) {
393                                         pkt_hdr0 & 0x1, pkt_hdr1 & 0x1,
394                                         pkt_hdr2 & 0x1, pkt_hdr3 & 0x1};
395
396                                 ol_flags_mask = (vector unsigned char)
397                                         vec_or((vector unsigned long)
398                                         ol_flags_mask,
399                                         (vector unsigned long)vlan_mask);
400                                 ol_flags = (vector unsigned char)
401                                         vec_or((vector unsigned long)ol_flags,
402                                         (vector unsigned long)
403                                         vec_and((vector unsigned long)vlan_mask,
404                                         (vector unsigned long)
405                                         vec_cmpeq((vector unsigned int)pkt_cv,
406                                         (vector unsigned int)cv_mask)));
407                                 elts[pos]->packet_type =
408                                         mlx5_ptype_table[(pkt_hdr0 >> 2) |
409                                                          pkt_info];
410                                 elts[pos + 1]->packet_type =
411                                         mlx5_ptype_table[(pkt_hdr1 >> 2) |
412                                                          pkt_info];
413                                 elts[pos + 2]->packet_type =
414                                         mlx5_ptype_table[(pkt_hdr2 >> 2) |
415                                                          pkt_info];
416                                 elts[pos + 3]->packet_type =
417                                         mlx5_ptype_table[(pkt_hdr3 >> 2) |
418                                                          pkt_info];
419                                 if (rxq->tunnel) {
420                                         elts[pos]->packet_type |=
421                                                 !!(((pkt_hdr0 >> 2) |
422                                                 pkt_info) & (1 << 6));
423                                         elts[pos + 1]->packet_type |=
424                                                 !!(((pkt_hdr1 >> 2) |
425                                                 pkt_info) & (1 << 6));
426                                         elts[pos + 2]->packet_type |=
427                                                 !!(((pkt_hdr2 >> 2) |
428                                                 pkt_info) & (1 << 6));
429                                         elts[pos + 3]->packet_type |=
430                                                 !!(((pkt_hdr3 >> 2) |
431                                                 pkt_info) & (1 << 6));
432                                 }
433                         }
434                         const vector unsigned char hash_mask =
435                                 (vector unsigned char)(vector unsigned int) {
436                                         PKT_RX_RSS_HASH,
437                                         PKT_RX_RSS_HASH,
438                                         PKT_RX_RSS_HASH,
439                                         PKT_RX_RSS_HASH};
440                         const vector unsigned char rearm_flags =
441                                 (vector unsigned char)(vector unsigned int) {
442                                 (uint32_t)t_pkt->ol_flags,
443                                 (uint32_t)t_pkt->ol_flags,
444                                 (uint32_t)t_pkt->ol_flags,
445                                 (uint32_t)t_pkt->ol_flags};
446
447                         ol_flags_mask = (vector unsigned char)
448                                 vec_or((vector unsigned long)ol_flags_mask,
449                                 (vector unsigned long)hash_mask);
450                         ol_flags = (vector unsigned char)
451                                 vec_or((vector unsigned long)ol_flags,
452                                 (vector unsigned long)
453                                 vec_andc((vector unsigned long)rearm_flags,
454                                 (vector unsigned long)ol_flags_mask));
455
456                         elts[pos]->ol_flags =
457                                 ((vector unsigned int)ol_flags)[0];
458                         elts[pos + 1]->ol_flags =
459                                 ((vector unsigned int)ol_flags)[1];
460                         elts[pos + 2]->ol_flags =
461                                 ((vector unsigned int)ol_flags)[2];
462                         elts[pos + 3]->ol_flags =
463                                 ((vector unsigned int)ol_flags)[3];
464                         elts[pos]->hash.rss = 0;
465                         elts[pos + 1]->hash.rss = 0;
466                         elts[pos + 2]->hash.rss = 0;
467                         elts[pos + 3]->hash.rss = 0;
468                 }
469                 if (rxq->dynf_meta) {
470                         int32_t offs = rxq->flow_meta_offset;
471                         const uint32_t meta =
472                                 *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *);
473
474                         /* Check if title packet has valid metadata. */
475                         if (meta) {
476                                 MLX5_ASSERT(t_pkt->ol_flags &
477                                             rxq->flow_meta_mask);
478                                 *RTE_MBUF_DYNFIELD(elts[pos], offs,
479                                                         uint32_t *) = meta;
480                                 *RTE_MBUF_DYNFIELD(elts[pos + 1], offs,
481                                                         uint32_t *) = meta;
482                                 *RTE_MBUF_DYNFIELD(elts[pos + 2], offs,
483                                                         uint32_t *) = meta;
484                                 *RTE_MBUF_DYNFIELD(elts[pos + 3], offs,
485                                                         uint32_t *) = meta;
486                         }
487                 }
488
489                 pos += MLX5_VPMD_DESCS_PER_LOOP;
490                 /* Move to next CQE and invalidate consumed CQEs. */
491                 if (!(pos & 0x7) && pos < mcqe_n) {
492                         if (pos + 8 < mcqe_n)
493                                 rte_prefetch0((void *)(cq + pos + 8));
494                         mcq = (void *)&(cq + pos)->pkt_info;
495                         for (i = 0; i < 8; ++i)
496                                 cq[inv++].op_own = MLX5_CQE_INVALIDATE;
497                 }
498         }
499
500         /* Invalidate the rest of CQEs. */
501         for (; inv < mcqe_n; ++inv)
502                 cq[inv].op_own = MLX5_CQE_INVALIDATE;
503
504 #ifdef MLX5_PMD_SOFT_COUNTERS
505         rxq->stats.ipackets += mcqe_n;
506         rxq->stats.ibytes += rcvd_byte;
507 #endif
508
509         return mcqe_n;
510 }
511
512 /**
513  * Calculate packet type and offload flag for mbuf and store it.
514  *
515  * @param rxq
516  *   Pointer to RX queue structure.
517  * @param cqes[4]
518  *   Array of four 16bytes completions extracted from the original completion
519  *   descriptor.
520  * @param op_err
521  *   Opcode vector having responder error status. Each field is 4B.
522  * @param pkts
523  *   Pointer to array of packets to be filled.
524  */
525 static inline void
526 rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
527                 vector unsigned char cqes[4], vector unsigned char op_err,
528                 struct rte_mbuf **pkts)
529 {
530         vector unsigned char pinfo0, pinfo1;
531         vector unsigned char pinfo, ptype;
532         vector unsigned char ol_flags = (vector unsigned char)
533                 (vector unsigned int){
534                         rxq->rss_hash * PKT_RX_RSS_HASH |
535                                 rxq->hw_timestamp * rxq->timestamp_rx_flag,
536                         rxq->rss_hash * PKT_RX_RSS_HASH |
537                                 rxq->hw_timestamp * rxq->timestamp_rx_flag,
538                         rxq->rss_hash * PKT_RX_RSS_HASH |
539                                 rxq->hw_timestamp * rxq->timestamp_rx_flag,
540                         rxq->rss_hash * PKT_RX_RSS_HASH |
541                                 rxq->hw_timestamp * rxq->timestamp_rx_flag};
542         vector unsigned char cv_flags;
543         const vector unsigned char zero = (vector unsigned char){0};
544         const vector unsigned char ptype_mask =
545                 (vector unsigned char)(vector unsigned int){
546                 0x0000fd06, 0x0000fd06, 0x0000fd06, 0x0000fd06};
547         const vector unsigned char ptype_ol_mask =
548                 (vector unsigned char)(vector unsigned int){
549                 0x00000106, 0x00000106, 0x00000106, 0x00000106};
550         const vector unsigned char pinfo_mask =
551                 (vector unsigned char)(vector unsigned int){
552                 0x00000003, 0x00000003, 0x00000003, 0x00000003};
553         const vector unsigned char cv_flag_sel = (vector unsigned char){
554                 0, (uint8_t)(PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
555                 (uint8_t)(PKT_RX_IP_CKSUM_GOOD >> 1), 0,
556                 (uint8_t)(PKT_RX_L4_CKSUM_GOOD >> 1), 0,
557                 (uint8_t)((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1),
558                 0, 0, 0, 0, 0, 0, 0, 0, 0};
559         const vector unsigned char cv_mask =
560                 (vector unsigned char)(vector unsigned int){
561                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
562                 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
563                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
564                 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
565                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
566                 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
567                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
568                 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED};
569         const vector unsigned char mbuf_init =
570                 (vector unsigned char)vec_vsx_ld
571                         (0, (vector unsigned char *)&rxq->mbuf_initializer);
572         const vector unsigned short rearm_sel_mask =
573                 (vector unsigned short){0, 0, 0, 0, 0xffff, 0xffff, 0, 0};
574         vector unsigned char rearm0, rearm1, rearm2, rearm3;
575         uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
576
577         /* Extract pkt_info field. */
578         pinfo0 = (vector unsigned char)
579                 vec_mergeh((vector unsigned int)cqes[0],
580                 (vector unsigned int)cqes[1]);
581         pinfo1 = (vector unsigned char)
582                 vec_mergeh((vector unsigned int)cqes[2],
583                 (vector unsigned int)cqes[3]);
584         pinfo = (vector unsigned char)
585                 vec_mergeh((vector unsigned long)pinfo0,
586                 (vector unsigned long)pinfo1);
587
588         /* Extract hdr_type_etc field. */
589         pinfo0 = (vector unsigned char)
590                 vec_mergel((vector unsigned int)cqes[0],
591                 (vector unsigned int)cqes[1]);
592         pinfo1 = (vector unsigned char)
593                 vec_mergel((vector unsigned int)cqes[2],
594                 (vector unsigned int)cqes[3]);
595         ptype = (vector unsigned char)
596                 vec_mergeh((vector unsigned long)pinfo0,
597                 (vector unsigned long)pinfo1);
598
599         if (rxq->mark) {
600                 const vector unsigned char pinfo_ft_mask =
601                         (vector unsigned char)(vector unsigned int){
602                         0xffffff00, 0xffffff00, 0xffffff00, 0xffffff00};
603                 const vector unsigned char fdir_flags =
604                         (vector unsigned char)(vector unsigned int){
605                         PKT_RX_FDIR, PKT_RX_FDIR,
606                         PKT_RX_FDIR, PKT_RX_FDIR};
607                 vector unsigned char fdir_id_flags =
608                         (vector unsigned char)(vector unsigned int){
609                         PKT_RX_FDIR_ID, PKT_RX_FDIR_ID,
610                         PKT_RX_FDIR_ID, PKT_RX_FDIR_ID};
611                 vector unsigned char flow_tag, invalid_mask;
612
613                 flow_tag = (vector unsigned char)
614                         vec_and((vector unsigned long)pinfo,
615                         (vector unsigned long)pinfo_ft_mask);
616
617                 /* Check if flow tag is non-zero then set PKT_RX_FDIR. */
618                 invalid_mask = (vector unsigned char)
619                         vec_cmpeq((vector unsigned int)flow_tag,
620                         (vector unsigned int)zero);
621                 ol_flags = (vector unsigned char)
622                         vec_or((vector unsigned long)ol_flags,
623                         (vector unsigned long)
624                         vec_andc((vector unsigned long)fdir_flags,
625                         (vector unsigned long)invalid_mask));
626
627                 /* Mask out invalid entries. */
628                 fdir_id_flags = (vector unsigned char)
629                         vec_andc((vector unsigned long)fdir_id_flags,
630                         (vector unsigned long)invalid_mask);
631
632                 /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
633                 ol_flags = (vector unsigned char)
634                         vec_or((vector unsigned long)ol_flags,
635                         (vector unsigned long)
636                         vec_andc((vector unsigned long)fdir_id_flags,
637                         (vector unsigned long)
638                         vec_cmpeq((vector unsigned int)flow_tag,
639                         (vector unsigned int)pinfo_ft_mask)));
640         }
641         /*
642          * Merge the two fields to generate the following:
643          * bit[1]     = l3_ok
644          * bit[2]     = l4_ok
645          * bit[8]     = cv
646          * bit[11:10] = l3_hdr_type
647          * bit[14:12] = l4_hdr_type
648          * bit[15]    = ip_frag
649          * bit[16]    = tunneled
650          * bit[17]    = outer_l3_type
651          */
652         ptype = (vector unsigned char)
653                 vec_and((vector unsigned long)ptype,
654                 (vector unsigned long)ptype_mask);
655         pinfo = (vector unsigned char)
656                 vec_and((vector unsigned long)pinfo,
657                 (vector unsigned long)pinfo_mask);
658         pinfo = (vector unsigned char)
659                 vec_sl((vector unsigned int)pinfo,
660                 (vector unsigned int){16, 16, 16, 16});
661
662         /* Make pinfo has merged fields for ol_flags calculation. */
663         pinfo = (vector unsigned char)
664                 vec_or((vector unsigned long)ptype,
665                 (vector unsigned long)pinfo);
666         ptype = (vector unsigned char)
667                 vec_sr((vector unsigned int)pinfo,
668                 (vector unsigned int){10, 10, 10, 10});
669         ptype = (vector unsigned char)
670                 vec_packs((vector unsigned int)ptype,
671                 (vector unsigned int)zero);
672
673         /* Errored packets will have RTE_PTYPE_ALL_MASK. */
674         op_err = (vector unsigned char)
675                 vec_sr((vector unsigned short)op_err,
676                 (vector unsigned short){8, 8, 8, 8, 8, 8, 8, 8});
677         ptype = (vector unsigned char)
678                 vec_or((vector unsigned long)ptype,
679                 (vector unsigned long)op_err);
680
681         pt_idx0 = (uint8_t)((vector unsigned char)ptype)[0];
682         pt_idx1 = (uint8_t)((vector unsigned char)ptype)[2];
683         pt_idx2 = (uint8_t)((vector unsigned char)ptype)[4];
684         pt_idx3 = (uint8_t)((vector unsigned char)ptype)[6];
685
686         pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
687                 !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
688         pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
689                 !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
690         pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
691                 !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
692         pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
693                 !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
694
695         /* Fill flags for checksum and VLAN. */
696         pinfo = (vector unsigned char)
697                 vec_and((vector unsigned long)pinfo,
698                 (vector unsigned long)ptype_ol_mask);
699         pinfo = vec_perm(cv_flag_sel, zero, pinfo);
700
701         /* Locate checksum flags at byte[2:1] and merge with VLAN flags. */
702         cv_flags = (vector unsigned char)
703                 vec_sl((vector unsigned int)pinfo,
704                 (vector unsigned int){9, 9, 9, 9});
705         cv_flags = (vector unsigned char)
706                 vec_or((vector unsigned long)pinfo,
707                 (vector unsigned long)cv_flags);
708
709         /* Move back flags to start from byte[0]. */
710         cv_flags = (vector unsigned char)
711                 vec_sr((vector unsigned int)cv_flags,
712                 (vector unsigned int){8, 8, 8, 8});
713
714         /* Mask out garbage bits. */
715         cv_flags = (vector unsigned char)
716                 vec_and((vector unsigned long)cv_flags,
717                 (vector unsigned long)cv_mask);
718
719         /* Merge to ol_flags. */
720         ol_flags = (vector unsigned char)
721                 vec_or((vector unsigned long)ol_flags,
722                 (vector unsigned long)cv_flags);
723
724         /* Merge mbuf_init and ol_flags. */
725         rearm0 = (vector unsigned char)
726                 vec_sel((vector unsigned short)mbuf_init,
727                 (vector unsigned short)
728                 vec_slo((vector unsigned short)ol_flags,
729                 (vector unsigned char){64}), rearm_sel_mask);
730         rearm1 = (vector unsigned char)
731                 vec_sel((vector unsigned short)mbuf_init,
732                 (vector unsigned short)
733                 vec_slo((vector unsigned short)ol_flags,
734                 (vector unsigned char){32}), rearm_sel_mask);
735         rearm2 = (vector unsigned char)
736                 vec_sel((vector unsigned short)mbuf_init,
737                 (vector unsigned short)ol_flags, rearm_sel_mask);
738         rearm3 = (vector unsigned char)
739                 vec_sel((vector unsigned short)mbuf_init,
740                 (vector unsigned short)
741                 vec_sro((vector unsigned short)ol_flags,
742                 (vector unsigned char){32}), rearm_sel_mask);
743
744         /* Write 8B rearm_data and 8B ol_flags. */
745         vec_vsx_st(rearm0, 0,
746                 (vector unsigned char *)&pkts[0]->rearm_data);
747         vec_vsx_st(rearm1, 0,
748                 (vector unsigned char *)&pkts[1]->rearm_data);
749         vec_vsx_st(rearm2, 0,
750                 (vector unsigned char *)&pkts[2]->rearm_data);
751         vec_vsx_st(rearm3, 0,
752                 (vector unsigned char *)&pkts[3]->rearm_data);
753 }
754
755 /**
756  * Process a non-compressed completion and fill in mbufs in RX SW ring
757  * with data extracted from the title completion descriptor.
758  *
759  * @param rxq
760  *   Pointer to RX queue structure.
761  * @param cq
762  *   Pointer to completion array having a non-compressed completion at first.
763  * @param elts
764  *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from
765  *   the title completion descriptor to be copied to the rest of mbufs.
766  * @param[out] pkts
767  *   Array to store received packets.
768  * @param pkts_n
769  *   Maximum number of packets in array.
770  * @param[out] err
771  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
772  *   packet to handle.
773  * @param[out] comp
774  *   Pointer to a index. Set it to the first compressed completion if any.
775  *
776  * @return
777  *   Number of CQEs successfully processed.
778  */
779 static inline uint16_t
780 rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
781                  struct rte_mbuf **elts, struct rte_mbuf **pkts,
782                  uint16_t pkts_n, uint64_t *err, uint64_t *comp)
783 {
784         const uint16_t q_n = 1 << rxq->cqe_n;
785         const uint16_t q_mask = q_n - 1;
786         unsigned int pos;
787         uint64_t n = 0;
788         uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
789         uint16_t nocmp_n = 0;
790         unsigned int ownership = !!(rxq->cq_ci & (q_mask + 1));
791         const vector unsigned char zero = (vector unsigned char){0};
792         const vector unsigned char ones = vec_splat_u8(-1);
793         const vector unsigned char owner_check =
794                 (vector unsigned char)(vector unsigned long){
795                 0x0100000001000000LL, 0x0100000001000000LL};
796         const vector unsigned char opcode_check =
797                 (vector unsigned char)(vector unsigned long){
798                 0xf0000000f0000000LL, 0xf0000000f0000000LL};
799         const vector unsigned char format_check =
800                 (vector unsigned char)(vector unsigned long){
801                 0x0c0000000c000000LL, 0x0c0000000c000000LL};
802         const vector unsigned char resp_err_check =
803                 (vector unsigned char)(vector unsigned long){
804                 0xe0000000e0000000LL, 0xe0000000e0000000LL};
805 #ifdef MLX5_PMD_SOFT_COUNTERS
806         uint32_t rcvd_byte = 0;
807         /* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
808         const vector unsigned char len_shuf_mask = (vector unsigned char){
809                  1,  0,  5,  4,
810                  9,  8, 13, 12,
811                 -1, -1, -1, -1,
812                 -1, -1, -1, -1};
813 #endif
814         /* Mask to shuffle from extracted CQE to mbuf. */
815         const vector unsigned char shuf_mask = (vector unsigned char){
816                  5,  4,           /* bswap16, pkt_len */
817                 -1, -1,           /* zero out 2nd half of pkt_len */
818                  5,  4,           /* bswap16, data_len */
819                 11, 10,           /* bswap16, vlan+tci */
820                 15, 14, 13, 12,   /* bswap32, rss */
821                  1,  2,  3, -1};  /* fdir.hi */
822         /* Mask to blend from the last Qword to the first DQword. */
823         /* Mask to blend from the last Qword to the first DQword. */
824         const vector unsigned char blend_mask = (vector unsigned char){
825                 -1,  0,  0,  0,
826                  0,  0,  0,  0,
827                 -1, -1, -1, -1,
828                 -1, -1, -1, -1};
829         const vector unsigned char crc_adj =
830                 (vector unsigned char)(vector unsigned short){
831                 rxq->crc_present * RTE_ETHER_CRC_LEN, 0,
832                 rxq->crc_present * RTE_ETHER_CRC_LEN, 0, 0, 0, 0, 0};
833         const vector unsigned char flow_mark_adj =
834                 (vector unsigned char)(vector unsigned int){
835                 0, 0, 0, rxq->mark * (-1)};
836         const vector unsigned short cqe_sel_mask1 =
837                 (vector unsigned short){0, 0, 0, 0, 0xffff, 0xffff, 0, 0};
838         const vector unsigned short cqe_sel_mask2 =
839                 (vector unsigned short){0, 0, 0xffff, 0, 0, 0, 0, 0};
840
841         /*
842          * A. load first Qword (8bytes) in one loop.
843          * B. copy 4 mbuf pointers from elts ring to returning pkts.
844          * C. load remaining CQE data and extract necessary fields.
845          *    Final 16bytes cqes[] extracted from original 64bytes CQE has the
846          *    following structure:
847          *        struct {
848          *          uint8_t  pkt_info;
849          *          uint8_t  flow_tag[3];
850          *          uint16_t byte_cnt;
851          *          uint8_t  rsvd4;
852          *          uint8_t  op_own;
853          *          uint16_t hdr_type_etc;
854          *          uint16_t vlan_info;
855          *          uint32_t rx_has_res;
856          *        } c;
857          * D. fill in mbuf.
858          * E. get valid CQEs.
859          * F. find compressed CQE.
860          */
861         for (pos = 0;
862              pos < pkts_n;
863              pos += MLX5_VPMD_DESCS_PER_LOOP) {
864                 vector unsigned char cqes[MLX5_VPMD_DESCS_PER_LOOP];
865                 vector unsigned char cqe_tmp1, cqe_tmp2;
866                 vector unsigned char pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
867                 vector unsigned char op_own, op_own_tmp1, op_own_tmp2;
868                 vector unsigned char opcode, owner_mask, invalid_mask;
869                 vector unsigned char comp_mask;
870                 vector unsigned char mask;
871 #ifdef MLX5_PMD_SOFT_COUNTERS
872                 const vector unsigned char lower_half = {
873                         0, 1, 4, 5, 8, 9, 12, 13,
874                         16, 17, 20, 21, 24, 25, 28, 29};
875                 const vector unsigned char upper_half = {
876                         2, 3, 6, 7, 10, 11, 14, 15,
877                         18, 19, 22, 23, 26, 27, 30, 31};
878                 const vector unsigned long shmax = {64, 64};
879                 vector unsigned char byte_cnt;
880                 vector unsigned short left, right;
881                 vector unsigned long lshift;
882                 vector __attribute__((altivec(bool__)))
883                         unsigned long shmask;
884 #endif
885                 vector unsigned char mbp1, mbp2;
886                 vector unsigned char p =
887                         (vector unsigned char)(vector unsigned short){
888                                 0, 1, 2, 3, 0, 0, 0, 0};
889                 unsigned int p1, p2, p3;
890
891                 /* Prefetch next 4 CQEs. */
892                 if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
893                         rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP]);
894                         rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 1]);
895                         rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 2]);
896                         rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 3]);
897                 }
898
899                 /* A.0 do not cross the end of CQ. */
900                 mask = (vector unsigned char)(vector unsigned long){
901                         (pkts_n - pos) * sizeof(uint16_t) * 8, 0};
902
903                 {
904                         vector unsigned long lshift;
905                         vector __attribute__((altivec(bool__)))
906                                 unsigned long shmask;
907                         const vector unsigned long shmax = {64, 64};
908
909                         lshift = vec_splat((vector unsigned long)mask, 0);
910                         shmask = vec_cmpgt(shmax, lshift);
911                         mask = (vector unsigned char)
912                                 vec_sl((vector unsigned long)ones, lshift);
913                         mask = (vector unsigned char)
914                                 vec_sel((vector unsigned long)shmask,
915                                 (vector unsigned long)mask, shmask);
916                 }
917
918                 p = (vector unsigned char)
919                         vec_andc((vector unsigned long)p,
920                         (vector unsigned long)mask);
921
922                 /* A.1 load cqes. */
923                 p3 = (unsigned int)((vector unsigned short)p)[3];
924                 cqes[3] = (vector unsigned char)(vector unsigned long){
925                         *(__rte_aligned(8) unsigned long *)
926                         &cq[pos + p3].sop_drop_qpn, 0LL};
927                 rte_compiler_barrier();
928
929                 p2 = (unsigned int)((vector unsigned short)p)[2];
930                 cqes[2] = (vector unsigned char)(vector unsigned long){
931                         *(__rte_aligned(8) unsigned long *)
932                         &cq[pos + p2].sop_drop_qpn, 0LL};
933                 rte_compiler_barrier();
934
935                 /* B.1 load mbuf pointers. */
936                 mbp1 = (vector unsigned char)vec_vsx_ld(0,
937                         (signed int const *)&elts[pos]);
938                 mbp2 = (vector unsigned char)vec_vsx_ld(0,
939                         (signed int const *)&elts[pos + 2]);
940
941                 /* A.1 load a block having op_own. */
942                 p1 = (unsigned int)((vector unsigned short)p)[1];
943                 cqes[1] = (vector unsigned char)(vector unsigned long){
944                         *(__rte_aligned(8) unsigned long *)
945                         &cq[pos + p1].sop_drop_qpn, 0LL};
946                 rte_compiler_barrier();
947
948                 cqes[0] = (vector unsigned char)(vector unsigned long){
949                         *(__rte_aligned(8) unsigned long *)
950                         &cq[pos].sop_drop_qpn, 0LL};
951                 rte_compiler_barrier();
952
953                 /* B.2 copy mbuf pointers. */
954                 *(vector unsigned char *)&pkts[pos] = mbp1;
955                 *(vector unsigned char *)&pkts[pos + 2] = mbp2;
956                 rte_io_rmb();
957
958                 /* C.1 load remaining CQE data and extract necessary fields. */
959                 cqe_tmp2 = *(vector unsigned char *)
960                         &cq[pos + p3].pkt_info;
961                 cqe_tmp1 = *(vector unsigned char *)
962                         &cq[pos + p2].pkt_info;
963                 cqes[3] = vec_sel(cqes[3], cqe_tmp2, blend_mask);
964                 cqes[2] = vec_sel(cqes[2], cqe_tmp1, blend_mask);
965                 cqe_tmp2 = (vector unsigned char)vec_vsx_ld(0,
966                         (signed int const *)&cq[pos + p3].csum);
967                 cqe_tmp1 = (vector unsigned char)vec_vsx_ld(0,
968                         (signed int const *)&cq[pos + p2].csum);
969                 cqes[3] = (vector unsigned char)
970                         vec_sel((vector unsigned short)cqes[3],
971                         (vector unsigned short)cqe_tmp2, cqe_sel_mask1);
972                 cqes[2] = (vector unsigned char)
973                         vec_sel((vector unsigned short)cqes[2],
974                         (vector unsigned short)cqe_tmp1, cqe_sel_mask1);
975                 cqe_tmp2 = (vector unsigned char)(vector unsigned long){
976                         *(__rte_aligned(8) unsigned long *)
977                         &cq[pos + p3].rsvd3[9], 0LL};
978                 cqe_tmp1 = (vector unsigned char)(vector unsigned long){
979                         *(__rte_aligned(8) unsigned long *)
980                         &cq[pos + p2].rsvd3[9], 0LL};
981                 cqes[3] = (vector unsigned char)
982                         vec_sel((vector unsigned short)cqes[3],
983                         (vector unsigned short)cqe_tmp2,
984                         (vector unsigned short)cqe_sel_mask2);
985                 cqes[2] = (vector unsigned char)
986                         vec_sel((vector unsigned short)cqes[2],
987                         (vector unsigned short)cqe_tmp1,
988                         (vector unsigned short)cqe_sel_mask2);
989
990                 /* C.2 generate final structure for mbuf with swapping bytes. */
991                 pkt_mb3 = vec_perm(cqes[3], zero, shuf_mask);
992                 pkt_mb2 = vec_perm(cqes[2], zero, shuf_mask);
993
994                 /* C.3 adjust CRC length. */
995                 pkt_mb3 = (vector unsigned char)
996                         ((vector unsigned short)pkt_mb3 -
997                         (vector unsigned short)crc_adj);
998                 pkt_mb2 = (vector unsigned char)
999                         ((vector unsigned short)pkt_mb2 -
1000                         (vector unsigned short)crc_adj);
1001
1002                 /* C.4 adjust flow mark. */
1003                 pkt_mb3 = (vector unsigned char)
1004                         ((vector unsigned int)pkt_mb3 +
1005                         (vector unsigned int)flow_mark_adj);
1006                 pkt_mb2 = (vector unsigned char)
1007                         ((vector unsigned int)pkt_mb2 +
1008                         (vector unsigned int)flow_mark_adj);
1009
1010                 /* D.1 fill in mbuf - rx_descriptor_fields1. */
1011                 *(vector unsigned char *)
1012                         &pkts[pos + 3]->pkt_len = pkt_mb3;
1013                 *(vector unsigned char *)
1014                         &pkts[pos + 2]->pkt_len = pkt_mb2;
1015
1016                 /* E.1 extract op_own field. */
1017                 op_own_tmp2 = (vector unsigned char)
1018                         vec_mergeh((vector unsigned int)cqes[2],
1019                         (vector unsigned int)cqes[3]);
1020
1021                 /* C.1 load remaining CQE data and extract necessary fields. */
1022                 cqe_tmp2 = *(vector unsigned char *)
1023                         &cq[pos + p1].pkt_info;
1024                 cqe_tmp1 = *(vector unsigned char *)
1025                         &cq[pos].pkt_info;
1026                 cqes[1] = vec_sel(cqes[1], cqe_tmp2, blend_mask);
1027                 cqes[0] = vec_sel(cqes[0], cqe_tmp2, blend_mask);
1028                 cqe_tmp2 = (vector unsigned char)vec_vsx_ld(0,
1029                         (signed int const *)&cq[pos + p1].csum);
1030                 cqe_tmp1 = (vector unsigned char)vec_vsx_ld(0,
1031                         (signed int const *)&cq[pos].csum);
1032                 cqes[1] = (vector unsigned char)
1033                         vec_sel((vector unsigned short)cqes[1],
1034                         (vector unsigned short)cqe_tmp2, cqe_sel_mask1);
1035                 cqes[0] = (vector unsigned char)
1036                         vec_sel((vector unsigned short)cqes[0],
1037                         (vector unsigned short)cqe_tmp1, cqe_sel_mask1);
1038                 cqe_tmp2 = (vector unsigned char)(vector unsigned long){
1039                         *(__rte_aligned(8) unsigned long *)
1040                         &cq[pos + p1].rsvd3[9], 0LL};
1041                 cqe_tmp1 = (vector unsigned char)(vector unsigned long){
1042                         *(__rte_aligned(8) unsigned long *)
1043                         &cq[pos].rsvd3[9], 0LL};
1044                 cqes[1] = (vector unsigned char)
1045                         vec_sel((vector unsigned short)cqes[1],
1046                         (vector unsigned short)cqe_tmp2, cqe_sel_mask2);
1047                 cqes[0] = (vector unsigned char)
1048                         vec_sel((vector unsigned short)cqes[0],
1049                         (vector unsigned short)cqe_tmp1, cqe_sel_mask2);
1050
1051                 /* C.2 generate final structure for mbuf with swapping bytes. */
1052                 pkt_mb1 = vec_perm(cqes[1], zero, shuf_mask);
1053                 pkt_mb0 = vec_perm(cqes[0], zero, shuf_mask);
1054
1055                 /* C.3 adjust CRC length. */
1056                 pkt_mb1 = (vector unsigned char)
1057                         ((vector unsigned short)pkt_mb1 -
1058                         (vector unsigned short)crc_adj);
1059                 pkt_mb0 = (vector unsigned char)
1060                         ((vector unsigned short)pkt_mb0 -
1061                         (vector unsigned short)crc_adj);
1062
1063                 /* C.4 adjust flow mark. */
1064                 pkt_mb1 = (vector unsigned char)
1065                         ((vector unsigned int)pkt_mb1 +
1066                         (vector unsigned int)flow_mark_adj);
1067                 pkt_mb0 = (vector unsigned char)
1068                         ((vector unsigned int)pkt_mb0 +
1069                         (vector unsigned int)flow_mark_adj);
1070
1071                 /* E.1 extract op_own byte. */
1072                 op_own_tmp1 = (vector unsigned char)
1073                         vec_mergeh((vector unsigned int)cqes[0],
1074                         (vector unsigned int)cqes[1]);
1075                 op_own = (vector unsigned char)
1076                         vec_mergel((vector unsigned long)op_own_tmp1,
1077                         (vector unsigned long)op_own_tmp2);
1078
1079                 /* D.1 fill in mbuf - rx_descriptor_fields1. */
1080                 *(vector unsigned char *)
1081                         &pkts[pos + 1]->pkt_len = pkt_mb1;
1082                 *(vector unsigned char *)
1083                         &pkts[pos]->pkt_len = pkt_mb0;
1084
1085                 /* E.2 flip owner bit to mark CQEs from last round. */
1086                 owner_mask = (vector unsigned char)
1087                         vec_and((vector unsigned long)op_own,
1088                         (vector unsigned long)owner_check);
1089                 if (ownership)
1090                         owner_mask = (vector unsigned char)
1091                                 vec_xor((vector unsigned long)owner_mask,
1092                                 (vector unsigned long)owner_check);
1093                 owner_mask = (vector unsigned char)
1094                         vec_cmpeq((vector unsigned int)owner_mask,
1095                         (vector unsigned int)owner_check);
1096                 owner_mask = (vector unsigned char)
1097                         vec_packs((vector unsigned int)owner_mask,
1098                         (vector unsigned int)zero);
1099
1100                 /* E.3 get mask for invalidated CQEs. */
1101                 opcode = (vector unsigned char)
1102                         vec_and((vector unsigned long)op_own,
1103                         (vector unsigned long)opcode_check);
1104                 invalid_mask = (vector unsigned char)
1105                         vec_cmpeq((vector unsigned int)opcode_check,
1106                         (vector unsigned int)opcode);
1107                 invalid_mask = (vector unsigned char)
1108                         vec_packs((vector unsigned int)invalid_mask,
1109                         (vector unsigned int)zero);
1110
1111                 /* E.4 mask out beyond boundary. */
1112                 invalid_mask = (vector unsigned char)
1113                         vec_or((vector unsigned long)invalid_mask,
1114                         (vector unsigned long)mask);
1115
1116                 /* E.5 merge invalid_mask with invalid owner. */
1117                 invalid_mask = (vector unsigned char)
1118                         vec_or((vector unsigned long)invalid_mask,
1119                         (vector unsigned long)owner_mask);
1120
1121                 /* F.1 find compressed CQE format. */
1122                 comp_mask = (vector unsigned char)
1123                         vec_and((vector unsigned long)op_own,
1124                         (vector unsigned long)format_check);
1125                 comp_mask = (vector unsigned char)
1126                         vec_cmpeq((vector unsigned int)comp_mask,
1127                         (vector unsigned int)format_check);
1128                 comp_mask = (vector unsigned char)
1129                         vec_packs((vector unsigned int)comp_mask,
1130                         (vector unsigned int)zero);
1131
1132                 /* F.2 mask out invalid entries. */
1133                 comp_mask = (vector unsigned char)
1134                         vec_andc((vector unsigned long)comp_mask,
1135                         (vector unsigned long)invalid_mask);
1136                 comp_idx = ((vector unsigned long)comp_mask)[0];
1137
1138                 /* F.3 get the first compressed CQE. */
1139                 comp_idx = comp_idx ? __builtin_ctzll(comp_idx) /
1140                         (sizeof(uint16_t) * 8) : MLX5_VPMD_DESCS_PER_LOOP;
1141
1142                 /* E.6 mask out entries after the compressed CQE. */
1143                 mask = (vector unsigned char)(vector unsigned long){
1144                         (comp_idx * sizeof(uint16_t) * 8), 0};
1145                 lshift = vec_splat((vector unsigned long)mask, 0);
1146                 shmask = vec_cmpgt(shmax, lshift);
1147                 mask = (vector unsigned char)
1148                         vec_sl((vector unsigned long)ones, lshift);
1149                 mask = (vector unsigned char)
1150                         vec_sel((vector unsigned long)shmask,
1151                         (vector unsigned long)mask, shmask);
1152                 invalid_mask = (vector unsigned char)
1153                         vec_or((vector unsigned long)invalid_mask,
1154                         (vector unsigned long)mask);
1155
1156                 /* E.7 count non-compressed valid CQEs. */
1157                 n = ((vector unsigned long)invalid_mask)[0];
1158                 n = n ? __builtin_ctzll(n) / (sizeof(uint16_t) * 8) :
1159                         MLX5_VPMD_DESCS_PER_LOOP;
1160                 nocmp_n += n;
1161
1162                 /* D.2 get the final invalid mask. */
1163                 mask = (vector unsigned char)(vector unsigned long){
1164                         (n * sizeof(uint16_t) * 8), 0};
1165                 lshift = vec_splat((vector unsigned long)mask, 0);
1166                 shmask = vec_cmpgt(shmax, lshift);
1167                 mask = (vector unsigned char)
1168                         vec_sl((vector unsigned long)ones, lshift);
1169                 mask = (vector unsigned char)
1170                         vec_sel((vector unsigned long)shmask,
1171                         (vector unsigned long)mask, shmask);
1172                 invalid_mask = (vector unsigned char)
1173                         vec_or((vector unsigned long)invalid_mask,
1174                         (vector unsigned long)mask);
1175
1176                 /* D.3 check error in opcode. */
1177                 opcode = (vector unsigned char)
1178                         vec_cmpeq((vector unsigned int)resp_err_check,
1179                         (vector unsigned int)opcode);
1180                 opcode = (vector unsigned char)
1181                         vec_packs((vector unsigned int)opcode,
1182                         (vector unsigned int)zero);
1183                 opcode = (vector unsigned char)
1184                         vec_andc((vector unsigned long)opcode,
1185                         (vector unsigned long)invalid_mask);
1186
1187                 /* D.4 mark if any error is set */
1188                 *err |= ((vector unsigned long)opcode)[0];
1189
1190                 /* D.5 fill in mbuf - rearm_data and packet_type. */
1191                 rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]);
1192                 if (rxq->hw_timestamp) {
1193                         int offset = rxq->timestamp_offset;
1194                         if (rxq->rt_timestamp) {
1195                                 struct mlx5_dev_ctx_shared *sh = rxq->sh;
1196                                 uint64_t ts;
1197
1198                                 ts = rte_be_to_cpu_64(cq[pos].timestamp);
1199                                 mlx5_timestamp_set(pkts[pos], offset,
1200                                         mlx5_txpp_convert_rx_ts(sh, ts));
1201                                 ts = rte_be_to_cpu_64(cq[pos + p1].timestamp);
1202                                 mlx5_timestamp_set(pkts[pos + 1], offset,
1203                                         mlx5_txpp_convert_rx_ts(sh, ts));
1204                                 ts = rte_be_to_cpu_64(cq[pos + p2].timestamp);
1205                                 mlx5_timestamp_set(pkts[pos + 2], offset,
1206                                         mlx5_txpp_convert_rx_ts(sh, ts));
1207                                 ts = rte_be_to_cpu_64(cq[pos + p3].timestamp);
1208                                 mlx5_timestamp_set(pkts[pos + 3], offset,
1209                                         mlx5_txpp_convert_rx_ts(sh, ts));
1210                         } else {
1211                                 mlx5_timestamp_set(pkts[pos], offset,
1212                                         rte_be_to_cpu_64(cq[pos].timestamp));
1213                                 mlx5_timestamp_set(pkts[pos + 1], offset,
1214                                         rte_be_to_cpu_64(cq[pos + p1].timestamp));
1215                                 mlx5_timestamp_set(pkts[pos + 2], offset,
1216                                         rte_be_to_cpu_64(cq[pos + p2].timestamp));
1217                                 mlx5_timestamp_set(pkts[pos + 3], offset,
1218                                         rte_be_to_cpu_64(cq[pos + p3].timestamp));
1219                         }
1220                 }
1221                 if (rxq->dynf_meta) {
1222                         uint64_t flag = rxq->flow_meta_mask;
1223                         int32_t offs = rxq->flow_meta_offset;
1224                         uint32_t mask = rxq->flow_meta_port_mask;
1225                         uint32_t metadata;
1226
1227                         /* This code is subject for futher optimization. */
1228                         metadata = rte_be_to_cpu_32
1229                                 (cq[pos].flow_table_metadata) & mask;
1230                         *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
1231                                                                 metadata;
1232                         pkts[pos]->ol_flags |= metadata ? flag : 0ULL;
1233                         metadata = rte_be_to_cpu_32
1234                                 (cq[pos + 1].flow_table_metadata) & mask;
1235                         *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
1236                                                                 metadata;
1237                         pkts[pos + 1]->ol_flags |= metadata ? flag : 0ULL;
1238                         metadata = rte_be_to_cpu_32
1239                                 (cq[pos + 2].flow_table_metadata) &     mask;
1240                         *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
1241                                                                 metadata;
1242                         pkts[pos + 2]->ol_flags |= metadata ? flag : 0ULL;
1243                         metadata = rte_be_to_cpu_32
1244                                 (cq[pos + 3].flow_table_metadata) &     mask;
1245                         *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
1246                                                                 metadata;
1247                         pkts[pos + 3]->ol_flags |= metadata ? flag : 0ULL;
1248                 }
1249 #ifdef MLX5_PMD_SOFT_COUNTERS
1250                 /* Add up received bytes count. */
1251                 byte_cnt = vec_perm(op_own, zero, len_shuf_mask);
1252                 byte_cnt = (vector unsigned char)
1253                         vec_andc((vector unsigned long)byte_cnt,
1254                         (vector unsigned long)invalid_mask);
1255                 left = vec_perm((vector unsigned short)byte_cnt,
1256                         (vector unsigned short)zero, lower_half);
1257                 right = vec_perm((vector unsigned short)byte_cnt,
1258                         (vector unsigned short)zero, upper_half);
1259                 byte_cnt = (vector unsigned char)vec_add(left, right);
1260                 left = vec_perm((vector unsigned short)byte_cnt,
1261                         (vector unsigned short)zero, lower_half);
1262                 right = vec_perm((vector unsigned short)byte_cnt,
1263                         (vector unsigned short)zero, upper_half);
1264                 byte_cnt = (vector unsigned char)vec_add(left, right);
1265                 rcvd_byte += ((vector unsigned long)byte_cnt)[0];
1266 #endif
1267
1268                 /*
1269                  * Break the loop unless more valid CQE is expected, or if
1270                  * there's a compressed CQE.
1271                  */
1272                 if (n != MLX5_VPMD_DESCS_PER_LOOP)
1273                         break;
1274         }
1275 #ifdef MLX5_PMD_SOFT_COUNTERS
1276         rxq->stats.ipackets += nocmp_n;
1277         rxq->stats.ibytes += rcvd_byte;
1278 #endif
1279         if (comp_idx == n)
1280                 *comp = comp_idx;
1281         return nocmp_n;
1282 }
1283
1284 #endif /* RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_ */