2af61daf5258257f608d9a89151b80cfb806791f
[dpdk.git] / app / test-pmd / txonly.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <stdarg.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12
13 #include <sys/queue.h>
14 #include <sys/stat.h>
15
16 #include <rte_common.h>
17 #include <rte_byteorder.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <rte_cycles.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_launch.h>
24 #include <rte_eal.h>
25 #include <rte_per_lcore.h>
26 #include <rte_lcore.h>
27 #include <rte_atomic.h>
28 #include <rte_branch_prediction.h>
29 #include <rte_mempool.h>
30 #include <rte_mbuf.h>
31 #include <rte_interrupts.h>
32 #include <rte_pci.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev.h>
35 #include <rte_ip.h>
36 #include <rte_tcp.h>
37 #include <rte_udp.h>
38 #include <rte_string_fns.h>
39 #include <rte_flow.h>
40
41 #include "testpmd.h"
42
43 struct tx_timestamp {
44         rte_be32_t signature;
45         rte_be16_t pkt_idx;
46         rte_be16_t queue_idx;
47         rte_be64_t ts;
48 };
49
50 /* use RFC863 Discard Protocol */
51 uint16_t tx_udp_src_port = 9;
52 uint16_t tx_udp_dst_port = 9;
53
54 /* use RFC5735 / RFC2544 reserved network test addresses */
55 uint32_t tx_ip_src_addr = (198U << 24) | (18 << 16) | (0 << 8) | 1;
56 uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0 << 8) | 2;
57
58 #define IP_DEFTTL  64   /* from RFC 1340. */
59
60 static struct rte_ipv4_hdr pkt_ip_hdr; /**< IP header of transmitted packets. */
61 RTE_DEFINE_PER_LCORE(uint8_t, _ip_var); /**< IP address variation */
62 static struct rte_udp_hdr pkt_udp_hdr; /**< UDP header of tx packets. */
63 RTE_DEFINE_PER_LCORE(uint64_t, timestamp_qskew);
64                                         /**< Timestamp offset per queue */
65 RTE_DEFINE_PER_LCORE(uint32_t, timestamp_idone); /**< Timestamp init done. */
66
67 static uint64_t timestamp_mask; /**< Timestamp dynamic flag mask */
68 static int32_t timestamp_off; /**< Timestamp dynamic field offset */
69 static bool timestamp_enable; /**< Timestamp enable */
70 static uint32_t timestamp_init_req; /**< Timestamp initialization request. */
71 static uint64_t timestamp_initial[RTE_MAX_ETHPORTS];
72
73 static void
74 copy_buf_to_pkt_segs(void* buf, unsigned len, struct rte_mbuf *pkt,
75                      unsigned offset)
76 {
77         struct rte_mbuf *seg;
78         void *seg_buf;
79         unsigned copy_len;
80
81         seg = pkt;
82         while (offset >= seg->data_len) {
83                 offset -= seg->data_len;
84                 seg = seg->next;
85         }
86         copy_len = seg->data_len - offset;
87         seg_buf = rte_pktmbuf_mtod_offset(seg, char *, offset);
88         while (len > copy_len) {
89                 rte_memcpy(seg_buf, buf, (size_t) copy_len);
90                 len -= copy_len;
91                 buf = ((char*) buf + copy_len);
92                 seg = seg->next;
93                 seg_buf = rte_pktmbuf_mtod(seg, char *);
94                 copy_len = seg->data_len;
95         }
96         rte_memcpy(seg_buf, buf, (size_t) len);
97 }
98
99 static inline void
100 copy_buf_to_pkt(void* buf, unsigned len, struct rte_mbuf *pkt, unsigned offset)
101 {
102         if (offset + len <= pkt->data_len) {
103                 rte_memcpy(rte_pktmbuf_mtod_offset(pkt, char *, offset),
104                         buf, (size_t) len);
105                 return;
106         }
107         copy_buf_to_pkt_segs(buf, len, pkt, offset);
108 }
109
110 static void
111 setup_pkt_udp_ip_headers(struct rte_ipv4_hdr *ip_hdr,
112                          struct rte_udp_hdr *udp_hdr,
113                          uint16_t pkt_data_len)
114 {
115         uint16_t *ptr16;
116         uint32_t ip_cksum;
117         uint16_t pkt_len;
118
119         /*
120          * Initialize UDP header.
121          */
122         pkt_len = (uint16_t) (pkt_data_len + sizeof(struct rte_udp_hdr));
123         udp_hdr->src_port = rte_cpu_to_be_16(tx_udp_src_port);
124         udp_hdr->dst_port = rte_cpu_to_be_16(tx_udp_dst_port);
125         udp_hdr->dgram_len      = RTE_CPU_TO_BE_16(pkt_len);
126         udp_hdr->dgram_cksum    = 0; /* No UDP checksum. */
127
128         /*
129          * Initialize IP header.
130          */
131         pkt_len = (uint16_t) (pkt_len + sizeof(struct rte_ipv4_hdr));
132         ip_hdr->version_ihl   = RTE_IPV4_VHL_DEF;
133         ip_hdr->type_of_service   = 0;
134         ip_hdr->fragment_offset = 0;
135         ip_hdr->time_to_live   = IP_DEFTTL;
136         ip_hdr->next_proto_id = IPPROTO_UDP;
137         ip_hdr->packet_id = 0;
138         ip_hdr->total_length   = RTE_CPU_TO_BE_16(pkt_len);
139         ip_hdr->src_addr = rte_cpu_to_be_32(tx_ip_src_addr);
140         ip_hdr->dst_addr = rte_cpu_to_be_32(tx_ip_dst_addr);
141
142         /*
143          * Compute IP header checksum.
144          */
145         ptr16 = (unaligned_uint16_t*) ip_hdr;
146         ip_cksum = 0;
147         ip_cksum += ptr16[0]; ip_cksum += ptr16[1];
148         ip_cksum += ptr16[2]; ip_cksum += ptr16[3];
149         ip_cksum += ptr16[4];
150         ip_cksum += ptr16[6]; ip_cksum += ptr16[7];
151         ip_cksum += ptr16[8]; ip_cksum += ptr16[9];
152
153         /*
154          * Reduce 32 bit checksum to 16 bits and complement it.
155          */
156         ip_cksum = ((ip_cksum & 0xFFFF0000) >> 16) +
157                 (ip_cksum & 0x0000FFFF);
158         if (ip_cksum > 65535)
159                 ip_cksum -= 65535;
160         ip_cksum = (~ip_cksum) & 0x0000FFFF;
161         if (ip_cksum == 0)
162                 ip_cksum = 0xFFFF;
163         ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
164 }
165
166 static inline void
167 update_pkt_header(struct rte_mbuf *pkt, uint32_t total_pkt_len)
168 {
169         struct rte_ipv4_hdr *ip_hdr;
170         struct rte_udp_hdr *udp_hdr;
171         uint16_t pkt_data_len;
172         uint16_t pkt_len;
173
174         pkt_data_len = (uint16_t) (total_pkt_len - (
175                                         sizeof(struct rte_ether_hdr) +
176                                         sizeof(struct rte_ipv4_hdr) +
177                                         sizeof(struct rte_udp_hdr)));
178         /* updata udp pkt length */
179         udp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_udp_hdr *,
180                                 sizeof(struct rte_ether_hdr) +
181                                 sizeof(struct rte_ipv4_hdr));
182         pkt_len = (uint16_t) (pkt_data_len + sizeof(struct rte_udp_hdr));
183         udp_hdr->dgram_len = RTE_CPU_TO_BE_16(pkt_len);
184
185         /* updata ip pkt length and csum */
186         ip_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_ipv4_hdr *,
187                                 sizeof(struct rte_ether_hdr));
188         ip_hdr->hdr_checksum = 0;
189         pkt_len = (uint16_t) (pkt_len + sizeof(struct rte_ipv4_hdr));
190         ip_hdr->total_length = RTE_CPU_TO_BE_16(pkt_len);
191         ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);
192 }
193
194 static inline bool
195 pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
196                 struct rte_ether_hdr *eth_hdr, const uint16_t vlan_tci,
197                 const uint16_t vlan_tci_outer, const uint64_t ol_flags,
198                 const uint16_t idx, const struct fwd_stream *fs)
199 {
200         struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
201         struct rte_mbuf *pkt_seg;
202         uint32_t nb_segs, pkt_len;
203         uint8_t i;
204
205         if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
206                 nb_segs = rte_rand() % tx_pkt_nb_segs + 1;
207         else
208                 nb_segs = tx_pkt_nb_segs;
209
210         if (nb_segs > 1) {
211                 if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs - 1))
212                         return false;
213         }
214
215         rte_pktmbuf_reset_headroom(pkt);
216         pkt->data_len = tx_pkt_seg_lengths[0];
217         pkt->ol_flags &= EXT_ATTACHED_MBUF;
218         pkt->ol_flags |= ol_flags;
219         pkt->vlan_tci = vlan_tci;
220         pkt->vlan_tci_outer = vlan_tci_outer;
221         pkt->l2_len = sizeof(struct rte_ether_hdr);
222         pkt->l3_len = sizeof(struct rte_ipv4_hdr);
223
224         pkt_len = pkt->data_len;
225         pkt_seg = pkt;
226         for (i = 1; i < nb_segs; i++) {
227                 pkt_seg->next = pkt_segs[i - 1];
228                 pkt_seg = pkt_seg->next;
229                 pkt_seg->data_len = tx_pkt_seg_lengths[i];
230                 pkt_len += pkt_seg->data_len;
231         }
232         pkt_seg->next = NULL; /* Last segment of packet. */
233         /*
234          * Copy headers in first packet segment(s).
235          */
236         copy_buf_to_pkt(eth_hdr, sizeof(*eth_hdr), pkt, 0);
237         copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
238                         sizeof(struct rte_ether_hdr));
239         if (txonly_multi_flow) {
240                 uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
241                 struct rte_ipv4_hdr *ip_hdr;
242                 uint32_t addr;
243
244                 ip_hdr = rte_pktmbuf_mtod_offset(pkt,
245                                 struct rte_ipv4_hdr *,
246                                 sizeof(struct rte_ether_hdr));
247                 /*
248                  * Generate multiple flows by varying IP src addr. This
249                  * enables packets are well distributed by RSS in
250                  * receiver side if any and txonly mode can be a decent
251                  * packet generator for developer's quick performance
252                  * regression test.
253                  */
254                 addr = (tx_ip_dst_addr | (ip_var++ << 8)) + rte_lcore_id();
255                 ip_hdr->src_addr = rte_cpu_to_be_32(addr);
256                 RTE_PER_LCORE(_ip_var) = ip_var;
257         }
258         copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
259                         sizeof(struct rte_ether_hdr) +
260                         sizeof(struct rte_ipv4_hdr));
261
262         if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND) || txonly_multi_flow)
263                 update_pkt_header(pkt, pkt_len);
264
265         if (unlikely(timestamp_enable)) {
266                 uint64_t skew = RTE_PER_LCORE(timestamp_qskew);
267                 struct tx_timestamp timestamp_mark;
268
269                 if (unlikely(timestamp_init_req !=
270                                 RTE_PER_LCORE(timestamp_idone))) {
271                         struct rte_eth_dev_info dev_info;
272                         unsigned int txqs_n;
273                         uint64_t phase;
274                         int ret;
275
276                         ret = eth_dev_info_get_print_err(fs->tx_port, &dev_info);
277                         if (ret != 0) {
278                                 TESTPMD_LOG(ERR,
279                                         "Failed to get device info for port %d,"
280                                         "could not finish timestamp init",
281                                         fs->tx_port);
282                                 return false;
283                         }
284                         txqs_n = dev_info.nb_tx_queues;
285                         phase = tx_pkt_times_inter * fs->tx_queue /
286                                          (txqs_n ? txqs_n : 1);
287                         /*
288                          * Initialize the scheduling time phase shift
289                          * depending on queue index.
290                          */
291                         skew = timestamp_initial[fs->tx_port] +
292                                tx_pkt_times_inter + phase;
293                         RTE_PER_LCORE(timestamp_qskew) = skew;
294                         RTE_PER_LCORE(timestamp_idone) = timestamp_init_req;
295                 }
296                 timestamp_mark.pkt_idx = rte_cpu_to_be_16(idx);
297                 timestamp_mark.queue_idx = rte_cpu_to_be_16(fs->tx_queue);
298                 timestamp_mark.signature = rte_cpu_to_be_32(0xBEEFC0DE);
299                 if (unlikely(!idx)) {
300                         skew += tx_pkt_times_inter;
301                         pkt->ol_flags |= timestamp_mask;
302                         *RTE_MBUF_DYNFIELD
303                                 (pkt, timestamp_off, uint64_t *) = skew;
304                         RTE_PER_LCORE(timestamp_qskew) = skew;
305                         timestamp_mark.ts = rte_cpu_to_be_64(skew);
306                 } else if (tx_pkt_times_intra) {
307                         skew += tx_pkt_times_intra;
308                         pkt->ol_flags |= timestamp_mask;
309                         *RTE_MBUF_DYNFIELD
310                                 (pkt, timestamp_off, uint64_t *) = skew;
311                         RTE_PER_LCORE(timestamp_qskew) = skew;
312                         timestamp_mark.ts = rte_cpu_to_be_64(skew);
313                 } else {
314                         timestamp_mark.ts = RTE_BE64(0);
315                 }
316                 copy_buf_to_pkt(&timestamp_mark, sizeof(timestamp_mark), pkt,
317                         sizeof(struct rte_ether_hdr) +
318                         sizeof(struct rte_ipv4_hdr) +
319                         sizeof(pkt_udp_hdr));
320         }
321         /*
322          * Complete first mbuf of packet and append it to the
323          * burst of packets to be transmitted.
324          */
325         pkt->nb_segs = nb_segs;
326         pkt->pkt_len = pkt_len;
327
328         return true;
329 }
330
331 /*
332  * Transmit a burst of multi-segments packets.
333  */
334 static void
335 pkt_burst_transmit(struct fwd_stream *fs)
336 {
337         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
338         struct rte_port *txp;
339         struct rte_mbuf *pkt;
340         struct rte_mempool *mbp;
341         struct rte_ether_hdr eth_hdr;
342         uint16_t nb_tx;
343         uint16_t nb_pkt;
344         uint16_t vlan_tci, vlan_tci_outer;
345         uint32_t retry;
346         uint64_t ol_flags = 0;
347         uint64_t tx_offloads;
348         uint64_t start_tsc = 0;
349
350         get_start_cycles(&start_tsc);
351
352         mbp = current_fwd_lcore()->mbp;
353         txp = &ports[fs->tx_port];
354         tx_offloads = txp->dev_conf.txmode.offloads;
355         vlan_tci = txp->tx_vlan_id;
356         vlan_tci_outer = txp->tx_vlan_id_outer;
357         if (tx_offloads & RTE_ETH_TX_OFFLOAD_VLAN_INSERT)
358                 ol_flags = PKT_TX_VLAN;
359         if (tx_offloads & RTE_ETH_TX_OFFLOAD_QINQ_INSERT)
360                 ol_flags |= PKT_TX_QINQ;
361         if (tx_offloads & RTE_ETH_TX_OFFLOAD_MACSEC_INSERT)
362                 ol_flags |= PKT_TX_MACSEC;
363
364         /*
365          * Initialize Ethernet header.
366          */
367         rte_ether_addr_copy(&peer_eth_addrs[fs->peer_addr], &eth_hdr.dst_addr);
368         rte_ether_addr_copy(&ports[fs->tx_port].eth_addr, &eth_hdr.src_addr);
369         eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
370
371         if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
372                                 nb_pkt_per_burst) == 0) {
373                 for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
374                         if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
375                                                         &eth_hdr, vlan_tci,
376                                                         vlan_tci_outer,
377                                                         ol_flags,
378                                                         nb_pkt, fs))) {
379                                 rte_mempool_put_bulk(mbp,
380                                                 (void **)&pkts_burst[nb_pkt],
381                                                 nb_pkt_per_burst - nb_pkt);
382                                 break;
383                         }
384                 }
385         } else {
386                 for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
387                         pkt = rte_mbuf_raw_alloc(mbp);
388                         if (pkt == NULL)
389                                 break;
390                         if (unlikely(!pkt_burst_prepare(pkt, mbp, &eth_hdr,
391                                                         vlan_tci,
392                                                         vlan_tci_outer,
393                                                         ol_flags,
394                                                         nb_pkt, fs))) {
395                                 rte_pktmbuf_free(pkt);
396                                 break;
397                         }
398                         pkts_burst[nb_pkt] = pkt;
399                 }
400         }
401
402         if (nb_pkt == 0)
403                 return;
404
405         nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
406
407         /*
408          * Retry if necessary
409          */
410         if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
411                 retry = 0;
412                 while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
413                         rte_delay_us(burst_tx_delay_time);
414                         nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
415                                         &pkts_burst[nb_tx], nb_pkt - nb_tx);
416                 }
417         }
418         fs->tx_packets += nb_tx;
419
420         if (txonly_multi_flow)
421                 RTE_PER_LCORE(_ip_var) -= nb_pkt - nb_tx;
422
423         inc_tx_burst_stats(fs, nb_tx);
424         if (unlikely(nb_tx < nb_pkt)) {
425                 if (verbose_level > 0 && fs->fwd_dropped == 0)
426                         printf("port %d tx_queue %d - drop "
427                                "(nb_pkt:%u - nb_tx:%u)=%u packets\n",
428                                fs->tx_port, fs->tx_queue,
429                                (unsigned) nb_pkt, (unsigned) nb_tx,
430                                (unsigned) (nb_pkt - nb_tx));
431                 fs->fwd_dropped += (nb_pkt - nb_tx);
432                 do {
433                         rte_pktmbuf_free(pkts_burst[nb_tx]);
434                 } while (++nb_tx < nb_pkt);
435         }
436
437         get_end_cycles(fs, start_tsc);
438 }
439
440 static int
441 tx_only_begin(portid_t pi)
442 {
443         uint16_t pkt_hdr_len, pkt_data_len;
444         int dynf;
445
446         pkt_hdr_len = (uint16_t)(sizeof(struct rte_ether_hdr) +
447                                  sizeof(struct rte_ipv4_hdr) +
448                                  sizeof(struct rte_udp_hdr));
449         pkt_data_len = tx_pkt_length - pkt_hdr_len;
450
451         if ((tx_pkt_split == TX_PKT_SPLIT_RND || txonly_multi_flow) &&
452             tx_pkt_seg_lengths[0] < pkt_hdr_len) {
453                 TESTPMD_LOG(ERR,
454                             "Random segment number or multiple flow is enabled, "
455                             "but tx_pkt_seg_lengths[0] %u < %u (needed)\n",
456                             tx_pkt_seg_lengths[0], pkt_hdr_len);
457                 return -EINVAL;
458         }
459
460         setup_pkt_udp_ip_headers(&pkt_ip_hdr, &pkt_udp_hdr, pkt_data_len);
461
462         timestamp_enable = false;
463         timestamp_mask = 0;
464         timestamp_off = -1;
465         RTE_PER_LCORE(timestamp_qskew) = 0;
466         dynf = rte_mbuf_dynflag_lookup
467                                 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL);
468         if (dynf >= 0)
469                 timestamp_mask = 1ULL << dynf;
470         dynf = rte_mbuf_dynfield_lookup
471                                 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL);
472         if (dynf >= 0)
473                 timestamp_off = dynf;
474         timestamp_enable = tx_pkt_times_inter &&
475                            timestamp_mask &&
476                            timestamp_off >= 0 &&
477                            !rte_eth_read_clock(pi, &timestamp_initial[pi]);
478
479         if (timestamp_enable) {
480                 pkt_hdr_len += sizeof(struct tx_timestamp);
481
482                 if (tx_pkt_split == TX_PKT_SPLIT_RND) {
483                         if (tx_pkt_seg_lengths[0] < pkt_hdr_len) {
484                                 TESTPMD_LOG(ERR,
485                                             "Time stamp and random segment number are enabled, "
486                                             "but tx_pkt_seg_lengths[0] %u < %u (needed)\n",
487                                             tx_pkt_seg_lengths[0], pkt_hdr_len);
488                                 return -EINVAL;
489                         }
490                 } else {
491                         uint16_t total = 0;
492                         uint8_t i;
493
494                         for (i = 0; i < tx_pkt_nb_segs; i++) {
495                                 total += tx_pkt_seg_lengths[i];
496                                 if (total >= pkt_hdr_len)
497                                         break;
498                         }
499
500                         if (total < pkt_hdr_len) {
501                                 TESTPMD_LOG(ERR,
502                                             "Not enough Tx segment space for time stamp info, "
503                                             "total %u < %u (needed)\n",
504                                             total, pkt_hdr_len);
505                                 return -EINVAL;
506                         }
507                 }
508                 timestamp_init_req++;
509         }
510
511         /* Make sure all settings are visible on forwarding cores.*/
512         rte_wmb();
513         return 0;
514 }
515
516 struct fwd_engine tx_only_engine = {
517         .fwd_mode_name  = "txonly",
518         .port_fwd_begin = tx_only_begin,
519         .port_fwd_end   = NULL,
520         .packet_fwd     = pkt_burst_transmit,
521 };