1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
13 #include <sys/queue.h>
16 #include <rte_common.h>
17 #include <rte_byteorder.h>
19 #include <rte_debug.h>
20 #include <rte_cycles.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_launch.h>
25 #include <rte_per_lcore.h>
26 #include <rte_lcore.h>
27 #include <rte_atomic.h>
28 #include <rte_branch_prediction.h>
29 #include <rte_mempool.h>
31 #include <rte_interrupts.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev.h>
38 #include <rte_string_fns.h>
43 /* use RFC863 Discard Protocol */
44 uint16_t tx_udp_src_port = 9;
45 uint16_t tx_udp_dst_port = 9;
47 /* use RFC5735 / RFC2544 reserved network test addresses */
48 uint32_t tx_ip_src_addr = (198U << 24) | (18 << 16) | (0 << 8) | 1;
49 uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0 << 8) | 2;
51 #define IP_DEFTTL 64 /* from RFC 1340. */
53 static struct rte_ipv4_hdr pkt_ip_hdr; /**< IP header of transmitted packets. */
54 RTE_DEFINE_PER_LCORE(uint8_t, _ip_var); /**< IP address variation */
55 static struct rte_udp_hdr pkt_udp_hdr; /**< UDP header of tx packets. */
56 RTE_DEFINE_PER_LCORE(uint64_t, timestamp_qskew);
57 /**< Timestamp offset per queue */
58 RTE_DEFINE_PER_LCORE(uint32_t, timestamp_idone); /**< Timestamp init done. */
60 static uint64_t timestamp_mask; /**< Timestamp dynamic flag mask */
61 static int32_t timestamp_off; /**< Timestamp dynamic field offset */
62 static bool timestamp_enable; /**< Timestamp enable */
63 static uint32_t timestamp_init_req; /**< Timestamp initialization request. */
64 static uint64_t timestamp_initial[RTE_MAX_ETHPORTS];
67 copy_buf_to_pkt_segs(void* buf, unsigned len, struct rte_mbuf *pkt,
75 while (offset >= seg->data_len) {
76 offset -= seg->data_len;
79 copy_len = seg->data_len - offset;
80 seg_buf = rte_pktmbuf_mtod_offset(seg, char *, offset);
81 while (len > copy_len) {
82 rte_memcpy(seg_buf, buf, (size_t) copy_len);
84 buf = ((char*) buf + copy_len);
86 seg_buf = rte_pktmbuf_mtod(seg, char *);
87 copy_len = seg->data_len;
89 rte_memcpy(seg_buf, buf, (size_t) len);
93 copy_buf_to_pkt(void* buf, unsigned len, struct rte_mbuf *pkt, unsigned offset)
95 if (offset + len <= pkt->data_len) {
96 rte_memcpy(rte_pktmbuf_mtod_offset(pkt, char *, offset),
100 copy_buf_to_pkt_segs(buf, len, pkt, offset);
104 setup_pkt_udp_ip_headers(struct rte_ipv4_hdr *ip_hdr,
105 struct rte_udp_hdr *udp_hdr,
106 uint16_t pkt_data_len)
113 * Initialize UDP header.
115 pkt_len = (uint16_t) (pkt_data_len + sizeof(struct rte_udp_hdr));
116 udp_hdr->src_port = rte_cpu_to_be_16(tx_udp_src_port);
117 udp_hdr->dst_port = rte_cpu_to_be_16(tx_udp_dst_port);
118 udp_hdr->dgram_len = RTE_CPU_TO_BE_16(pkt_len);
119 udp_hdr->dgram_cksum = 0; /* No UDP checksum. */
122 * Initialize IP header.
124 pkt_len = (uint16_t) (pkt_len + sizeof(struct rte_ipv4_hdr));
125 ip_hdr->version_ihl = RTE_IPV4_VHL_DEF;
126 ip_hdr->type_of_service = 0;
127 ip_hdr->fragment_offset = 0;
128 ip_hdr->time_to_live = IP_DEFTTL;
129 ip_hdr->next_proto_id = IPPROTO_UDP;
130 ip_hdr->packet_id = 0;
131 ip_hdr->total_length = RTE_CPU_TO_BE_16(pkt_len);
132 ip_hdr->src_addr = rte_cpu_to_be_32(tx_ip_src_addr);
133 ip_hdr->dst_addr = rte_cpu_to_be_32(tx_ip_dst_addr);
136 * Compute IP header checksum.
138 ptr16 = (unaligned_uint16_t*) ip_hdr;
140 ip_cksum += ptr16[0]; ip_cksum += ptr16[1];
141 ip_cksum += ptr16[2]; ip_cksum += ptr16[3];
142 ip_cksum += ptr16[4];
143 ip_cksum += ptr16[6]; ip_cksum += ptr16[7];
144 ip_cksum += ptr16[8]; ip_cksum += ptr16[9];
147 * Reduce 32 bit checksum to 16 bits and complement it.
149 ip_cksum = ((ip_cksum & 0xFFFF0000) >> 16) +
150 (ip_cksum & 0x0000FFFF);
151 if (ip_cksum > 65535)
153 ip_cksum = (~ip_cksum) & 0x0000FFFF;
156 ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
160 update_pkt_header(struct rte_mbuf *pkt, uint32_t total_pkt_len)
162 struct rte_ipv4_hdr *ip_hdr;
163 struct rte_udp_hdr *udp_hdr;
164 uint16_t pkt_data_len;
167 pkt_data_len = (uint16_t) (total_pkt_len - (
168 sizeof(struct rte_ether_hdr) +
169 sizeof(struct rte_ipv4_hdr) +
170 sizeof(struct rte_udp_hdr)));
171 /* updata udp pkt length */
172 udp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_udp_hdr *,
173 sizeof(struct rte_ether_hdr) +
174 sizeof(struct rte_ipv4_hdr));
175 pkt_len = (uint16_t) (pkt_data_len + sizeof(struct rte_udp_hdr));
176 udp_hdr->dgram_len = RTE_CPU_TO_BE_16(pkt_len);
178 /* updata ip pkt length and csum */
179 ip_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_ipv4_hdr *,
180 sizeof(struct rte_ether_hdr));
181 ip_hdr->hdr_checksum = 0;
182 pkt_len = (uint16_t) (pkt_len + sizeof(struct rte_ipv4_hdr));
183 ip_hdr->total_length = RTE_CPU_TO_BE_16(pkt_len);
184 ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);
188 pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
189 struct rte_ether_hdr *eth_hdr, const uint16_t vlan_tci,
190 const uint16_t vlan_tci_outer, const uint64_t ol_flags,
191 const uint16_t idx, const struct fwd_stream *fs)
193 struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
194 struct rte_mbuf *pkt_seg;
195 uint32_t nb_segs, pkt_len;
198 if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
199 nb_segs = rte_rand() % tx_pkt_nb_segs + 1;
201 nb_segs = tx_pkt_nb_segs;
204 if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs - 1))
208 rte_pktmbuf_reset_headroom(pkt);
209 pkt->data_len = tx_pkt_seg_lengths[0];
210 pkt->ol_flags &= EXT_ATTACHED_MBUF;
211 pkt->ol_flags |= ol_flags;
212 pkt->vlan_tci = vlan_tci;
213 pkt->vlan_tci_outer = vlan_tci_outer;
214 pkt->l2_len = sizeof(struct rte_ether_hdr);
215 pkt->l3_len = sizeof(struct rte_ipv4_hdr);
217 pkt_len = pkt->data_len;
219 for (i = 1; i < nb_segs; i++) {
220 pkt_seg->next = pkt_segs[i - 1];
221 pkt_seg = pkt_seg->next;
222 pkt_seg->data_len = tx_pkt_seg_lengths[i];
223 pkt_len += pkt_seg->data_len;
225 pkt_seg->next = NULL; /* Last segment of packet. */
227 * Copy headers in first packet segment(s).
229 copy_buf_to_pkt(eth_hdr, sizeof(*eth_hdr), pkt, 0);
230 copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
231 sizeof(struct rte_ether_hdr));
232 if (txonly_multi_flow) {
233 uint8_t ip_var = RTE_PER_LCORE(_ip_var);
234 struct rte_ipv4_hdr *ip_hdr;
237 ip_hdr = rte_pktmbuf_mtod_offset(pkt,
238 struct rte_ipv4_hdr *,
239 sizeof(struct rte_ether_hdr));
241 * Generate multiple flows by varying IP src addr. This
242 * enables packets are well distributed by RSS in
243 * receiver side if any and txonly mode can be a decent
244 * packet generator for developer's quick performance
247 addr = (tx_ip_dst_addr | (ip_var++ << 8)) + rte_lcore_id();
248 ip_hdr->src_addr = rte_cpu_to_be_32(addr);
249 RTE_PER_LCORE(_ip_var) = ip_var;
251 copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
252 sizeof(struct rte_ether_hdr) +
253 sizeof(struct rte_ipv4_hdr));
255 if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND) || txonly_multi_flow)
256 update_pkt_header(pkt, pkt_len);
258 if (unlikely(timestamp_enable)) {
259 uint64_t skew = RTE_PER_LCORE(timestamp_qskew);
261 rte_be32_t signature;
263 rte_be16_t queue_idx;
267 if (unlikely(timestamp_init_req !=
268 RTE_PER_LCORE(timestamp_idone))) {
269 struct rte_eth_dev_info dev_info;
274 ret = eth_dev_info_get_print_err(fs->tx_port, &dev_info);
277 "Failed to get device info for port %d,"
278 "could not finish timestamp init",
282 txqs_n = dev_info.nb_tx_queues;
283 phase = tx_pkt_times_inter * fs->tx_queue /
284 (txqs_n ? txqs_n : 1);
286 * Initialize the scheduling time phase shift
287 * depending on queue index.
289 skew = timestamp_initial[fs->tx_port] +
290 tx_pkt_times_inter + phase;
291 RTE_PER_LCORE(timestamp_qskew) = skew;
292 RTE_PER_LCORE(timestamp_idone) = timestamp_init_req;
294 timestamp_mark.pkt_idx = rte_cpu_to_be_16(idx);
295 timestamp_mark.queue_idx = rte_cpu_to_be_16(fs->tx_queue);
296 timestamp_mark.signature = rte_cpu_to_be_32(0xBEEFC0DE);
297 if (unlikely(!idx)) {
298 skew += tx_pkt_times_inter;
299 pkt->ol_flags |= timestamp_mask;
301 (pkt, timestamp_off, uint64_t *) = skew;
302 RTE_PER_LCORE(timestamp_qskew) = skew;
303 timestamp_mark.ts = rte_cpu_to_be_64(skew);
304 } else if (tx_pkt_times_intra) {
305 skew += tx_pkt_times_intra;
306 pkt->ol_flags |= timestamp_mask;
308 (pkt, timestamp_off, uint64_t *) = skew;
309 RTE_PER_LCORE(timestamp_qskew) = skew;
310 timestamp_mark.ts = rte_cpu_to_be_64(skew);
312 timestamp_mark.ts = RTE_BE64(0);
314 copy_buf_to_pkt(×tamp_mark, sizeof(timestamp_mark), pkt,
315 sizeof(struct rte_ether_hdr) +
316 sizeof(struct rte_ipv4_hdr) +
317 sizeof(pkt_udp_hdr));
320 * Complete first mbuf of packet and append it to the
321 * burst of packets to be transmitted.
323 pkt->nb_segs = nb_segs;
324 pkt->pkt_len = pkt_len;
330 * Transmit a burst of multi-segments packets.
333 pkt_burst_transmit(struct fwd_stream *fs)
335 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
336 struct rte_port *txp;
337 struct rte_mbuf *pkt;
338 struct rte_mempool *mbp;
339 struct rte_ether_hdr eth_hdr;
342 uint16_t vlan_tci, vlan_tci_outer;
344 uint64_t ol_flags = 0;
345 uint64_t tx_offloads;
346 uint64_t start_tsc = 0;
348 get_start_cycles(&start_tsc);
350 mbp = current_fwd_lcore()->mbp;
351 txp = &ports[fs->tx_port];
352 tx_offloads = txp->dev_conf.txmode.offloads;
353 vlan_tci = txp->tx_vlan_id;
354 vlan_tci_outer = txp->tx_vlan_id_outer;
355 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT)
356 ol_flags = PKT_TX_VLAN_PKT;
357 if (tx_offloads & DEV_TX_OFFLOAD_QINQ_INSERT)
358 ol_flags |= PKT_TX_QINQ_PKT;
359 if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
360 ol_flags |= PKT_TX_MACSEC;
363 * Initialize Ethernet header.
365 rte_ether_addr_copy(&peer_eth_addrs[fs->peer_addr], ð_hdr.dst_addr);
366 rte_ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.src_addr);
367 eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
369 if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
370 nb_pkt_per_burst) == 0) {
371 for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
372 if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
377 rte_mempool_put_bulk(mbp,
378 (void **)&pkts_burst[nb_pkt],
379 nb_pkt_per_burst - nb_pkt);
384 for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
385 pkt = rte_mbuf_raw_alloc(mbp);
388 if (unlikely(!pkt_burst_prepare(pkt, mbp, ð_hdr,
393 rte_pktmbuf_free(pkt);
396 pkts_burst[nb_pkt] = pkt;
403 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
408 if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) {
410 while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) {
411 rte_delay_us(burst_tx_delay_time);
412 nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
413 &pkts_burst[nb_tx], nb_pkt - nb_tx);
416 fs->tx_packets += nb_tx;
418 if (txonly_multi_flow)
419 RTE_PER_LCORE(_ip_var) -= nb_pkt - nb_tx;
421 inc_tx_burst_stats(fs, nb_tx);
422 if (unlikely(nb_tx < nb_pkt)) {
423 if (verbose_level > 0 && fs->fwd_dropped == 0)
424 printf("port %d tx_queue %d - drop "
425 "(nb_pkt:%u - nb_tx:%u)=%u packets\n",
426 fs->tx_port, fs->tx_queue,
427 (unsigned) nb_pkt, (unsigned) nb_tx,
428 (unsigned) (nb_pkt - nb_tx));
429 fs->fwd_dropped += (nb_pkt - nb_tx);
431 rte_pktmbuf_free(pkts_burst[nb_tx]);
432 } while (++nb_tx < nb_pkt);
435 get_end_cycles(fs, start_tsc);
439 tx_only_begin(portid_t pi)
441 uint16_t pkt_data_len;
444 pkt_data_len = (uint16_t) (tx_pkt_length - (
445 sizeof(struct rte_ether_hdr) +
446 sizeof(struct rte_ipv4_hdr) +
447 sizeof(struct rte_udp_hdr)));
448 setup_pkt_udp_ip_headers(&pkt_ip_hdr, &pkt_udp_hdr, pkt_data_len);
450 timestamp_enable = false;
453 RTE_PER_LCORE(timestamp_qskew) = 0;
454 dynf = rte_mbuf_dynflag_lookup
455 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL);
457 timestamp_mask = 1ULL << dynf;
458 dynf = rte_mbuf_dynfield_lookup
459 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL);
461 timestamp_off = dynf;
462 timestamp_enable = tx_pkt_times_inter &&
464 timestamp_off >= 0 &&
465 !rte_eth_read_clock(pi, ×tamp_initial[pi]);
466 if (timestamp_enable)
467 timestamp_init_req++;
468 /* Make sure all settings are visible on forwarding cores.*/
472 struct fwd_engine tx_only_engine = {
473 .fwd_mode_name = "txonly",
474 .port_fwd_begin = tx_only_begin,
475 .port_fwd_end = NULL,
476 .packet_fwd = pkt_burst_transmit,