f990045fe3abed2956bf80a882c4c91ce7da0811
[dpdk.git] / examples / l2fwd-jobstats / main.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <locale.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <stdint.h>
38 #include <ctype.h>
39 #include <getopt.h>
40
41 #include <rte_alarm.h>
42 #include <rte_common.h>
43 #include <rte_log.h>
44 #include <rte_memory.h>
45 #include <rte_memcpy.h>
46 #include <rte_memzone.h>
47 #include <rte_eal.h>
48 #include <rte_per_lcore.h>
49 #include <rte_launch.h>
50 #include <rte_atomic.h>
51 #include <rte_cycles.h>
52 #include <rte_prefetch.h>
53 #include <rte_lcore.h>
54 #include <rte_per_lcore.h>
55 #include <rte_branch_prediction.h>
56 #include <rte_interrupts.h>
57 #include <rte_pci.h>
58 #include <rte_debug.h>
59 #include <rte_ether.h>
60 #include <rte_ethdev.h>
61 #include <rte_ring.h>
62 #include <rte_mempool.h>
63 #include <rte_mbuf.h>
64 #include <rte_spinlock.h>
65
66 #include <rte_errno.h>
67 #include <rte_jobstats.h>
68 #include <rte_timer.h>
69 #include <rte_alarm.h>
70
71 #define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1
72
73 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
74 #define NB_MBUF   8192
75
76 #define MAX_PKT_BURST 32
77 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
78
79 /*
80  * Configurable number of RX/TX ring descriptors
81  */
82 #define RTE_TEST_RX_DESC_DEFAULT 128
83 #define RTE_TEST_TX_DESC_DEFAULT 512
84 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
85 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
86
87 /* ethernet addresses of ports */
88 static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS];
89
90 /* mask of enabled ports */
91 static uint32_t l2fwd_enabled_port_mask;
92
93 /* list of enabled ports */
94 static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];
95
96 #define UPDATE_STEP_UP 1
97 #define UPDATE_STEP_DOWN 32
98
99 static unsigned int l2fwd_rx_queue_per_lcore = 1;
100
101 struct mbuf_table {
102         uint64_t next_flush_time;
103         unsigned len;
104         struct rte_mbuf *mbufs[MAX_PKT_BURST];
105 };
106
107 #define MAX_RX_QUEUE_PER_LCORE 16
108 #define MAX_TX_QUEUE_PER_PORT 16
109 struct lcore_queue_conf {
110         unsigned n_rx_port;
111         unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
112         struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
113
114         struct rte_timer rx_timers[MAX_RX_QUEUE_PER_LCORE];
115         struct rte_jobstats port_fwd_jobs[MAX_RX_QUEUE_PER_LCORE];
116
117         struct rte_timer flush_timer;
118         struct rte_jobstats flush_job;
119         struct rte_jobstats idle_job;
120         struct rte_jobstats_context jobs_context;
121
122         rte_atomic16_t stats_read_pending;
123         rte_spinlock_t lock;
124 } __rte_cache_aligned;
125 struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
126
127 static const struct rte_eth_conf port_conf = {
128         .rxmode = {
129                 .split_hdr_size = 0,
130                 .header_split   = 0, /**< Header Split disabled */
131                 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
132                 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
133                 .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
134                 .hw_strip_crc   = 0, /**< CRC stripped by hardware */
135         },
136         .txmode = {
137                 .mq_mode = ETH_MQ_TX_NONE,
138         },
139 };
140
141 struct rte_mempool *l2fwd_pktmbuf_pool = NULL;
142
143 /* Per-port statistics struct */
144 struct l2fwd_port_statistics {
145         uint64_t tx;
146         uint64_t rx;
147         uint64_t dropped;
148 } __rte_cache_aligned;
149 struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS];
150
151 /* 1 day max */
152 #define MAX_TIMER_PERIOD 86400
153 /* default period is 10 seconds */
154 static int64_t timer_period = 10;
155 /* default timer frequency */
156 static double hz;
157 /* BURST_TX_DRAIN_US converted to cycles */
158 uint64_t drain_tsc;
159 /* Convert cycles to ns */
160 static inline double
161 cycles_to_ns(uint64_t cycles)
162 {
163         double t = cycles;
164
165         t *= (double)NS_PER_S;
166         t /= hz;
167         return t;
168 }
169
170 static void
171 show_lcore_stats(unsigned lcore_id)
172 {
173         struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id];
174         struct rte_jobstats_context *ctx = &qconf->jobs_context;
175         struct rte_jobstats *job;
176         uint8_t i;
177
178         /* LCore statistics. */
179         uint64_t stats_period, loop_count;
180         uint64_t exec, exec_min, exec_max;
181         uint64_t management, management_min, management_max;
182         uint64_t busy, busy_min, busy_max;
183
184         /* Jobs statistics. */
185         const uint8_t port_cnt = qconf->n_rx_port;
186         uint64_t jobs_exec_cnt[port_cnt], jobs_period[port_cnt];
187         uint64_t jobs_exec[port_cnt], jobs_exec_min[port_cnt],
188                                 jobs_exec_max[port_cnt];
189
190         uint64_t flush_exec_cnt, flush_period;
191         uint64_t flush_exec, flush_exec_min, flush_exec_max;
192
193         uint64_t idle_exec_cnt;
194         uint64_t idle_exec, idle_exec_min, idle_exec_max;
195         uint64_t collection_time = rte_get_timer_cycles();
196
197         /* Ask forwarding thread to give us stats. */
198         rte_atomic16_set(&qconf->stats_read_pending, 1);
199         rte_spinlock_lock(&qconf->lock);
200         rte_atomic16_set(&qconf->stats_read_pending, 0);
201
202         /* Collect context statistics. */
203         stats_period = ctx->state_time - ctx->start_time;
204         loop_count = ctx->loop_cnt;
205
206         exec = ctx->exec_time;
207         exec_min = ctx->min_exec_time;
208         exec_max = ctx->max_exec_time;
209
210         management = ctx->management_time;
211         management_min = ctx->min_management_time;
212         management_max = ctx->max_management_time;
213
214         rte_jobstats_context_reset(ctx);
215
216         for (i = 0; i < port_cnt; i++) {
217                 job = &qconf->port_fwd_jobs[i];
218
219                 jobs_exec_cnt[i] = job->exec_cnt;
220                 jobs_period[i] = job->period;
221
222                 jobs_exec[i] = job->exec_time;
223                 jobs_exec_min[i] = job->min_exec_time;
224                 jobs_exec_max[i] = job->max_exec_time;
225
226                 rte_jobstats_reset(job);
227         }
228
229         flush_exec_cnt = qconf->flush_job.exec_cnt;
230         flush_period = qconf->flush_job.period;
231         flush_exec = qconf->flush_job.exec_time;
232         flush_exec_min = qconf->flush_job.min_exec_time;
233         flush_exec_max = qconf->flush_job.max_exec_time;
234         rte_jobstats_reset(&qconf->flush_job);
235
236         idle_exec_cnt = qconf->idle_job.exec_cnt;
237         idle_exec = qconf->idle_job.exec_time;
238         idle_exec_min = qconf->idle_job.min_exec_time;
239         idle_exec_max = qconf->idle_job.max_exec_time;
240         rte_jobstats_reset(&qconf->idle_job);
241
242         rte_spinlock_unlock(&qconf->lock);
243
244         exec -= idle_exec;
245         busy = exec + management;
246         busy_min = exec_min + management_min;
247         busy_max = exec_max + management_max;
248
249
250         collection_time = rte_get_timer_cycles() - collection_time;
251
252 #define STAT_FMT "\n%-18s %'14.0f %6.1f%% %'10.0f %'10.0f %'10.0f"
253
254         printf("\n----------------"
255                         "\nLCore %3u: statistics (time in ns, collected in %'9.0f)"
256                         "\n%-18s %14s %7s %10s %10s %10s "
257                         "\n%-18s %'14.0f"
258                         "\n%-18s %'14" PRIu64
259                         STAT_FMT /* Exec */
260                         STAT_FMT /* Management */
261                         STAT_FMT /* Busy */
262                         STAT_FMT, /* Idle  */
263                         lcore_id, cycles_to_ns(collection_time),
264                         "Stat type", "total", "%total", "avg", "min", "max",
265                         "Stats duration:", cycles_to_ns(stats_period),
266                         "Loop count:", loop_count,
267                         "Exec time",
268                         cycles_to_ns(exec), exec * 100.0 / stats_period,
269                         cycles_to_ns(loop_count  ? exec / loop_count : 0),
270                         cycles_to_ns(exec_min),
271                         cycles_to_ns(exec_max),
272                         "Management time",
273                         cycles_to_ns(management), management * 100.0 / stats_period,
274                         cycles_to_ns(loop_count  ? management / loop_count : 0),
275                         cycles_to_ns(management_min),
276                         cycles_to_ns(management_max),
277                         "Exec + management",
278                         cycles_to_ns(busy),  busy * 100.0 / stats_period,
279                         cycles_to_ns(loop_count ? busy / loop_count : 0),
280                         cycles_to_ns(busy_min),
281                         cycles_to_ns(busy_max),
282                         "Idle (job)",
283                         cycles_to_ns(idle_exec), idle_exec * 100.0 / stats_period,
284                         cycles_to_ns(idle_exec_cnt ? idle_exec / idle_exec_cnt : 0),
285                         cycles_to_ns(idle_exec_min),
286                         cycles_to_ns(idle_exec_max));
287
288         for (i = 0; i < qconf->n_rx_port; i++) {
289                 job = &qconf->port_fwd_jobs[i];
290                 printf("\n\nJob %" PRIu32 ": %-20s "
291                                 "\n%-18s %'14" PRIu64
292                                 "\n%-18s %'14.0f"
293                                 STAT_FMT,
294                                 i, job->name,
295                                 "Exec count:", jobs_exec_cnt[i],
296                                 "Exec period: ", cycles_to_ns(jobs_period[i]),
297                                 "Exec time",
298                                 cycles_to_ns(jobs_exec[i]), jobs_exec[i] * 100.0 / stats_period,
299                                 cycles_to_ns(jobs_exec_cnt[i] ? jobs_exec[i] / jobs_exec_cnt[i]
300                                                 : 0),
301                                 cycles_to_ns(jobs_exec_min[i]),
302                                 cycles_to_ns(jobs_exec_max[i]));
303         }
304
305         if (qconf->n_rx_port > 0) {
306                 job = &qconf->flush_job;
307                 printf("\n\nJob %" PRIu32 ": %-20s "
308                                 "\n%-18s %'14" PRIu64
309                                 "\n%-18s %'14.0f"
310                                 STAT_FMT,
311                                 i, job->name,
312                                 "Exec count:", flush_exec_cnt,
313                                 "Exec period: ", cycles_to_ns(flush_period),
314                                 "Exec time",
315                                 cycles_to_ns(flush_exec), flush_exec * 100.0 / stats_period,
316                                 cycles_to_ns(flush_exec_cnt ? flush_exec / flush_exec_cnt : 0),
317                                 cycles_to_ns(flush_exec_min),
318                                 cycles_to_ns(flush_exec_max));
319         }
320 }
321
322 /* Print out statistics on packets dropped */
323 static void
324 show_stats_cb(__rte_unused void *param)
325 {
326         uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
327         unsigned portid, lcore_id;
328
329         total_packets_dropped = 0;
330         total_packets_tx = 0;
331         total_packets_rx = 0;
332
333         const char clr[] = { 27, '[', '2', 'J', '\0' };
334         const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
335
336         /* Clear screen and move to top left */
337         printf("%s%s"
338                         "\nPort statistics ===================================",
339                         clr, topLeft);
340
341         for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
342                 /* skip disabled ports */
343                 if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
344                         continue;
345                 printf("\nStatistics for port %u ------------------------------"
346                                 "\nPackets sent: %24"PRIu64
347                                 "\nPackets received: %20"PRIu64
348                                 "\nPackets dropped: %21"PRIu64,
349                                 portid,
350                                 port_statistics[portid].tx,
351                                 port_statistics[portid].rx,
352                                 port_statistics[portid].dropped);
353
354                 total_packets_dropped += port_statistics[portid].dropped;
355                 total_packets_tx += port_statistics[portid].tx;
356                 total_packets_rx += port_statistics[portid].rx;
357         }
358
359         printf("\nAggregate statistics ==============================="
360                         "\nTotal packets sent: %18"PRIu64
361                         "\nTotal packets received: %14"PRIu64
362                         "\nTotal packets dropped: %15"PRIu64
363                         "\n====================================================",
364                         total_packets_tx,
365                         total_packets_rx,
366                         total_packets_dropped);
367
368         RTE_LCORE_FOREACH(lcore_id) {
369                 if (lcore_queue_conf[lcore_id].n_rx_port > 0)
370                         show_lcore_stats(lcore_id);
371         }
372
373         printf("\n====================================================\n");
374         rte_eal_alarm_set(timer_period * US_PER_S, show_stats_cb, NULL);
375 }
376
377 /* Send the burst of packets on an output interface */
378 static void
379 l2fwd_send_burst(struct lcore_queue_conf *qconf, uint8_t port)
380 {
381         struct mbuf_table *m_table;
382         uint16_t ret;
383         uint16_t queueid = 0;
384         uint16_t n;
385
386         m_table = &qconf->tx_mbufs[port];
387         n = m_table->len;
388
389         m_table->next_flush_time = rte_get_timer_cycles() + drain_tsc;
390         m_table->len = 0;
391
392         ret = rte_eth_tx_burst(port, queueid, m_table->mbufs, n);
393
394         port_statistics[port].tx += ret;
395         if (unlikely(ret < n)) {
396                 port_statistics[port].dropped += (n - ret);
397                 do {
398                         rte_pktmbuf_free(m_table->mbufs[ret]);
399                 } while (++ret < n);
400         }
401 }
402
403 /* Enqueue packets for TX and prepare them to be sent */
404 static int
405 l2fwd_send_packet(struct rte_mbuf *m, uint8_t port)
406 {
407         const unsigned lcore_id = rte_lcore_id();
408         struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id];
409         struct mbuf_table *m_table = &qconf->tx_mbufs[port];
410         uint16_t len = qconf->tx_mbufs[port].len;
411
412         m_table->mbufs[len] = m;
413
414         len++;
415         m_table->len = len;
416
417         /* Enough pkts to be sent. */
418         if (unlikely(len == MAX_PKT_BURST))
419                 l2fwd_send_burst(qconf, port);
420
421         return 0;
422 }
423
424 static void
425 l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
426 {
427         struct ether_hdr *eth;
428         void *tmp;
429         unsigned dst_port;
430
431         dst_port = l2fwd_dst_ports[portid];
432         eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
433
434         /* 02:00:00:00:00:xx */
435         tmp = &eth->d_addr.addr_bytes[0];
436         *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
437
438         /* src addr */
439         ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], &eth->s_addr);
440
441         l2fwd_send_packet(m, (uint8_t) dst_port);
442 }
443
444 static void
445 l2fwd_job_update_cb(struct rte_jobstats *job, int64_t result)
446 {
447         int64_t err = job->target - result;
448         int64_t histeresis = job->target / 8;
449
450         if (err < -histeresis) {
451                 if (job->min_period + UPDATE_STEP_DOWN < job->period)
452                         job->period -= UPDATE_STEP_DOWN;
453         } else if (err > histeresis) {
454                 if (job->period + UPDATE_STEP_UP < job->max_period)
455                         job->period += UPDATE_STEP_UP;
456         }
457 }
458
459 static void
460 l2fwd_fwd_job(__rte_unused struct rte_timer *timer, void *arg)
461 {
462         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
463         struct rte_mbuf *m;
464
465         const uint8_t port_idx = (uintptr_t) arg;
466         const unsigned lcore_id = rte_lcore_id();
467         struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id];
468         struct rte_jobstats *job = &qconf->port_fwd_jobs[port_idx];
469         const uint8_t portid = qconf->rx_port_list[port_idx];
470
471         uint8_t j;
472         uint16_t total_nb_rx;
473
474         rte_jobstats_start(&qconf->jobs_context, job);
475
476         /* Call rx burst 2 times. This allow rte_jobstats logic to see if this
477          * function must be called more frequently. */
478
479         total_nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, pkts_burst,
480                         MAX_PKT_BURST);
481
482         for (j = 0; j < total_nb_rx; j++) {
483                 m = pkts_burst[j];
484                 rte_prefetch0(rte_pktmbuf_mtod(m, void *));
485                 l2fwd_simple_forward(m, portid);
486         }
487
488         if (total_nb_rx == MAX_PKT_BURST) {
489                 const uint16_t nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, pkts_burst,
490                                 MAX_PKT_BURST);
491
492                 total_nb_rx += nb_rx;
493                 for (j = 0; j < nb_rx; j++) {
494                         m = pkts_burst[j];
495                         rte_prefetch0(rte_pktmbuf_mtod(m, void *));
496                         l2fwd_simple_forward(m, portid);
497                 }
498         }
499
500         port_statistics[portid].rx += total_nb_rx;
501
502         /* Adjust period time in which we are running here. */
503         if (rte_jobstats_finish(job, total_nb_rx) != 0) {
504                 rte_timer_reset(&qconf->rx_timers[port_idx], job->period, PERIODICAL,
505                                 lcore_id, l2fwd_fwd_job, arg);
506         }
507 }
508
509 static void
510 l2fwd_flush_job(__rte_unused struct rte_timer *timer, __rte_unused void *arg)
511 {
512         uint64_t now;
513         unsigned lcore_id;
514         struct lcore_queue_conf *qconf;
515         struct mbuf_table *m_table;
516         uint8_t portid;
517
518         lcore_id = rte_lcore_id();
519         qconf = &lcore_queue_conf[lcore_id];
520
521         rte_jobstats_start(&qconf->jobs_context, &qconf->flush_job);
522
523         now = rte_get_timer_cycles();
524         lcore_id = rte_lcore_id();
525         qconf = &lcore_queue_conf[lcore_id];
526         for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
527                 m_table = &qconf->tx_mbufs[portid];
528                 if (m_table->len == 0 || m_table->next_flush_time <= now)
529                         continue;
530
531                 l2fwd_send_burst(qconf, portid);
532         }
533
534
535         /* Pass target to indicate that this job is happy of time interwal
536          * in which it was called. */
537         rte_jobstats_finish(&qconf->flush_job, qconf->flush_job.target);
538 }
539
540 /* main processing loop */
541 static void
542 l2fwd_main_loop(void)
543 {
544         unsigned lcore_id;
545         unsigned i, portid;
546         struct lcore_queue_conf *qconf;
547         uint8_t stats_read_pending = 0;
548         uint8_t need_manage;
549
550         lcore_id = rte_lcore_id();
551         qconf = &lcore_queue_conf[lcore_id];
552
553         if (qconf->n_rx_port == 0) {
554                 RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
555                 return;
556         }
557
558         RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);
559
560         for (i = 0; i < qconf->n_rx_port; i++) {
561
562                 portid = qconf->rx_port_list[i];
563                 RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id,
564                         portid);
565         }
566
567         rte_jobstats_init(&qconf->idle_job, "idle", 0, 0, 0, 0);
568
569         for (;;) {
570                 rte_spinlock_lock(&qconf->lock);
571
572                 do {
573                         rte_jobstats_context_start(&qconf->jobs_context);
574
575                         /* Do the Idle job:
576                          * - Read stats_read_pending flag
577                          * - check if some real job need to be executed
578                          */
579                         rte_jobstats_start(&qconf->jobs_context, &qconf->idle_job);
580
581                         do {
582                                 uint8_t i;
583                                 uint64_t now = rte_get_timer_cycles();
584
585                                 need_manage = qconf->flush_timer.expire < now;
586                                 /* Check if we was esked to give a stats. */
587                                 stats_read_pending =
588                                                 rte_atomic16_read(&qconf->stats_read_pending);
589                                 need_manage |= stats_read_pending;
590
591                                 for (i = 0; i < qconf->n_rx_port && !need_manage; i++)
592                                         need_manage = qconf->rx_timers[i].expire < now;
593
594                         } while (!need_manage);
595                         rte_jobstats_finish(&qconf->idle_job, qconf->idle_job.target);
596
597                         rte_timer_manage();
598                         rte_jobstats_context_finish(&qconf->jobs_context);
599                 } while (likely(stats_read_pending == 0));
600
601                 rte_spinlock_unlock(&qconf->lock);
602                 rte_pause();
603         }
604 }
605
606 static int
607 l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy)
608 {
609         l2fwd_main_loop();
610         return 0;
611 }
612
613 /* display usage */
614 static void
615 l2fwd_usage(const char *prgname)
616 {
617         printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
618                "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
619                "  -q NQ: number of queue (=ports) per lcore (default is 1)\n"
620                    "  -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n"
621                    "  -l set system default locale instead of default (\"C\" locale) for thousands separator in stats.",
622                prgname);
623 }
624
625 static int
626 l2fwd_parse_portmask(const char *portmask)
627 {
628         char *end = NULL;
629         unsigned long pm;
630
631         /* parse hexadecimal string */
632         pm = strtoul(portmask, &end, 16);
633         if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
634                 return -1;
635
636         if (pm == 0)
637                 return -1;
638
639         return pm;
640 }
641
642 static unsigned int
643 l2fwd_parse_nqueue(const char *q_arg)
644 {
645         char *end = NULL;
646         unsigned long n;
647
648         /* parse hexadecimal string */
649         n = strtoul(q_arg, &end, 10);
650         if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
651                 return 0;
652         if (n == 0)
653                 return 0;
654         if (n >= MAX_RX_QUEUE_PER_LCORE)
655                 return 0;
656
657         return n;
658 }
659
660 static int
661 l2fwd_parse_timer_period(const char *q_arg)
662 {
663         char *end = NULL;
664         int n;
665
666         /* parse number string */
667         n = strtol(q_arg, &end, 10);
668         if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
669                 return -1;
670         if (n >= MAX_TIMER_PERIOD)
671                 return -1;
672
673         return n;
674 }
675
676 /* Parse the argument given in the command line of the application */
677 static int
678 l2fwd_parse_args(int argc, char **argv)
679 {
680         int opt, ret;
681         char **argvopt;
682         int option_index;
683         char *prgname = argv[0];
684         static struct option lgopts[] = {
685                 {NULL, 0, 0, 0}
686         };
687
688         argvopt = argv;
689
690         while ((opt = getopt_long(argc, argvopt, "p:q:T:l",
691                                   lgopts, &option_index)) != EOF) {
692
693                 switch (opt) {
694                 /* portmask */
695                 case 'p':
696                         l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg);
697                         if (l2fwd_enabled_port_mask == 0) {
698                                 printf("invalid portmask\n");
699                                 l2fwd_usage(prgname);
700                                 return -1;
701                         }
702                         break;
703
704                 /* nqueue */
705                 case 'q':
706                         l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg);
707                         if (l2fwd_rx_queue_per_lcore == 0) {
708                                 printf("invalid queue number\n");
709                                 l2fwd_usage(prgname);
710                                 return -1;
711                         }
712                         break;
713
714                 /* timer period */
715                 case 'T':
716                         timer_period = l2fwd_parse_timer_period(optarg);
717                         if (timer_period < 0) {
718                                 printf("invalid timer period\n");
719                                 l2fwd_usage(prgname);
720                                 return -1;
721                         }
722                         break;
723
724                 /* For thousands separator in printf. */
725                 case 'l':
726                         setlocale(LC_ALL, "");
727                         break;
728
729                 /* long options */
730                 case 0:
731                         l2fwd_usage(prgname);
732                         return -1;
733
734                 default:
735                         l2fwd_usage(prgname);
736                         return -1;
737                 }
738         }
739
740         if (optind >= 0)
741                 argv[optind-1] = prgname;
742
743         ret = optind-1;
744         optind = 0; /* reset getopt lib */
745         return ret;
746 }
747
748 /* Check the link status of all ports in up to 9s, and print them finally */
749 static void
750 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
751 {
752 #define CHECK_INTERVAL 100 /* 100ms */
753 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
754         uint8_t portid, count, all_ports_up, print_flag = 0;
755         struct rte_eth_link link;
756
757         printf("\nChecking link status");
758         fflush(stdout);
759         for (count = 0; count <= MAX_CHECK_TIME; count++) {
760                 all_ports_up = 1;
761                 for (portid = 0; portid < port_num; portid++) {
762                         if ((port_mask & (1 << portid)) == 0)
763                                 continue;
764                         memset(&link, 0, sizeof(link));
765                         rte_eth_link_get_nowait(portid, &link);
766                         /* print link status if flag set */
767                         if (print_flag == 1) {
768                                 if (link.link_status)
769                                         printf("Port %d Link Up - speed %u "
770                                                 "Mbps - %s\n", (uint8_t)portid,
771                                                 (unsigned)link.link_speed,
772                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
773                                         ("full-duplex") : ("half-duplex\n"));
774                                 else
775                                         printf("Port %d Link Down\n",
776                                                 (uint8_t)portid);
777                                 continue;
778                         }
779                         /* clear all_ports_up flag if any link down */
780                         if (link.link_status == 0) {
781                                 all_ports_up = 0;
782                                 break;
783                         }
784                 }
785                 /* after finally printing all link status, get out */
786                 if (print_flag == 1)
787                         break;
788
789                 if (all_ports_up == 0) {
790                         printf(".");
791                         fflush(stdout);
792                         rte_delay_ms(CHECK_INTERVAL);
793                 }
794
795                 /* set the print_flag if all ports up or timeout */
796                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
797                         print_flag = 1;
798                         printf("done\n");
799                 }
800         }
801 }
802
803 int
804 main(int argc, char **argv)
805 {
806         struct lcore_queue_conf *qconf;
807         struct rte_eth_dev_info dev_info;
808         unsigned lcore_id, rx_lcore_id;
809         unsigned nb_ports_in_mask = 0;
810         int ret;
811         char name[RTE_JOBSTATS_NAMESIZE];
812         uint8_t nb_ports;
813         uint8_t nb_ports_available;
814         uint8_t portid, last_port;
815         uint8_t i;
816
817         /* init EAL */
818         ret = rte_eal_init(argc, argv);
819         if (ret < 0)
820                 rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
821         argc -= ret;
822         argv += ret;
823
824         /* parse application arguments (after the EAL ones) */
825         ret = l2fwd_parse_args(argc, argv);
826         if (ret < 0)
827                 rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n");
828
829         rte_timer_subsystem_init();
830
831         /* fetch default timer frequency. */
832         hz = rte_get_timer_hz();
833
834         /* create the mbuf pool */
835         l2fwd_pktmbuf_pool =
836                 rte_mempool_create("mbuf_pool", NB_MBUF,
837                                    MBUF_SIZE, 32,
838                                    sizeof(struct rte_pktmbuf_pool_private),
839                                    rte_pktmbuf_pool_init, NULL,
840                                    rte_pktmbuf_init, NULL,
841                                    rte_socket_id(), 0);
842         if (l2fwd_pktmbuf_pool == NULL)
843                 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
844
845         nb_ports = rte_eth_dev_count();
846         if (nb_ports == 0)
847                 rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
848
849         if (nb_ports > RTE_MAX_ETHPORTS)
850                 nb_ports = RTE_MAX_ETHPORTS;
851
852         /* reset l2fwd_dst_ports */
853         for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
854                 l2fwd_dst_ports[portid] = 0;
855         last_port = 0;
856
857         /*
858          * Each logical core is assigned a dedicated TX queue on each port.
859          */
860         for (portid = 0; portid < nb_ports; portid++) {
861                 /* skip ports that are not enabled */
862                 if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
863                         continue;
864
865                 if (nb_ports_in_mask % 2) {
866                         l2fwd_dst_ports[portid] = last_port;
867                         l2fwd_dst_ports[last_port] = portid;
868                 } else
869                         last_port = portid;
870
871                 nb_ports_in_mask++;
872
873                 rte_eth_dev_info_get(portid, &dev_info);
874         }
875         if (nb_ports_in_mask % 2) {
876                 printf("Notice: odd number of ports in portmask.\n");
877                 l2fwd_dst_ports[last_port] = last_port;
878         }
879
880         rx_lcore_id = 0;
881         qconf = NULL;
882
883         /* Initialize the port/queue configuration of each logical core */
884         for (portid = 0; portid < nb_ports; portid++) {
885                 /* skip ports that are not enabled */
886                 if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
887                         continue;
888
889                 /* get the lcore_id for this port */
890                 while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
891                        lcore_queue_conf[rx_lcore_id].n_rx_port ==
892                        l2fwd_rx_queue_per_lcore) {
893                         rx_lcore_id++;
894                         if (rx_lcore_id >= RTE_MAX_LCORE)
895                                 rte_exit(EXIT_FAILURE, "Not enough cores\n");
896                 }
897
898                 if (qconf != &lcore_queue_conf[rx_lcore_id])
899                         /* Assigned a new logical core in the loop above. */
900                         qconf = &lcore_queue_conf[rx_lcore_id];
901
902                 qconf->rx_port_list[qconf->n_rx_port] = portid;
903                 qconf->n_rx_port++;
904                 printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid);
905         }
906
907         nb_ports_available = nb_ports;
908
909         /* Initialise each port */
910         for (portid = 0; portid < nb_ports; portid++) {
911                 /* skip ports that are not enabled */
912                 if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) {
913                         printf("Skipping disabled port %u\n", (unsigned) portid);
914                         nb_ports_available--;
915                         continue;
916                 }
917                 /* init port */
918                 printf("Initializing port %u... ", (unsigned) portid);
919                 fflush(stdout);
920                 ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
921                 if (ret < 0)
922                         rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
923                                   ret, (unsigned) portid);
924
925                 rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]);
926
927                 /* init one RX queue */
928                 fflush(stdout);
929                 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
930                                              rte_eth_dev_socket_id(portid),
931                                              NULL,
932                                              l2fwd_pktmbuf_pool);
933                 if (ret < 0)
934                         rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
935                                   ret, (unsigned) portid);
936
937                 /* init one TX queue on each port */
938                 fflush(stdout);
939                 ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
940                                 rte_eth_dev_socket_id(portid),
941                                 NULL);
942                 if (ret < 0)
943                         rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
944                                 ret, (unsigned) portid);
945
946                 /* Start device */
947                 ret = rte_eth_dev_start(portid);
948                 if (ret < 0)
949                         rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
950                                   ret, (unsigned) portid);
951
952                 printf("done:\n");
953
954                 rte_eth_promiscuous_enable(portid);
955
956                 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
957                                 (unsigned) portid,
958                                 l2fwd_ports_eth_addr[portid].addr_bytes[0],
959                                 l2fwd_ports_eth_addr[portid].addr_bytes[1],
960                                 l2fwd_ports_eth_addr[portid].addr_bytes[2],
961                                 l2fwd_ports_eth_addr[portid].addr_bytes[3],
962                                 l2fwd_ports_eth_addr[portid].addr_bytes[4],
963                                 l2fwd_ports_eth_addr[portid].addr_bytes[5]);
964
965                 /* initialize port stats */
966                 memset(&port_statistics, 0, sizeof(port_statistics));
967         }
968
969         if (!nb_ports_available) {
970                 rte_exit(EXIT_FAILURE,
971                         "All available ports are disabled. Please set portmask.\n");
972         }
973
974         check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask);
975
976         drain_tsc = (hz + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
977
978         RTE_LCORE_FOREACH(lcore_id) {
979                 qconf = &lcore_queue_conf[lcore_id];
980
981                 rte_spinlock_init(&qconf->lock);
982
983                 if (rte_jobstats_context_init(&qconf->jobs_context) != 0)
984                         rte_panic("Jobs stats context for core %u init failed\n", lcore_id);
985
986                 if (qconf->n_rx_port == 0) {
987                         RTE_LOG(INFO, L2FWD,
988                                 "lcore %u: no ports so no jobs stats context initialization\n",
989                                 lcore_id);
990                         continue;
991                 }
992                 /* Add flush job.
993                  * Set fixed period by setting min = max = initial period. Set target to
994                  * zero as it is irrelevant for this job. */
995                 rte_jobstats_init(&qconf->flush_job, "flush", drain_tsc, drain_tsc,
996                                 drain_tsc, 0);
997
998                 rte_timer_init(&qconf->flush_timer);
999                 ret = rte_timer_reset(&qconf->flush_timer, drain_tsc, PERIODICAL,
1000                                 lcore_id, &l2fwd_flush_job, NULL);
1001
1002                 if (ret < 0) {
1003                         rte_exit(1, "Failed to reset flush job timer for lcore %u: %s",
1004                                         lcore_id, rte_strerror(-ret));
1005                 }
1006
1007                 for (i = 0; i < qconf->n_rx_port; i++) {
1008                         struct rte_jobstats *job = &qconf->port_fwd_jobs[i];
1009
1010                         portid = qconf->rx_port_list[i];
1011                         printf("Setting forward jon for port %u\n", portid);
1012
1013                         snprintf(name, RTE_DIM(name), "port %u fwd", portid);
1014                         /* Setup forward job.
1015                          * Set min, max and initial period. Set target to MAX_PKT_BURST as
1016                          * this is desired optimal RX/TX burst size. */
1017                         rte_jobstats_init(job, name, 0, drain_tsc, 0, MAX_PKT_BURST);
1018                         rte_jobstats_set_update_period_function(job, l2fwd_job_update_cb);
1019
1020                         rte_timer_init(&qconf->rx_timers[i]);
1021                         ret = rte_timer_reset(&qconf->rx_timers[i], 0, PERIODICAL, lcore_id,
1022                                         &l2fwd_fwd_job, (void *)(uintptr_t)i);
1023
1024                         if (ret < 0) {
1025                                 rte_exit(1, "Failed to reset lcore %u port %u job timer: %s",
1026                                                 lcore_id, qconf->rx_port_list[i], rte_strerror(-ret));
1027                         }
1028                 }
1029         }
1030
1031         if (timer_period)
1032                 rte_eal_alarm_set(timer_period * MS_PER_S, show_stats_cb, NULL);
1033         else
1034                 RTE_LOG(INFO, L2FWD, "Stats display disabled\n");
1035
1036         /* launch per-lcore init on every lcore */
1037         rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER);
1038         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1039                 if (rte_eal_wait_lcore(lcore_id) < 0)
1040                         return -1;
1041         }
1042
1043         return 0;
1044 }