ethdev: make default behavior CRC strip on Rx
[dpdk.git] / examples / l3fwd-power / main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdint.h>
8 #include <inttypes.h>
9 #include <sys/types.h>
10 #include <string.h>
11 #include <sys/queue.h>
12 #include <stdarg.h>
13 #include <errno.h>
14 #include <getopt.h>
15 #include <unistd.h>
16 #include <signal.h>
17
18 #include <rte_common.h>
19 #include <rte_byteorder.h>
20 #include <rte_log.h>
21 #include <rte_malloc.h>
22 #include <rte_memory.h>
23 #include <rte_memcpy.h>
24 #include <rte_eal.h>
25 #include <rte_launch.h>
26 #include <rte_atomic.h>
27 #include <rte_cycles.h>
28 #include <rte_prefetch.h>
29 #include <rte_lcore.h>
30 #include <rte_per_lcore.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_interrupts.h>
33 #include <rte_random.h>
34 #include <rte_debug.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev.h>
37 #include <rte_mempool.h>
38 #include <rte_mbuf.h>
39 #include <rte_ip.h>
40 #include <rte_tcp.h>
41 #include <rte_udp.h>
42 #include <rte_string_fns.h>
43 #include <rte_timer.h>
44 #include <rte_power.h>
45 #include <rte_spinlock.h>
46
47 #include "perf_core.h"
48 #include "main.h"
49
50 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
51
52 #define MAX_PKT_BURST 32
53
54 #define MIN_ZERO_POLL_COUNT 10
55
56 /* 100 ms interval */
57 #define TIMER_NUMBER_PER_SECOND           10
58 /* 100000 us */
59 #define SCALING_PERIOD                    (1000000/TIMER_NUMBER_PER_SECOND)
60 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25
61
62 #define APP_LOOKUP_EXACT_MATCH          0
63 #define APP_LOOKUP_LPM                  1
64 #define DO_RFC_1812_CHECKS
65
66 #ifndef APP_LOOKUP_METHOD
67 #define APP_LOOKUP_METHOD             APP_LOOKUP_LPM
68 #endif
69
70 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
71 #include <rte_hash.h>
72 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
73 #include <rte_lpm.h>
74 #else
75 #error "APP_LOOKUP_METHOD set to incorrect value"
76 #endif
77
78 #ifndef IPv6_BYTES
79 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
80                        "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
81 #define IPv6_BYTES(addr) \
82         addr[0],  addr[1], addr[2],  addr[3], \
83         addr[4],  addr[5], addr[6],  addr[7], \
84         addr[8],  addr[9], addr[10], addr[11],\
85         addr[12], addr[13],addr[14], addr[15]
86 #endif
87
88 #define MAX_JUMBO_PKT_LEN  9600
89
90 #define IPV6_ADDR_LEN 16
91
92 #define MEMPOOL_CACHE_SIZE 256
93
94 /*
95  * This expression is used to calculate the number of mbufs needed depending on
96  * user input, taking into account memory for rx and tx hardware rings, cache
97  * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that
98  * NB_MBUF never goes below a minimum value of 8192.
99  */
100
101 #define NB_MBUF RTE_MAX ( \
102         (nb_ports*nb_rx_queue*nb_rxd + \
103         nb_ports*nb_lcores*MAX_PKT_BURST + \
104         nb_ports*n_tx_queue*nb_txd + \
105         nb_lcores*MEMPOOL_CACHE_SIZE), \
106         (unsigned)8192)
107
108 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
109
110 #define NB_SOCKETS 8
111
112 /* Configure how many packets ahead to prefetch, when reading packets */
113 #define PREFETCH_OFFSET 3
114
115 /*
116  * Configurable number of RX/TX ring descriptors
117  */
118 #define RTE_TEST_RX_DESC_DEFAULT 1024
119 #define RTE_TEST_TX_DESC_DEFAULT 1024
120 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
121 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
122
123 /* ethernet addresses of ports */
124 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
125
126 /* ethernet addresses of ports */
127 static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
128
129 /* mask of enabled ports */
130 static uint32_t enabled_port_mask = 0;
131 /* Ports set in promiscuous mode off by default. */
132 static int promiscuous_on = 0;
133 /* NUMA is enabled by default. */
134 static int numa_on = 1;
135 static int parse_ptype; /**< Parse packet type using rx callback, and */
136                         /**< disabled by default */
137
138 enum freq_scale_hint_t
139 {
140         FREQ_LOWER    =      -1,
141         FREQ_CURRENT  =       0,
142         FREQ_HIGHER   =       1,
143         FREQ_HIGHEST  =       2
144 };
145
146 struct lcore_rx_queue {
147         uint16_t port_id;
148         uint8_t queue_id;
149         enum freq_scale_hint_t freq_up_hint;
150         uint32_t zero_rx_packet_count;
151         uint32_t idle_hint;
152 } __rte_cache_aligned;
153
154 #define MAX_RX_QUEUE_PER_LCORE 16
155 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
156 #define MAX_RX_QUEUE_PER_PORT 128
157
158 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
159
160
161 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
162 static struct lcore_params lcore_params_array_default[] = {
163         {0, 0, 2},
164         {0, 1, 2},
165         {0, 2, 2},
166         {1, 0, 2},
167         {1, 1, 2},
168         {1, 2, 2},
169         {2, 0, 2},
170         {3, 0, 3},
171         {3, 1, 3},
172 };
173
174 struct lcore_params *lcore_params = lcore_params_array_default;
175 uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
176                                 sizeof(lcore_params_array_default[0]);
177
178 static struct rte_eth_conf port_conf = {
179         .rxmode = {
180                 .mq_mode        = ETH_MQ_RX_RSS,
181                 .max_rx_pkt_len = ETHER_MAX_LEN,
182                 .split_hdr_size = 0,
183                 .offloads = DEV_RX_OFFLOAD_CHECKSUM,
184         },
185         .rx_adv_conf = {
186                 .rss_conf = {
187                         .rss_key = NULL,
188                         .rss_hf = ETH_RSS_UDP,
189                 },
190         },
191         .txmode = {
192                 .mq_mode = ETH_MQ_TX_NONE,
193         },
194         .intr_conf = {
195                 .rxq = 1,
196         },
197 };
198
199 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
200
201
202 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
203
204 #ifdef RTE_ARCH_X86
205 #include <rte_hash_crc.h>
206 #define DEFAULT_HASH_FUNC       rte_hash_crc
207 #else
208 #include <rte_jhash.h>
209 #define DEFAULT_HASH_FUNC       rte_jhash
210 #endif
211
212 struct ipv4_5tuple {
213         uint32_t ip_dst;
214         uint32_t ip_src;
215         uint16_t port_dst;
216         uint16_t port_src;
217         uint8_t  proto;
218 } __attribute__((__packed__));
219
220 struct ipv6_5tuple {
221         uint8_t  ip_dst[IPV6_ADDR_LEN];
222         uint8_t  ip_src[IPV6_ADDR_LEN];
223         uint16_t port_dst;
224         uint16_t port_src;
225         uint8_t  proto;
226 } __attribute__((__packed__));
227
228 struct ipv4_l3fwd_route {
229         struct ipv4_5tuple key;
230         uint8_t if_out;
231 };
232
233 struct ipv6_l3fwd_route {
234         struct ipv6_5tuple key;
235         uint8_t if_out;
236 };
237
238 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
239         {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0},
240         {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1},
241         {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2},
242         {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3},
243 };
244
245 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
246         {
247                 {
248                         {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
249                          0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
250                         {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
251                          0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a},
252                          1, 10, IPPROTO_UDP
253                 }, 4
254         },
255 };
256
257 typedef struct rte_hash lookup_struct_t;
258 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
259 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
260
261 #define L3FWD_HASH_ENTRIES      1024
262
263 #define IPV4_L3FWD_NUM_ROUTES \
264         (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
265
266 #define IPV6_L3FWD_NUM_ROUTES \
267         (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0]))
268
269 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
270 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
271 #endif
272
273 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
274 struct ipv4_l3fwd_route {
275         uint32_t ip;
276         uint8_t  depth;
277         uint8_t  if_out;
278 };
279
280 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
281         {IPv4(1,1,1,0), 24, 0},
282         {IPv4(2,1,1,0), 24, 1},
283         {IPv4(3,1,1,0), 24, 2},
284         {IPv4(4,1,1,0), 24, 3},
285         {IPv4(5,1,1,0), 24, 4},
286         {IPv4(6,1,1,0), 24, 5},
287         {IPv4(7,1,1,0), 24, 6},
288         {IPv4(8,1,1,0), 24, 7},
289 };
290
291 #define IPV4_L3FWD_NUM_ROUTES \
292         (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
293
294 #define IPV4_L3FWD_LPM_MAX_RULES     1024
295
296 typedef struct rte_lpm lookup_struct_t;
297 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
298 #endif
299
300 struct lcore_conf {
301         uint16_t n_rx_queue;
302         struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
303         uint16_t n_tx_port;
304         uint16_t tx_port_id[RTE_MAX_ETHPORTS];
305         uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
306         struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
307         lookup_struct_t * ipv4_lookup_struct;
308         lookup_struct_t * ipv6_lookup_struct;
309 } __rte_cache_aligned;
310
311 struct lcore_stats {
312         /* total sleep time in ms since last frequency scaling down */
313         uint32_t sleep_time;
314         /* number of long sleep recently */
315         uint32_t nb_long_sleep;
316         /* freq. scaling up trend */
317         uint32_t trend;
318         /* total packet processed recently */
319         uint64_t nb_rx_processed;
320         /* total iterations looped recently */
321         uint64_t nb_iteration_looped;
322         uint32_t padding[9];
323 } __rte_cache_aligned;
324
325 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned;
326 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned;
327 static struct rte_timer power_timers[RTE_MAX_LCORE];
328
329 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count);
330 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \
331                 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id);
332
333 /* exit signal handler */
334 static void
335 signal_exit_now(int sigtype)
336 {
337         unsigned lcore_id;
338         unsigned int portid;
339         int ret;
340
341         if (sigtype == SIGINT) {
342                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
343                         if (rte_lcore_is_enabled(lcore_id) == 0)
344                                 continue;
345
346                         /* init power management library */
347                         ret = rte_power_exit(lcore_id);
348                         if (ret)
349                                 rte_exit(EXIT_FAILURE, "Power management "
350                                         "library de-initialization failed on "
351                                                         "core%u\n", lcore_id);
352                 }
353
354                 RTE_ETH_FOREACH_DEV(portid) {
355                         if ((enabled_port_mask & (1 << portid)) == 0)
356                                 continue;
357
358                         rte_eth_dev_stop(portid);
359                         rte_eth_dev_close(portid);
360                 }
361         }
362
363         rte_exit(EXIT_SUCCESS, "User forced exit\n");
364 }
365
366 /*  Freqency scale down timer callback */
367 static void
368 power_timer_cb(__attribute__((unused)) struct rte_timer *tim,
369                           __attribute__((unused)) void *arg)
370 {
371         uint64_t hz;
372         float sleep_time_ratio;
373         unsigned lcore_id = rte_lcore_id();
374
375         /* accumulate total execution time in us when callback is invoked */
376         sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
377                                         (float)SCALING_PERIOD;
378         /**
379          * check whether need to scale down frequency a step if it sleep a lot.
380          */
381         if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
382                 if (rte_power_freq_down)
383                         rte_power_freq_down(lcore_id);
384         }
385         else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
386                 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
387                 /**
388                  * scale down a step if average packet per iteration less
389                  * than expectation.
390                  */
391                 if (rte_power_freq_down)
392                         rte_power_freq_down(lcore_id);
393         }
394
395         /**
396          * initialize another timer according to current frequency to ensure
397          * timer interval is relatively fixed.
398          */
399         hz = rte_get_timer_hz();
400         rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND,
401                                 SINGLE, lcore_id, power_timer_cb, NULL);
402
403         stats[lcore_id].nb_rx_processed = 0;
404         stats[lcore_id].nb_iteration_looped = 0;
405
406         stats[lcore_id].sleep_time = 0;
407 }
408
409 /* Enqueue a single packet, and send burst if queue is filled */
410 static inline int
411 send_single_packet(struct rte_mbuf *m, uint16_t port)
412 {
413         uint32_t lcore_id;
414         struct lcore_conf *qconf;
415
416         lcore_id = rte_lcore_id();
417         qconf = &lcore_conf[lcore_id];
418
419         rte_eth_tx_buffer(port, qconf->tx_queue_id[port],
420                         qconf->tx_buffer[port], m);
421
422         return 0;
423 }
424
425 #ifdef DO_RFC_1812_CHECKS
426 static inline int
427 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
428 {
429         /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
430         /*
431          * 1. The packet length reported by the Link Layer must be large
432          * enough to hold the minimum length legal IP datagram (20 bytes).
433          */
434         if (link_len < sizeof(struct ipv4_hdr))
435                 return -1;
436
437         /* 2. The IP checksum must be correct. */
438         /* this is checked in H/W */
439
440         /*
441          * 3. The IP version number must be 4. If the version number is not 4
442          * then the packet may be another version of IP, such as IPng or
443          * ST-II.
444          */
445         if (((pkt->version_ihl) >> 4) != 4)
446                 return -3;
447         /*
448          * 4. The IP header length field must be large enough to hold the
449          * minimum length legal IP datagram (20 bytes = 5 words).
450          */
451         if ((pkt->version_ihl & 0xf) < 5)
452                 return -4;
453
454         /*
455          * 5. The IP total length field must be large enough to hold the IP
456          * datagram header, whose length is specified in the IP header length
457          * field.
458          */
459         if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
460                 return -5;
461
462         return 0;
463 }
464 #endif
465
466 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
467 static void
468 print_ipv4_key(struct ipv4_5tuple key)
469 {
470         printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, "
471                 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src,
472                                 key.port_dst, key.port_src, key.proto);
473 }
474 static void
475 print_ipv6_key(struct ipv6_5tuple key)
476 {
477         printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", "
478                 "port dst = %d, port src = %d, proto = %d\n",
479                 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src),
480                 key.port_dst, key.port_src, key.proto);
481 }
482
483 static inline uint16_t
484 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint16_t portid,
485                 lookup_struct_t * ipv4_l3fwd_lookup_struct)
486 {
487         struct ipv4_5tuple key;
488         struct tcp_hdr *tcp;
489         struct udp_hdr *udp;
490         int ret = 0;
491
492         key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
493         key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr);
494         key.proto = ipv4_hdr->next_proto_id;
495
496         switch (ipv4_hdr->next_proto_id) {
497         case IPPROTO_TCP:
498                 tcp = (struct tcp_hdr *)((unsigned char *)ipv4_hdr +
499                                         sizeof(struct ipv4_hdr));
500                 key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
501                 key.port_src = rte_be_to_cpu_16(tcp->src_port);
502                 break;
503
504         case IPPROTO_UDP:
505                 udp = (struct udp_hdr *)((unsigned char *)ipv4_hdr +
506                                         sizeof(struct ipv4_hdr));
507                 key.port_dst = rte_be_to_cpu_16(udp->dst_port);
508                 key.port_src = rte_be_to_cpu_16(udp->src_port);
509                 break;
510
511         default:
512                 key.port_dst = 0;
513                 key.port_src = 0;
514                 break;
515         }
516
517         /* Find destination port */
518         ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
519         return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]);
520 }
521
522 static inline uint16_t
523 get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint16_t portid,
524                         lookup_struct_t *ipv6_l3fwd_lookup_struct)
525 {
526         struct ipv6_5tuple key;
527         struct tcp_hdr *tcp;
528         struct udp_hdr *udp;
529         int ret = 0;
530
531         memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN);
532         memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN);
533
534         key.proto = ipv6_hdr->proto;
535
536         switch (ipv6_hdr->proto) {
537         case IPPROTO_TCP:
538                 tcp = (struct tcp_hdr *)((unsigned char *) ipv6_hdr +
539                                         sizeof(struct ipv6_hdr));
540                 key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
541                 key.port_src = rte_be_to_cpu_16(tcp->src_port);
542                 break;
543
544         case IPPROTO_UDP:
545                 udp = (struct udp_hdr *)((unsigned char *) ipv6_hdr +
546                                         sizeof(struct ipv6_hdr));
547                 key.port_dst = rte_be_to_cpu_16(udp->dst_port);
548                 key.port_src = rte_be_to_cpu_16(udp->src_port);
549                 break;
550
551         default:
552                 key.port_dst = 0;
553                 key.port_src = 0;
554                 break;
555         }
556
557         /* Find destination port */
558         ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
559         return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
560 }
561 #endif
562
563 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
564 static inline uint16_t
565 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint16_t portid,
566                 lookup_struct_t *ipv4_l3fwd_lookup_struct)
567 {
568         uint32_t next_hop;
569
570         return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
571                         rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)?
572                         next_hop : portid);
573 }
574 #endif
575
576 static inline void
577 parse_ptype_one(struct rte_mbuf *m)
578 {
579         struct ether_hdr *eth_hdr;
580         uint32_t packet_type = RTE_PTYPE_UNKNOWN;
581         uint16_t ether_type;
582
583         eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
584         ether_type = eth_hdr->ether_type;
585         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
586                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
587         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
588                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
589
590         m->packet_type = packet_type;
591 }
592
593 static uint16_t
594 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused,
595                struct rte_mbuf *pkts[], uint16_t nb_pkts,
596                uint16_t max_pkts __rte_unused,
597                void *user_param __rte_unused)
598 {
599         unsigned int i;
600
601         for (i = 0; i < nb_pkts; ++i)
602                 parse_ptype_one(pkts[i]);
603
604         return nb_pkts;
605 }
606
607 static int
608 add_cb_parse_ptype(uint16_t portid, uint16_t queueid)
609 {
610         printf("Port %d: softly parse packet type info\n", portid);
611         if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL))
612                 return 0;
613
614         printf("Failed to add rx callback: port=%d\n", portid);
615         return -1;
616 }
617
618 static inline void
619 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid,
620                                 struct lcore_conf *qconf)
621 {
622         struct ether_hdr *eth_hdr;
623         struct ipv4_hdr *ipv4_hdr;
624         void *d_addr_bytes;
625         uint16_t dst_port;
626
627         eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
628
629         if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
630                 /* Handle IPv4 headers.*/
631                 ipv4_hdr =
632                         rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
633                                                 sizeof(struct ether_hdr));
634
635 #ifdef DO_RFC_1812_CHECKS
636                 /* Check to make sure the packet is valid (RFC1812) */
637                 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
638                         rte_pktmbuf_free(m);
639                         return;
640                 }
641 #endif
642
643                 dst_port = get_ipv4_dst_port(ipv4_hdr, portid,
644                                         qconf->ipv4_lookup_struct);
645                 if (dst_port >= RTE_MAX_ETHPORTS ||
646                                 (enabled_port_mask & 1 << dst_port) == 0)
647                         dst_port = portid;
648
649                 /* 02:00:00:00:00:xx */
650                 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
651                 *((uint64_t *)d_addr_bytes) =
652                         0x000000000002 + ((uint64_t)dst_port << 40);
653
654 #ifdef DO_RFC_1812_CHECKS
655                 /* Update time to live and header checksum */
656                 --(ipv4_hdr->time_to_live);
657                 ++(ipv4_hdr->hdr_checksum);
658 #endif
659
660                 /* src addr */
661                 ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
662
663                 send_single_packet(m, dst_port);
664         } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
665                 /* Handle IPv6 headers.*/
666 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
667                 struct ipv6_hdr *ipv6_hdr;
668
669                 ipv6_hdr =
670                         rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
671                                                 sizeof(struct ether_hdr));
672
673                 dst_port = get_ipv6_dst_port(ipv6_hdr, portid,
674                                         qconf->ipv6_lookup_struct);
675
676                 if (dst_port >= RTE_MAX_ETHPORTS ||
677                                 (enabled_port_mask & 1 << dst_port) == 0)
678                         dst_port = portid;
679
680                 /* 02:00:00:00:00:xx */
681                 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
682                 *((uint64_t *)d_addr_bytes) =
683                         0x000000000002 + ((uint64_t)dst_port << 40);
684
685                 /* src addr */
686                 ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
687
688                 send_single_packet(m, dst_port);
689 #else
690                 /* We don't currently handle IPv6 packets in LPM mode. */
691                 rte_pktmbuf_free(m);
692 #endif
693         } else
694                 rte_pktmbuf_free(m);
695
696 }
697
698 #define MINIMUM_SLEEP_TIME         1
699 #define SUSPEND_THRESHOLD          300
700
701 static inline uint32_t
702 power_idle_heuristic(uint32_t zero_rx_packet_count)
703 {
704         /* If zero count is less than 100,  sleep 1us */
705         if (zero_rx_packet_count < SUSPEND_THRESHOLD)
706                 return MINIMUM_SLEEP_TIME;
707         /* If zero count is less than 1000, sleep 100 us which is the
708                 minimum latency switching from C3/C6 to C0
709         */
710         else
711                 return SUSPEND_THRESHOLD;
712 }
713
714 static inline enum freq_scale_hint_t
715 power_freq_scaleup_heuristic(unsigned lcore_id,
716                              uint16_t port_id,
717                              uint16_t queue_id)
718 {
719         uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id);
720 /**
721  * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries
722  * per iteration
723  */
724 #define FREQ_GEAR1_RX_PACKET_THRESHOLD             MAX_PKT_BURST
725 #define FREQ_GEAR2_RX_PACKET_THRESHOLD             (MAX_PKT_BURST*2)
726 #define FREQ_GEAR3_RX_PACKET_THRESHOLD             (MAX_PKT_BURST*3)
727 #define FREQ_UP_TREND1_ACC   1
728 #define FREQ_UP_TREND2_ACC   100
729 #define FREQ_UP_THRESHOLD    10000
730
731         if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) {
732                 stats[lcore_id].trend = 0;
733                 return FREQ_HIGHEST;
734         } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD))
735                 stats[lcore_id].trend += FREQ_UP_TREND2_ACC;
736         else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD))
737                 stats[lcore_id].trend += FREQ_UP_TREND1_ACC;
738
739         if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) {
740                 stats[lcore_id].trend = 0;
741                 return FREQ_HIGHER;
742         }
743
744         return FREQ_CURRENT;
745 }
746
747 /**
748  * force polling thread sleep until one-shot rx interrupt triggers
749  * @param port_id
750  *  Port id.
751  * @param queue_id
752  *  Rx queue id.
753  * @return
754  *  0 on success
755  */
756 static int
757 sleep_until_rx_interrupt(int num)
758 {
759         struct rte_epoll_event event[num];
760         int n, i;
761         uint16_t port_id;
762         uint8_t queue_id;
763         void *data;
764
765         RTE_LOG(INFO, L3FWD_POWER,
766                 "lcore %u sleeps until interrupt triggers\n",
767                 rte_lcore_id());
768
769         n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1);
770         for (i = 0; i < n; i++) {
771                 data = event[i].epdata.data;
772                 port_id = ((uintptr_t)data) >> CHAR_BIT;
773                 queue_id = ((uintptr_t)data) &
774                         RTE_LEN2MASK(CHAR_BIT, uint8_t);
775                 rte_eth_dev_rx_intr_disable(port_id, queue_id);
776                 RTE_LOG(INFO, L3FWD_POWER,
777                         "lcore %u is waked up from rx interrupt on"
778                         " port %d queue %d\n",
779                         rte_lcore_id(), port_id, queue_id);
780         }
781
782         return 0;
783 }
784
785 static void turn_on_intr(struct lcore_conf *qconf)
786 {
787         int i;
788         struct lcore_rx_queue *rx_queue;
789         uint8_t queue_id;
790         uint16_t port_id;
791
792         for (i = 0; i < qconf->n_rx_queue; ++i) {
793                 rx_queue = &(qconf->rx_queue_list[i]);
794                 port_id = rx_queue->port_id;
795                 queue_id = rx_queue->queue_id;
796
797                 rte_spinlock_lock(&(locks[port_id]));
798                 rte_eth_dev_rx_intr_enable(port_id, queue_id);
799                 rte_spinlock_unlock(&(locks[port_id]));
800         }
801 }
802
803 static int event_register(struct lcore_conf *qconf)
804 {
805         struct lcore_rx_queue *rx_queue;
806         uint8_t queueid;
807         uint16_t portid;
808         uint32_t data;
809         int ret;
810         int i;
811
812         for (i = 0; i < qconf->n_rx_queue; ++i) {
813                 rx_queue = &(qconf->rx_queue_list[i]);
814                 portid = rx_queue->port_id;
815                 queueid = rx_queue->queue_id;
816                 data = portid << CHAR_BIT | queueid;
817
818                 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
819                                                 RTE_EPOLL_PER_THREAD,
820                                                 RTE_INTR_EVENT_ADD,
821                                                 (void *)((uintptr_t)data));
822                 if (ret)
823                         return ret;
824         }
825
826         return 0;
827 }
828
829 /* main processing loop */
830 static int
831 main_loop(__attribute__((unused)) void *dummy)
832 {
833         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
834         unsigned lcore_id;
835         uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz;
836         uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power;
837         int i, j, nb_rx;
838         uint8_t queueid;
839         uint16_t portid;
840         struct lcore_conf *qconf;
841         struct lcore_rx_queue *rx_queue;
842         enum freq_scale_hint_t lcore_scaleup_hint;
843         uint32_t lcore_rx_idle_count = 0;
844         uint32_t lcore_idle_hint = 0;
845         int intr_en = 0;
846
847         const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
848
849         prev_tsc = 0;
850         hz = rte_get_timer_hz();
851         tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND;
852
853         lcore_id = rte_lcore_id();
854         qconf = &lcore_conf[lcore_id];
855
856         if (qconf->n_rx_queue == 0) {
857                 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id);
858                 return 0;
859         }
860
861         RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id);
862
863         for (i = 0; i < qconf->n_rx_queue; i++) {
864                 portid = qconf->rx_queue_list[i].port_id;
865                 queueid = qconf->rx_queue_list[i].queue_id;
866                 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u "
867                         "rxqueueid=%hhu\n", lcore_id, portid, queueid);
868         }
869
870         /* add into event wait list */
871         if (event_register(qconf) == 0)
872                 intr_en = 1;
873         else
874                 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n");
875
876         while (1) {
877                 stats[lcore_id].nb_iteration_looped++;
878
879                 cur_tsc = rte_rdtsc();
880                 cur_tsc_power = cur_tsc;
881
882                 /*
883                  * TX burst queue drain
884                  */
885                 diff_tsc = cur_tsc - prev_tsc;
886                 if (unlikely(diff_tsc > drain_tsc)) {
887                         for (i = 0; i < qconf->n_tx_port; ++i) {
888                                 portid = qconf->tx_port_id[i];
889                                 rte_eth_tx_buffer_flush(portid,
890                                                 qconf->tx_queue_id[portid],
891                                                 qconf->tx_buffer[portid]);
892                         }
893                         prev_tsc = cur_tsc;
894                 }
895
896                 diff_tsc_power = cur_tsc_power - prev_tsc_power;
897                 if (diff_tsc_power > tim_res_tsc) {
898                         rte_timer_manage();
899                         prev_tsc_power = cur_tsc_power;
900                 }
901
902 start_rx:
903                 /*
904                  * Read packet from RX queues
905                  */
906                 lcore_scaleup_hint = FREQ_CURRENT;
907                 lcore_rx_idle_count = 0;
908                 for (i = 0; i < qconf->n_rx_queue; ++i) {
909                         rx_queue = &(qconf->rx_queue_list[i]);
910                         rx_queue->idle_hint = 0;
911                         portid = rx_queue->port_id;
912                         queueid = rx_queue->queue_id;
913
914                         nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
915                                                                 MAX_PKT_BURST);
916
917                         stats[lcore_id].nb_rx_processed += nb_rx;
918                         if (unlikely(nb_rx == 0)) {
919                                 /**
920                                  * no packet received from rx queue, try to
921                                  * sleep for a while forcing CPU enter deeper
922                                  * C states.
923                                  */
924                                 rx_queue->zero_rx_packet_count++;
925
926                                 if (rx_queue->zero_rx_packet_count <=
927                                                         MIN_ZERO_POLL_COUNT)
928                                         continue;
929
930                                 rx_queue->idle_hint = power_idle_heuristic(\
931                                         rx_queue->zero_rx_packet_count);
932                                 lcore_rx_idle_count++;
933                         } else {
934                                 rx_queue->zero_rx_packet_count = 0;
935
936                                 /**
937                                  * do not scale up frequency immediately as
938                                  * user to kernel space communication is costly
939                                  * which might impact packet I/O for received
940                                  * packets.
941                                  */
942                                 rx_queue->freq_up_hint =
943                                         power_freq_scaleup_heuristic(lcore_id,
944                                                         portid, queueid);
945                         }
946
947                         /* Prefetch first packets */
948                         for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
949                                 rte_prefetch0(rte_pktmbuf_mtod(
950                                                 pkts_burst[j], void *));
951                         }
952
953                         /* Prefetch and forward already prefetched packets */
954                         for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
955                                 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
956                                                 j + PREFETCH_OFFSET], void *));
957                                 l3fwd_simple_forward(pkts_burst[j], portid,
958                                                                 qconf);
959                         }
960
961                         /* Forward remaining prefetched packets */
962                         for (; j < nb_rx; j++) {
963                                 l3fwd_simple_forward(pkts_burst[j], portid,
964                                                                 qconf);
965                         }
966                 }
967
968                 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) {
969                         for (i = 1, lcore_scaleup_hint =
970                                 qconf->rx_queue_list[0].freq_up_hint;
971                                         i < qconf->n_rx_queue; ++i) {
972                                 rx_queue = &(qconf->rx_queue_list[i]);
973                                 if (rx_queue->freq_up_hint >
974                                                 lcore_scaleup_hint)
975                                         lcore_scaleup_hint =
976                                                 rx_queue->freq_up_hint;
977                         }
978
979                         if (lcore_scaleup_hint == FREQ_HIGHEST) {
980                                 if (rte_power_freq_max)
981                                         rte_power_freq_max(lcore_id);
982                         } else if (lcore_scaleup_hint == FREQ_HIGHER) {
983                                 if (rte_power_freq_up)
984                                         rte_power_freq_up(lcore_id);
985                         }
986                 } else {
987                         /**
988                          * All Rx queues empty in recent consecutive polls,
989                          * sleep in a conservative manner, meaning sleep as
990                          * less as possible.
991                          */
992                         for (i = 1, lcore_idle_hint =
993                                 qconf->rx_queue_list[0].idle_hint;
994                                         i < qconf->n_rx_queue; ++i) {
995                                 rx_queue = &(qconf->rx_queue_list[i]);
996                                 if (rx_queue->idle_hint < lcore_idle_hint)
997                                         lcore_idle_hint = rx_queue->idle_hint;
998                         }
999
1000                         if (lcore_idle_hint < SUSPEND_THRESHOLD)
1001                                 /**
1002                                  * execute "pause" instruction to avoid context
1003                                  * switch which generally take hundred of
1004                                  * microseconds for short sleep.
1005                                  */
1006                                 rte_delay_us(lcore_idle_hint);
1007                         else {
1008                                 /* suspend until rx interrupt trigges */
1009                                 if (intr_en) {
1010                                         turn_on_intr(qconf);
1011                                         sleep_until_rx_interrupt(
1012                                                 qconf->n_rx_queue);
1013                                         /**
1014                                          * start receiving packets immediately
1015                                          */
1016                                         goto start_rx;
1017                                 }
1018                         }
1019                         stats[lcore_id].sleep_time += lcore_idle_hint;
1020                 }
1021         }
1022 }
1023
1024 static int
1025 check_lcore_params(void)
1026 {
1027         uint8_t queue, lcore;
1028         uint16_t i;
1029         int socketid;
1030
1031         for (i = 0; i < nb_lcore_params; ++i) {
1032                 queue = lcore_params[i].queue_id;
1033                 if (queue >= MAX_RX_QUEUE_PER_PORT) {
1034                         printf("invalid queue number: %hhu\n", queue);
1035                         return -1;
1036                 }
1037                 lcore = lcore_params[i].lcore_id;
1038                 if (!rte_lcore_is_enabled(lcore)) {
1039                         printf("error: lcore %hhu is not enabled in lcore "
1040                                                         "mask\n", lcore);
1041                         return -1;
1042                 }
1043                 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
1044                                                         (numa_on == 0)) {
1045                         printf("warning: lcore %hhu is on socket %d with numa "
1046                                                 "off\n", lcore, socketid);
1047                 }
1048         }
1049         return 0;
1050 }
1051
1052 static int
1053 check_port_config(void)
1054 {
1055         unsigned portid;
1056         uint16_t i;
1057
1058         for (i = 0; i < nb_lcore_params; ++i) {
1059                 portid = lcore_params[i].port_id;
1060                 if ((enabled_port_mask & (1 << portid)) == 0) {
1061                         printf("port %u is not enabled in port mask\n",
1062                                                                 portid);
1063                         return -1;
1064                 }
1065                 if (!rte_eth_dev_is_valid_port(portid)) {
1066                         printf("port %u is not present on the board\n",
1067                                                                 portid);
1068                         return -1;
1069                 }
1070         }
1071         return 0;
1072 }
1073
1074 static uint8_t
1075 get_port_n_rx_queues(const uint16_t port)
1076 {
1077         int queue = -1;
1078         uint16_t i;
1079
1080         for (i = 0; i < nb_lcore_params; ++i) {
1081                 if (lcore_params[i].port_id == port &&
1082                                 lcore_params[i].queue_id > queue)
1083                         queue = lcore_params[i].queue_id;
1084         }
1085         return (uint8_t)(++queue);
1086 }
1087
1088 static int
1089 init_lcore_rx_queues(void)
1090 {
1091         uint16_t i, nb_rx_queue;
1092         uint8_t lcore;
1093
1094         for (i = 0; i < nb_lcore_params; ++i) {
1095                 lcore = lcore_params[i].lcore_id;
1096                 nb_rx_queue = lcore_conf[lcore].n_rx_queue;
1097                 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
1098                         printf("error: too many queues (%u) for lcore: %u\n",
1099                                 (unsigned)nb_rx_queue + 1, (unsigned)lcore);
1100                         return -1;
1101                 } else {
1102                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
1103                                 lcore_params[i].port_id;
1104                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
1105                                 lcore_params[i].queue_id;
1106                         lcore_conf[lcore].n_rx_queue++;
1107                 }
1108         }
1109         return 0;
1110 }
1111
1112 /* display usage */
1113 static void
1114 print_usage(const char *prgname)
1115 {
1116         printf ("%s [EAL options] -- -p PORTMASK -P"
1117                 "  [--config (port,queue,lcore)[,(port,queue,lcore]]"
1118                 "  [--high-perf-cores CORELIST"
1119                 "  [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]"
1120                 "  [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
1121                 "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
1122                 "  -P : enable promiscuous mode\n"
1123                 "  --config (port,queue,lcore): rx queues configuration\n"
1124                 "  --high-perf-cores CORELIST: list of high performance cores\n"
1125                 "  --perf-config: similar as config, cores specified as indices"
1126                 " for bins containing high or regular performance cores\n"
1127                 "  --no-numa: optional, disable numa awareness\n"
1128                 "  --enable-jumbo: enable jumbo frame"
1129                 " which max packet len is PKTLEN in decimal (64-9600)\n"
1130                 "  --parse-ptype: parse packet type by software\n",
1131                 prgname);
1132 }
1133
1134 static int parse_max_pkt_len(const char *pktlen)
1135 {
1136         char *end = NULL;
1137         unsigned long len;
1138
1139         /* parse decimal string */
1140         len = strtoul(pktlen, &end, 10);
1141         if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
1142                 return -1;
1143
1144         if (len == 0)
1145                 return -1;
1146
1147         return len;
1148 }
1149
1150 static int
1151 parse_portmask(const char *portmask)
1152 {
1153         char *end = NULL;
1154         unsigned long pm;
1155
1156         /* parse hexadecimal string */
1157         pm = strtoul(portmask, &end, 16);
1158         if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
1159                 return -1;
1160
1161         if (pm == 0)
1162                 return -1;
1163
1164         return pm;
1165 }
1166
1167 static int
1168 parse_config(const char *q_arg)
1169 {
1170         char s[256];
1171         const char *p, *p0 = q_arg;
1172         char *end;
1173         enum fieldnames {
1174                 FLD_PORT = 0,
1175                 FLD_QUEUE,
1176                 FLD_LCORE,
1177                 _NUM_FLD
1178         };
1179         unsigned long int_fld[_NUM_FLD];
1180         char *str_fld[_NUM_FLD];
1181         int i;
1182         unsigned size;
1183
1184         nb_lcore_params = 0;
1185
1186         while ((p = strchr(p0,'(')) != NULL) {
1187                 ++p;
1188                 if((p0 = strchr(p,')')) == NULL)
1189                         return -1;
1190
1191                 size = p0 - p;
1192                 if(size >= sizeof(s))
1193                         return -1;
1194
1195                 snprintf(s, sizeof(s), "%.*s", size, p);
1196                 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
1197                                                                 _NUM_FLD)
1198                         return -1;
1199                 for (i = 0; i < _NUM_FLD; i++){
1200                         errno = 0;
1201                         int_fld[i] = strtoul(str_fld[i], &end, 0);
1202                         if (errno != 0 || end == str_fld[i] || int_fld[i] >
1203                                                                         255)
1204                                 return -1;
1205                 }
1206                 if (nb_lcore_params >= MAX_LCORE_PARAMS) {
1207                         printf("exceeded max number of lcore params: %hu\n",
1208                                 nb_lcore_params);
1209                         return -1;
1210                 }
1211                 lcore_params_array[nb_lcore_params].port_id =
1212                                 (uint8_t)int_fld[FLD_PORT];
1213                 lcore_params_array[nb_lcore_params].queue_id =
1214                                 (uint8_t)int_fld[FLD_QUEUE];
1215                 lcore_params_array[nb_lcore_params].lcore_id =
1216                                 (uint8_t)int_fld[FLD_LCORE];
1217                 ++nb_lcore_params;
1218         }
1219         lcore_params = lcore_params_array;
1220
1221         return 0;
1222 }
1223
1224 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
1225
1226 /* Parse the argument given in the command line of the application */
1227 static int
1228 parse_args(int argc, char **argv)
1229 {
1230         int opt, ret;
1231         char **argvopt;
1232         int option_index;
1233         char *prgname = argv[0];
1234         static struct option lgopts[] = {
1235                 {"config", 1, 0, 0},
1236                 {"perf-config", 1, 0, 0},
1237                 {"high-perf-cores", 1, 0, 0},
1238                 {"no-numa", 0, 0, 0},
1239                 {"enable-jumbo", 0, 0, 0},
1240                 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
1241                 {NULL, 0, 0, 0}
1242         };
1243
1244         argvopt = argv;
1245
1246         while ((opt = getopt_long(argc, argvopt, "p:P",
1247                                 lgopts, &option_index)) != EOF) {
1248
1249                 switch (opt) {
1250                 /* portmask */
1251                 case 'p':
1252                         enabled_port_mask = parse_portmask(optarg);
1253                         if (enabled_port_mask == 0) {
1254                                 printf("invalid portmask\n");
1255                                 print_usage(prgname);
1256                                 return -1;
1257                         }
1258                         break;
1259                 case 'P':
1260                         printf("Promiscuous mode selected\n");
1261                         promiscuous_on = 1;
1262                         break;
1263
1264                 /* long options */
1265                 case 0:
1266                         if (!strncmp(lgopts[option_index].name, "config", 6)) {
1267                                 ret = parse_config(optarg);
1268                                 if (ret) {
1269                                         printf("invalid config\n");
1270                                         print_usage(prgname);
1271                                         return -1;
1272                                 }
1273                         }
1274
1275                         if (!strncmp(lgopts[option_index].name,
1276                                         "perf-config", 11)) {
1277                                 ret = parse_perf_config(optarg);
1278                                 if (ret) {
1279                                         printf("invalid perf-config\n");
1280                                         print_usage(prgname);
1281                                         return -1;
1282                                 }
1283                         }
1284
1285                         if (!strncmp(lgopts[option_index].name,
1286                                         "high-perf-cores", 15)) {
1287                                 ret = parse_perf_core_list(optarg);
1288                                 if (ret) {
1289                                         printf("invalid high-perf-cores\n");
1290                                         print_usage(prgname);
1291                                         return -1;
1292                                 }
1293                         }
1294
1295                         if (!strncmp(lgopts[option_index].name,
1296                                                 "no-numa", 7)) {
1297                                 printf("numa is disabled \n");
1298                                 numa_on = 0;
1299                         }
1300
1301                         if (!strncmp(lgopts[option_index].name,
1302                                         "enable-jumbo", 12)) {
1303                                 struct option lenopts =
1304                                         {"max-pkt-len", required_argument, \
1305                                                                         0, 0};
1306
1307                                 printf("jumbo frame is enabled \n");
1308                                 port_conf.rxmode.offloads |=
1309                                                 DEV_RX_OFFLOAD_JUMBO_FRAME;
1310                                 port_conf.txmode.offloads |=
1311                                                 DEV_TX_OFFLOAD_MULTI_SEGS;
1312
1313                                 /**
1314                                  * if no max-pkt-len set, use the default value
1315                                  * ETHER_MAX_LEN
1316                                  */
1317                                 if (0 == getopt_long(argc, argvopt, "",
1318                                                 &lenopts, &option_index)) {
1319                                         ret = parse_max_pkt_len(optarg);
1320                                         if ((ret < 64) ||
1321                                                 (ret > MAX_JUMBO_PKT_LEN)){
1322                                                 printf("invalid packet "
1323                                                                 "length\n");
1324                                                 print_usage(prgname);
1325                                                 return -1;
1326                                         }
1327                                         port_conf.rxmode.max_rx_pkt_len = ret;
1328                                 }
1329                                 printf("set jumbo frame "
1330                                         "max packet length to %u\n",
1331                                 (unsigned int)port_conf.rxmode.max_rx_pkt_len);
1332                         }
1333
1334                         if (!strncmp(lgopts[option_index].name,
1335                                      CMD_LINE_OPT_PARSE_PTYPE,
1336                                      sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
1337                                 printf("soft parse-ptype is enabled\n");
1338                                 parse_ptype = 1;
1339                         }
1340
1341                         break;
1342
1343                 default:
1344                         print_usage(prgname);
1345                         return -1;
1346                 }
1347         }
1348
1349         if (optind >= 0)
1350                 argv[optind-1] = prgname;
1351
1352         ret = optind-1;
1353         optind = 1; /* reset getopt lib */
1354         return ret;
1355 }
1356
1357 static void
1358 print_ethaddr(const char *name, const struct ether_addr *eth_addr)
1359 {
1360         char buf[ETHER_ADDR_FMT_SIZE];
1361         ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
1362         printf("%s%s", name, buf);
1363 }
1364
1365 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1366 static void
1367 setup_hash(int socketid)
1368 {
1369         struct rte_hash_parameters ipv4_l3fwd_hash_params = {
1370                 .name = NULL,
1371                 .entries = L3FWD_HASH_ENTRIES,
1372                 .key_len = sizeof(struct ipv4_5tuple),
1373                 .hash_func = DEFAULT_HASH_FUNC,
1374                 .hash_func_init_val = 0,
1375         };
1376
1377         struct rte_hash_parameters ipv6_l3fwd_hash_params = {
1378                 .name = NULL,
1379                 .entries = L3FWD_HASH_ENTRIES,
1380                 .key_len = sizeof(struct ipv6_5tuple),
1381                 .hash_func = DEFAULT_HASH_FUNC,
1382                 .hash_func_init_val = 0,
1383         };
1384
1385         unsigned i;
1386         int ret;
1387         char s[64];
1388
1389         /* create ipv4 hash */
1390         snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
1391         ipv4_l3fwd_hash_params.name = s;
1392         ipv4_l3fwd_hash_params.socket_id = socketid;
1393         ipv4_l3fwd_lookup_struct[socketid] =
1394                 rte_hash_create(&ipv4_l3fwd_hash_params);
1395         if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
1396                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
1397                                 "socket %d\n", socketid);
1398
1399         /* create ipv6 hash */
1400         snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
1401         ipv6_l3fwd_hash_params.name = s;
1402         ipv6_l3fwd_hash_params.socket_id = socketid;
1403         ipv6_l3fwd_lookup_struct[socketid] =
1404                 rte_hash_create(&ipv6_l3fwd_hash_params);
1405         if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
1406                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
1407                                 "socket %d\n", socketid);
1408
1409
1410         /* populate the ipv4 hash */
1411         for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
1412                 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid],
1413                                 (void *) &ipv4_l3fwd_route_array[i].key);
1414                 if (ret < 0) {
1415                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
1416                                 "l3fwd hash on socket %d\n", i, socketid);
1417                 }
1418                 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out;
1419                 printf("Hash: Adding key\n");
1420                 print_ipv4_key(ipv4_l3fwd_route_array[i].key);
1421         }
1422
1423         /* populate the ipv6 hash */
1424         for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) {
1425                 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid],
1426                                 (void *) &ipv6_l3fwd_route_array[i].key);
1427                 if (ret < 0) {
1428                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
1429                                 "l3fwd hash on socket %d\n", i, socketid);
1430                 }
1431                 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out;
1432                 printf("Hash: Adding key\n");
1433                 print_ipv6_key(ipv6_l3fwd_route_array[i].key);
1434         }
1435 }
1436 #endif
1437
1438 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1439 static void
1440 setup_lpm(int socketid)
1441 {
1442         unsigned i;
1443         int ret;
1444         char s[64];
1445
1446         /* create the LPM table */
1447         struct rte_lpm_config lpm_ipv4_config;
1448
1449         lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
1450         lpm_ipv4_config.number_tbl8s = 256;
1451         lpm_ipv4_config.flags = 0;
1452
1453         snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
1454         ipv4_l3fwd_lookup_struct[socketid] =
1455                         rte_lpm_create(s, socketid, &lpm_ipv4_config);
1456         if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
1457                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
1458                                 " on socket %d\n", socketid);
1459
1460         /* populate the LPM table */
1461         for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
1462                 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
1463                         ipv4_l3fwd_route_array[i].ip,
1464                         ipv4_l3fwd_route_array[i].depth,
1465                         ipv4_l3fwd_route_array[i].if_out);
1466
1467                 if (ret < 0) {
1468                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
1469                                 "l3fwd LPM table on socket %d\n",
1470                                 i, socketid);
1471                 }
1472
1473                 printf("LPM: Adding route 0x%08x / %d (%d)\n",
1474                         (unsigned)ipv4_l3fwd_route_array[i].ip,
1475                         ipv4_l3fwd_route_array[i].depth,
1476                         ipv4_l3fwd_route_array[i].if_out);
1477         }
1478 }
1479 #endif
1480
1481 static int
1482 init_mem(unsigned nb_mbuf)
1483 {
1484         struct lcore_conf *qconf;
1485         int socketid;
1486         unsigned lcore_id;
1487         char s[64];
1488
1489         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1490                 if (rte_lcore_is_enabled(lcore_id) == 0)
1491                         continue;
1492
1493                 if (numa_on)
1494                         socketid = rte_lcore_to_socket_id(lcore_id);
1495                 else
1496                         socketid = 0;
1497
1498                 if (socketid >= NB_SOCKETS) {
1499                         rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is "
1500                                         "out of range %d\n", socketid,
1501                                                 lcore_id, NB_SOCKETS);
1502                 }
1503                 if (pktmbuf_pool[socketid] == NULL) {
1504                         snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
1505                         pktmbuf_pool[socketid] =
1506                                 rte_pktmbuf_pool_create(s, nb_mbuf,
1507                                         MEMPOOL_CACHE_SIZE, 0,
1508                                         RTE_MBUF_DEFAULT_BUF_SIZE,
1509                                         socketid);
1510                         if (pktmbuf_pool[socketid] == NULL)
1511                                 rte_exit(EXIT_FAILURE,
1512                                         "Cannot init mbuf pool on socket %d\n",
1513                                                                 socketid);
1514                         else
1515                                 printf("Allocated mbuf pool on socket %d\n",
1516                                                                 socketid);
1517
1518 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1519                         setup_lpm(socketid);
1520 #else
1521                         setup_hash(socketid);
1522 #endif
1523                 }
1524                 qconf = &lcore_conf[lcore_id];
1525                 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
1526 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1527                 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
1528 #endif
1529         }
1530         return 0;
1531 }
1532
1533 /* Check the link status of all ports in up to 9s, and print them finally */
1534 static void
1535 check_all_ports_link_status(uint32_t port_mask)
1536 {
1537 #define CHECK_INTERVAL 100 /* 100ms */
1538 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
1539         uint8_t count, all_ports_up, print_flag = 0;
1540         uint16_t portid;
1541         struct rte_eth_link link;
1542
1543         printf("\nChecking link status");
1544         fflush(stdout);
1545         for (count = 0; count <= MAX_CHECK_TIME; count++) {
1546                 all_ports_up = 1;
1547                 RTE_ETH_FOREACH_DEV(portid) {
1548                         if ((port_mask & (1 << portid)) == 0)
1549                                 continue;
1550                         memset(&link, 0, sizeof(link));
1551                         rte_eth_link_get_nowait(portid, &link);
1552                         /* print link status if flag set */
1553                         if (print_flag == 1) {
1554                                 if (link.link_status)
1555                                         printf("Port %d Link Up - speed %u "
1556                                                 "Mbps - %s\n", (uint8_t)portid,
1557                                                 (unsigned)link.link_speed,
1558                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
1559                                         ("full-duplex") : ("half-duplex\n"));
1560                                 else
1561                                         printf("Port %d Link Down\n",
1562                                                 (uint8_t)portid);
1563                                 continue;
1564                         }
1565                         /* clear all_ports_up flag if any link down */
1566                         if (link.link_status == ETH_LINK_DOWN) {
1567                                 all_ports_up = 0;
1568                                 break;
1569                         }
1570                 }
1571                 /* after finally printing all link status, get out */
1572                 if (print_flag == 1)
1573                         break;
1574
1575                 if (all_ports_up == 0) {
1576                         printf(".");
1577                         fflush(stdout);
1578                         rte_delay_ms(CHECK_INTERVAL);
1579                 }
1580
1581                 /* set the print_flag if all ports up or timeout */
1582                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
1583                         print_flag = 1;
1584                         printf("done\n");
1585                 }
1586         }
1587 }
1588
1589 static int check_ptype(uint16_t portid)
1590 {
1591         int i, ret;
1592         int ptype_l3_ipv4 = 0;
1593 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1594         int ptype_l3_ipv6 = 0;
1595 #endif
1596         uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
1597
1598         ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
1599         if (ret <= 0)
1600                 return 0;
1601
1602         uint32_t ptypes[ret];
1603
1604         ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
1605         for (i = 0; i < ret; ++i) {
1606                 if (ptypes[i] & RTE_PTYPE_L3_IPV4)
1607                         ptype_l3_ipv4 = 1;
1608 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1609                 if (ptypes[i] & RTE_PTYPE_L3_IPV6)
1610                         ptype_l3_ipv6 = 1;
1611 #endif
1612         }
1613
1614         if (ptype_l3_ipv4 == 0)
1615                 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
1616
1617 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1618         if (ptype_l3_ipv6 == 0)
1619                 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
1620 #endif
1621
1622 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1623         if (ptype_l3_ipv4)
1624 #else /* APP_LOOKUP_EXACT_MATCH */
1625         if (ptype_l3_ipv4 && ptype_l3_ipv6)
1626 #endif
1627                 return 1;
1628
1629         return 0;
1630
1631 }
1632
1633 static int
1634 init_power_library(void)
1635 {
1636         int ret = 0, lcore_id;
1637         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1638                 if (rte_lcore_is_enabled(lcore_id)) {
1639                         /* init power management library */
1640                         ret = rte_power_init(lcore_id);
1641                         if (ret)
1642                                 RTE_LOG(ERR, POWER,
1643                                 "Library initialization failed on core %u\n",
1644                                 lcore_id);
1645                 }
1646         }
1647         return ret;
1648 }
1649
1650 int
1651 main(int argc, char **argv)
1652 {
1653         struct lcore_conf *qconf;
1654         struct rte_eth_dev_info dev_info;
1655         struct rte_eth_txconf *txconf;
1656         int ret;
1657         uint16_t nb_ports;
1658         uint16_t queueid;
1659         unsigned lcore_id;
1660         uint64_t hz;
1661         uint32_t n_tx_queue, nb_lcores;
1662         uint32_t dev_rxq_num, dev_txq_num;
1663         uint8_t nb_rx_queue, queue, socketid;
1664         uint16_t portid;
1665
1666         /* catch SIGINT and restore cpufreq governor to ondemand */
1667         signal(SIGINT, signal_exit_now);
1668
1669         /* init EAL */
1670         ret = rte_eal_init(argc, argv);
1671         if (ret < 0)
1672                 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
1673         argc -= ret;
1674         argv += ret;
1675
1676         /* init RTE timer library to be used late */
1677         rte_timer_subsystem_init();
1678
1679         /* parse application arguments (after the EAL ones) */
1680         ret = parse_args(argc, argv);
1681         if (ret < 0)
1682                 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
1683
1684         if (init_power_library())
1685                 rte_exit(EXIT_FAILURE, "init_power_library failed\n");
1686
1687         if (update_lcore_params() < 0)
1688                 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n");
1689
1690         if (check_lcore_params() < 0)
1691                 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
1692
1693         ret = init_lcore_rx_queues();
1694         if (ret < 0)
1695                 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
1696
1697         nb_ports = rte_eth_dev_count_avail();
1698
1699         if (check_port_config() < 0)
1700                 rte_exit(EXIT_FAILURE, "check_port_config failed\n");
1701
1702         nb_lcores = rte_lcore_count();
1703
1704         /* initialize all ports */
1705         RTE_ETH_FOREACH_DEV(portid) {
1706                 struct rte_eth_conf local_port_conf = port_conf;
1707
1708                 /* skip ports that are not enabled */
1709                 if ((enabled_port_mask & (1 << portid)) == 0) {
1710                         printf("\nSkipping disabled port %d\n", portid);
1711                         continue;
1712                 }
1713
1714                 /* init port */
1715                 printf("Initializing port %d ... ", portid );
1716                 fflush(stdout);
1717
1718                 rte_eth_dev_info_get(portid, &dev_info);
1719                 dev_rxq_num = dev_info.max_rx_queues;
1720                 dev_txq_num = dev_info.max_tx_queues;
1721
1722                 nb_rx_queue = get_port_n_rx_queues(portid);
1723                 if (nb_rx_queue > dev_rxq_num)
1724                         rte_exit(EXIT_FAILURE,
1725                                 "Cannot configure not existed rxq: "
1726                                 "port=%d\n", portid);
1727
1728                 n_tx_queue = nb_lcores;
1729                 if (n_tx_queue > dev_txq_num)
1730                         n_tx_queue = dev_txq_num;
1731                 printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
1732                         nb_rx_queue, (unsigned)n_tx_queue );
1733                 /* If number of Rx queue is 0, no need to enable Rx interrupt */
1734                 if (nb_rx_queue == 0)
1735                         local_port_conf.intr_conf.rxq = 0;
1736                 rte_eth_dev_info_get(portid, &dev_info);
1737                 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
1738                         local_port_conf.txmode.offloads |=
1739                                 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1740
1741                 local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
1742                         dev_info.flow_type_rss_offloads;
1743                 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
1744                                 port_conf.rx_adv_conf.rss_conf.rss_hf) {
1745                         printf("Port %u modified RSS hash function based on hardware support,"
1746                                 "requested:%#"PRIx64" configured:%#"PRIx64"\n",
1747                                 portid,
1748                                 port_conf.rx_adv_conf.rss_conf.rss_hf,
1749                                 local_port_conf.rx_adv_conf.rss_conf.rss_hf);
1750                 }
1751
1752                 ret = rte_eth_dev_configure(portid, nb_rx_queue,
1753                                         (uint16_t)n_tx_queue, &local_port_conf);
1754                 if (ret < 0)
1755                         rte_exit(EXIT_FAILURE, "Cannot configure device: "
1756                                         "err=%d, port=%d\n", ret, portid);
1757
1758                 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
1759                                                        &nb_txd);
1760                 if (ret < 0)
1761                         rte_exit(EXIT_FAILURE,
1762                                  "Cannot adjust number of descriptors: err=%d, port=%d\n",
1763                                  ret, portid);
1764
1765                 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
1766                 print_ethaddr(" Address:", &ports_eth_addr[portid]);
1767                 printf(", ");
1768
1769                 /* init memory */
1770                 ret = init_mem(NB_MBUF);
1771                 if (ret < 0)
1772                         rte_exit(EXIT_FAILURE, "init_mem failed\n");
1773
1774                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1775                         if (rte_lcore_is_enabled(lcore_id) == 0)
1776                                 continue;
1777
1778                         /* Initialize TX buffers */
1779                         qconf = &lcore_conf[lcore_id];
1780                         qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
1781                                 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
1782                                 rte_eth_dev_socket_id(portid));
1783                         if (qconf->tx_buffer[portid] == NULL)
1784                                 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n",
1785                                                  portid);
1786
1787                         rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST);
1788                 }
1789
1790                 /* init one TX queue per couple (lcore,port) */
1791                 queueid = 0;
1792                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1793                         if (rte_lcore_is_enabled(lcore_id) == 0)
1794                                 continue;
1795
1796                         if (queueid >= dev_txq_num)
1797                                 continue;
1798
1799                         if (numa_on)
1800                                 socketid = \
1801                                 (uint8_t)rte_lcore_to_socket_id(lcore_id);
1802                         else
1803                                 socketid = 0;
1804
1805                         printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
1806                         fflush(stdout);
1807
1808                         txconf = &dev_info.default_txconf;
1809                         txconf->offloads = local_port_conf.txmode.offloads;
1810                         ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
1811                                                      socketid, txconf);
1812                         if (ret < 0)
1813                                 rte_exit(EXIT_FAILURE,
1814                                         "rte_eth_tx_queue_setup: err=%d, "
1815                                                 "port=%d\n", ret, portid);
1816
1817                         qconf = &lcore_conf[lcore_id];
1818                         qconf->tx_queue_id[portid] = queueid;
1819                         queueid++;
1820
1821                         qconf->tx_port_id[qconf->n_tx_port] = portid;
1822                         qconf->n_tx_port++;
1823                 }
1824                 printf("\n");
1825         }
1826
1827         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1828                 if (rte_lcore_is_enabled(lcore_id) == 0)
1829                         continue;
1830
1831                 /* init timer structures for each enabled lcore */
1832                 rte_timer_init(&power_timers[lcore_id]);
1833                 hz = rte_get_timer_hz();
1834                 rte_timer_reset(&power_timers[lcore_id],
1835                         hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id,
1836                                                 power_timer_cb, NULL);
1837
1838                 qconf = &lcore_conf[lcore_id];
1839                 printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
1840                 fflush(stdout);
1841                 /* init RX queues */
1842                 for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
1843                         struct rte_eth_rxconf rxq_conf;
1844                         struct rte_eth_dev *dev;
1845                         struct rte_eth_conf *conf;
1846
1847                         portid = qconf->rx_queue_list[queue].port_id;
1848                         queueid = qconf->rx_queue_list[queue].queue_id;
1849                         dev = &rte_eth_devices[portid];
1850                         conf = &dev->data->dev_conf;
1851
1852                         if (numa_on)
1853                                 socketid = \
1854                                 (uint8_t)rte_lcore_to_socket_id(lcore_id);
1855                         else
1856                                 socketid = 0;
1857
1858                         printf("rxq=%d,%d,%d ", portid, queueid, socketid);
1859                         fflush(stdout);
1860
1861                         rte_eth_dev_info_get(portid, &dev_info);
1862                         rxq_conf = dev_info.default_rxconf;
1863                         rxq_conf.offloads = conf->rxmode.offloads;
1864                         ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
1865                                 socketid, &rxq_conf,
1866                                 pktmbuf_pool[socketid]);
1867                         if (ret < 0)
1868                                 rte_exit(EXIT_FAILURE,
1869                                         "rte_eth_rx_queue_setup: err=%d, "
1870                                                 "port=%d\n", ret, portid);
1871
1872                         if (parse_ptype) {
1873                                 if (add_cb_parse_ptype(portid, queueid) < 0)
1874                                         rte_exit(EXIT_FAILURE,
1875                                                  "Fail to add ptype cb\n");
1876                         } else if (!check_ptype(portid))
1877                                 rte_exit(EXIT_FAILURE,
1878                                          "PMD can not provide needed ptypes\n");
1879                 }
1880         }
1881
1882         printf("\n");
1883
1884         /* start ports */
1885         RTE_ETH_FOREACH_DEV(portid) {
1886                 if ((enabled_port_mask & (1 << portid)) == 0) {
1887                         continue;
1888                 }
1889                 /* Start device */
1890                 ret = rte_eth_dev_start(portid);
1891                 if (ret < 0)
1892                         rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
1893                                                 "port=%d\n", ret, portid);
1894                 /*
1895                  * If enabled, put device in promiscuous mode.
1896                  * This allows IO forwarding mode to forward packets
1897                  * to itself through 2 cross-connected  ports of the
1898                  * target machine.
1899                  */
1900                 if (promiscuous_on)
1901                         rte_eth_promiscuous_enable(portid);
1902                 /* initialize spinlock for each port */
1903                 rte_spinlock_init(&(locks[portid]));
1904         }
1905
1906         check_all_ports_link_status(enabled_port_mask);
1907
1908         /* launch per-lcore init on every lcore */
1909         rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
1910         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1911                 if (rte_eal_wait_lcore(lcore_id) < 0)
1912                         return -1;
1913         }
1914
1915         return 0;
1916 }