examples/l3fwd-power: add --parse-ptype option
[dpdk.git] / examples / l3fwd-power / main.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdint.h>
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <sys/queue.h>
41 #include <stdarg.h>
42 #include <errno.h>
43 #include <getopt.h>
44 #include <unistd.h>
45 #include <signal.h>
46
47 #include <rte_common.h>
48 #include <rte_byteorder.h>
49 #include <rte_log.h>
50 #include <rte_malloc.h>
51 #include <rte_memory.h>
52 #include <rte_memcpy.h>
53 #include <rte_memzone.h>
54 #include <rte_eal.h>
55 #include <rte_per_lcore.h>
56 #include <rte_launch.h>
57 #include <rte_atomic.h>
58 #include <rte_cycles.h>
59 #include <rte_prefetch.h>
60 #include <rte_lcore.h>
61 #include <rte_per_lcore.h>
62 #include <rte_branch_prediction.h>
63 #include <rte_interrupts.h>
64 #include <rte_pci.h>
65 #include <rte_random.h>
66 #include <rte_debug.h>
67 #include <rte_ether.h>
68 #include <rte_ethdev.h>
69 #include <rte_mempool.h>
70 #include <rte_mbuf.h>
71 #include <rte_ip.h>
72 #include <rte_tcp.h>
73 #include <rte_udp.h>
74 #include <rte_string_fns.h>
75 #include <rte_timer.h>
76 #include <rte_power.h>
77 #include <rte_eal.h>
78 #include <rte_spinlock.h>
79
80 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
81
82 #define MAX_PKT_BURST 32
83
84 #define MIN_ZERO_POLL_COUNT 10
85
86 /* around 100ms at 2 Ghz */
87 #define TIMER_RESOLUTION_CYCLES           200000000ULL
88 /* 100 ms interval */
89 #define TIMER_NUMBER_PER_SECOND           10
90 /* 100000 us */
91 #define SCALING_PERIOD                    (1000000/TIMER_NUMBER_PER_SECOND)
92 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25
93
94 #define APP_LOOKUP_EXACT_MATCH          0
95 #define APP_LOOKUP_LPM                  1
96 #define DO_RFC_1812_CHECKS
97
98 #ifndef APP_LOOKUP_METHOD
99 #define APP_LOOKUP_METHOD             APP_LOOKUP_LPM
100 #endif
101
102 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
103 #include <rte_hash.h>
104 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
105 #include <rte_lpm.h>
106 #else
107 #error "APP_LOOKUP_METHOD set to incorrect value"
108 #endif
109
110 #ifndef IPv6_BYTES
111 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
112                        "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
113 #define IPv6_BYTES(addr) \
114         addr[0],  addr[1], addr[2],  addr[3], \
115         addr[4],  addr[5], addr[6],  addr[7], \
116         addr[8],  addr[9], addr[10], addr[11],\
117         addr[12], addr[13],addr[14], addr[15]
118 #endif
119
120 #define MAX_JUMBO_PKT_LEN  9600
121
122 #define IPV6_ADDR_LEN 16
123
124 #define MEMPOOL_CACHE_SIZE 256
125
126 /*
127  * This expression is used to calculate the number of mbufs needed depending on
128  * user input, taking into account memory for rx and tx hardware rings, cache
129  * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that
130  * NB_MBUF never goes below a minimum value of 8192.
131  */
132
133 #define NB_MBUF RTE_MAX ( \
134         (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
135         nb_ports*nb_lcores*MAX_PKT_BURST + \
136         nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \
137         nb_lcores*MEMPOOL_CACHE_SIZE), \
138         (unsigned)8192)
139
140 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
141
142 #define NB_SOCKETS 8
143
144 /* Configure how many packets ahead to prefetch, when reading packets */
145 #define PREFETCH_OFFSET 3
146
147 /*
148  * Configurable number of RX/TX ring descriptors
149  */
150 #define RTE_TEST_RX_DESC_DEFAULT 128
151 #define RTE_TEST_TX_DESC_DEFAULT 512
152 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
153 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
154
155 /* ethernet addresses of ports */
156 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
157
158 /* ethernet addresses of ports */
159 static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
160
161 /* mask of enabled ports */
162 static uint32_t enabled_port_mask = 0;
163 /* Ports set in promiscuous mode off by default. */
164 static int promiscuous_on = 0;
165 /* NUMA is enabled by default. */
166 static int numa_on = 1;
167 static int parse_ptype; /**< Parse packet type using rx callback, and */
168                         /**< disabled by default */
169
170 enum freq_scale_hint_t
171 {
172         FREQ_LOWER    =      -1,
173         FREQ_CURRENT  =       0,
174         FREQ_HIGHER   =       1,
175         FREQ_HIGHEST  =       2
176 };
177
178 struct lcore_rx_queue {
179         uint8_t port_id;
180         uint8_t queue_id;
181         enum freq_scale_hint_t freq_up_hint;
182         uint32_t zero_rx_packet_count;
183         uint32_t idle_hint;
184 } __rte_cache_aligned;
185
186 #define MAX_RX_QUEUE_PER_LCORE 16
187 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
188 #define MAX_RX_QUEUE_PER_PORT 128
189
190 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
191
192
193 #define MAX_LCORE_PARAMS 1024
194 struct lcore_params {
195         uint8_t port_id;
196         uint8_t queue_id;
197         uint8_t lcore_id;
198 } __rte_cache_aligned;
199
200 static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
201 static struct lcore_params lcore_params_array_default[] = {
202         {0, 0, 2},
203         {0, 1, 2},
204         {0, 2, 2},
205         {1, 0, 2},
206         {1, 1, 2},
207         {1, 2, 2},
208         {2, 0, 2},
209         {3, 0, 3},
210         {3, 1, 3},
211 };
212
213 static struct lcore_params * lcore_params = lcore_params_array_default;
214 static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
215                                 sizeof(lcore_params_array_default[0]);
216
217 static struct rte_eth_conf port_conf = {
218         .rxmode = {
219                 .mq_mode        = ETH_MQ_RX_RSS,
220                 .max_rx_pkt_len = ETHER_MAX_LEN,
221                 .split_hdr_size = 0,
222                 .header_split   = 0, /**< Header Split disabled */
223                 .hw_ip_checksum = 1, /**< IP checksum offload enabled */
224                 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
225                 .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
226                 .hw_strip_crc   = 0, /**< CRC stripped by hardware */
227         },
228         .rx_adv_conf = {
229                 .rss_conf = {
230                         .rss_key = NULL,
231                         .rss_hf = ETH_RSS_UDP,
232                 },
233         },
234         .txmode = {
235                 .mq_mode = ETH_MQ_TX_NONE,
236         },
237         .intr_conf = {
238                 .lsc = 1,
239                 .rxq = 1,
240         },
241 };
242
243 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
244
245
246 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
247
248 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
249 #include <rte_hash_crc.h>
250 #define DEFAULT_HASH_FUNC       rte_hash_crc
251 #else
252 #include <rte_jhash.h>
253 #define DEFAULT_HASH_FUNC       rte_jhash
254 #endif
255
256 struct ipv4_5tuple {
257         uint32_t ip_dst;
258         uint32_t ip_src;
259         uint16_t port_dst;
260         uint16_t port_src;
261         uint8_t  proto;
262 } __attribute__((__packed__));
263
264 struct ipv6_5tuple {
265         uint8_t  ip_dst[IPV6_ADDR_LEN];
266         uint8_t  ip_src[IPV6_ADDR_LEN];
267         uint16_t port_dst;
268         uint16_t port_src;
269         uint8_t  proto;
270 } __attribute__((__packed__));
271
272 struct ipv4_l3fwd_route {
273         struct ipv4_5tuple key;
274         uint8_t if_out;
275 };
276
277 struct ipv6_l3fwd_route {
278         struct ipv6_5tuple key;
279         uint8_t if_out;
280 };
281
282 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
283         {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0},
284         {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1},
285         {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2},
286         {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3},
287 };
288
289 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
290         {
291                 {
292                         {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
293                          0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
294                         {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
295                          0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a},
296                          1, 10, IPPROTO_UDP
297                 }, 4
298         },
299 };
300
301 typedef struct rte_hash lookup_struct_t;
302 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
303 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
304
305 #define L3FWD_HASH_ENTRIES      1024
306
307 #define IPV4_L3FWD_NUM_ROUTES \
308         (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
309
310 #define IPV6_L3FWD_NUM_ROUTES \
311         (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0]))
312
313 static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
314 static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
315 #endif
316
317 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
318 struct ipv4_l3fwd_route {
319         uint32_t ip;
320         uint8_t  depth;
321         uint8_t  if_out;
322 };
323
324 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
325         {IPv4(1,1,1,0), 24, 0},
326         {IPv4(2,1,1,0), 24, 1},
327         {IPv4(3,1,1,0), 24, 2},
328         {IPv4(4,1,1,0), 24, 3},
329         {IPv4(5,1,1,0), 24, 4},
330         {IPv4(6,1,1,0), 24, 5},
331         {IPv4(7,1,1,0), 24, 6},
332         {IPv4(8,1,1,0), 24, 7},
333 };
334
335 #define IPV4_L3FWD_NUM_ROUTES \
336         (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
337
338 #define IPV4_L3FWD_LPM_MAX_RULES     1024
339
340 typedef struct rte_lpm lookup_struct_t;
341 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
342 #endif
343
344 struct lcore_conf {
345         uint16_t n_rx_queue;
346         struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
347         uint16_t n_tx_port;
348         uint16_t tx_port_id[RTE_MAX_ETHPORTS];
349         uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
350         struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
351         lookup_struct_t * ipv4_lookup_struct;
352         lookup_struct_t * ipv6_lookup_struct;
353 } __rte_cache_aligned;
354
355 struct lcore_stats {
356         /* total sleep time in ms since last frequency scaling down */
357         uint32_t sleep_time;
358         /* number of long sleep recently */
359         uint32_t nb_long_sleep;
360         /* freq. scaling up trend */
361         uint32_t trend;
362         /* total packet processed recently */
363         uint64_t nb_rx_processed;
364         /* total iterations looped recently */
365         uint64_t nb_iteration_looped;
366         uint32_t padding[9];
367 } __rte_cache_aligned;
368
369 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned;
370 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned;
371 static struct rte_timer power_timers[RTE_MAX_LCORE];
372
373 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count);
374 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \
375                         unsigned lcore_id, uint8_t port_id, uint16_t queue_id);
376
377 /* exit signal handler */
378 static void
379 signal_exit_now(int sigtype)
380 {
381         unsigned lcore_id;
382         int ret;
383
384         if (sigtype == SIGINT) {
385                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
386                         if (rte_lcore_is_enabled(lcore_id) == 0)
387                                 continue;
388
389                         /* init power management library */
390                         ret = rte_power_exit(lcore_id);
391                         if (ret)
392                                 rte_exit(EXIT_FAILURE, "Power management "
393                                         "library de-initialization failed on "
394                                                         "core%u\n", lcore_id);
395                 }
396         }
397
398         rte_exit(EXIT_SUCCESS, "User forced exit\n");
399 }
400
401 /*  Freqency scale down timer callback */
402 static void
403 power_timer_cb(__attribute__((unused)) struct rte_timer *tim,
404                           __attribute__((unused)) void *arg)
405 {
406         uint64_t hz;
407         float sleep_time_ratio;
408         unsigned lcore_id = rte_lcore_id();
409
410         /* accumulate total execution time in us when callback is invoked */
411         sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
412                                         (float)SCALING_PERIOD;
413         /**
414          * check whether need to scale down frequency a step if it sleep a lot.
415          */
416         if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
417                 if (rte_power_freq_down)
418                         rte_power_freq_down(lcore_id);
419         }
420         else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
421                 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
422                 /**
423                  * scale down a step if average packet per iteration less
424                  * than expectation.
425                  */
426                 if (rte_power_freq_down)
427                         rte_power_freq_down(lcore_id);
428         }
429
430         /**
431          * initialize another timer according to current frequency to ensure
432          * timer interval is relatively fixed.
433          */
434         hz = rte_get_timer_hz();
435         rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND,
436                                 SINGLE, lcore_id, power_timer_cb, NULL);
437
438         stats[lcore_id].nb_rx_processed = 0;
439         stats[lcore_id].nb_iteration_looped = 0;
440
441         stats[lcore_id].sleep_time = 0;
442 }
443
444 /* Enqueue a single packet, and send burst if queue is filled */
445 static inline int
446 send_single_packet(struct rte_mbuf *m, uint8_t port)
447 {
448         uint32_t lcore_id;
449         struct lcore_conf *qconf;
450
451         lcore_id = rte_lcore_id();
452         qconf = &lcore_conf[lcore_id];
453
454         rte_eth_tx_buffer(port, qconf->tx_queue_id[port],
455                         qconf->tx_buffer[port], m);
456
457         return 0;
458 }
459
460 #ifdef DO_RFC_1812_CHECKS
461 static inline int
462 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
463 {
464         /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
465         /*
466          * 1. The packet length reported by the Link Layer must be large
467          * enough to hold the minimum length legal IP datagram (20 bytes).
468          */
469         if (link_len < sizeof(struct ipv4_hdr))
470                 return -1;
471
472         /* 2. The IP checksum must be correct. */
473         /* this is checked in H/W */
474
475         /*
476          * 3. The IP version number must be 4. If the version number is not 4
477          * then the packet may be another version of IP, such as IPng or
478          * ST-II.
479          */
480         if (((pkt->version_ihl) >> 4) != 4)
481                 return -3;
482         /*
483          * 4. The IP header length field must be large enough to hold the
484          * minimum length legal IP datagram (20 bytes = 5 words).
485          */
486         if ((pkt->version_ihl & 0xf) < 5)
487                 return -4;
488
489         /*
490          * 5. The IP total length field must be large enough to hold the IP
491          * datagram header, whose length is specified in the IP header length
492          * field.
493          */
494         if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
495                 return -5;
496
497         return 0;
498 }
499 #endif
500
501 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
502 static void
503 print_ipv4_key(struct ipv4_5tuple key)
504 {
505         printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, "
506                 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src,
507                                 key.port_dst, key.port_src, key.proto);
508 }
509 static void
510 print_ipv6_key(struct ipv6_5tuple key)
511 {
512         printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", "
513                 "port dst = %d, port src = %d, proto = %d\n",
514                 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src),
515                 key.port_dst, key.port_src, key.proto);
516 }
517
518 static inline uint8_t
519 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid,
520                 lookup_struct_t * ipv4_l3fwd_lookup_struct)
521 {
522         struct ipv4_5tuple key;
523         struct tcp_hdr *tcp;
524         struct udp_hdr *udp;
525         int ret = 0;
526
527         key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
528         key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr);
529         key.proto = ipv4_hdr->next_proto_id;
530
531         switch (ipv4_hdr->next_proto_id) {
532         case IPPROTO_TCP:
533                 tcp = (struct tcp_hdr *)((unsigned char *)ipv4_hdr +
534                                         sizeof(struct ipv4_hdr));
535                 key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
536                 key.port_src = rte_be_to_cpu_16(tcp->src_port);
537                 break;
538
539         case IPPROTO_UDP:
540                 udp = (struct udp_hdr *)((unsigned char *)ipv4_hdr +
541                                         sizeof(struct ipv4_hdr));
542                 key.port_dst = rte_be_to_cpu_16(udp->dst_port);
543                 key.port_src = rte_be_to_cpu_16(udp->src_port);
544                 break;
545
546         default:
547                 key.port_dst = 0;
548                 key.port_src = 0;
549                 break;
550         }
551
552         /* Find destination port */
553         ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
554         return (uint8_t)((ret < 0)? portid : ipv4_l3fwd_out_if[ret]);
555 }
556
557 static inline uint8_t
558 get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr,  uint8_t portid,
559                         lookup_struct_t *ipv6_l3fwd_lookup_struct)
560 {
561         struct ipv6_5tuple key;
562         struct tcp_hdr *tcp;
563         struct udp_hdr *udp;
564         int ret = 0;
565
566         memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN);
567         memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN);
568
569         key.proto = ipv6_hdr->proto;
570
571         switch (ipv6_hdr->proto) {
572         case IPPROTO_TCP:
573                 tcp = (struct tcp_hdr *)((unsigned char *) ipv6_hdr +
574                                         sizeof(struct ipv6_hdr));
575                 key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
576                 key.port_src = rte_be_to_cpu_16(tcp->src_port);
577                 break;
578
579         case IPPROTO_UDP:
580                 udp = (struct udp_hdr *)((unsigned char *) ipv6_hdr +
581                                         sizeof(struct ipv6_hdr));
582                 key.port_dst = rte_be_to_cpu_16(udp->dst_port);
583                 key.port_src = rte_be_to_cpu_16(udp->src_port);
584                 break;
585
586         default:
587                 key.port_dst = 0;
588                 key.port_src = 0;
589                 break;
590         }
591
592         /* Find destination port */
593         ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
594         return (uint8_t)((ret < 0)? portid : ipv6_l3fwd_out_if[ret]);
595 }
596 #endif
597
598 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
599 static inline uint8_t
600 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid,
601                 lookup_struct_t *ipv4_l3fwd_lookup_struct)
602 {
603         uint32_t next_hop;
604
605         return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
606                         rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)?
607                         next_hop : portid);
608 }
609 #endif
610
611 static inline void
612 parse_ptype_one(struct rte_mbuf *m)
613 {
614         struct ether_hdr *eth_hdr;
615         uint32_t packet_type = RTE_PTYPE_UNKNOWN;
616         uint16_t ether_type;
617
618         eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
619         ether_type = eth_hdr->ether_type;
620         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
621                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
622         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
623                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
624
625         m->packet_type = packet_type;
626 }
627
628 static uint16_t
629 cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused,
630                struct rte_mbuf *pkts[], uint16_t nb_pkts,
631                uint16_t max_pkts __rte_unused,
632                void *user_param __rte_unused)
633 {
634         unsigned int i;
635
636         for (i = 0; i < nb_pkts; ++i)
637                 parse_ptype_one(pkts[i]);
638
639         return nb_pkts;
640 }
641
642 static int
643 add_cb_parse_ptype(uint8_t portid, uint16_t queueid)
644 {
645         printf("Port %d: softly parse packet type info\n", portid);
646         if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL))
647                 return 0;
648
649         printf("Failed to add rx callback: port=%d\n", portid);
650         return -1;
651 }
652
653 static inline void
654 l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,
655                                 struct lcore_conf *qconf)
656 {
657         struct ether_hdr *eth_hdr;
658         struct ipv4_hdr *ipv4_hdr;
659         void *d_addr_bytes;
660         uint8_t dst_port;
661
662         eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
663
664         if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
665                 /* Handle IPv4 headers.*/
666                 ipv4_hdr =
667                         rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
668                                                 sizeof(struct ether_hdr));
669
670 #ifdef DO_RFC_1812_CHECKS
671                 /* Check to make sure the packet is valid (RFC1812) */
672                 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
673                         rte_pktmbuf_free(m);
674                         return;
675                 }
676 #endif
677
678                 dst_port = get_ipv4_dst_port(ipv4_hdr, portid,
679                                         qconf->ipv4_lookup_struct);
680                 if (dst_port >= RTE_MAX_ETHPORTS ||
681                                 (enabled_port_mask & 1 << dst_port) == 0)
682                         dst_port = portid;
683
684                 /* 02:00:00:00:00:xx */
685                 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
686                 *((uint64_t *)d_addr_bytes) =
687                         0x000000000002 + ((uint64_t)dst_port << 40);
688
689 #ifdef DO_RFC_1812_CHECKS
690                 /* Update time to live and header checksum */
691                 --(ipv4_hdr->time_to_live);
692                 ++(ipv4_hdr->hdr_checksum);
693 #endif
694
695                 /* src addr */
696                 ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
697
698                 send_single_packet(m, dst_port);
699         } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
700                 /* Handle IPv6 headers.*/
701 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
702                 struct ipv6_hdr *ipv6_hdr;
703
704                 ipv6_hdr =
705                         rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
706                                                 sizeof(struct ether_hdr));
707
708                 dst_port = get_ipv6_dst_port(ipv6_hdr, portid,
709                                         qconf->ipv6_lookup_struct);
710
711                 if (dst_port >= RTE_MAX_ETHPORTS ||
712                                 (enabled_port_mask & 1 << dst_port) == 0)
713                         dst_port = portid;
714
715                 /* 02:00:00:00:00:xx */
716                 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
717                 *((uint64_t *)d_addr_bytes) =
718                         0x000000000002 + ((uint64_t)dst_port << 40);
719
720                 /* src addr */
721                 ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
722
723                 send_single_packet(m, dst_port);
724 #else
725                 /* We don't currently handle IPv6 packets in LPM mode. */
726                 rte_pktmbuf_free(m);
727 #endif
728         } else
729                 rte_pktmbuf_free(m);
730
731 }
732
733 #define MINIMUM_SLEEP_TIME         1
734 #define SUSPEND_THRESHOLD          300
735
736 static inline uint32_t
737 power_idle_heuristic(uint32_t zero_rx_packet_count)
738 {
739         /* If zero count is less than 100,  sleep 1us */
740         if (zero_rx_packet_count < SUSPEND_THRESHOLD)
741                 return MINIMUM_SLEEP_TIME;
742         /* If zero count is less than 1000, sleep 100 us which is the
743                 minimum latency switching from C3/C6 to C0
744         */
745         else
746                 return SUSPEND_THRESHOLD;
747
748         return 0;
749 }
750
751 static inline enum freq_scale_hint_t
752 power_freq_scaleup_heuristic(unsigned lcore_id,
753                              uint8_t port_id,
754                              uint16_t queue_id)
755 {
756 /**
757  * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries
758  * per iteration
759  */
760 #define FREQ_GEAR1_RX_PACKET_THRESHOLD             MAX_PKT_BURST
761 #define FREQ_GEAR2_RX_PACKET_THRESHOLD             (MAX_PKT_BURST*2)
762 #define FREQ_GEAR3_RX_PACKET_THRESHOLD             (MAX_PKT_BURST*3)
763 #define FREQ_UP_TREND1_ACC   1
764 #define FREQ_UP_TREND2_ACC   100
765 #define FREQ_UP_THRESHOLD    10000
766
767         if (likely(rte_eth_rx_descriptor_done(port_id, queue_id,
768                         FREQ_GEAR3_RX_PACKET_THRESHOLD) > 0)) {
769                 stats[lcore_id].trend = 0;
770                 return FREQ_HIGHEST;
771         } else if (likely(rte_eth_rx_descriptor_done(port_id, queue_id,
772                         FREQ_GEAR2_RX_PACKET_THRESHOLD) > 0))
773                 stats[lcore_id].trend += FREQ_UP_TREND2_ACC;
774         else if (likely(rte_eth_rx_descriptor_done(port_id, queue_id,
775                         FREQ_GEAR1_RX_PACKET_THRESHOLD) > 0))
776                 stats[lcore_id].trend += FREQ_UP_TREND1_ACC;
777
778         if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) {
779                 stats[lcore_id].trend = 0;
780                 return FREQ_HIGHER;
781         }
782
783         return FREQ_CURRENT;
784 }
785
786 /**
787  * force polling thread sleep until one-shot rx interrupt triggers
788  * @param port_id
789  *  Port id.
790  * @param queue_id
791  *  Rx queue id.
792  * @return
793  *  0 on success
794  */
795 static int
796 sleep_until_rx_interrupt(int num)
797 {
798         struct rte_epoll_event event[num];
799         int n, i;
800         uint8_t port_id, queue_id;
801         void *data;
802
803         RTE_LOG(INFO, L3FWD_POWER,
804                 "lcore %u sleeps until interrupt triggers\n",
805                 rte_lcore_id());
806
807         n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1);
808         for (i = 0; i < n; i++) {
809                 data = event[i].epdata.data;
810                 port_id = ((uintptr_t)data) >> CHAR_BIT;
811                 queue_id = ((uintptr_t)data) &
812                         RTE_LEN2MASK(CHAR_BIT, uint8_t);
813                 rte_eth_dev_rx_intr_disable(port_id, queue_id);
814                 RTE_LOG(INFO, L3FWD_POWER,
815                         "lcore %u is waked up from rx interrupt on"
816                         " port %d queue %d\n",
817                         rte_lcore_id(), port_id, queue_id);
818         }
819
820         return 0;
821 }
822
823 static void turn_on_intr(struct lcore_conf *qconf)
824 {
825         int i;
826         struct lcore_rx_queue *rx_queue;
827         uint8_t port_id, queue_id;
828
829         for (i = 0; i < qconf->n_rx_queue; ++i) {
830                 rx_queue = &(qconf->rx_queue_list[i]);
831                 port_id = rx_queue->port_id;
832                 queue_id = rx_queue->queue_id;
833
834                 rte_spinlock_lock(&(locks[port_id]));
835                 rte_eth_dev_rx_intr_enable(port_id, queue_id);
836                 rte_spinlock_unlock(&(locks[port_id]));
837         }
838 }
839
840 static int event_register(struct lcore_conf *qconf)
841 {
842         struct lcore_rx_queue *rx_queue;
843         uint8_t portid, queueid;
844         uint32_t data;
845         int ret;
846         int i;
847
848         for (i = 0; i < qconf->n_rx_queue; ++i) {
849                 rx_queue = &(qconf->rx_queue_list[i]);
850                 portid = rx_queue->port_id;
851                 queueid = rx_queue->queue_id;
852                 data = portid << CHAR_BIT | queueid;
853
854                 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
855                                                 RTE_EPOLL_PER_THREAD,
856                                                 RTE_INTR_EVENT_ADD,
857                                                 (void *)((uintptr_t)data));
858                 if (ret)
859                         return ret;
860         }
861
862         return 0;
863 }
864
865 /* main processing loop */
866 static int
867 main_loop(__attribute__((unused)) void *dummy)
868 {
869         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
870         unsigned lcore_id;
871         uint64_t prev_tsc, diff_tsc, cur_tsc;
872         uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power;
873         int i, j, nb_rx;
874         uint8_t portid, queueid;
875         struct lcore_conf *qconf;
876         struct lcore_rx_queue *rx_queue;
877         enum freq_scale_hint_t lcore_scaleup_hint;
878         uint32_t lcore_rx_idle_count = 0;
879         uint32_t lcore_idle_hint = 0;
880         int intr_en = 0;
881
882         const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
883
884         prev_tsc = 0;
885
886         lcore_id = rte_lcore_id();
887         qconf = &lcore_conf[lcore_id];
888
889         if (qconf->n_rx_queue == 0) {
890                 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id);
891                 return 0;
892         }
893
894         RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id);
895
896         for (i = 0; i < qconf->n_rx_queue; i++) {
897                 portid = qconf->rx_queue_list[i].port_id;
898                 queueid = qconf->rx_queue_list[i].queue_id;
899                 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%hhu "
900                         "rxqueueid=%hhu\n", lcore_id, portid, queueid);
901         }
902
903         /* add into event wait list */
904         if (event_register(qconf) == 0)
905                 intr_en = 1;
906         else
907                 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n");
908
909         while (1) {
910                 stats[lcore_id].nb_iteration_looped++;
911
912                 cur_tsc = rte_rdtsc();
913                 cur_tsc_power = cur_tsc;
914
915                 /*
916                  * TX burst queue drain
917                  */
918                 diff_tsc = cur_tsc - prev_tsc;
919                 if (unlikely(diff_tsc > drain_tsc)) {
920                         for (i = 0; i < qconf->n_tx_port; ++i) {
921                                 portid = qconf->tx_port_id[i];
922                                 rte_eth_tx_buffer_flush(portid,
923                                                 qconf->tx_queue_id[portid],
924                                                 qconf->tx_buffer[portid]);
925                         }
926                         prev_tsc = cur_tsc;
927                 }
928
929                 diff_tsc_power = cur_tsc_power - prev_tsc_power;
930                 if (diff_tsc_power > TIMER_RESOLUTION_CYCLES) {
931                         rte_timer_manage();
932                         prev_tsc_power = cur_tsc_power;
933                 }
934
935 start_rx:
936                 /*
937                  * Read packet from RX queues
938                  */
939                 lcore_scaleup_hint = FREQ_CURRENT;
940                 lcore_rx_idle_count = 0;
941                 for (i = 0; i < qconf->n_rx_queue; ++i) {
942                         rx_queue = &(qconf->rx_queue_list[i]);
943                         rx_queue->idle_hint = 0;
944                         portid = rx_queue->port_id;
945                         queueid = rx_queue->queue_id;
946
947                         nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
948                                                                 MAX_PKT_BURST);
949
950                         stats[lcore_id].nb_rx_processed += nb_rx;
951                         if (unlikely(nb_rx == 0)) {
952                                 /**
953                                  * no packet received from rx queue, try to
954                                  * sleep for a while forcing CPU enter deeper
955                                  * C states.
956                                  */
957                                 rx_queue->zero_rx_packet_count++;
958
959                                 if (rx_queue->zero_rx_packet_count <=
960                                                         MIN_ZERO_POLL_COUNT)
961                                         continue;
962
963                                 rx_queue->idle_hint = power_idle_heuristic(\
964                                         rx_queue->zero_rx_packet_count);
965                                 lcore_rx_idle_count++;
966                         } else {
967                                 rx_queue->zero_rx_packet_count = 0;
968
969                                 /**
970                                  * do not scale up frequency immediately as
971                                  * user to kernel space communication is costly
972                                  * which might impact packet I/O for received
973                                  * packets.
974                                  */
975                                 rx_queue->freq_up_hint =
976                                         power_freq_scaleup_heuristic(lcore_id,
977                                                         portid, queueid);
978                         }
979
980                         /* Prefetch first packets */
981                         for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
982                                 rte_prefetch0(rte_pktmbuf_mtod(
983                                                 pkts_burst[j], void *));
984                         }
985
986                         /* Prefetch and forward already prefetched packets */
987                         for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
988                                 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
989                                                 j + PREFETCH_OFFSET], void *));
990                                 l3fwd_simple_forward(pkts_burst[j], portid,
991                                                                 qconf);
992                         }
993
994                         /* Forward remaining prefetched packets */
995                         for (; j < nb_rx; j++) {
996                                 l3fwd_simple_forward(pkts_burst[j], portid,
997                                                                 qconf);
998                         }
999                 }
1000
1001                 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) {
1002                         for (i = 1, lcore_scaleup_hint =
1003                                 qconf->rx_queue_list[0].freq_up_hint;
1004                                         i < qconf->n_rx_queue; ++i) {
1005                                 rx_queue = &(qconf->rx_queue_list[i]);
1006                                 if (rx_queue->freq_up_hint >
1007                                                 lcore_scaleup_hint)
1008                                         lcore_scaleup_hint =
1009                                                 rx_queue->freq_up_hint;
1010                         }
1011
1012                         if (lcore_scaleup_hint == FREQ_HIGHEST) {
1013                                 if (rte_power_freq_max)
1014                                         rte_power_freq_max(lcore_id);
1015                         } else if (lcore_scaleup_hint == FREQ_HIGHER) {
1016                                 if (rte_power_freq_up)
1017                                         rte_power_freq_up(lcore_id);
1018                         }
1019                 } else {
1020                         /**
1021                          * All Rx queues empty in recent consecutive polls,
1022                          * sleep in a conservative manner, meaning sleep as
1023                          * less as possible.
1024                          */
1025                         for (i = 1, lcore_idle_hint =
1026                                 qconf->rx_queue_list[0].idle_hint;
1027                                         i < qconf->n_rx_queue; ++i) {
1028                                 rx_queue = &(qconf->rx_queue_list[i]);
1029                                 if (rx_queue->idle_hint < lcore_idle_hint)
1030                                         lcore_idle_hint = rx_queue->idle_hint;
1031                         }
1032
1033                         if (lcore_idle_hint < SUSPEND_THRESHOLD)
1034                                 /**
1035                                  * execute "pause" instruction to avoid context
1036                                  * switch which generally take hundred of
1037                                  * microseconds for short sleep.
1038                                  */
1039                                 rte_delay_us(lcore_idle_hint);
1040                         else {
1041                                 /* suspend until rx interrupt trigges */
1042                                 if (intr_en) {
1043                                         turn_on_intr(qconf);
1044                                         sleep_until_rx_interrupt(
1045                                                 qconf->n_rx_queue);
1046                                 }
1047                                 /* start receiving packets immediately */
1048                                 goto start_rx;
1049                         }
1050                         stats[lcore_id].sleep_time += lcore_idle_hint;
1051                 }
1052         }
1053 }
1054
1055 static int
1056 check_lcore_params(void)
1057 {
1058         uint8_t queue, lcore;
1059         uint16_t i;
1060         int socketid;
1061
1062         for (i = 0; i < nb_lcore_params; ++i) {
1063                 queue = lcore_params[i].queue_id;
1064                 if (queue >= MAX_RX_QUEUE_PER_PORT) {
1065                         printf("invalid queue number: %hhu\n", queue);
1066                         return -1;
1067                 }
1068                 lcore = lcore_params[i].lcore_id;
1069                 if (!rte_lcore_is_enabled(lcore)) {
1070                         printf("error: lcore %hhu is not enabled in lcore "
1071                                                         "mask\n", lcore);
1072                         return -1;
1073                 }
1074                 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
1075                                                         (numa_on == 0)) {
1076                         printf("warning: lcore %hhu is on socket %d with numa "
1077                                                 "off\n", lcore, socketid);
1078                 }
1079         }
1080         return 0;
1081 }
1082
1083 static int
1084 check_port_config(const unsigned nb_ports)
1085 {
1086         unsigned portid;
1087         uint16_t i;
1088
1089         for (i = 0; i < nb_lcore_params; ++i) {
1090                 portid = lcore_params[i].port_id;
1091                 if ((enabled_port_mask & (1 << portid)) == 0) {
1092                         printf("port %u is not enabled in port mask\n",
1093                                                                 portid);
1094                         return -1;
1095                 }
1096                 if (portid >= nb_ports) {
1097                         printf("port %u is not present on the board\n",
1098                                                                 portid);
1099                         return -1;
1100                 }
1101         }
1102         return 0;
1103 }
1104
1105 static uint8_t
1106 get_port_n_rx_queues(const uint8_t port)
1107 {
1108         int queue = -1;
1109         uint16_t i;
1110
1111         for (i = 0; i < nb_lcore_params; ++i) {
1112                 if (lcore_params[i].port_id == port &&
1113                                 lcore_params[i].queue_id > queue)
1114                         queue = lcore_params[i].queue_id;
1115         }
1116         return (uint8_t)(++queue);
1117 }
1118
1119 static int
1120 init_lcore_rx_queues(void)
1121 {
1122         uint16_t i, nb_rx_queue;
1123         uint8_t lcore;
1124
1125         for (i = 0; i < nb_lcore_params; ++i) {
1126                 lcore = lcore_params[i].lcore_id;
1127                 nb_rx_queue = lcore_conf[lcore].n_rx_queue;
1128                 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
1129                         printf("error: too many queues (%u) for lcore: %u\n",
1130                                 (unsigned)nb_rx_queue + 1, (unsigned)lcore);
1131                         return -1;
1132                 } else {
1133                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
1134                                 lcore_params[i].port_id;
1135                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
1136                                 lcore_params[i].queue_id;
1137                         lcore_conf[lcore].n_rx_queue++;
1138                 }
1139         }
1140         return 0;
1141 }
1142
1143 /* display usage */
1144 static void
1145 print_usage(const char *prgname)
1146 {
1147         printf ("%s [EAL options] -- -p PORTMASK -P"
1148                 "  [--config (port,queue,lcore)[,(port,queue,lcore]]"
1149                 "  [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
1150                 "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
1151                 "  -P : enable promiscuous mode\n"
1152                 "  --config (port,queue,lcore): rx queues configuration\n"
1153                 "  --no-numa: optional, disable numa awareness\n"
1154                 "  --enable-jumbo: enable jumbo frame"
1155                 " which max packet len is PKTLEN in decimal (64-9600)\n"
1156                 "  --parse-ptype: parse packet type by software\n",
1157                 prgname);
1158 }
1159
1160 static int parse_max_pkt_len(const char *pktlen)
1161 {
1162         char *end = NULL;
1163         unsigned long len;
1164
1165         /* parse decimal string */
1166         len = strtoul(pktlen, &end, 10);
1167         if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
1168                 return -1;
1169
1170         if (len == 0)
1171                 return -1;
1172
1173         return len;
1174 }
1175
1176 static int
1177 parse_portmask(const char *portmask)
1178 {
1179         char *end = NULL;
1180         unsigned long pm;
1181
1182         /* parse hexadecimal string */
1183         pm = strtoul(portmask, &end, 16);
1184         if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
1185                 return -1;
1186
1187         if (pm == 0)
1188                 return -1;
1189
1190         return pm;
1191 }
1192
1193 static int
1194 parse_config(const char *q_arg)
1195 {
1196         char s[256];
1197         const char *p, *p0 = q_arg;
1198         char *end;
1199         enum fieldnames {
1200                 FLD_PORT = 0,
1201                 FLD_QUEUE,
1202                 FLD_LCORE,
1203                 _NUM_FLD
1204         };
1205         unsigned long int_fld[_NUM_FLD];
1206         char *str_fld[_NUM_FLD];
1207         int i;
1208         unsigned size;
1209
1210         nb_lcore_params = 0;
1211
1212         while ((p = strchr(p0,'(')) != NULL) {
1213                 ++p;
1214                 if((p0 = strchr(p,')')) == NULL)
1215                         return -1;
1216
1217                 size = p0 - p;
1218                 if(size >= sizeof(s))
1219                         return -1;
1220
1221                 snprintf(s, sizeof(s), "%.*s", size, p);
1222                 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
1223                                                                 _NUM_FLD)
1224                         return -1;
1225                 for (i = 0; i < _NUM_FLD; i++){
1226                         errno = 0;
1227                         int_fld[i] = strtoul(str_fld[i], &end, 0);
1228                         if (errno != 0 || end == str_fld[i] || int_fld[i] >
1229                                                                         255)
1230                                 return -1;
1231                 }
1232                 if (nb_lcore_params >= MAX_LCORE_PARAMS) {
1233                         printf("exceeded max number of lcore params: %hu\n",
1234                                 nb_lcore_params);
1235                         return -1;
1236                 }
1237                 lcore_params_array[nb_lcore_params].port_id =
1238                                 (uint8_t)int_fld[FLD_PORT];
1239                 lcore_params_array[nb_lcore_params].queue_id =
1240                                 (uint8_t)int_fld[FLD_QUEUE];
1241                 lcore_params_array[nb_lcore_params].lcore_id =
1242                                 (uint8_t)int_fld[FLD_LCORE];
1243                 ++nb_lcore_params;
1244         }
1245         lcore_params = lcore_params_array;
1246
1247         return 0;
1248 }
1249
1250 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
1251
1252 /* Parse the argument given in the command line of the application */
1253 static int
1254 parse_args(int argc, char **argv)
1255 {
1256         int opt, ret;
1257         char **argvopt;
1258         int option_index;
1259         char *prgname = argv[0];
1260         static struct option lgopts[] = {
1261                 {"config", 1, 0, 0},
1262                 {"no-numa", 0, 0, 0},
1263                 {"enable-jumbo", 0, 0, 0},
1264                 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
1265                 {NULL, 0, 0, 0}
1266         };
1267
1268         argvopt = argv;
1269
1270         while ((opt = getopt_long(argc, argvopt, "p:P",
1271                                 lgopts, &option_index)) != EOF) {
1272
1273                 switch (opt) {
1274                 /* portmask */
1275                 case 'p':
1276                         enabled_port_mask = parse_portmask(optarg);
1277                         if (enabled_port_mask == 0) {
1278                                 printf("invalid portmask\n");
1279                                 print_usage(prgname);
1280                                 return -1;
1281                         }
1282                         break;
1283                 case 'P':
1284                         printf("Promiscuous mode selected\n");
1285                         promiscuous_on = 1;
1286                         break;
1287
1288                 /* long options */
1289                 case 0:
1290                         if (!strncmp(lgopts[option_index].name, "config", 6)) {
1291                                 ret = parse_config(optarg);
1292                                 if (ret) {
1293                                         printf("invalid config\n");
1294                                         print_usage(prgname);
1295                                         return -1;
1296                                 }
1297                         }
1298
1299                         if (!strncmp(lgopts[option_index].name,
1300                                                 "no-numa", 7)) {
1301                                 printf("numa is disabled \n");
1302                                 numa_on = 0;
1303                         }
1304
1305                         if (!strncmp(lgopts[option_index].name,
1306                                         "enable-jumbo", 12)) {
1307                                 struct option lenopts =
1308                                         {"max-pkt-len", required_argument, \
1309                                                                         0, 0};
1310
1311                                 printf("jumbo frame is enabled \n");
1312                                 port_conf.rxmode.jumbo_frame = 1;
1313
1314                                 /**
1315                                  * if no max-pkt-len set, use the default value
1316                                  * ETHER_MAX_LEN
1317                                  */
1318                                 if (0 == getopt_long(argc, argvopt, "",
1319                                                 &lenopts, &option_index)) {
1320                                         ret = parse_max_pkt_len(optarg);
1321                                         if ((ret < 64) ||
1322                                                 (ret > MAX_JUMBO_PKT_LEN)){
1323                                                 printf("invalid packet "
1324                                                                 "length\n");
1325                                                 print_usage(prgname);
1326                                                 return -1;
1327                                         }
1328                                         port_conf.rxmode.max_rx_pkt_len = ret;
1329                                 }
1330                                 printf("set jumbo frame "
1331                                         "max packet length to %u\n",
1332                                 (unsigned int)port_conf.rxmode.max_rx_pkt_len);
1333                         }
1334
1335                         if (!strncmp(lgopts[option_index].name,
1336                                      CMD_LINE_OPT_PARSE_PTYPE,
1337                                      sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
1338                                 printf("soft parse-ptype is enabled\n");
1339                                 parse_ptype = 1;
1340                         }
1341
1342                         break;
1343
1344                 default:
1345                         print_usage(prgname);
1346                         return -1;
1347                 }
1348         }
1349
1350         if (optind >= 0)
1351                 argv[optind-1] = prgname;
1352
1353         ret = optind-1;
1354         optind = 0; /* reset getopt lib */
1355         return ret;
1356 }
1357
1358 static void
1359 print_ethaddr(const char *name, const struct ether_addr *eth_addr)
1360 {
1361         char buf[ETHER_ADDR_FMT_SIZE];
1362         ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
1363         printf("%s%s", name, buf);
1364 }
1365
1366 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1367 static void
1368 setup_hash(int socketid)
1369 {
1370         struct rte_hash_parameters ipv4_l3fwd_hash_params = {
1371                 .name = NULL,
1372                 .entries = L3FWD_HASH_ENTRIES,
1373                 .key_len = sizeof(struct ipv4_5tuple),
1374                 .hash_func = DEFAULT_HASH_FUNC,
1375                 .hash_func_init_val = 0,
1376         };
1377
1378         struct rte_hash_parameters ipv6_l3fwd_hash_params = {
1379                 .name = NULL,
1380                 .entries = L3FWD_HASH_ENTRIES,
1381                 .key_len = sizeof(struct ipv6_5tuple),
1382                 .hash_func = DEFAULT_HASH_FUNC,
1383                 .hash_func_init_val = 0,
1384         };
1385
1386         unsigned i;
1387         int ret;
1388         char s[64];
1389
1390         /* create ipv4 hash */
1391         snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
1392         ipv4_l3fwd_hash_params.name = s;
1393         ipv4_l3fwd_hash_params.socket_id = socketid;
1394         ipv4_l3fwd_lookup_struct[socketid] =
1395                 rte_hash_create(&ipv4_l3fwd_hash_params);
1396         if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
1397                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
1398                                 "socket %d\n", socketid);
1399
1400         /* create ipv6 hash */
1401         snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
1402         ipv6_l3fwd_hash_params.name = s;
1403         ipv6_l3fwd_hash_params.socket_id = socketid;
1404         ipv6_l3fwd_lookup_struct[socketid] =
1405                 rte_hash_create(&ipv6_l3fwd_hash_params);
1406         if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
1407                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
1408                                 "socket %d\n", socketid);
1409
1410
1411         /* populate the ipv4 hash */
1412         for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
1413                 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid],
1414                                 (void *) &ipv4_l3fwd_route_array[i].key);
1415                 if (ret < 0) {
1416                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
1417                                 "l3fwd hash on socket %d\n", i, socketid);
1418                 }
1419                 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out;
1420                 printf("Hash: Adding key\n");
1421                 print_ipv4_key(ipv4_l3fwd_route_array[i].key);
1422         }
1423
1424         /* populate the ipv6 hash */
1425         for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) {
1426                 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid],
1427                                 (void *) &ipv6_l3fwd_route_array[i].key);
1428                 if (ret < 0) {
1429                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
1430                                 "l3fwd hash on socket %d\n", i, socketid);
1431                 }
1432                 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out;
1433                 printf("Hash: Adding key\n");
1434                 print_ipv6_key(ipv6_l3fwd_route_array[i].key);
1435         }
1436 }
1437 #endif
1438
1439 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1440 static void
1441 setup_lpm(int socketid)
1442 {
1443         unsigned i;
1444         int ret;
1445         char s[64];
1446
1447         /* create the LPM table */
1448         struct rte_lpm_config lpm_ipv4_config;
1449
1450         lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
1451         lpm_ipv4_config.number_tbl8s = 256;
1452         lpm_ipv4_config.flags = 0;
1453
1454         snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
1455         ipv4_l3fwd_lookup_struct[socketid] =
1456                         rte_lpm_create(s, socketid, &lpm_ipv4_config);
1457         if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
1458                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
1459                                 " on socket %d\n", socketid);
1460
1461         /* populate the LPM table */
1462         for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
1463                 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
1464                         ipv4_l3fwd_route_array[i].ip,
1465                         ipv4_l3fwd_route_array[i].depth,
1466                         ipv4_l3fwd_route_array[i].if_out);
1467
1468                 if (ret < 0) {
1469                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
1470                                 "l3fwd LPM table on socket %d\n",
1471                                 i, socketid);
1472                 }
1473
1474                 printf("LPM: Adding route 0x%08x / %d (%d)\n",
1475                         (unsigned)ipv4_l3fwd_route_array[i].ip,
1476                         ipv4_l3fwd_route_array[i].depth,
1477                         ipv4_l3fwd_route_array[i].if_out);
1478         }
1479 }
1480 #endif
1481
1482 static int
1483 init_mem(unsigned nb_mbuf)
1484 {
1485         struct lcore_conf *qconf;
1486         int socketid;
1487         unsigned lcore_id;
1488         char s[64];
1489
1490         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1491                 if (rte_lcore_is_enabled(lcore_id) == 0)
1492                         continue;
1493
1494                 if (numa_on)
1495                         socketid = rte_lcore_to_socket_id(lcore_id);
1496                 else
1497                         socketid = 0;
1498
1499                 if (socketid >= NB_SOCKETS) {
1500                         rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is "
1501                                         "out of range %d\n", socketid,
1502                                                 lcore_id, NB_SOCKETS);
1503                 }
1504                 if (pktmbuf_pool[socketid] == NULL) {
1505                         snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
1506                         pktmbuf_pool[socketid] =
1507                                 rte_pktmbuf_pool_create(s, nb_mbuf,
1508                                         MEMPOOL_CACHE_SIZE, 0,
1509                                         RTE_MBUF_DEFAULT_BUF_SIZE,
1510                                         socketid);
1511                         if (pktmbuf_pool[socketid] == NULL)
1512                                 rte_exit(EXIT_FAILURE,
1513                                         "Cannot init mbuf pool on socket %d\n",
1514                                                                 socketid);
1515                         else
1516                                 printf("Allocated mbuf pool on socket %d\n",
1517                                                                 socketid);
1518
1519 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1520                         setup_lpm(socketid);
1521 #else
1522                         setup_hash(socketid);
1523 #endif
1524                 }
1525                 qconf = &lcore_conf[lcore_id];
1526                 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
1527 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1528                 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
1529 #endif
1530         }
1531         return 0;
1532 }
1533
1534 /* Check the link status of all ports in up to 9s, and print them finally */
1535 static void
1536 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
1537 {
1538 #define CHECK_INTERVAL 100 /* 100ms */
1539 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
1540         uint8_t portid, count, all_ports_up, print_flag = 0;
1541         struct rte_eth_link link;
1542
1543         printf("\nChecking link status");
1544         fflush(stdout);
1545         for (count = 0; count <= MAX_CHECK_TIME; count++) {
1546                 all_ports_up = 1;
1547                 for (portid = 0; portid < port_num; portid++) {
1548                         if ((port_mask & (1 << portid)) == 0)
1549                                 continue;
1550                         memset(&link, 0, sizeof(link));
1551                         rte_eth_link_get_nowait(portid, &link);
1552                         /* print link status if flag set */
1553                         if (print_flag == 1) {
1554                                 if (link.link_status)
1555                                         printf("Port %d Link Up - speed %u "
1556                                                 "Mbps - %s\n", (uint8_t)portid,
1557                                                 (unsigned)link.link_speed,
1558                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
1559                                         ("full-duplex") : ("half-duplex\n"));
1560                                 else
1561                                         printf("Port %d Link Down\n",
1562                                                 (uint8_t)portid);
1563                                 continue;
1564                         }
1565                         /* clear all_ports_up flag if any link down */
1566                         if (link.link_status == ETH_LINK_DOWN) {
1567                                 all_ports_up = 0;
1568                                 break;
1569                         }
1570                 }
1571                 /* after finally printing all link status, get out */
1572                 if (print_flag == 1)
1573                         break;
1574
1575                 if (all_ports_up == 0) {
1576                         printf(".");
1577                         fflush(stdout);
1578                         rte_delay_ms(CHECK_INTERVAL);
1579                 }
1580
1581                 /* set the print_flag if all ports up or timeout */
1582                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
1583                         print_flag = 1;
1584                         printf("done\n");
1585                 }
1586         }
1587 }
1588
1589 static int check_ptype(uint8_t portid)
1590 {
1591         int i, ret;
1592         int ptype_l3_ipv4 = 0;
1593 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1594         int ptype_l3_ipv6 = 0;
1595 #endif
1596         uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
1597
1598         ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
1599         if (ret <= 0)
1600                 return 0;
1601
1602         uint32_t ptypes[ret];
1603
1604         ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
1605         for (i = 0; i < ret; ++i) {
1606                 if (ptypes[i] & RTE_PTYPE_L3_IPV4)
1607                         ptype_l3_ipv4 = 1;
1608 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1609                 if (ptypes[i] & RTE_PTYPE_L3_IPV6)
1610                         ptype_l3_ipv6 = 1;
1611 #endif
1612         }
1613
1614         if (ptype_l3_ipv4 == 0)
1615                 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
1616
1617 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1618         if (ptype_l3_ipv6 == 0)
1619                 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
1620 #endif
1621
1622 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1623         if (ptype_l3_ipv4)
1624 #else /* APP_LOOKUP_EXACT_MATCH */
1625         if (ptype_l3_ipv4 && ptype_l3_ipv6)
1626 #endif
1627                 return 1;
1628
1629         return 0;
1630
1631 }
1632
1633 int
1634 main(int argc, char **argv)
1635 {
1636         struct lcore_conf *qconf;
1637         struct rte_eth_dev_info dev_info;
1638         struct rte_eth_txconf *txconf;
1639         int ret;
1640         unsigned nb_ports;
1641         uint16_t queueid;
1642         unsigned lcore_id;
1643         uint64_t hz;
1644         uint32_t n_tx_queue, nb_lcores;
1645         uint32_t dev_rxq_num, dev_txq_num;
1646         uint8_t portid, nb_rx_queue, queue, socketid;
1647
1648         /* catch SIGINT and restore cpufreq governor to ondemand */
1649         signal(SIGINT, signal_exit_now);
1650
1651         /* init EAL */
1652         ret = rte_eal_init(argc, argv);
1653         if (ret < 0)
1654                 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
1655         argc -= ret;
1656         argv += ret;
1657
1658         /* init RTE timer library to be used late */
1659         rte_timer_subsystem_init();
1660
1661         /* parse application arguments (after the EAL ones) */
1662         ret = parse_args(argc, argv);
1663         if (ret < 0)
1664                 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
1665
1666         if (check_lcore_params() < 0)
1667                 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
1668
1669         ret = init_lcore_rx_queues();
1670         if (ret < 0)
1671                 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
1672
1673         nb_ports = rte_eth_dev_count();
1674
1675         if (check_port_config(nb_ports) < 0)
1676                 rte_exit(EXIT_FAILURE, "check_port_config failed\n");
1677
1678         nb_lcores = rte_lcore_count();
1679
1680         /* initialize all ports */
1681         for (portid = 0; portid < nb_ports; portid++) {
1682                 /* skip ports that are not enabled */
1683                 if ((enabled_port_mask & (1 << portid)) == 0) {
1684                         printf("\nSkipping disabled port %d\n", portid);
1685                         continue;
1686                 }
1687
1688                 /* init port */
1689                 printf("Initializing port %d ... ", portid );
1690                 fflush(stdout);
1691
1692                 rte_eth_dev_info_get(portid, &dev_info);
1693                 dev_rxq_num = dev_info.max_rx_queues;
1694                 dev_txq_num = dev_info.max_tx_queues;
1695
1696                 nb_rx_queue = get_port_n_rx_queues(portid);
1697                 if (nb_rx_queue > dev_rxq_num)
1698                         rte_exit(EXIT_FAILURE,
1699                                 "Cannot configure not existed rxq: "
1700                                 "port=%d\n", portid);
1701
1702                 n_tx_queue = nb_lcores;
1703                 if (n_tx_queue > dev_txq_num)
1704                         n_tx_queue = dev_txq_num;
1705                 printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
1706                         nb_rx_queue, (unsigned)n_tx_queue );
1707                 ret = rte_eth_dev_configure(portid, nb_rx_queue,
1708                                         (uint16_t)n_tx_queue, &port_conf);
1709                 if (ret < 0)
1710                         rte_exit(EXIT_FAILURE, "Cannot configure device: "
1711                                         "err=%d, port=%d\n", ret, portid);
1712
1713                 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
1714                 print_ethaddr(" Address:", &ports_eth_addr[portid]);
1715                 printf(", ");
1716
1717                 /* init memory */
1718                 ret = init_mem(NB_MBUF);
1719                 if (ret < 0)
1720                         rte_exit(EXIT_FAILURE, "init_mem failed\n");
1721
1722                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1723                         if (rte_lcore_is_enabled(lcore_id) == 0)
1724                                 continue;
1725
1726                         /* Initialize TX buffers */
1727                         qconf = &lcore_conf[lcore_id];
1728                         qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
1729                                 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
1730                                 rte_eth_dev_socket_id(portid));
1731                         if (qconf->tx_buffer[portid] == NULL)
1732                                 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n",
1733                                                 (unsigned) portid);
1734
1735                         rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST);
1736                 }
1737
1738                 /* init one TX queue per couple (lcore,port) */
1739                 queueid = 0;
1740                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1741                         if (rte_lcore_is_enabled(lcore_id) == 0)
1742                                 continue;
1743
1744                         if (queueid >= dev_txq_num)
1745                                 continue;
1746
1747                         if (numa_on)
1748                                 socketid = \
1749                                 (uint8_t)rte_lcore_to_socket_id(lcore_id);
1750                         else
1751                                 socketid = 0;
1752
1753                         printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
1754                         fflush(stdout);
1755
1756                         rte_eth_dev_info_get(portid, &dev_info);
1757                         txconf = &dev_info.default_txconf;
1758                         if (port_conf.rxmode.jumbo_frame)
1759                                 txconf->txq_flags = 0;
1760                         ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
1761                                                      socketid, txconf);
1762                         if (ret < 0)
1763                                 rte_exit(EXIT_FAILURE,
1764                                         "rte_eth_tx_queue_setup: err=%d, "
1765                                                 "port=%d\n", ret, portid);
1766
1767                         qconf = &lcore_conf[lcore_id];
1768                         qconf->tx_queue_id[portid] = queueid;
1769                         queueid++;
1770
1771                         qconf->tx_port_id[qconf->n_tx_port] = portid;
1772                         qconf->n_tx_port++;
1773                 }
1774                 printf("\n");
1775         }
1776
1777         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1778                 if (rte_lcore_is_enabled(lcore_id) == 0)
1779                         continue;
1780
1781                 /* init power management library */
1782                 ret = rte_power_init(lcore_id);
1783                 if (ret)
1784                         RTE_LOG(ERR, POWER,
1785                                 "Library initialization failed on core %u\n", lcore_id);
1786
1787                 /* init timer structures for each enabled lcore */
1788                 rte_timer_init(&power_timers[lcore_id]);
1789                 hz = rte_get_timer_hz();
1790                 rte_timer_reset(&power_timers[lcore_id],
1791                         hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id,
1792                                                 power_timer_cb, NULL);
1793
1794                 qconf = &lcore_conf[lcore_id];
1795                 printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
1796                 fflush(stdout);
1797                 /* init RX queues */
1798                 for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
1799                         portid = qconf->rx_queue_list[queue].port_id;
1800                         queueid = qconf->rx_queue_list[queue].queue_id;
1801
1802                         if (numa_on)
1803                                 socketid = \
1804                                 (uint8_t)rte_lcore_to_socket_id(lcore_id);
1805                         else
1806                                 socketid = 0;
1807
1808                         printf("rxq=%d,%d,%d ", portid, queueid, socketid);
1809                         fflush(stdout);
1810
1811                         ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
1812                                 socketid, NULL,
1813                                 pktmbuf_pool[socketid]);
1814                         if (ret < 0)
1815                                 rte_exit(EXIT_FAILURE,
1816                                         "rte_eth_rx_queue_setup: err=%d, "
1817                                                 "port=%d\n", ret, portid);
1818
1819                         if (parse_ptype) {
1820                                 if (add_cb_parse_ptype(portid, queueid) < 0)
1821                                         rte_exit(EXIT_FAILURE,
1822                                                  "Fail to add ptype cb\n");
1823                         } else if (!check_ptype(portid))
1824                                 rte_exit(EXIT_FAILURE,
1825                                          "PMD can not provide needed ptypes\n");
1826                 }
1827         }
1828
1829         printf("\n");
1830
1831         /* start ports */
1832         for (portid = 0; portid < nb_ports; portid++) {
1833                 if ((enabled_port_mask & (1 << portid)) == 0) {
1834                         continue;
1835                 }
1836                 /* Start device */
1837                 ret = rte_eth_dev_start(portid);
1838                 if (ret < 0)
1839                         rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
1840                                                 "port=%d\n", ret, portid);
1841                 /*
1842                  * If enabled, put device in promiscuous mode.
1843                  * This allows IO forwarding mode to forward packets
1844                  * to itself through 2 cross-connected  ports of the
1845                  * target machine.
1846                  */
1847                 if (promiscuous_on)
1848                         rte_eth_promiscuous_enable(portid);
1849                 /* initialize spinlock for each port */
1850                 rte_spinlock_init(&(locks[portid]));
1851         }
1852
1853         check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
1854
1855         /* launch per-lcore init on every lcore */
1856         rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
1857         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1858                 if (rte_eal_wait_lcore(lcore_id) < 0)
1859                         return -1;
1860         }
1861
1862         return 0;
1863 }