remove trailing whitespaces
[dpdk.git] / examples / l3fwd / main.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdint.h>
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <sys/queue.h>
41 #include <stdarg.h>
42 #include <errno.h>
43 #include <getopt.h>
44
45 #include <tmmintrin.h>
46 #include <rte_common.h>
47 #include <rte_byteorder.h>
48 #include <rte_log.h>
49 #include <rte_memory.h>
50 #include <rte_memcpy.h>
51 #include <rte_memzone.h>
52 #include <rte_tailq.h>
53 #include <rte_eal.h>
54 #include <rte_per_lcore.h>
55 #include <rte_launch.h>
56 #include <rte_atomic.h>
57 #include <rte_cycles.h>
58 #include <rte_prefetch.h>
59 #include <rte_lcore.h>
60 #include <rte_per_lcore.h>
61 #include <rte_branch_prediction.h>
62 #include <rte_interrupts.h>
63 #include <rte_pci.h>
64 #include <rte_random.h>
65 #include <rte_debug.h>
66 #include <rte_ether.h>
67 #include <rte_ethdev.h>
68 #include <rte_ring.h>
69 #include <rte_mempool.h>
70 #include <rte_mbuf.h>
71 #include <rte_ip.h>
72 #include <rte_tcp.h>
73 #include <rte_udp.h>
74 #include <rte_string_fns.h>
75
76 #include "main.h"
77
78 #define APP_LOOKUP_EXACT_MATCH          0
79 #define APP_LOOKUP_LPM                  1
80 #define DO_RFC_1812_CHECKS
81
82 #ifndef APP_LOOKUP_METHOD
83 #define APP_LOOKUP_METHOD             APP_LOOKUP_LPM
84 #endif
85
86 #define ENABLE_MULTI_BUFFER_OPTIMIZE    1
87
88 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
89 #include <rte_hash.h>
90 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
91 #include <rte_lpm.h>
92 #include <rte_lpm6.h>
93 #else
94 #error "APP_LOOKUP_METHOD set to incorrect value"
95 #endif
96
97 #ifndef IPv6_BYTES
98 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
99                        "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
100 #define IPv6_BYTES(addr) \
101         addr[0],  addr[1], addr[2],  addr[3], \
102         addr[4],  addr[5], addr[6],  addr[7], \
103         addr[8],  addr[9], addr[10], addr[11],\
104         addr[12], addr[13],addr[14], addr[15]
105 #endif
106
107
108 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
109
110 #define MAX_JUMBO_PKT_LEN  9600
111
112 #define IPV6_ADDR_LEN 16
113
114 #define MEMPOOL_CACHE_SIZE 256
115
116 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
117
118 /*
119  * This expression is used to calculate the number of mbufs needed depending on user input, taking
120  *  into account memory for rx and tx hardware rings, cache per lcore and mtable per port per lcore.
121  *  RTE_MAX is used to ensure that NB_MBUF never goes below a minimum value of 8192
122  */
123
124 #define NB_MBUF RTE_MAX (                                                                                                                                       \
125                                 (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT +                                                        \
126                                 nb_ports*nb_lcores*MAX_PKT_BURST +                                                                                      \
127                                 nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT +                                                          \
128                                 nb_lcores*MEMPOOL_CACHE_SIZE),                                                                                          \
129                                 (unsigned)8192)
130
131 /*
132  * RX and TX Prefetch, Host, and Write-back threshold values should be
133  * carefully set for optimal performance. Consult the network
134  * controller's datasheet and supporting DPDK documentation for guidance
135  * on how these parameters should be set.
136  */
137 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
138 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
139 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
140
141 /*
142  * These default values are optimized for use with the Intel(R) 82599 10 GbE
143  * Controller and the DPDK ixgbe PMD. Consider using other values for other
144  * network controllers and/or network drivers.
145  */
146 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
147 #define TX_HTHRESH 0  /**< Default values of TX host threshold reg. */
148 #define TX_WTHRESH 0  /**< Default values of TX write-back threshold reg. */
149
150 #define MAX_PKT_BURST     32
151 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
152
153 #define NB_SOCKETS 8
154
155 /* Configure how many packets ahead to prefetch, when reading packets */
156 #define PREFETCH_OFFSET 3
157
158 /*
159  * Configurable number of RX/TX ring descriptors
160  */
161 #define RTE_TEST_RX_DESC_DEFAULT 128
162 #define RTE_TEST_TX_DESC_DEFAULT 512
163 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
164 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
165
166 /* ethernet addresses of ports */
167 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
168
169 /* mask of enabled ports */
170 static uint32_t enabled_port_mask = 0;
171 static int promiscuous_on = 0; /**< Ports set in promiscuous mode off by default. */
172 static int numa_on = 1; /**< NUMA is enabled by default. */
173
174 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
175 static int ipv6 = 0; /**< ipv6 is false by default. */
176 #endif
177
178 struct mbuf_table {
179         uint16_t len;
180         struct rte_mbuf *m_table[MAX_PKT_BURST];
181 };
182
183 struct lcore_rx_queue {
184         uint8_t port_id;
185         uint8_t queue_id;
186 } __rte_cache_aligned;
187
188 #define MAX_RX_QUEUE_PER_LCORE 16
189 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
190 #define MAX_RX_QUEUE_PER_PORT 128
191
192 #define MAX_LCORE_PARAMS 1024
193 struct lcore_params {
194         uint8_t port_id;
195         uint8_t queue_id;
196         uint8_t lcore_id;
197 } __rte_cache_aligned;
198
199 static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
200 static struct lcore_params lcore_params_array_default[] = {
201         {0, 0, 2},
202         {0, 1, 2},
203         {0, 2, 2},
204         {1, 0, 2},
205         {1, 1, 2},
206         {1, 2, 2},
207         {2, 0, 2},
208         {3, 0, 3},
209         {3, 1, 3},
210 };
211
212 static struct lcore_params * lcore_params = lcore_params_array_default;
213 static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
214                                 sizeof(lcore_params_array_default[0]);
215
216 static struct rte_eth_conf port_conf = {
217         .rxmode = {
218                 .mq_mode = ETH_MQ_RX_RSS,
219                 .max_rx_pkt_len = ETHER_MAX_LEN,
220                 .split_hdr_size = 0,
221                 .header_split   = 0, /**< Header Split disabled */
222                 .hw_ip_checksum = 1, /**< IP checksum offload enabled */
223                 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
224                 .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
225                 .hw_strip_crc   = 0, /**< CRC stripped by hardware */
226         },
227         .rx_adv_conf = {
228                 .rss_conf = {
229                         .rss_key = NULL,
230                         .rss_hf = ETH_RSS_IPV4 | ETH_RSS_IPV6,
231                 },
232         },
233         .txmode = {
234                 .mq_mode = ETH_MQ_TX_NONE,
235         },
236 };
237
238 static const struct rte_eth_rxconf rx_conf = {
239         .rx_thresh = {
240                 .pthresh = RX_PTHRESH,
241                 .hthresh = RX_HTHRESH,
242                 .wthresh = RX_WTHRESH,
243         },
244         .rx_free_thresh = 32,
245 };
246
247 static struct rte_eth_txconf tx_conf = {
248         .tx_thresh = {
249                 .pthresh = TX_PTHRESH,
250                 .hthresh = TX_HTHRESH,
251                 .wthresh = TX_WTHRESH,
252         },
253         .tx_free_thresh = 0, /* Use PMD default values */
254         .tx_rs_thresh = 0, /* Use PMD default values */
255         .txq_flags = (ETH_TXQ_FLAGS_NOMULTSEGS |
256                         ETH_TXQ_FLAGS_NOVLANOFFL |
257                         ETH_TXQ_FLAGS_NOXSUMSCTP |
258                         ETH_TXQ_FLAGS_NOXSUMUDP |
259                         ETH_TXQ_FLAGS_NOXSUMTCP)
260
261 };
262
263 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
264
265 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
266
267 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
268 #include <rte_hash_crc.h>
269 #define DEFAULT_HASH_FUNC       rte_hash_crc
270 #else
271 #include <rte_jhash.h>
272 #define DEFAULT_HASH_FUNC       rte_jhash
273 #endif
274
275 struct ipv4_5tuple {
276         uint32_t ip_dst;
277         uint32_t ip_src;
278         uint16_t port_dst;
279         uint16_t port_src;
280         uint8_t  proto;
281 } __attribute__((__packed__));
282
283 union ipv4_5tuple_host {
284         struct {
285                 uint8_t  pad0;
286                 uint8_t  proto;
287                 uint16_t pad1;
288                 uint32_t ip_src;
289                 uint32_t ip_dst;
290                 uint16_t port_src;
291                 uint16_t port_dst;
292         };
293         __m128i xmm;
294 };
295
296 #define XMM_NUM_IN_IPV6_5TUPLE 3
297
298 struct ipv6_5tuple {
299         uint8_t  ip_dst[IPV6_ADDR_LEN];
300         uint8_t  ip_src[IPV6_ADDR_LEN];
301         uint16_t port_dst;
302         uint16_t port_src;
303         uint8_t  proto;
304 } __attribute__((__packed__));
305
306 union ipv6_5tuple_host {
307         struct {
308                 uint16_t pad0;
309                 uint8_t  proto;
310                 uint8_t  pad1;
311                 uint8_t  ip_src[IPV6_ADDR_LEN];
312                 uint8_t  ip_dst[IPV6_ADDR_LEN];
313                 uint16_t port_src;
314                 uint16_t port_dst;
315                 uint64_t reserve;
316         };
317         __m128i xmm[XMM_NUM_IN_IPV6_5TUPLE];
318 };
319
320 struct ipv4_l3fwd_route {
321         struct ipv4_5tuple key;
322         uint8_t if_out;
323 };
324
325 struct ipv6_l3fwd_route {
326         struct ipv6_5tuple key;
327         uint8_t if_out;
328 };
329
330 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
331         {{IPv4(101,0,0,0), IPv4(100,10,0,1),  101, 11, IPPROTO_TCP}, 0},
332         {{IPv4(201,0,0,0), IPv4(200,20,0,1),  102, 12, IPPROTO_TCP}, 1},
333         {{IPv4(111,0,0,0), IPv4(100,30,0,1),  101, 11, IPPROTO_TCP}, 2},
334         {{IPv4(211,0,0,0), IPv4(200,40,0,1),  102, 12, IPPROTO_TCP}, 3},
335 };
336
337 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
338         {{
339         {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
340         {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
341         101, 11, IPPROTO_TCP}, 0},
342
343         {{
344         {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
345         {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
346         102, 12, IPPROTO_TCP}, 1},
347
348         {{
349         {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
350         {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
351         101, 11, IPPROTO_TCP}, 2},
352
353         {{
354         {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
355         {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
356         102, 12, IPPROTO_TCP}, 3},
357 };
358
359 typedef struct rte_hash lookup_struct_t;
360 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
361 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
362
363 #ifdef RTE_ARCH_X86_64
364 /* default to 4 million hash entries (approx) */
365 #define L3FWD_HASH_ENTRIES              1024*1024*4
366 #else
367 /* 32-bit has less address-space for hugepage memory, limit to 1M entries */
368 #define L3FWD_HASH_ENTRIES              1024*1024*1
369 #endif
370 #define HASH_ENTRY_NUMBER_DEFAULT       4
371
372 static uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
373
374 static inline uint32_t
375 ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len,
376         uint32_t init_val)
377 {
378         const union ipv4_5tuple_host *k;
379         uint32_t t;
380         const uint32_t *p;
381
382         k = data;
383         t = k->proto;
384         p = (const uint32_t *)&k->port_src;
385
386 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
387         init_val = rte_hash_crc_4byte(t, init_val);
388         init_val = rte_hash_crc_4byte(k->ip_src, init_val);
389         init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
390         init_val = rte_hash_crc_4byte(*p, init_val);
391 #else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
392         init_val = rte_jhash_1word(t, init_val);
393         init_val = rte_jhash_1word(k->ip_src, init_val);
394         init_val = rte_jhash_1word(k->ip_dst, init_val);
395         init_val = rte_jhash_1word(*p, init_val);
396 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
397         return (init_val);
398 }
399
400 static inline uint32_t
401 ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len, uint32_t init_val)
402 {
403         const union ipv6_5tuple_host *k;
404         uint32_t t;
405         const uint32_t *p;
406 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
407         const uint32_t  *ip_src0, *ip_src1, *ip_src2, *ip_src3;
408         const uint32_t  *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
409 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
410
411         k = data;
412         t = k->proto;
413         p = (const uint32_t *)&k->port_src;
414
415 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
416         ip_src0 = (const uint32_t *) k->ip_src;
417         ip_src1 = (const uint32_t *)(k->ip_src+4);
418         ip_src2 = (const uint32_t *)(k->ip_src+8);
419         ip_src3 = (const uint32_t *)(k->ip_src+12);
420         ip_dst0 = (const uint32_t *) k->ip_dst;
421         ip_dst1 = (const uint32_t *)(k->ip_dst+4);
422         ip_dst2 = (const uint32_t *)(k->ip_dst+8);
423         ip_dst3 = (const uint32_t *)(k->ip_dst+12);
424         init_val = rte_hash_crc_4byte(t, init_val);
425         init_val = rte_hash_crc_4byte(*ip_src0, init_val);
426         init_val = rte_hash_crc_4byte(*ip_src1, init_val);
427         init_val = rte_hash_crc_4byte(*ip_src2, init_val);
428         init_val = rte_hash_crc_4byte(*ip_src3, init_val);
429         init_val = rte_hash_crc_4byte(*ip_dst0, init_val);
430         init_val = rte_hash_crc_4byte(*ip_dst1, init_val);
431         init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
432         init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
433         init_val = rte_hash_crc_4byte(*p, init_val);
434 #else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
435         init_val = rte_jhash_1word(t, init_val);
436         init_val = rte_jhash(k->ip_src, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
437         init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
438         init_val = rte_jhash_1word(*p, init_val);
439 #endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
440         return (init_val);
441 }
442
443 #define IPV4_L3FWD_NUM_ROUTES \
444         (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
445
446 #define IPV6_L3FWD_NUM_ROUTES \
447         (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0]))
448
449 static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
450 static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
451
452 #endif
453
454 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
455 struct ipv4_l3fwd_route {
456         uint32_t ip;
457         uint8_t  depth;
458         uint8_t  if_out;
459 };
460
461 struct ipv6_l3fwd_route {
462         uint8_t ip[16];
463         uint8_t  depth;
464         uint8_t  if_out;
465 };
466
467 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
468         {IPv4(1,1,1,0), 24, 0},
469         {IPv4(2,1,1,0), 24, 1},
470         {IPv4(3,1,1,0), 24, 2},
471         {IPv4(4,1,1,0), 24, 3},
472         {IPv4(5,1,1,0), 24, 4},
473         {IPv4(6,1,1,0), 24, 5},
474         {IPv4(7,1,1,0), 24, 6},
475         {IPv4(8,1,1,0), 24, 7},
476 };
477
478 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
479         {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0},
480         {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1},
481         {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2},
482         {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3},
483         {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4},
484         {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5},
485         {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6},
486         {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7},
487 };
488
489 #define IPV4_L3FWD_NUM_ROUTES \
490         (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
491 #define IPV6_L3FWD_NUM_ROUTES \
492         (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0]))
493
494 #define IPV4_L3FWD_LPM_MAX_RULES         1024
495 #define IPV6_L3FWD_LPM_MAX_RULES         1024
496 #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)
497
498 typedef struct rte_lpm lookup_struct_t;
499 typedef struct rte_lpm6 lookup6_struct_t;
500 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
501 static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
502 #endif
503
504 struct lcore_conf {
505         uint16_t n_rx_queue;
506         struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
507         uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
508         struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
509         lookup_struct_t * ipv4_lookup_struct;
510 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
511         lookup6_struct_t * ipv6_lookup_struct;
512 #else
513         lookup_struct_t * ipv6_lookup_struct;
514 #endif
515 } __rte_cache_aligned;
516
517 static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
518
519 /* Send burst of packets on an output interface */
520 static inline int
521 send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
522 {
523         struct rte_mbuf **m_table;
524         int ret;
525         uint16_t queueid;
526
527         queueid = qconf->tx_queue_id[port];
528         m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
529
530         ret = rte_eth_tx_burst(port, queueid, m_table, n);
531         if (unlikely(ret < n)) {
532                 do {
533                         rte_pktmbuf_free(m_table[ret]);
534                 } while (++ret < n);
535         }
536
537         return 0;
538 }
539
540 /* Enqueue a single packet, and send burst if queue is filled */
541 static inline int
542 send_single_packet(struct rte_mbuf *m, uint8_t port)
543 {
544         uint32_t lcore_id;
545         uint16_t len;
546         struct lcore_conf *qconf;
547
548         lcore_id = rte_lcore_id();
549
550         qconf = &lcore_conf[lcore_id];
551         len = qconf->tx_mbufs[port].len;
552         qconf->tx_mbufs[port].m_table[len] = m;
553         len++;
554
555         /* enough pkts to be sent */
556         if (unlikely(len == MAX_PKT_BURST)) {
557                 send_burst(qconf, MAX_PKT_BURST, port);
558                 len = 0;
559         }
560
561         qconf->tx_mbufs[port].len = len;
562         return 0;
563 }
564
565 #ifdef DO_RFC_1812_CHECKS
566 static inline int
567 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
568 {
569         /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
570         /*
571          * 1. The packet length reported by the Link Layer must be large
572          * enough to hold the minimum length legal IP datagram (20 bytes).
573          */
574         if (link_len < sizeof(struct ipv4_hdr))
575                 return -1;
576
577         /* 2. The IP checksum must be correct. */
578         /* this is checked in H/W */
579
580         /*
581          * 3. The IP version number must be 4. If the version number is not 4
582          * then the packet may be another version of IP, such as IPng or
583          * ST-II.
584          */
585         if (((pkt->version_ihl) >> 4) != 4)
586                 return -3;
587         /*
588          * 4. The IP header length field must be large enough to hold the
589          * minimum length legal IP datagram (20 bytes = 5 words).
590          */
591         if ((pkt->version_ihl & 0xf) < 5)
592                 return -4;
593
594         /*
595          * 5. The IP total length field must be large enough to hold the IP
596          * datagram header, whose length is specified in the IP header length
597          * field.
598          */
599         if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
600                 return -5;
601
602         return 0;
603 }
604 #endif
605
606 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
607
608 static __m128i mask0;
609 static __m128i mask1;
610 static __m128i mask2;
611 static inline uint8_t
612 get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct)
613 {
614         int ret = 0;
615         union ipv4_5tuple_host key;
616
617         ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live);
618         __m128i data = _mm_loadu_si128((__m128i*)(ipv4_hdr));
619         /* Get 5 tuple: dst port, src port, dst IP address, src IP address and protocol */
620         key.xmm = _mm_and_si128(data, mask0);
621         /* Find destination port */
622         ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
623         return (uint8_t)((ret < 0)? portid : ipv4_l3fwd_out_if[ret]);
624 }
625
626 static inline uint8_t
627 get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, lookup_struct_t * ipv6_l3fwd_lookup_struct)
628 {
629         int ret = 0;
630         union ipv6_5tuple_host key;
631
632         ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len);
633         __m128i data0 = _mm_loadu_si128((__m128i*)(ipv6_hdr));
634         __m128i data1 = _mm_loadu_si128((__m128i*)(((uint8_t*)ipv6_hdr)+sizeof(__m128i)));
635         __m128i data2 = _mm_loadu_si128((__m128i*)(((uint8_t*)ipv6_hdr)+sizeof(__m128i)+sizeof(__m128i)));
636         /* Get part of 5 tuple: src IP address lower 96 bits and protocol */
637         key.xmm[0] = _mm_and_si128(data0, mask1);
638         /* Get part of 5 tuple: dst IP address lower 96 bits and src IP address higher 32 bits */
639         key.xmm[1] = data1;
640         /* Get part of 5 tuple: dst port and src port and dst IP address higher 32 bits */
641         key.xmm[2] = _mm_and_si128(data2, mask2);
642
643         /* Find destination port */
644         ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
645         return (uint8_t)((ret < 0)? portid : ipv6_l3fwd_out_if[ret]);
646 }
647 #endif
648
649 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
650 static inline uint8_t
651 get_ipv4_dst_port(void *ipv4_hdr,  uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct)
652 {
653         uint8_t next_hop;
654
655         return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
656                         rte_be_to_cpu_32(((struct ipv4_hdr*)ipv4_hdr)->dst_addr), &next_hop) == 0)?
657                         next_hop : portid);
658 }
659
660 static inline uint8_t
661 get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, lookup6_struct_t * ipv6_l3fwd_lookup_struct)
662 {
663         uint8_t next_hop;
664         return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
665                         ((struct ipv6_hdr*)ipv6_hdr)->dst_addr, &next_hop) == 0)?
666                         next_hop : portid);
667 }
668 #endif
669
670 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) & (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)
671 static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qconf);
672
673 #define MASK_ALL_PKTS    0xf
674 #define EXECLUDE_1ST_PKT 0xe
675 #define EXECLUDE_2ND_PKT 0xd
676 #define EXECLUDE_3RD_PKT 0xb
677 #define EXECLUDE_4TH_PKT 0x7
678
679 static inline void
680 simple_ipv4_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *qconf)
681 {
682         struct ether_hdr *eth_hdr[4];
683         struct ipv4_hdr *ipv4_hdr[4];
684         void *d_addr_bytes[4];
685         uint8_t dst_port[4];
686         int32_t ret[4];
687         union ipv4_5tuple_host key[4];
688         __m128i data[4];
689
690         eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
691         eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
692         eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
693         eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
694
695         /* Handle IPv4 headers.*/
696         ipv4_hdr[0] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[0], unsigned char *) +
697                         sizeof(struct ether_hdr));
698         ipv4_hdr[1] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[1], unsigned char *) +
699                         sizeof(struct ether_hdr));
700         ipv4_hdr[2] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[2], unsigned char *) +
701                         sizeof(struct ether_hdr));
702         ipv4_hdr[3] = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m[3], unsigned char *) +
703                         sizeof(struct ether_hdr));
704
705 #ifdef DO_RFC_1812_CHECKS
706         /* Check to make sure the packet is valid (RFC1812) */
707         uint8_t valid_mask = MASK_ALL_PKTS;
708         if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt.pkt_len) < 0) {
709                 rte_pktmbuf_free(m[0]);
710                 valid_mask &= EXECLUDE_1ST_PKT;
711         }
712         if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt.pkt_len) < 0) {
713                 rte_pktmbuf_free(m[1]);
714                 valid_mask &= EXECLUDE_2ND_PKT;
715         }
716         if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt.pkt_len) < 0) {
717                 rte_pktmbuf_free(m[2]);
718                 valid_mask &= EXECLUDE_3RD_PKT;
719         }
720         if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt.pkt_len) < 0) {
721                 rte_pktmbuf_free(m[3]);
722                 valid_mask &= EXECLUDE_4TH_PKT;
723         }
724         if (unlikely(valid_mask != MASK_ALL_PKTS)) {
725                 if (valid_mask == 0){
726                         return;
727                 } else {
728                         uint8_t i = 0;
729                         for (i = 0; i < 4; i++) {
730                                 if ((0x1 << i) & valid_mask) {
731                                         l3fwd_simple_forward(m[i], portid, qconf);
732                                 }
733                         }
734                         return;
735                 }
736         }
737 #endif // End of #ifdef DO_RFC_1812_CHECKS
738
739         data[0] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[0], unsigned char *) +
740                 sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
741         data[1] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[1], unsigned char *) +
742                 sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
743         data[2] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[2], unsigned char *) +
744                 sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
745         data[3] = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m[3], unsigned char *) +
746                 sizeof(struct ether_hdr) + offsetof(struct ipv4_hdr, time_to_live)));
747
748         key[0].xmm = _mm_and_si128(data[0], mask0);
749         key[1].xmm = _mm_and_si128(data[1], mask0);
750         key[2].xmm = _mm_and_si128(data[2], mask0);
751         key[3].xmm = _mm_and_si128(data[3], mask0);
752
753         const void *key_array[4] = {&key[0], &key[1], &key[2],&key[3]};
754         rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[0], 4, ret);
755         dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]);
756         dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]);
757         dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]);
758         dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]);
759
760         if (dst_port[0] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[0]) == 0)
761                 dst_port[0] = portid;
762         if (dst_port[1] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[1]) == 0)
763                 dst_port[1] = portid;
764         if (dst_port[2] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[2]) == 0)
765                 dst_port[2] = portid;
766         if (dst_port[3] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[3]) == 0)
767                 dst_port[3] = portid;
768
769         /* 02:00:00:00:00:xx */
770         d_addr_bytes[0] = &eth_hdr[0]->d_addr.addr_bytes[0];
771         d_addr_bytes[1] = &eth_hdr[1]->d_addr.addr_bytes[0];
772         d_addr_bytes[2] = &eth_hdr[2]->d_addr.addr_bytes[0];
773         d_addr_bytes[3] = &eth_hdr[3]->d_addr.addr_bytes[0];
774         *((uint64_t *)d_addr_bytes[0]) = 0x000000000002 + ((uint64_t)dst_port[0] << 40);
775         *((uint64_t *)d_addr_bytes[1]) = 0x000000000002 + ((uint64_t)dst_port[1] << 40);
776         *((uint64_t *)d_addr_bytes[2]) = 0x000000000002 + ((uint64_t)dst_port[2] << 40);
777         *((uint64_t *)d_addr_bytes[3]) = 0x000000000002 + ((uint64_t)dst_port[3] << 40);
778
779 #ifdef DO_RFC_1812_CHECKS
780         /* Update time to live and header checksum */
781         --(ipv4_hdr[0]->time_to_live);
782         --(ipv4_hdr[1]->time_to_live);
783         --(ipv4_hdr[2]->time_to_live);
784         --(ipv4_hdr[3]->time_to_live);
785         ++(ipv4_hdr[0]->hdr_checksum);
786         ++(ipv4_hdr[1]->hdr_checksum);
787         ++(ipv4_hdr[2]->hdr_checksum);
788         ++(ipv4_hdr[3]->hdr_checksum);
789 #endif
790
791         /* src addr */
792         ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
793         ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
794         ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
795         ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
796
797         send_single_packet(m[0], (uint8_t)dst_port[0]);
798         send_single_packet(m[1], (uint8_t)dst_port[1]);
799         send_single_packet(m[2], (uint8_t)dst_port[2]);
800         send_single_packet(m[3], (uint8_t)dst_port[3]);
801
802 }
803
804 static inline void get_ipv6_5tuple(struct rte_mbuf* m0, __m128i mask0, __m128i mask1,
805                                  union ipv6_5tuple_host * key)
806 {
807         __m128i tmpdata0 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *)
808                         + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len)));
809         __m128i tmpdata1 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *)
810                         + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len)
811                         +  sizeof(__m128i)));
812         __m128i tmpdata2 = _mm_loadu_si128((__m128i*)(rte_pktmbuf_mtod(m0, unsigned char *)
813                         + sizeof(struct ether_hdr) + offsetof(struct ipv6_hdr, payload_len)
814                         + sizeof(__m128i) + sizeof(__m128i)));
815         key->xmm[0] = _mm_and_si128(tmpdata0, mask0);
816         key->xmm[1] = tmpdata1;
817         key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
818         return;
819 }
820
821 static inline void
822 simple_ipv6_fwd_4pkts(struct rte_mbuf* m[4], uint8_t portid, struct lcore_conf *qconf)
823 {
824         struct ether_hdr *eth_hdr[4];
825         __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[4];
826         void *d_addr_bytes[4];
827         uint8_t dst_port[4];
828         int32_t ret[4];
829         union ipv6_5tuple_host key[4];
830
831         eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
832         eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
833         eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
834         eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
835
836         /* Handle IPv6 headers.*/
837         ipv6_hdr[0] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[0], unsigned char *) +
838                         sizeof(struct ether_hdr));
839         ipv6_hdr[1] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[1], unsigned char *) +
840                         sizeof(struct ether_hdr));
841         ipv6_hdr[2] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[2], unsigned char *) +
842                         sizeof(struct ether_hdr));
843         ipv6_hdr[3] = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m[3], unsigned char *) +
844                         sizeof(struct ether_hdr));
845
846         get_ipv6_5tuple(m[0], mask1, mask2, &key[0]);
847         get_ipv6_5tuple(m[1], mask1, mask2, &key[1]);
848         get_ipv6_5tuple(m[2], mask1, mask2, &key[2]);
849         get_ipv6_5tuple(m[3], mask1, mask2, &key[3]);
850
851         const void *key_array[4] = {&key[0], &key[1], &key[2],&key[3]};
852         rte_hash_lookup_multi(qconf->ipv6_lookup_struct, &key_array[0], 4, ret);
853         dst_port[0] = (uint8_t) ((ret[0] < 0)? portid:ipv6_l3fwd_out_if[ret[0]]);
854         dst_port[1] = (uint8_t) ((ret[1] < 0)? portid:ipv6_l3fwd_out_if[ret[1]]);
855         dst_port[2] = (uint8_t) ((ret[2] < 0)? portid:ipv6_l3fwd_out_if[ret[2]]);
856         dst_port[3] = (uint8_t) ((ret[3] < 0)? portid:ipv6_l3fwd_out_if[ret[3]]);
857
858         if (dst_port[0] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[0]) == 0)
859                 dst_port[0] = portid;
860         if (dst_port[1] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[1]) == 0)
861                 dst_port[1] = portid;
862         if (dst_port[2] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[2]) == 0)
863                 dst_port[2] = portid;
864         if (dst_port[3] >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port[3]) == 0)
865                 dst_port[3] = portid;
866
867         /* 02:00:00:00:00:xx */
868         d_addr_bytes[0] = &eth_hdr[0]->d_addr.addr_bytes[0];
869         d_addr_bytes[1] = &eth_hdr[1]->d_addr.addr_bytes[0];
870         d_addr_bytes[2] = &eth_hdr[2]->d_addr.addr_bytes[0];
871         d_addr_bytes[3] = &eth_hdr[3]->d_addr.addr_bytes[0];
872         *((uint64_t *)d_addr_bytes[0]) = 0x000000000002 + ((uint64_t)dst_port[0] << 40);
873         *((uint64_t *)d_addr_bytes[1]) = 0x000000000002 + ((uint64_t)dst_port[1] << 40);
874         *((uint64_t *)d_addr_bytes[2]) = 0x000000000002 + ((uint64_t)dst_port[2] << 40);
875         *((uint64_t *)d_addr_bytes[3]) = 0x000000000002 + ((uint64_t)dst_port[3] << 40);
876
877         /* src addr */
878         ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
879         ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
880         ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
881         ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
882
883         send_single_packet(m[0], (uint8_t)dst_port[0]);
884         send_single_packet(m[1], (uint8_t)dst_port[1]);
885         send_single_packet(m[2], (uint8_t)dst_port[2]);
886         send_single_packet(m[3], (uint8_t)dst_port[3]);
887
888 }
889 #endif // End of #if(APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)&(ENABLE_MULTI_BUFFER_OPTIMIZE == 1)
890
891 static inline __attribute__((always_inline)) void
892 l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qconf)
893 {
894         struct ether_hdr *eth_hdr;
895         struct ipv4_hdr *ipv4_hdr;
896         void *d_addr_bytes;
897         uint8_t dst_port;
898
899         eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
900
901         if (m->ol_flags & PKT_RX_IPV4_HDR) {
902                 /* Handle IPv4 headers.*/
903                 ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) +
904                                 sizeof(struct ether_hdr));
905
906 #ifdef DO_RFC_1812_CHECKS
907                 /* Check to make sure the packet is valid (RFC1812) */
908                 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt.pkt_len) < 0) {
909                         rte_pktmbuf_free(m);
910                         return;
911                 }
912 #endif
913
914                 dst_port = get_ipv4_dst_port(ipv4_hdr, portid, qconf->ipv4_lookup_struct);
915                 if (dst_port >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port) == 0)
916                         dst_port = portid;
917
918                 /* 02:00:00:00:00:xx */
919                 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
920                 *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40);
921
922 #ifdef DO_RFC_1812_CHECKS
923                 /* Update time to live and header checksum */
924                 --(ipv4_hdr->time_to_live);
925                 ++(ipv4_hdr->hdr_checksum);
926 #endif
927
928                 /* src addr */
929                 ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
930
931                 send_single_packet(m, dst_port);
932
933         } else {
934                 /* Handle IPv6 headers.*/
935                 struct ipv6_hdr *ipv6_hdr;
936
937                 ipv6_hdr = (struct ipv6_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) +
938                                 sizeof(struct ether_hdr));
939
940                 dst_port = get_ipv6_dst_port(ipv6_hdr, portid, qconf->ipv6_lookup_struct);
941
942                 if (dst_port >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port) == 0)
943                         dst_port = portid;
944
945                 /* 02:00:00:00:00:xx */
946                 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
947                 *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40);
948
949                 /* src addr */
950                 ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
951
952                 send_single_packet(m, dst_port);
953         }
954
955 }
956
957 /* main processing loop */
958 static int
959 main_loop(__attribute__((unused)) void *dummy)
960 {
961         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
962         unsigned lcore_id;
963         uint64_t prev_tsc, diff_tsc, cur_tsc;
964         int i, j, nb_rx;
965         uint8_t portid, queueid;
966         struct lcore_conf *qconf;
967         const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
968
969         prev_tsc = 0;
970
971         lcore_id = rte_lcore_id();
972         qconf = &lcore_conf[lcore_id];
973
974         if (qconf->n_rx_queue == 0) {
975                 RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
976                 return 0;
977         }
978
979         RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
980
981         for (i = 0; i < qconf->n_rx_queue; i++) {
982
983                 portid = qconf->rx_queue_list[i].port_id;
984                 queueid = qconf->rx_queue_list[i].queue_id;
985                 RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", lcore_id,
986                         portid, queueid);
987         }
988
989         while (1) {
990
991                 cur_tsc = rte_rdtsc();
992
993                 /*
994                  * TX burst queue drain
995                  */
996                 diff_tsc = cur_tsc - prev_tsc;
997                 if (unlikely(diff_tsc > drain_tsc)) {
998
999                         /*
1000                          * This could be optimized (use queueid instead of
1001                          * portid), but it is not called so often
1002                          */
1003                         for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
1004                                 if (qconf->tx_mbufs[portid].len == 0)
1005                                         continue;
1006                                 send_burst(&lcore_conf[lcore_id],
1007                                         qconf->tx_mbufs[portid].len,
1008                                         portid);
1009                                 qconf->tx_mbufs[portid].len = 0;
1010                         }
1011
1012                         prev_tsc = cur_tsc;
1013                 }
1014
1015                 /*
1016                  * Read packet from RX queues
1017                  */
1018                 for (i = 0; i < qconf->n_rx_queue; ++i) {
1019                         portid = qconf->rx_queue_list[i].port_id;
1020                         queueid = qconf->rx_queue_list[i].queue_id;
1021                         nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, MAX_PKT_BURST);
1022 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) & (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)
1023                         {
1024                                 /* Send nb_rx - nb_rx%4 packets in groups of 4.*/
1025                                 int32_t n = RTE_ALIGN_FLOOR(nb_rx, 4);
1026                                 for (j = 0; j < n ; j+=4) {
1027                                         uint32_t ol_flag = pkts_burst[j]->ol_flags
1028                                                         & pkts_burst[j+1]->ol_flags
1029                                                         & pkts_burst[j+2]->ol_flags
1030                                                         & pkts_burst[j+3]->ol_flags;
1031                                         if (ol_flag & PKT_RX_IPV4_HDR ) {
1032                                                 simple_ipv4_fwd_4pkts(&pkts_burst[j],
1033                                                                         portid, qconf);
1034                                         } else if (ol_flag & PKT_RX_IPV6_HDR) {
1035                                                 simple_ipv6_fwd_4pkts(&pkts_burst[j],
1036                                                                         portid, qconf);
1037                                         } else {
1038                                                 l3fwd_simple_forward(pkts_burst[j],
1039                                                                         portid, qconf);
1040                                                 l3fwd_simple_forward(pkts_burst[j+1],
1041                                                                         portid, qconf);
1042                                                 l3fwd_simple_forward(pkts_burst[j+2],
1043                                                                         portid, qconf);
1044                                                 l3fwd_simple_forward(pkts_burst[j+3],
1045                                                                         portid, qconf);
1046                                         }
1047                                 }
1048                                 for (; j < nb_rx ; j++) {
1049                                         l3fwd_simple_forward(pkts_burst[j],
1050                                                                 portid, qconf);
1051                                 }
1052                         }
1053 #else
1054                         /* Prefetch first packets */
1055                         for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
1056                                 rte_prefetch0(rte_pktmbuf_mtod(
1057                                                 pkts_burst[j], void *));
1058                         }
1059
1060                         /* Prefetch and forward already prefetched packets */
1061                         for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
1062                                 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
1063                                                 j + PREFETCH_OFFSET], void *));
1064                                 l3fwd_simple_forward(pkts_burst[j], portid, qconf);
1065                         }
1066
1067                         /* Forward remaining prefetched packets */
1068                         for (; j < nb_rx; j++) {
1069                                 l3fwd_simple_forward(pkts_burst[j], portid, qconf);
1070                         }
1071 #endif // End of #if((ENABLE_MULTI_BUFFER_OPTIMIZE == 1)&(APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH))
1072                 }
1073         }
1074 }
1075
1076 static int
1077 check_lcore_params(void)
1078 {
1079         uint8_t queue, lcore;
1080         uint16_t i;
1081         int socketid;
1082
1083         for (i = 0; i < nb_lcore_params; ++i) {
1084                 queue = lcore_params[i].queue_id;
1085                 if (queue >= MAX_RX_QUEUE_PER_PORT) {
1086                         printf("invalid queue number: %hhu\n", queue);
1087                         return -1;
1088                 }
1089                 lcore = lcore_params[i].lcore_id;
1090                 if (!rte_lcore_is_enabled(lcore)) {
1091                         printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
1092                         return -1;
1093                 }
1094                 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
1095                         (numa_on == 0)) {
1096                         printf("warning: lcore %hhu is on socket %d with numa off \n",
1097                                 lcore, socketid);
1098                 }
1099         }
1100         return 0;
1101 }
1102
1103 static int
1104 check_port_config(const unsigned nb_ports)
1105 {
1106         unsigned portid;
1107         uint16_t i;
1108
1109         for (i = 0; i < nb_lcore_params; ++i) {
1110                 portid = lcore_params[i].port_id;
1111                 if ((enabled_port_mask & (1 << portid)) == 0) {
1112                         printf("port %u is not enabled in port mask\n", portid);
1113                         return -1;
1114                 }
1115                 if (portid >= nb_ports) {
1116                         printf("port %u is not present on the board\n", portid);
1117                         return -1;
1118                 }
1119         }
1120         return 0;
1121 }
1122
1123 static uint8_t
1124 get_port_n_rx_queues(const uint8_t port)
1125 {
1126         int queue = -1;
1127         uint16_t i;
1128
1129         for (i = 0; i < nb_lcore_params; ++i) {
1130                 if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue)
1131                         queue = lcore_params[i].queue_id;
1132         }
1133         return (uint8_t)(++queue);
1134 }
1135
1136 static int
1137 init_lcore_rx_queues(void)
1138 {
1139         uint16_t i, nb_rx_queue;
1140         uint8_t lcore;
1141
1142         for (i = 0; i < nb_lcore_params; ++i) {
1143                 lcore = lcore_params[i].lcore_id;
1144                 nb_rx_queue = lcore_conf[lcore].n_rx_queue;
1145                 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
1146                         printf("error: too many queues (%u) for lcore: %u\n",
1147                                 (unsigned)nb_rx_queue + 1, (unsigned)lcore);
1148                         return -1;
1149                 } else {
1150                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
1151                                 lcore_params[i].port_id;
1152                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
1153                                 lcore_params[i].queue_id;
1154                         lcore_conf[lcore].n_rx_queue++;
1155                 }
1156         }
1157         return 0;
1158 }
1159
1160 /* display usage */
1161 static void
1162 print_usage(const char *prgname)
1163 {
1164         printf ("%s [EAL options] -- -p PORTMASK -P"
1165                 "  [--config (port,queue,lcore)[,(port,queue,lcore]]"
1166                 "  [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
1167                 "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
1168                 "  -P : enable promiscuous mode\n"
1169                 "  --config (port,queue,lcore): rx queues configuration\n"
1170                 "  --no-numa: optional, disable numa awareness\n"
1171                 "  --ipv6: optional, specify it if running ipv6 packets\n"
1172                 "  --enable-jumbo: enable jumbo frame"
1173                 " which max packet len is PKTLEN in decimal (64-9600)\n"
1174                 "  --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n",
1175                 prgname);
1176 }
1177
1178 static int parse_max_pkt_len(const char *pktlen)
1179 {
1180         char *end = NULL;
1181         unsigned long len;
1182
1183         /* parse decimal string */
1184         len = strtoul(pktlen, &end, 10);
1185         if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
1186                 return -1;
1187
1188         if (len == 0)
1189                 return -1;
1190
1191         return len;
1192 }
1193
1194 static int
1195 parse_portmask(const char *portmask)
1196 {
1197         char *end = NULL;
1198         unsigned long pm;
1199
1200         /* parse hexadecimal string */
1201         pm = strtoul(portmask, &end, 16);
1202         if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
1203                 return -1;
1204
1205         if (pm == 0)
1206                 return -1;
1207
1208         return pm;
1209 }
1210
1211 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1212 static int
1213 parse_hash_entry_number(const char *hash_entry_num)
1214 {
1215         char *end = NULL;
1216         unsigned long hash_en;
1217         /* parse hexadecimal string */
1218         hash_en = strtoul(hash_entry_num, &end, 16);
1219         if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0'))
1220                 return -1;
1221
1222         if (hash_en == 0)
1223                 return -1;
1224
1225         return hash_en;
1226 }
1227 #endif
1228
1229 static int
1230 parse_config(const char *q_arg)
1231 {
1232         char s[256];
1233         const char *p, *p0 = q_arg;
1234         char *end;
1235         enum fieldnames {
1236                 FLD_PORT = 0,
1237                 FLD_QUEUE,
1238                 FLD_LCORE,
1239                 _NUM_FLD
1240         };
1241         unsigned long int_fld[_NUM_FLD];
1242         char *str_fld[_NUM_FLD];
1243         int i;
1244         unsigned size;
1245
1246         nb_lcore_params = 0;
1247
1248         while ((p = strchr(p0,'(')) != NULL) {
1249                 ++p;
1250                 if((p0 = strchr(p,')')) == NULL)
1251                         return -1;
1252
1253                 size = p0 - p;
1254                 if(size >= sizeof(s))
1255                         return -1;
1256
1257                 rte_snprintf(s, sizeof(s), "%.*s", size, p);
1258                 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
1259                         return -1;
1260                 for (i = 0; i < _NUM_FLD; i++){
1261                         errno = 0;
1262                         int_fld[i] = strtoul(str_fld[i], &end, 0);
1263                         if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
1264                                 return -1;
1265                 }
1266                 if (nb_lcore_params >= MAX_LCORE_PARAMS) {
1267                         printf("exceeded max number of lcore params: %hu\n",
1268                                 nb_lcore_params);
1269                         return -1;
1270                 }
1271                 lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT];
1272                 lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE];
1273                 lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE];
1274                 ++nb_lcore_params;
1275         }
1276         lcore_params = lcore_params_array;
1277         return 0;
1278 }
1279
1280 #define CMD_LINE_OPT_CONFIG "config"
1281 #define CMD_LINE_OPT_NO_NUMA "no-numa"
1282 #define CMD_LINE_OPT_IPV6 "ipv6"
1283 #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
1284 #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
1285
1286 /* Parse the argument given in the command line of the application */
1287 static int
1288 parse_args(int argc, char **argv)
1289 {
1290         int opt, ret;
1291         char **argvopt;
1292         int option_index;
1293         char *prgname = argv[0];
1294         static struct option lgopts[] = {
1295                 {CMD_LINE_OPT_CONFIG, 1, 0, 0},
1296                 {CMD_LINE_OPT_NO_NUMA, 0, 0, 0},
1297                 {CMD_LINE_OPT_IPV6, 0, 0, 0},
1298                 {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
1299                 {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
1300                 {NULL, 0, 0, 0}
1301         };
1302
1303         argvopt = argv;
1304
1305         while ((opt = getopt_long(argc, argvopt, "p:P",
1306                                 lgopts, &option_index)) != EOF) {
1307
1308                 switch (opt) {
1309                 /* portmask */
1310                 case 'p':
1311                         enabled_port_mask = parse_portmask(optarg);
1312                         if (enabled_port_mask == 0) {
1313                                 printf("invalid portmask\n");
1314                                 print_usage(prgname);
1315                                 return -1;
1316                         }
1317                         break;
1318                 case 'P':
1319                         printf("Promiscuous mode selected\n");
1320                         promiscuous_on = 1;
1321                         break;
1322
1323                 /* long options */
1324                 case 0:
1325                         if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_CONFIG,
1326                                 sizeof (CMD_LINE_OPT_CONFIG))) {
1327                                 ret = parse_config(optarg);
1328                                 if (ret) {
1329                                         printf("invalid config\n");
1330                                         print_usage(prgname);
1331                                         return -1;
1332                                 }
1333                         }
1334
1335                         if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA,
1336                                 sizeof(CMD_LINE_OPT_NO_NUMA))) {
1337                                 printf("numa is disabled \n");
1338                                 numa_on = 0;
1339                         }
1340
1341 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1342                         if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_IPV6,
1343                                 sizeof(CMD_LINE_OPT_IPV6))) {
1344                                 printf("ipv6 is specified \n");
1345                                 ipv6 = 1;
1346                         }
1347 #endif
1348
1349                         if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO,
1350                                 sizeof (CMD_LINE_OPT_ENABLE_JUMBO))) {
1351                                 struct option lenopts = {"max-pkt-len", required_argument, 0, 0};
1352
1353                                 printf("jumbo frame is enabled - disabling simple TX path\n");
1354                                 port_conf.rxmode.jumbo_frame = 1;
1355                                 tx_conf.txq_flags = 0;
1356
1357                                 /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */
1358                                 if (0 == getopt_long(argc, argvopt, "", &lenopts, &option_index)) {
1359                                         ret = parse_max_pkt_len(optarg);
1360                                         if ((ret < 64) || (ret > MAX_JUMBO_PKT_LEN)){
1361                                                 printf("invalid packet length\n");
1362                                                 print_usage(prgname);
1363                                                 return -1;
1364                                         }
1365                                         port_conf.rxmode.max_rx_pkt_len = ret;
1366                                 }
1367                                 printf("set jumbo frame max packet length to %u\n",
1368                                                 (unsigned int)port_conf.rxmode.max_rx_pkt_len);
1369                         }
1370 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1371                         if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_HASH_ENTRY_NUM,
1372                                 sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) {
1373                                 ret = parse_hash_entry_number(optarg);
1374                                 if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) {
1375                                         hash_entry_number = ret;
1376                                 } else {
1377                                         printf("invalid hash entry number\n");
1378                                         print_usage(prgname);
1379                                         return -1;
1380                                 }
1381                         }
1382 #endif
1383                         break;
1384
1385                 default:
1386                         print_usage(prgname);
1387                         return -1;
1388                 }
1389         }
1390
1391         if (optind >= 0)
1392                 argv[optind-1] = prgname;
1393
1394         ret = optind-1;
1395         optind = 0; /* reset getopt lib */
1396         return ret;
1397 }
1398
1399 static void
1400 print_ethaddr(const char *name, const struct ether_addr *eth_addr)
1401 {
1402         printf ("%s%02X:%02X:%02X:%02X:%02X:%02X", name,
1403                 eth_addr->addr_bytes[0],
1404                 eth_addr->addr_bytes[1],
1405                 eth_addr->addr_bytes[2],
1406                 eth_addr->addr_bytes[3],
1407                 eth_addr->addr_bytes[4],
1408                 eth_addr->addr_bytes[5]);
1409 }
1410
1411 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1412
1413 static void convert_ipv4_5tuple(struct ipv4_5tuple* key1,
1414                 union ipv4_5tuple_host* key2)
1415 {
1416         key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst);
1417         key2->ip_src = rte_cpu_to_be_32(key1->ip_src);
1418         key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
1419         key2->port_src = rte_cpu_to_be_16(key1->port_src);
1420         key2->proto = key1->proto;
1421         key2->pad0 = 0;
1422         key2->pad1 = 0;
1423         return;
1424 }
1425
1426 static void convert_ipv6_5tuple(struct ipv6_5tuple* key1,
1427                 union ipv6_5tuple_host* key2)
1428 {
1429         uint32_t i;
1430         for (i = 0; i < 16; i++)
1431         {
1432                 key2->ip_dst[i] = key1->ip_dst[i];
1433                 key2->ip_src[i] = key1->ip_src[i];
1434         }
1435         key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
1436         key2->port_src = rte_cpu_to_be_16(key1->port_src);
1437         key2->proto = key1->proto;
1438         key2->pad0 = 0;
1439         key2->pad1 = 0;
1440         key2->reserve = 0;
1441         return;
1442 }
1443
1444 #define BYTE_VALUE_MAX 256
1445 #define ALL_32_BITS 0xffffffff
1446 #define BIT_8_TO_15 0x0000ff00
1447 static inline void
1448 populate_ipv4_few_flow_into_table(const struct rte_hash* h)
1449 {
1450         uint32_t i;
1451         int32_t ret;
1452         uint32_t array_len = sizeof(ipv4_l3fwd_route_array)/sizeof(ipv4_l3fwd_route_array[0]);
1453
1454         mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15);
1455         for (i = 0; i < array_len; i++) {
1456                 struct ipv4_l3fwd_route  entry;
1457                 union ipv4_5tuple_host newkey;
1458                 entry = ipv4_l3fwd_route_array[i];
1459                 convert_ipv4_5tuple(&entry.key, &newkey);
1460                 ret = rte_hash_add_key (h,(void *) &newkey);
1461                 if (ret < 0) {
1462                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
1463                                 "l3fwd hash.\n", i);
1464                 }
1465                 ipv4_l3fwd_out_if[ret] = entry.if_out;
1466         }
1467         printf("Hash: Adding 0x%x keys\n", array_len);
1468 }
1469
1470 #define BIT_16_TO_23 0x00ff0000
1471 static inline void
1472 populate_ipv6_few_flow_into_table(const struct rte_hash* h)
1473 {
1474         uint32_t i;
1475         int32_t ret;
1476         uint32_t array_len = sizeof(ipv6_l3fwd_route_array)/sizeof(ipv6_l3fwd_route_array[0]);
1477
1478         mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23);
1479         mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
1480         for (i = 0; i < array_len; i++) {
1481                 struct ipv6_l3fwd_route entry;
1482                 union ipv6_5tuple_host newkey;
1483                 entry = ipv6_l3fwd_route_array[i];
1484                 convert_ipv6_5tuple(&entry.key, &newkey);
1485                 ret = rte_hash_add_key (h, (void *) &newkey);
1486                 if (ret < 0) {
1487                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
1488                                 "l3fwd hash.\n", i);
1489                 }
1490                 ipv6_l3fwd_out_if[ret] = entry.if_out;
1491         }
1492         printf("Hash: Adding 0x%xkeys\n", array_len);
1493 }
1494
1495 #define NUMBER_PORT_USED 4
1496 static inline void
1497 populate_ipv4_many_flow_into_table(const struct rte_hash* h,
1498                 unsigned int nr_flow)
1499 {
1500         unsigned i;
1501         mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15);
1502         for (i = 0; i < nr_flow; i++) {
1503                 struct ipv4_l3fwd_route entry;
1504                 union ipv4_5tuple_host newkey;
1505                 uint8_t a = (uint8_t) ((i/NUMBER_PORT_USED)%BYTE_VALUE_MAX);
1506                 uint8_t b = (uint8_t) (((i/NUMBER_PORT_USED)/BYTE_VALUE_MAX)%BYTE_VALUE_MAX);
1507                 uint8_t c = (uint8_t) ((i/NUMBER_PORT_USED)/(BYTE_VALUE_MAX*BYTE_VALUE_MAX));
1508                 /* Create the ipv4 exact match flow */
1509                 memset(&entry, 0, sizeof(entry));
1510                 switch (i & (NUMBER_PORT_USED -1)) {
1511                 case 0:
1512                         entry = ipv4_l3fwd_route_array[0];
1513                         entry.key.ip_dst = IPv4(101,c,b,a);
1514                         break;
1515                 case 1:
1516                         entry = ipv4_l3fwd_route_array[1];
1517                         entry.key.ip_dst = IPv4(201,c,b,a);
1518                         break;
1519                 case 2:
1520                         entry = ipv4_l3fwd_route_array[2];
1521                         entry.key.ip_dst = IPv4(111,c,b,a);
1522                         break;
1523                 case 3:
1524                         entry = ipv4_l3fwd_route_array[3];
1525                         entry.key.ip_dst = IPv4(211,c,b,a);
1526                         break;
1527                 };
1528                 convert_ipv4_5tuple(&entry.key, &newkey);
1529                 int32_t ret = rte_hash_add_key(h,(void *) &newkey);
1530                 if (ret < 0) {
1531                         rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
1532                 }
1533                 ipv4_l3fwd_out_if[ret] = (uint8_t) entry.if_out;
1534
1535         }
1536         printf("Hash: Adding 0x%x keys\n", nr_flow);
1537 }
1538
1539 static inline void
1540 populate_ipv6_many_flow_into_table(const struct rte_hash* h,
1541                 unsigned int nr_flow)
1542 {
1543         unsigned i;
1544         mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23);
1545         mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
1546         for (i = 0; i < nr_flow; i++) {
1547                 struct ipv6_l3fwd_route entry;
1548                 union ipv6_5tuple_host newkey;
1549                 uint8_t a = (uint8_t) ((i/NUMBER_PORT_USED)%BYTE_VALUE_MAX);
1550                 uint8_t b = (uint8_t) (((i/NUMBER_PORT_USED)/BYTE_VALUE_MAX)%BYTE_VALUE_MAX);
1551                 uint8_t c = (uint8_t) ((i/NUMBER_PORT_USED)/(BYTE_VALUE_MAX*BYTE_VALUE_MAX));
1552                 /* Create the ipv6 exact match flow */
1553                 memset(&entry, 0, sizeof(entry));
1554                 switch (i & (NUMBER_PORT_USED - 1)) {
1555                 case 0: entry = ipv6_l3fwd_route_array[0]; break;
1556                 case 1: entry = ipv6_l3fwd_route_array[1]; break;
1557                 case 2: entry = ipv6_l3fwd_route_array[2]; break;
1558                 case 3: entry = ipv6_l3fwd_route_array[3]; break;
1559                 };
1560                 entry.key.ip_dst[13] = c;
1561                 entry.key.ip_dst[14] = b;
1562                 entry.key.ip_dst[15] = a;
1563                 convert_ipv6_5tuple(&entry.key, &newkey);
1564                 int32_t ret = rte_hash_add_key(h,(void *) &newkey);
1565                 if (ret < 0) {
1566                         rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
1567                 }
1568                 ipv6_l3fwd_out_if[ret] = (uint8_t) entry.if_out;
1569
1570         }
1571         printf("Hash: Adding 0x%x keys\n", nr_flow);
1572 }
1573
1574 static void
1575 setup_hash(int socketid)
1576 {
1577     struct rte_hash_parameters ipv4_l3fwd_hash_params = {
1578         .name = NULL,
1579         .entries = L3FWD_HASH_ENTRIES,
1580         .bucket_entries = 4,
1581         .key_len = sizeof(union ipv4_5tuple_host),
1582         .hash_func = ipv4_hash_crc,
1583         .hash_func_init_val = 0,
1584     };
1585
1586     struct rte_hash_parameters ipv6_l3fwd_hash_params = {
1587         .name = NULL,
1588         .entries = L3FWD_HASH_ENTRIES,
1589         .bucket_entries = 4,
1590         .key_len = sizeof(union ipv6_5tuple_host),
1591         .hash_func = ipv6_hash_crc,
1592         .hash_func_init_val = 0,
1593     };
1594
1595     char s[64];
1596
1597         /* create ipv4 hash */
1598         rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
1599         ipv4_l3fwd_hash_params.name = s;
1600         ipv4_l3fwd_hash_params.socket_id = socketid;
1601         ipv4_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv4_l3fwd_hash_params);
1602         if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
1603                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
1604                                 "socket %d\n", socketid);
1605
1606         /* create ipv6 hash */
1607         rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
1608         ipv6_l3fwd_hash_params.name = s;
1609         ipv6_l3fwd_hash_params.socket_id = socketid;
1610         ipv6_l3fwd_lookup_struct[socketid] = rte_hash_create(&ipv6_l3fwd_hash_params);
1611         if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
1612                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
1613                                 "socket %d\n", socketid);
1614
1615         if (hash_entry_number != HASH_ENTRY_NUMBER_DEFAULT) {
1616                 /* For testing hash matching with a large number of flows we
1617                  * generate millions of IP 5-tuples with an incremented dst
1618                  * address to initialize the hash table. */
1619                 if (ipv6 == 0) {
1620                         /* populate the ipv4 hash */
1621                         populate_ipv4_many_flow_into_table(
1622                                 ipv4_l3fwd_lookup_struct[socketid], hash_entry_number);
1623                 } else {
1624                         /* populate the ipv6 hash */
1625                         populate_ipv6_many_flow_into_table(
1626                                 ipv6_l3fwd_lookup_struct[socketid], hash_entry_number);
1627                 }
1628         } else {
1629                 /* Use data in ipv4/ipv6 l3fwd lookup table directly to initialize the hash table */
1630                 if (ipv6 == 0) {
1631                         /* populate the ipv4 hash */
1632                         populate_ipv4_few_flow_into_table(ipv4_l3fwd_lookup_struct[socketid]);
1633                 } else {
1634                         /* populate the ipv6 hash */
1635                         populate_ipv6_few_flow_into_table(ipv6_l3fwd_lookup_struct[socketid]);
1636                 }
1637         }
1638 }
1639 #endif
1640
1641 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1642 static void
1643 setup_lpm(int socketid)
1644 {
1645         struct rte_lpm6_config config;
1646         unsigned i;
1647         int ret;
1648         char s[64];
1649
1650         /* create the LPM table */
1651         rte_snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
1652         ipv4_l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid,
1653                                 IPV4_L3FWD_LPM_MAX_RULES, 0);
1654         if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
1655                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
1656                                 " on socket %d\n", socketid);
1657
1658         /* populate the LPM table */
1659         for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
1660                 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
1661                         ipv4_l3fwd_route_array[i].ip,
1662                         ipv4_l3fwd_route_array[i].depth,
1663                         ipv4_l3fwd_route_array[i].if_out);
1664
1665                 if (ret < 0) {
1666                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
1667                                 "l3fwd LPM table on socket %d\n",
1668                                 i, socketid);
1669                 }
1670
1671                 printf("LPM: Adding route 0x%08x / %d (%d)\n",
1672                         (unsigned)ipv4_l3fwd_route_array[i].ip,
1673                         ipv4_l3fwd_route_array[i].depth,
1674                         ipv4_l3fwd_route_array[i].if_out);
1675         }
1676
1677         /* create the LPM6 table */
1678         rte_snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid);
1679
1680         config.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
1681         config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
1682         config.flags = 0;
1683         ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid,
1684                                 &config);
1685         if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
1686                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
1687                                 " on socket %d\n", socketid);
1688
1689         /* populate the LPM table */
1690         for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) {
1691                 ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid],
1692                         ipv6_l3fwd_route_array[i].ip,
1693                         ipv6_l3fwd_route_array[i].depth,
1694                         ipv6_l3fwd_route_array[i].if_out);
1695
1696                 if (ret < 0) {
1697                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
1698                                 "l3fwd LPM table on socket %d\n",
1699                                 i, socketid);
1700                 }
1701
1702                 printf("LPM: Adding route %s / %d (%d)\n",
1703                         "IPV6",
1704                         ipv6_l3fwd_route_array[i].depth,
1705                         ipv6_l3fwd_route_array[i].if_out);
1706         }
1707 }
1708 #endif
1709
1710 static int
1711 init_mem(unsigned nb_mbuf)
1712 {
1713         struct lcore_conf *qconf;
1714         int socketid;
1715         unsigned lcore_id;
1716         char s[64];
1717
1718         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1719                 if (rte_lcore_is_enabled(lcore_id) == 0)
1720                         continue;
1721
1722                 if (numa_on)
1723                         socketid = rte_lcore_to_socket_id(lcore_id);
1724                 else
1725                         socketid = 0;
1726
1727                 if (socketid >= NB_SOCKETS) {
1728                         rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
1729                                 socketid, lcore_id, NB_SOCKETS);
1730                 }
1731                 if (pktmbuf_pool[socketid] == NULL) {
1732                         rte_snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
1733                         pktmbuf_pool[socketid] =
1734                                 rte_mempool_create(s, nb_mbuf, MBUF_SIZE, MEMPOOL_CACHE_SIZE,
1735                                         sizeof(struct rte_pktmbuf_pool_private),
1736                                         rte_pktmbuf_pool_init, NULL,
1737                                         rte_pktmbuf_init, NULL,
1738                                         socketid, 0);
1739                         if (pktmbuf_pool[socketid] == NULL)
1740                                 rte_exit(EXIT_FAILURE,
1741                                                 "Cannot init mbuf pool on socket %d\n", socketid);
1742                         else
1743                                 printf("Allocated mbuf pool on socket %d\n", socketid);
1744
1745 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1746                         setup_lpm(socketid);
1747 #else
1748                         setup_hash(socketid);
1749 #endif
1750                 }
1751                 qconf = &lcore_conf[lcore_id];
1752                 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
1753                 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
1754         }
1755         return 0;
1756 }
1757
1758 /* Check the link status of all ports in up to 9s, and print them finally */
1759 static void
1760 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
1761 {
1762 #define CHECK_INTERVAL 100 /* 100ms */
1763 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
1764         uint8_t portid, count, all_ports_up, print_flag = 0;
1765         struct rte_eth_link link;
1766
1767         printf("\nChecking link status");
1768         fflush(stdout);
1769         for (count = 0; count <= MAX_CHECK_TIME; count++) {
1770                 all_ports_up = 1;
1771                 for (portid = 0; portid < port_num; portid++) {
1772                         if ((port_mask & (1 << portid)) == 0)
1773                                 continue;
1774                         memset(&link, 0, sizeof(link));
1775                         rte_eth_link_get_nowait(portid, &link);
1776                         /* print link status if flag set */
1777                         if (print_flag == 1) {
1778                                 if (link.link_status)
1779                                         printf("Port %d Link Up - speed %u "
1780                                                 "Mbps - %s\n", (uint8_t)portid,
1781                                                 (unsigned)link.link_speed,
1782                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
1783                                         ("full-duplex") : ("half-duplex\n"));
1784                                 else
1785                                         printf("Port %d Link Down\n",
1786                                                 (uint8_t)portid);
1787                                 continue;
1788                         }
1789                         /* clear all_ports_up flag if any link down */
1790                         if (link.link_status == 0) {
1791                                 all_ports_up = 0;
1792                                 break;
1793                         }
1794                 }
1795                 /* after finally printing all link status, get out */
1796                 if (print_flag == 1)
1797                         break;
1798
1799                 if (all_ports_up == 0) {
1800                         printf(".");
1801                         fflush(stdout);
1802                         rte_delay_ms(CHECK_INTERVAL);
1803                 }
1804
1805                 /* set the print_flag if all ports up or timeout */
1806                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
1807                         print_flag = 1;
1808                         printf("done\n");
1809                 }
1810         }
1811 }
1812
1813 int
1814 MAIN(int argc, char **argv)
1815 {
1816         struct lcore_conf *qconf;
1817         int ret;
1818         unsigned nb_ports;
1819         uint16_t queueid;
1820         unsigned lcore_id;
1821         uint32_t n_tx_queue, nb_lcores;
1822         uint8_t portid, nb_rx_queue, queue, socketid;
1823
1824         /* init EAL */
1825         ret = rte_eal_init(argc, argv);
1826         if (ret < 0)
1827                 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
1828         argc -= ret;
1829         argv += ret;
1830
1831         /* parse application arguments (after the EAL ones) */
1832         ret = parse_args(argc, argv);
1833         if (ret < 0)
1834                 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
1835
1836         if (check_lcore_params() < 0)
1837                 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
1838
1839         ret = init_lcore_rx_queues();
1840         if (ret < 0)
1841                 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
1842
1843
1844         if (rte_eal_pci_probe() < 0)
1845                 rte_exit(EXIT_FAILURE, "Cannot probe PCI\n");
1846
1847         nb_ports = rte_eth_dev_count();
1848         if (nb_ports > RTE_MAX_ETHPORTS)
1849                 nb_ports = RTE_MAX_ETHPORTS;
1850
1851         if (check_port_config(nb_ports) < 0)
1852                 rte_exit(EXIT_FAILURE, "check_port_config failed\n");
1853
1854         nb_lcores = rte_lcore_count();
1855
1856         /* initialize all ports */
1857         for (portid = 0; portid < nb_ports; portid++) {
1858                 /* skip ports that are not enabled */
1859                 if ((enabled_port_mask & (1 << portid)) == 0) {
1860                         printf("\nSkipping disabled port %d\n", portid);
1861                         continue;
1862                 }
1863
1864                 /* init port */
1865                 printf("Initializing port %d ... ", portid );
1866                 fflush(stdout);
1867
1868                 nb_rx_queue = get_port_n_rx_queues(portid);
1869                 n_tx_queue = nb_lcores;
1870                 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
1871                         n_tx_queue = MAX_TX_QUEUE_PER_PORT;
1872                 printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
1873                         nb_rx_queue, (unsigned)n_tx_queue );
1874                 ret = rte_eth_dev_configure(portid, nb_rx_queue,
1875                                         (uint16_t)n_tx_queue, &port_conf);
1876                 if (ret < 0)
1877                         rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
1878                                 ret, portid);
1879
1880                 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
1881                 print_ethaddr(" Address:", &ports_eth_addr[portid]);
1882                 printf(", ");
1883
1884                 /* init memory */
1885                 ret = init_mem(NB_MBUF);
1886                 if (ret < 0)
1887                         rte_exit(EXIT_FAILURE, "init_mem failed\n");
1888
1889                 /* init one TX queue per couple (lcore,port) */
1890                 queueid = 0;
1891                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1892                         if (rte_lcore_is_enabled(lcore_id) == 0)
1893                                 continue;
1894
1895                         if (numa_on)
1896                                 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
1897                         else
1898                                 socketid = 0;
1899
1900                         printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
1901                         fflush(stdout);
1902                         ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
1903                                                      socketid, &tx_conf);
1904                         if (ret < 0)
1905                                 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
1906                                         "port=%d\n", ret, portid);
1907
1908                         qconf = &lcore_conf[lcore_id];
1909                         qconf->tx_queue_id[portid] = queueid;
1910                         queueid++;
1911                 }
1912                 printf("\n");
1913         }
1914
1915         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1916                 if (rte_lcore_is_enabled(lcore_id) == 0)
1917                         continue;
1918                 qconf = &lcore_conf[lcore_id];
1919                 printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
1920                 fflush(stdout);
1921                 /* init RX queues */
1922                 for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
1923                         portid = qconf->rx_queue_list[queue].port_id;
1924                         queueid = qconf->rx_queue_list[queue].queue_id;
1925
1926                         if (numa_on)
1927                                 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
1928                         else
1929                                 socketid = 0;
1930
1931                         printf("rxq=%d,%d,%d ", portid, queueid, socketid);
1932                         fflush(stdout);
1933
1934                         ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
1935                                         socketid, &rx_conf, pktmbuf_pool[socketid]);
1936                         if (ret < 0)
1937                                 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d,"
1938                                                 "port=%d\n", ret, portid);
1939                 }
1940         }
1941
1942         printf("\n");
1943
1944         /* start ports */
1945         for (portid = 0; portid < nb_ports; portid++) {
1946                 if ((enabled_port_mask & (1 << portid)) == 0) {
1947                         continue;
1948                 }
1949                 /* Start device */
1950                 ret = rte_eth_dev_start(portid);
1951                 if (ret < 0)
1952                         rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
1953                                 ret, portid);
1954
1955                 /*
1956                  * If enabled, put device in promiscuous mode.
1957                  * This allows IO forwarding mode to forward packets
1958                  * to itself through 2 cross-connected  ports of the
1959                  * target machine.
1960                  */
1961                 if (promiscuous_on)
1962                         rte_eth_promiscuous_enable(portid);
1963         }
1964
1965         check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
1966
1967         /* launch per-lcore init on every lcore */
1968         rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
1969         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1970                 if (rte_eal_wait_lcore(lcore_id) < 0)
1971                         return -1;
1972         }
1973
1974         return 0;
1975 }