mbuf: remove the rte_pktmbuf structure
[dpdk.git] / examples / l3fwd-vf / main.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdint.h>
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <sys/queue.h>
41 #include <stdarg.h>
42 #include <errno.h>
43 #include <getopt.h>
44 #include <signal.h>
45
46 #include <rte_common.h>
47 #include <rte_byteorder.h>
48 #include <rte_log.h>
49 #include <rte_memory.h>
50 #include <rte_memcpy.h>
51 #include <rte_memzone.h>
52 #include <rte_tailq.h>
53 #include <rte_eal.h>
54 #include <rte_per_lcore.h>
55 #include <rte_launch.h>
56 #include <rte_atomic.h>
57 #include <rte_cycles.h>
58 #include <rte_prefetch.h>
59 #include <rte_lcore.h>
60 #include <rte_per_lcore.h>
61 #include <rte_branch_prediction.h>
62 #include <rte_interrupts.h>
63 #include <rte_pci.h>
64 #include <rte_random.h>
65 #include <rte_debug.h>
66 #include <rte_ether.h>
67 #include <rte_ethdev.h>
68 #include <rte_ring.h>
69 #include <rte_mempool.h>
70 #include <rte_mbuf.h>
71 #include <rte_ip.h>
72 #include <rte_tcp.h>
73 #include <rte_udp.h>
74 #include <rte_string_fns.h>
75
76 #include "main.h"
77
78 #define APP_LOOKUP_EXACT_MATCH          0
79 #define APP_LOOKUP_LPM                  1
80 #define DO_RFC_1812_CHECKS
81
82 //#define APP_LOOKUP_METHOD             APP_LOOKUP_EXACT_MATCH
83 #ifndef APP_LOOKUP_METHOD
84 #define APP_LOOKUP_METHOD             APP_LOOKUP_LPM
85 #endif
86
87 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
88 #include <rte_hash.h>
89 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
90 #include <rte_lpm.h>
91 #else
92 #error "APP_LOOKUP_METHOD set to incorrect value"
93 #endif
94
95 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
96
97 #define MEMPOOL_CACHE_SIZE 256
98
99 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
100
101 /*
102  * This expression is used to calculate the number of mbufs needed depending on user input, taking
103  *  into account memory for rx and tx hardware rings, cache per lcore and mtable per port per lcore.
104  *  RTE_MAX is used to ensure that NB_MBUF never goes below a minimum value of 8192
105  */
106
107 #define NB_MBUF RTE_MAX (                                                                                                                                       \
108                                 (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT +                                                        \
109                                 nb_ports*nb_lcores*MAX_PKT_BURST +                                                                                      \
110                                 nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT +                                                          \
111                                 nb_lcores*MEMPOOL_CACHE_SIZE),                                                                                          \
112                                 (unsigned)8192)
113
114 /*
115  * RX and TX Prefetch, Host, and Write-back threshold values should be
116  * carefully set for optimal performance. Consult the network
117  * controller's datasheet and supporting DPDK documentation for guidance
118  * on how these parameters should be set.
119  */
120 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
121 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
122 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
123
124 /*
125  * These default values are optimized for use with the Intel(R) 82599 10 GbE
126  * Controller and the DPDK ixgbe PMD. Consider using other values for other
127  * network controllers and/or network drivers.
128  */
129 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
130 #define TX_HTHRESH 0  /**< Default values of TX host threshold reg. */
131 #define TX_WTHRESH 0  /**< Default values of TX write-back threshold reg. */
132
133 #define MAX_PKT_BURST 32
134 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
135
136 #define NB_SOCKETS 8
137
138 #define SOCKET0 0
139
140 /* Configure how many packets ahead to prefetch, when reading packets */
141 #define PREFETCH_OFFSET 3
142
143 /*
144  * Configurable number of RX/TX ring descriptors
145  */
146 #define RTE_TEST_RX_DESC_DEFAULT 128
147 #define RTE_TEST_TX_DESC_DEFAULT 512
148 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
149 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
150
151 /* ethernet addresses of ports */
152 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
153
154 /* mask of enabled ports */
155 static uint32_t enabled_port_mask = 0;
156 static int numa_on = 1; /**< NUMA is enabled by default. */
157
158 struct mbuf_table {
159         uint16_t len;
160         struct rte_mbuf *m_table[MAX_PKT_BURST];
161 };
162
163 struct lcore_rx_queue {
164         uint8_t port_id;
165         uint8_t queue_id;
166 } __rte_cache_aligned;
167
168 #define MAX_RX_QUEUE_PER_LCORE 16
169 #define MAX_TX_QUEUE_PER_PORT 1
170 #define MAX_RX_QUEUE_PER_PORT 1
171
172 #define MAX_LCORE_PARAMS 1024
173 struct lcore_params {
174         uint8_t port_id;
175         uint8_t queue_id;
176         uint8_t lcore_id;
177 } __rte_cache_aligned;
178
179 static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
180 static struct lcore_params lcore_params_array_default[] = {
181         {0, 0, 2},
182         {0, 1, 2},
183         {0, 2, 2},
184         {1, 0, 2},
185         {1, 1, 2},
186         {1, 2, 2},
187         {2, 0, 2},
188         {3, 0, 3},
189         {3, 1, 3},
190 };
191
192 static struct lcore_params * lcore_params = lcore_params_array_default;
193 static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
194                                 sizeof(lcore_params_array_default[0]);
195
196 static struct rte_eth_conf port_conf = {
197         .rxmode = {
198                 .mq_mode        = ETH_MQ_RX_RSS,
199                 .max_rx_pkt_len = ETHER_MAX_LEN,
200                 .split_hdr_size = 0,
201                 .header_split   = 0, /**< Header Split disabled */
202                 .hw_ip_checksum = 1, /**< IP checksum offload enabled */
203                 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
204                 .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
205                 .hw_strip_crc   = 0, /**< CRC stripped by hardware */
206         },
207         .rx_adv_conf = {
208                 .rss_conf = {
209                         .rss_key = NULL,
210                         .rss_hf = ETH_RSS_IP,
211                 },
212         },
213         .txmode = {
214                 .mq_mode = ETH_MQ_TX_NONE,
215         },
216 };
217
218 static const struct rte_eth_rxconf rx_conf = {
219         .rx_thresh = {
220                 .pthresh = RX_PTHRESH,
221                 .hthresh = RX_HTHRESH,
222                 .wthresh = RX_WTHRESH,
223         },
224         .rx_free_thresh = 32,
225 };
226
227 static const struct rte_eth_txconf tx_conf = {
228         .tx_thresh = {
229                 .pthresh = TX_PTHRESH,
230                 .hthresh = TX_HTHRESH,
231                 .wthresh = TX_WTHRESH,
232         },
233         .tx_free_thresh = 0, /* Use PMD default values */
234         .tx_rs_thresh = 0, /* Use PMD default values */
235         .txq_flags = (ETH_TXQ_FLAGS_NOMULTSEGS |
236                       ETH_TXQ_FLAGS_NOVLANOFFL |
237                       ETH_TXQ_FLAGS_NOXSUMSCTP |
238                       ETH_TXQ_FLAGS_NOXSUMUDP |
239                       ETH_TXQ_FLAGS_NOXSUMTCP)
240 };
241
242 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
243
244
245 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
246
247 #ifdef RTE_MACHINE_CPUFLAG_SSE4_2
248 #include <rte_hash_crc.h>
249 #define DEFAULT_HASH_FUNC       rte_hash_crc
250 #else
251 #include <rte_jhash.h>
252 #define DEFAULT_HASH_FUNC       rte_jhash
253 #endif
254
255 struct ipv4_5tuple {
256         uint32_t ip_dst;
257         uint32_t ip_src;
258         uint16_t port_dst;
259         uint16_t port_src;
260         uint8_t proto;
261 } __attribute__((__packed__));
262
263 struct l3fwd_route {
264         struct ipv4_5tuple key;
265         uint8_t if_out;
266 };
267
268 static struct l3fwd_route l3fwd_route_array[] = {
269         {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0},
270         {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1},
271         {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2},
272         {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3},
273 };
274
275 typedef struct rte_hash lookup_struct_t;
276 static lookup_struct_t *l3fwd_lookup_struct[NB_SOCKETS];
277
278 #define L3FWD_HASH_ENTRIES      1024
279 struct rte_hash_parameters l3fwd_hash_params = {
280         .name = "l3fwd_hash_0",
281         .entries = L3FWD_HASH_ENTRIES,
282         .bucket_entries = 4,
283         .key_len = sizeof(struct ipv4_5tuple),
284         .hash_func = DEFAULT_HASH_FUNC,
285         .hash_func_init_val = 0,
286         .socket_id = SOCKET0,
287 };
288
289 #define L3FWD_NUM_ROUTES \
290         (sizeof(l3fwd_route_array) / sizeof(l3fwd_route_array[0]))
291
292 static uint8_t l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
293 #endif
294
295 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
296 struct l3fwd_route {
297         uint32_t ip;
298         uint8_t  depth;
299         uint8_t  if_out;
300 };
301
302 static struct l3fwd_route l3fwd_route_array[] = {
303         {IPv4(1,1,1,0), 24, 0},
304         {IPv4(2,1,1,0), 24, 1},
305         {IPv4(3,1,1,0), 24, 2},
306         {IPv4(4,1,1,0), 24, 3},
307         {IPv4(5,1,1,0), 24, 4},
308         {IPv4(6,1,1,0), 24, 5},
309         {IPv4(7,1,1,0), 24, 6},
310         {IPv4(8,1,1,0), 24, 7},
311 };
312
313 #define L3FWD_NUM_ROUTES \
314         (sizeof(l3fwd_route_array) / sizeof(l3fwd_route_array[0]))
315
316 #define L3FWD_LPM_MAX_RULES     1024
317
318 typedef struct rte_lpm lookup_struct_t;
319 static lookup_struct_t *l3fwd_lookup_struct[NB_SOCKETS];
320 #endif
321
322 struct lcore_conf {
323         uint16_t n_rx_queue;
324         struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
325         uint16_t tx_queue_id;
326         struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
327         lookup_struct_t * lookup_struct;
328 } __rte_cache_aligned;
329
330 static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
331
332 /* Send burst of packets on an output interface */
333 static inline int
334 send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
335 {
336         struct rte_mbuf **m_table;
337         int ret;
338         uint16_t queueid;
339
340         queueid = qconf->tx_queue_id;
341         m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
342
343         ret = rte_eth_tx_burst(port, queueid, m_table, n);
344         if (unlikely(ret < n)) {
345                 do {
346                         rte_pktmbuf_free(m_table[ret]);
347                 } while (++ret < n);
348         }
349
350         return 0;
351 }
352
353 /* Enqueue a single packet, and send burst if queue is filled */
354 static inline int
355 send_single_packet(struct rte_mbuf *m, uint8_t port)
356 {
357         uint32_t lcore_id;
358         uint16_t len;
359         struct lcore_conf *qconf;
360
361         lcore_id = rte_lcore_id();
362
363         qconf = &lcore_conf[lcore_id];
364         len = qconf->tx_mbufs[port].len;
365         qconf->tx_mbufs[port].m_table[len] = m;
366         len++;
367
368         /* enough pkts to be sent */
369         if (unlikely(len == MAX_PKT_BURST)) {
370                 send_burst(qconf, MAX_PKT_BURST, port);
371                 len = 0;
372         }
373
374         qconf->tx_mbufs[port].len = len;
375         return 0;
376 }
377
378 #ifdef DO_RFC_1812_CHECKS
379 static inline int
380 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
381 {
382         /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
383         /*
384          * 1. The packet length reported by the Link Layer must be large
385          * enough to hold the minimum length legal IP datagram (20 bytes).
386          */
387         if (link_len < sizeof(struct ipv4_hdr))
388                 return -1;
389
390         /* 2. The IP checksum must be correct. */
391         /* this is checked in H/W */
392
393         /*
394          * 3. The IP version number must be 4. If the version number is not 4
395          * then the packet may be another version of IP, such as IPng or
396          * ST-II.
397          */
398         if (((pkt->version_ihl) >> 4) != 4)
399                 return -3;
400         /*
401          * 4. The IP header length field must be large enough to hold the
402          * minimum length legal IP datagram (20 bytes = 5 words).
403          */
404         if ((pkt->version_ihl & 0xf) < 5)
405                 return -4;
406
407         /*
408          * 5. The IP total length field must be large enough to hold the IP
409          * datagram header, whose length is specified in the IP header length
410          * field.
411          */
412         if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
413                 return -5;
414
415         return 0;
416 }
417 #endif
418
419 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
420 static void
421 print_key(struct ipv4_5tuple key)
422 {
423         printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, proto = %d\n",
424                (unsigned)key.ip_dst, (unsigned)key.ip_src, key.port_dst, key.port_src, key.proto);
425 }
426
427 static inline uint8_t
428 get_dst_port(struct ipv4_hdr *ipv4_hdr,  uint8_t portid, lookup_struct_t * l3fwd_lookup_struct)
429 {
430         struct ipv4_5tuple key;
431         struct tcp_hdr *tcp;
432         struct udp_hdr *udp;
433         int ret = 0;
434
435         key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
436         key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr);
437         key.proto = ipv4_hdr->next_proto_id;
438
439         switch (ipv4_hdr->next_proto_id) {
440         case IPPROTO_TCP:
441                 tcp = (struct tcp_hdr *)((unsigned char *) ipv4_hdr +
442                                         sizeof(struct ipv4_hdr));
443                 key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
444                 key.port_src = rte_be_to_cpu_16(tcp->src_port);
445                 break;
446
447         case IPPROTO_UDP:
448                 udp = (struct udp_hdr *)((unsigned char *) ipv4_hdr +
449                                         sizeof(struct ipv4_hdr));
450                 key.port_dst = rte_be_to_cpu_16(udp->dst_port);
451                 key.port_src = rte_be_to_cpu_16(udp->src_port);
452                 break;
453
454         default:
455                 key.port_dst = 0;
456                 key.port_src = 0;
457         }
458
459         /* Find destination port */
460         ret = rte_hash_lookup(l3fwd_lookup_struct, (const void *)&key);
461         return (uint8_t)((ret < 0)? portid : l3fwd_out_if[ret]);
462 }
463 #endif
464
465 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
466 static inline uint8_t
467 get_dst_port(struct ipv4_hdr *ipv4_hdr,  uint8_t portid, lookup_struct_t * l3fwd_lookup_struct)
468 {
469         uint8_t next_hop;
470
471         return (uint8_t) ((rte_lpm_lookup(l3fwd_lookup_struct,
472                         rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)?
473                         next_hop : portid);
474 }
475 #endif
476
477 static inline void
478 l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, lookup_struct_t * l3fwd_lookup_struct)
479 {
480         struct ether_hdr *eth_hdr;
481         struct ipv4_hdr *ipv4_hdr;
482         void *tmp;
483         uint8_t dst_port;
484
485         eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
486
487         ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, unsigned char *) +
488                                 sizeof(struct ether_hdr));
489
490 #ifdef DO_RFC_1812_CHECKS
491         /* Check to make sure the packet is valid (RFC1812) */
492         if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
493                 rte_pktmbuf_free(m);
494                 return;
495         }
496 #endif
497
498         dst_port = get_dst_port(ipv4_hdr, portid, l3fwd_lookup_struct);
499         if (dst_port >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port) == 0)
500                 dst_port = portid;
501
502         /* 02:00:00:00:00:xx */
503         tmp = &eth_hdr->d_addr.addr_bytes[0];
504         *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
505
506 #ifdef DO_RFC_1812_CHECKS
507         /* Update time to live and header checksum */
508         --(ipv4_hdr->time_to_live);
509         ++(ipv4_hdr->hdr_checksum);
510 #endif
511
512         /* src addr */
513         ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
514
515         send_single_packet(m, dst_port);
516
517 }
518
519 /* main processing loop */
520 static int
521 main_loop(__attribute__((unused)) void *dummy)
522 {
523         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
524         unsigned lcore_id;
525         uint64_t prev_tsc, diff_tsc, cur_tsc;
526         int i, j, nb_rx;
527         uint8_t portid, queueid;
528         struct lcore_conf *qconf;
529         const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
530
531         prev_tsc = 0;
532
533         lcore_id = rte_lcore_id();
534         qconf = &lcore_conf[lcore_id];
535
536         if (qconf->n_rx_queue == 0) {
537                 RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
538                 return 0;
539         }
540
541         RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
542
543         for (i = 0; i < qconf->n_rx_queue; i++) {
544
545                 portid = qconf->rx_queue_list[i].port_id;
546                 queueid = qconf->rx_queue_list[i].queue_id;
547                 RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", lcore_id,
548                         portid, queueid);
549         }
550
551         while (1) {
552
553                 cur_tsc = rte_rdtsc();
554
555                 /*
556                  * TX burst queue drain
557                  */
558                 diff_tsc = cur_tsc - prev_tsc;
559                 if (unlikely(diff_tsc > drain_tsc)) {
560
561                         /*
562                          * This could be optimized (use queueid instead of
563                          * portid), but it is not called so often
564                          */
565                         for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
566                                 if (qconf->tx_mbufs[portid].len == 0)
567                                         continue;
568                                 send_burst(&lcore_conf[lcore_id],
569                                         qconf->tx_mbufs[portid].len,
570                                         portid);
571                                 qconf->tx_mbufs[portid].len = 0;
572                         }
573
574                         prev_tsc = cur_tsc;
575                 }
576
577                 /*
578                  * Read packet from RX queues
579                  */
580                 for (i = 0; i < qconf->n_rx_queue; ++i) {
581
582                         portid = qconf->rx_queue_list[i].port_id;
583                         queueid = qconf->rx_queue_list[i].queue_id;
584                         nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, MAX_PKT_BURST);
585
586                         /* Prefetch first packets */
587                         for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
588                                 rte_prefetch0(rte_pktmbuf_mtod(
589                                                 pkts_burst[j], void *));
590                         }
591
592                         /* Prefetch and forward already prefetched packets */
593                         for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
594                                 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
595                                                 j + PREFETCH_OFFSET], void *));
596                                 l3fwd_simple_forward(pkts_burst[j], portid, qconf->lookup_struct);
597                         }
598
599                         /* Forward remaining prefetched packets */
600                         for (; j < nb_rx; j++) {
601                                 l3fwd_simple_forward(pkts_burst[j], portid, qconf->lookup_struct);
602                         }
603                 }
604         }
605 }
606
607 static int
608 check_lcore_params(void)
609 {
610         uint8_t queue, lcore;
611         uint16_t i;
612         int socketid;
613
614         for (i = 0; i < nb_lcore_params; ++i) {
615                 queue = lcore_params[i].queue_id;
616                 if (queue >= MAX_RX_QUEUE_PER_PORT) {
617                         printf("invalid queue number: %hhu\n", queue);
618                         return -1;
619                 }
620                 lcore = lcore_params[i].lcore_id;
621                 if (!rte_lcore_is_enabled(lcore)) {
622                         printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
623                         return -1;
624                 }
625                 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
626                         (numa_on == 0)) {
627                         printf("warning: lcore %hhu is on socket %d with numa off \n",
628                                 lcore, socketid);
629                 }
630         }
631         return 0;
632 }
633
634 static int
635 check_port_config(const unsigned nb_ports)
636 {
637         unsigned portid;
638         uint16_t i;
639
640         for (i = 0; i < nb_lcore_params; ++i) {
641                 portid = lcore_params[i].port_id;
642                 if ((enabled_port_mask & (1 << portid)) == 0) {
643                         printf("port %u is not enabled in port mask\n", portid);
644                         return -1;
645                 }
646                 if (portid >= nb_ports) {
647                         printf("port %u is not present on the board\n", portid);
648                         return -1;
649                 }
650         }
651         return 0;
652 }
653
654 static uint8_t
655 get_port_n_rx_queues(const uint8_t port)
656 {
657         int queue = -1;
658         uint16_t i;
659
660         for (i = 0; i < nb_lcore_params; ++i) {
661                 if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue)
662                         queue = lcore_params[i].queue_id;
663         }
664         return (uint8_t)(++queue);
665 }
666
667 static int
668 init_lcore_rx_queues(void)
669 {
670         uint16_t i, nb_rx_queue;
671         uint8_t lcore;
672
673         for (i = 0; i < nb_lcore_params; ++i) {
674                 lcore = lcore_params[i].lcore_id;
675                 nb_rx_queue = lcore_conf[lcore].n_rx_queue;
676                 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
677                         printf("error: too many queues (%u) for lcore: %u\n",
678                                 (unsigned)nb_rx_queue + 1, (unsigned)lcore);
679                         return -1;
680                 } else {
681                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
682                                 lcore_params[i].port_id;
683                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
684                                 lcore_params[i].queue_id;
685                         lcore_conf[lcore].n_rx_queue++;
686                 }
687         }
688         return 0;
689 }
690
691 /* display usage */
692 static void
693 print_usage(const char *prgname)
694 {
695         printf ("%s [EAL options] -- -p PORTMASK"
696                 "  [--config (port,queue,lcore)[,(port,queue,lcore]]\n"
697                 "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
698                 "  --config (port,queue,lcore): rx queues configuration\n"
699                 "  --no-numa: optional, disable numa awareness\n",
700                 prgname);
701 }
702
703 /* Custom handling of signals to handle process terminal */
704 static void
705 signal_handler(int signum)
706 {
707         uint8_t portid;
708         uint8_t nb_ports = rte_eth_dev_count();
709
710         /* When we receive a SIGINT signal */
711         if (signum == SIGINT) {
712                 for (portid = 0; portid < nb_ports; portid++) {
713                         /* skip ports that are not enabled */
714                         if ((enabled_port_mask & (1 << portid)) == 0)
715                                 continue;
716                         rte_eth_dev_close(portid);
717                 }
718         }
719         rte_exit(EXIT_SUCCESS, "\n User forced exit\n");
720 }
721 static int
722 parse_portmask(const char *portmask)
723 {
724         char *end = NULL;
725         unsigned long pm;
726
727         /* parse hexadecimal string */
728         pm = strtoul(portmask, &end, 16);
729         if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
730                 return -1;
731
732         if (pm == 0)
733                 return -1;
734
735         return pm;
736 }
737
738 static int
739 parse_config(const char *q_arg)
740 {
741         char s[256];
742         const char *p, *p0 = q_arg;
743         char *end;
744         enum fieldnames {
745                 FLD_PORT = 0,
746                 FLD_QUEUE,
747                 FLD_LCORE,
748                 _NUM_FLD
749         };
750         unsigned long int_fld[_NUM_FLD];
751         char *str_fld[_NUM_FLD];
752         int i;
753         unsigned size;
754
755         nb_lcore_params = 0;
756
757         while ((p = strchr(p0,'(')) != NULL) {
758                 ++p;
759                 if((p0 = strchr(p,')')) == NULL)
760                         return -1;
761
762                 size = p0 - p;
763                 if(size >= sizeof(s))
764                         return -1;
765
766                 snprintf(s, sizeof(s), "%.*s", size, p);
767                 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
768                         return -1;
769                 for (i = 0; i < _NUM_FLD; i++){
770                         errno = 0;
771                         int_fld[i] = strtoul(str_fld[i], &end, 0);
772                         if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
773                                 return -1;
774                 }
775                 if (nb_lcore_params >= MAX_LCORE_PARAMS) {
776                         printf("exceeded max number of lcore params: %hu\n",
777                                 nb_lcore_params);
778                         return -1;
779                 }
780                 lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT];
781                 lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE];
782                 lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE];
783                 ++nb_lcore_params;
784         }
785         lcore_params = lcore_params_array;
786         return 0;
787 }
788
789 /* Parse the argument given in the command line of the application */
790 static int
791 parse_args(int argc, char **argv)
792 {
793         int opt, ret;
794         char **argvopt;
795         int option_index;
796         char *prgname = argv[0];
797         static struct option lgopts[] = {
798                 {"config", 1, 0, 0},
799                 {"no-numa", 0, 0, 0},
800                 {NULL, 0, 0, 0}
801         };
802
803         argvopt = argv;
804
805         while ((opt = getopt_long(argc, argvopt, "p:",
806                                 lgopts, &option_index)) != EOF) {
807
808                 switch (opt) {
809                 /* portmask */
810                 case 'p':
811                         enabled_port_mask = parse_portmask(optarg);
812                         if (enabled_port_mask == 0) {
813                                 printf("invalid portmask\n");
814                                 print_usage(prgname);
815                                 return -1;
816                         }
817                         break;
818
819                 /* long options */
820                 case 0:
821                         if (!strcmp(lgopts[option_index].name, "config")) {
822                                 ret = parse_config(optarg);
823                                 if (ret) {
824                                         printf("invalid config\n");
825                                         print_usage(prgname);
826                                         return -1;
827                                 }
828                         }
829
830                         if (!strcmp(lgopts[option_index].name, "no-numa")) {
831                                 printf("numa is disabled \n");
832                                 numa_on = 0;
833                         }
834                         break;
835
836                 default:
837                         print_usage(prgname);
838                         return -1;
839                 }
840         }
841
842         if (optind >= 0)
843                 argv[optind-1] = prgname;
844
845         ret = optind-1;
846         optind = 0; /* reset getopt lib */
847         return ret;
848 }
849
850 static void
851 print_ethaddr(const char *name, const struct ether_addr *eth_addr)
852 {
853         printf ("%s%02X:%02X:%02X:%02X:%02X:%02X", name,
854                 eth_addr->addr_bytes[0],
855                 eth_addr->addr_bytes[1],
856                 eth_addr->addr_bytes[2],
857                 eth_addr->addr_bytes[3],
858                 eth_addr->addr_bytes[4],
859                 eth_addr->addr_bytes[5]);
860 }
861
862 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
863 static void
864 setup_hash(int socketid)
865 {
866         unsigned i;
867         int ret;
868         char s[64];
869
870         /* create  hashes */
871         snprintf(s, sizeof(s), "l3fwd_hash_%d", socketid);
872         l3fwd_hash_params.name = s;
873         l3fwd_hash_params.socket_id = socketid;
874         l3fwd_lookup_struct[socketid] = rte_hash_create(&l3fwd_hash_params);
875         if (l3fwd_lookup_struct[socketid] == NULL)
876                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
877                                 "socket %d\n", socketid);
878
879         /* populate the hash */
880         for (i = 0; i < L3FWD_NUM_ROUTES; i++) {
881                 ret = rte_hash_add_key (l3fwd_lookup_struct[socketid],
882                                 (void *) &l3fwd_route_array[i].key);
883                 if (ret < 0) {
884                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
885                                 "l3fwd hash on socket %d\n", i, socketid);
886                 }
887                 l3fwd_out_if[ret] = l3fwd_route_array[i].if_out;
888                 printf("Hash: Adding key\n");
889                 print_key(l3fwd_route_array[i].key);
890         }
891 }
892 #endif
893
894 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
895 static void
896 setup_lpm(int socketid)
897 {
898         unsigned i;
899         int ret;
900         char s[64];
901
902         /* create the LPM table */
903         snprintf(s, sizeof(s), "L3FWD_LPM_%d", socketid);
904         l3fwd_lookup_struct[socketid] = rte_lpm_create(s, socketid,
905                                 L3FWD_LPM_MAX_RULES, 0);
906         if (l3fwd_lookup_struct[socketid] == NULL)
907                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
908                                 " on socket %d\n", socketid);
909
910         /* populate the LPM table */
911         for (i = 0; i < L3FWD_NUM_ROUTES; i++) {
912                 ret = rte_lpm_add(l3fwd_lookup_struct[socketid],
913                         l3fwd_route_array[i].ip,
914                         l3fwd_route_array[i].depth,
915                         l3fwd_route_array[i].if_out);
916
917                 if (ret < 0) {
918                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
919                                 "l3fwd LPM table on socket %d\n",
920                                 i, socketid);
921                 }
922
923                 printf("LPM: Adding route 0x%08x / %d (%d)\n",
924                         (unsigned)l3fwd_route_array[i].ip,
925                         l3fwd_route_array[i].depth,
926                         l3fwd_route_array[i].if_out);
927         }
928 }
929 #endif
930
931 static int
932 init_mem(unsigned nb_mbuf)
933 {
934         struct lcore_conf *qconf;
935         int socketid;
936         unsigned lcore_id;
937         char s[64];
938
939         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
940                 if (rte_lcore_is_enabled(lcore_id) == 0)
941                         continue;
942
943                 if (numa_on)
944                         socketid = rte_lcore_to_socket_id(lcore_id);
945                 else
946                         socketid = 0;
947
948                 if (socketid >= NB_SOCKETS) {
949                         rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
950                                 socketid, lcore_id, NB_SOCKETS);
951                 }
952                 if (pktmbuf_pool[socketid] == NULL) {
953                         snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
954                         pktmbuf_pool[socketid] =
955                                 rte_mempool_create(s, nb_mbuf, MBUF_SIZE,
956                                                    MEMPOOL_CACHE_SIZE,
957                                         sizeof(struct rte_pktmbuf_pool_private),
958                                         rte_pktmbuf_pool_init, NULL,
959                                         rte_pktmbuf_init, NULL,
960                                         socketid, 0);
961                         if (pktmbuf_pool[socketid] == NULL)
962                                 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n", socketid);
963                         else
964                                 printf("Allocated mbuf pool on socket %d\n", socketid);
965
966 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
967                         setup_lpm(socketid);
968 #else
969                         setup_hash(socketid);
970 #endif
971                 }
972                 qconf = &lcore_conf[lcore_id];
973                 qconf->lookup_struct = l3fwd_lookup_struct[socketid];
974         }
975         return 0;
976 }
977
978 int
979 MAIN(int argc, char **argv)
980 {
981         struct lcore_conf *qconf;
982         int ret;
983         unsigned nb_ports;
984         uint16_t queueid;
985         unsigned lcore_id;
986         uint32_t nb_lcores;
987         uint16_t n_tx_queue;
988         uint8_t portid, nb_rx_queue, queue, socketid;
989
990         signal(SIGINT, signal_handler);
991         /* init EAL */
992         ret = rte_eal_init(argc, argv);
993         if (ret < 0)
994                 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
995         argc -= ret;
996         argv += ret;
997
998         /* parse application arguments (after the EAL ones) */
999         ret = parse_args(argc, argv);
1000         if (ret < 0)
1001                 rte_exit(EXIT_FAILURE, "Invalid L3FWD-VF parameters\n");
1002
1003         if (check_lcore_params() < 0)
1004                 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
1005
1006         ret = init_lcore_rx_queues();
1007         if (ret < 0)
1008                 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
1009
1010         if (rte_eal_pci_probe() < 0)
1011                 rte_exit(EXIT_FAILURE, "Cannot probe PCI\n");
1012
1013         nb_ports = rte_eth_dev_count();
1014         if (nb_ports > RTE_MAX_ETHPORTS)
1015                 nb_ports = RTE_MAX_ETHPORTS;
1016
1017         if (check_port_config(nb_ports) < 0)
1018                 rte_exit(EXIT_FAILURE, "check_port_config failed\n");
1019
1020         nb_lcores = rte_lcore_count();
1021
1022         /* initialize all ports */
1023         for (portid = 0; portid < nb_ports; portid++) {
1024                 /* skip ports that are not enabled */
1025                 if ((enabled_port_mask & (1 << portid)) == 0) {
1026                         printf("\nSkipping disabled port %d\n", portid);
1027                         continue;
1028                 }
1029
1030                 /* init port */
1031                 printf("Initializing port %d ... ", portid );
1032                 fflush(stdout);
1033
1034                 /* must always equal(=1) */
1035                 nb_rx_queue = get_port_n_rx_queues(portid);
1036                 n_tx_queue = MAX_TX_QUEUE_PER_PORT;
1037
1038                 printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
1039                         nb_rx_queue, (unsigned)1 );
1040                 ret = rte_eth_dev_configure(portid, nb_rx_queue, n_tx_queue, &port_conf);
1041                 if (ret < 0)
1042                         rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
1043                                 ret, portid);
1044
1045                 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
1046                 print_ethaddr(" Address:", &ports_eth_addr[portid]);
1047                 printf(", ");
1048
1049                 ret = init_mem(NB_MBUF);
1050                 if (ret < 0)
1051                         rte_exit(EXIT_FAILURE, "init_mem failed\n");
1052
1053                 /* init one TX queue */
1054                 socketid = (uint8_t)rte_lcore_to_socket_id(rte_get_master_lcore());
1055
1056                 printf("txq=%d,%d,%d ", portid, 0, socketid);
1057                 fflush(stdout);
1058                 ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
1059                                                  socketid, &tx_conf);
1060                 if (ret < 0)
1061                         rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
1062                                 "port=%d\n", ret, portid);
1063
1064                 printf("\n");
1065         }
1066
1067         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1068                 if (rte_lcore_is_enabled(lcore_id) == 0)
1069                         continue;
1070                 qconf = &lcore_conf[lcore_id];
1071                 qconf->tx_queue_id = 0;
1072
1073                 printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
1074                 fflush(stdout);
1075                 /* init RX queues */
1076                 for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
1077                         portid = qconf->rx_queue_list[queue].port_id;
1078                         queueid = qconf->rx_queue_list[queue].queue_id;
1079
1080                         if (numa_on)
1081                                 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
1082                         else
1083                                 socketid = 0;
1084
1085                         printf("rxq=%d,%d,%d ", portid, queueid, socketid);
1086                         fflush(stdout);
1087
1088                         ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
1089                                                 socketid, &rx_conf, pktmbuf_pool[socketid]);
1090                         if (ret < 0)
1091                                 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d,"
1092                                                 "port=%d\n", ret, portid);
1093                 }
1094         }
1095         printf("\n");
1096
1097         /* start ports */
1098         for (portid = 0; portid < nb_ports; portid++) {
1099                 if ((enabled_port_mask & (1 << portid)) == 0) {
1100                         continue;
1101                 }
1102                 /* Start device */
1103                 ret = rte_eth_dev_start(portid);
1104                 if (ret < 0)
1105                         rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
1106                                 ret, portid);
1107
1108                 printf("done: Port %d\n", portid);
1109
1110         }
1111
1112         /* launch per-lcore init on every lcore */
1113         rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
1114         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1115                 if (rte_eal_wait_lcore(lcore_id) < 0)
1116                         return -1;
1117         }
1118
1119         return 0;
1120 }