ae36d5e04cc125f41ba42b72192dd174c39da112
[dpdk.git] / examples / load_balancer / init.c
1 /*-
2  *   BSD LICENSE
3  * 
4  *   Copyright(c) 2010-2012 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  * 
7  *   Redistribution and use in source and binary forms, with or without 
8  *   modification, are permitted provided that the following conditions 
9  *   are met:
10  * 
11  *     * Redistributions of source code must retain the above copyright 
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright 
14  *       notice, this list of conditions and the following disclaimer in 
15  *       the documentation and/or other materials provided with the 
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its 
18  *       contributors may be used to endorse or promote products derived 
19  *       from this software without specific prior written permission.
20  * 
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  * 
33  */
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdint.h>
38 #include <inttypes.h>
39 #include <sys/types.h>
40 #include <string.h>
41 #include <sys/queue.h>
42 #include <stdarg.h>
43 #include <errno.h>
44 #include <getopt.h>
45
46 #include <rte_common.h>
47 #include <rte_byteorder.h>
48 #include <rte_log.h>
49 #include <rte_memory.h>
50 #include <rte_memcpy.h>
51 #include <rte_memzone.h>
52 #include <rte_tailq.h>
53 #include <rte_eal.h>
54 #include <rte_per_lcore.h>
55 #include <rte_launch.h>
56 #include <rte_atomic.h>
57 #include <rte_cycles.h>
58 #include <rte_prefetch.h>
59 #include <rte_lcore.h>
60 #include <rte_per_lcore.h>
61 #include <rte_branch_prediction.h>
62 #include <rte_interrupts.h>
63 #include <rte_pci.h>
64 #include <rte_random.h>
65 #include <rte_debug.h>
66 #include <rte_ether.h>
67 #include <rte_ethdev.h>
68 #include <rte_ring.h>
69 #include <rte_mempool.h>
70 #include <rte_mbuf.h>
71 #include <rte_string_fns.h>
72 #include <rte_ip.h>
73 #include <rte_tcp.h>
74 #include <rte_lpm.h>
75
76 #include "main.h"
77
78 static struct rte_eth_conf port_conf = {
79         .rxmode = {
80                 .split_hdr_size = 0,
81                 .header_split   = 0, /**< Header Split disabled */
82                 .hw_ip_checksum = 1, /**< IP checksum offload enabled */
83                 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
84                 .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
85                 .hw_strip_crc   = 0, /**< CRC stripped by hardware */
86         },
87         .rx_adv_conf = {
88                 .rss_conf = {
89                         .rss_key = NULL,
90                         .rss_hf = ETH_RSS_IPV4 | ETH_RSS_IPV6,
91                 },
92         },
93         .txmode = {
94                 .mq_mode = ETH_DCB_NONE,
95         },
96 };
97
98 static struct rte_eth_rxconf rx_conf = {
99         .rx_thresh = {
100                 .pthresh = APP_DEFAULT_NIC_RX_PTHRESH,
101                 .hthresh = APP_DEFAULT_NIC_RX_HTHRESH,
102                 .wthresh = APP_DEFAULT_NIC_RX_WTHRESH,
103         },
104         .rx_free_thresh = APP_DEFAULT_NIC_RX_FREE_THRESH,
105         .rx_drop_en = APP_DEFAULT_NIC_RX_DROP_EN,
106 };
107
108 static struct rte_eth_txconf tx_conf = {
109         .tx_thresh = {
110                 .pthresh = APP_DEFAULT_NIC_TX_PTHRESH,
111                 .hthresh = APP_DEFAULT_NIC_TX_HTHRESH,
112                 .wthresh = APP_DEFAULT_NIC_TX_WTHRESH,
113         },
114         .tx_free_thresh = APP_DEFAULT_NIC_TX_FREE_THRESH,
115         .tx_rs_thresh = APP_DEFAULT_NIC_TX_RS_THRESH,
116 };
117
118 static void
119 app_assign_worker_ids(void)
120 {
121         uint32_t lcore, worker_id;
122
123         /* Assign ID for each worker */
124         worker_id = 0;
125         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
126                 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
127
128                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
129                         continue;
130                 }
131
132                 lp_worker->worker_id = worker_id;
133                 worker_id ++;
134         }
135 }
136
137 static void
138 app_init_mbuf_pools(void)
139 {
140         unsigned socket, lcore;
141
142         /* Init the buffer pools */
143         for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
144                 char name[32];
145                 if (app_is_socket_used(socket) == 0) {
146                         continue;
147                 }
148
149                 rte_snprintf(name, sizeof(name), "mbuf_pool_%u", socket);
150                 printf("Creating the mbuf pool for socket %u ...\n", socket);
151                 app.pools[socket] = rte_mempool_create(
152                         name,
153                         APP_DEFAULT_MEMPOOL_BUFFERS,
154                         APP_DEFAULT_MBUF_SIZE,
155                         APP_DEFAULT_MEMPOOL_CACHE_SIZE,
156                         sizeof(struct rte_pktmbuf_pool_private),
157                         rte_pktmbuf_pool_init, NULL,
158                         rte_pktmbuf_init, NULL,
159                         socket,
160                         0);
161                 if (app.pools[socket] == NULL) {
162                         rte_panic("Cannot create mbuf pool on socket %u\n", socket);
163                 }
164         }
165
166         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
167                 if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) {
168                         continue;
169                 }
170
171                 socket = rte_lcore_to_socket_id(lcore);
172                 app.lcore_params[lcore].pool = app.pools[socket];
173         }
174 }
175
176 static void
177 app_init_lpm_tables(void)
178 {
179         unsigned socket, lcore;
180
181         /* Init the LPM tables */
182         for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
183                 char name[32];
184                 uint32_t rule;
185
186                 if (app_is_socket_used(socket) == 0) {
187                         continue;
188                 }
189
190                 rte_snprintf(name, sizeof(name), "lpm_table_%u", socket);
191                 printf("Creating the LPM table for socket %u ...\n", socket);
192                 app.lpm_tables[socket] = rte_lpm_create(
193                         name,
194                         socket,
195                         APP_MAX_LPM_RULES,
196                         0);
197                 if (app.lpm_tables[socket] == NULL) {
198                         rte_panic("Unable to create LPM table on socket %u\n", socket);
199                 }
200
201                 for (rule = 0; rule < app.n_lpm_rules; rule ++) {
202                         int ret;
203
204                         ret = rte_lpm_add(app.lpm_tables[socket],
205                                 app.lpm_rules[rule].ip,
206                                 app.lpm_rules[rule].depth,
207                                 app.lpm_rules[rule].if_out);
208
209                         if (ret < 0) {
210                                 rte_panic("Unable to add entry %u (%x/%u => %u) to the LPM table on socket %u (%d)\n",
211                                         (unsigned) rule,
212                                         (unsigned) app.lpm_rules[rule].ip,
213                                         (unsigned) app.lpm_rules[rule].depth,
214                                         (unsigned) app.lpm_rules[rule].if_out,
215                                         socket,
216                                         ret);
217                         }
218                 }
219
220         }
221
222         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
223                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
224                         continue;
225                 }
226
227                 socket = rte_lcore_to_socket_id(lcore);
228                 app.lcore_params[lcore].worker.lpm_table = app.lpm_tables[socket];
229         }
230 }
231
232 static void
233 app_init_rings_rx(void)
234 {
235         unsigned lcore;
236
237         /* Initialize the rings for the RX side */
238         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
239                 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
240                 unsigned socket_io, lcore_worker;
241
242                 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
243                     (lp_io->rx.n_nic_queues == 0)) {
244                         continue;
245                 }
246
247                 socket_io = rte_lcore_to_socket_id(lcore);
248
249                 for (lcore_worker = 0; lcore_worker < APP_MAX_LCORES; lcore_worker ++) {
250                         char name[32];
251                         struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore_worker].worker;
252                         struct rte_ring *ring = NULL;
253
254                         if (app.lcore_params[lcore_worker].type != e_APP_LCORE_WORKER) {
255                                 continue;
256                         }
257
258                         printf("Creating ring to connect I/O lcore %u (socket %u) with worker lcore %u ...\n",
259                                 lcore,
260                                 socket_io,
261                                 lcore_worker);
262                         rte_snprintf(name, sizeof(name), "app_ring_rx_s%u_io%u_w%u",
263                                 socket_io,
264                                 lcore,
265                                 lcore_worker);
266                         ring = rte_ring_create(
267                                 name,
268                                 app.ring_rx_size,
269                                 socket_io,
270                                 RING_F_SP_ENQ | RING_F_SC_DEQ);
271                         if (ring == NULL) {
272                                 rte_panic("Cannot create ring to connect I/O core %u with worker core %u\n",
273                                         lcore,
274                                         lcore_worker);
275                         }
276
277                         lp_io->rx.rings[lp_io->rx.n_rings] = ring;
278                         lp_io->rx.n_rings ++;
279
280                         lp_worker->rings_in[lp_worker->n_rings_in] = ring;
281                         lp_worker->n_rings_in ++;
282                 }
283         }
284
285         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
286                 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
287
288                 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
289                     (lp_io->rx.n_nic_queues == 0)) {
290                         continue;
291                 }
292
293                 if (lp_io->rx.n_rings != app_get_lcores_worker()) {
294                         rte_panic("Algorithmic error (I/O RX rings)\n");
295                 }
296         }
297
298         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
299                 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
300
301                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
302                         continue;
303                 }
304
305                 if (lp_worker->n_rings_in != app_get_lcores_io_rx()) {
306                         rte_panic("Algorithmic error (worker input rings)\n");
307                 }
308         }
309 }
310
311 static void
312 app_init_rings_tx(void)
313 {
314         unsigned lcore;
315
316         /* Initialize the rings for the TX side */
317         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
318                 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
319                 unsigned port;
320
321                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
322                         continue;
323                 }
324
325                 for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
326                         char name[32];
327                         struct app_lcore_params_io *lp_io = NULL;
328                         struct rte_ring *ring;
329                         uint32_t socket_io, lcore_io;
330
331                         if (app.nic_tx_port_mask[port] == 0) {
332                                 continue;
333                         }
334
335                         if (app_get_lcore_for_nic_tx((uint8_t) port, &lcore_io) < 0) {
336                                 rte_panic("Algorithmic error (no I/O core to handle TX of port %u)\n",
337                                         port);
338                         }
339
340                         lp_io = &app.lcore_params[lcore_io].io;
341                         socket_io = rte_lcore_to_socket_id(lcore_io);
342
343                         printf("Creating ring to connect worker lcore %u with TX port %u (through I/O lcore %u) (socket %u) ...\n",
344                                 lcore, port, (unsigned)lcore_io, (unsigned)socket_io);
345                         rte_snprintf(name, sizeof(name), "app_ring_tx_s%u_w%u_p%u", socket_io, lcore, port);
346                         ring = rte_ring_create(
347                                 name,
348                                 app.ring_tx_size,
349                                 socket_io,
350                                 RING_F_SP_ENQ | RING_F_SC_DEQ);
351                         if (ring == NULL) {
352                                 rte_panic("Cannot create ring to connect worker core %u with TX port %u\n",
353                                         lcore,
354                                         port);
355                         }
356
357                         lp_worker->rings_out[port] = ring;
358                         lp_io->tx.rings[port][lp_worker->worker_id] = ring;
359                 }
360         }
361
362         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
363                 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
364                 unsigned i;
365
366                 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
367                     (lp_io->tx.n_nic_ports == 0)) {
368                         continue;
369                 }
370
371                 for (i = 0; i < lp_io->tx.n_nic_ports; i ++){
372                         unsigned port, j;
373
374                         port = lp_io->tx.nic_ports[i];
375                         for (j = 0; j < app_get_lcores_worker(); j ++) {
376                                 if (lp_io->tx.rings[port][j] == NULL) {
377                                         rte_panic("Algorithmic error (I/O TX rings)\n");
378                                 }
379                         }
380                 }
381         }
382 }
383
384 /* Check the link status of all ports in up to 9s, and print them finally */
385 static void
386 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
387 {
388 #define CHECK_INTERVAL 100 /* 100ms */
389 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
390         uint8_t portid, count, all_ports_up, print_flag = 0;
391         struct rte_eth_link link;
392         uint32_t n_rx_queues, n_tx_queues;
393
394         printf("\nChecking link status");
395         fflush(stdout);
396         for (count = 0; count <= MAX_CHECK_TIME; count++) {
397                 all_ports_up = 1;
398                 for (portid = 0; portid < port_num; portid++) {
399                         if ((port_mask & (1 << portid)) == 0)
400                                 continue;
401                         n_rx_queues = app_get_nic_rx_queues_per_port(portid);
402                         n_tx_queues = app.nic_tx_port_mask[portid];
403                         if ((n_rx_queues == 0) && (n_tx_queues == 0))
404                                 continue;
405                         memset(&link, 0, sizeof(link));
406                         rte_eth_link_get_nowait(portid, &link);
407                         /* print link status if flag set */
408                         if (print_flag == 1) {
409                                 if (link.link_status)
410                                         printf("Port %d Link Up - speed %u "
411                                                 "Mbps - %s\n", (uint8_t)portid,
412                                                 (unsigned)link.link_speed,
413                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
414                                         ("full-duplex") : ("half-duplex\n"));
415                                 else
416                                         printf("Port %d Link Down\n",
417                                                         (uint8_t)portid);
418                                 continue;
419                         }
420                         /* clear all_ports_up flag if any link down */
421                         if (link.link_status == 0) {
422                                 all_ports_up = 0;
423                                 break;
424                         }
425                 }
426                 /* after finally printing all link status, get out */
427                 if (print_flag == 1)
428                         break;
429
430                 if (all_ports_up == 0) {
431                         printf(".");
432                         fflush(stdout);
433                         rte_delay_ms(CHECK_INTERVAL);
434                 }
435
436                 /* set the print_flag if all ports up or timeout */
437                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
438                         print_flag = 1;
439                         printf("done\n");
440                 }
441         }
442 }
443
444 static void
445 app_init_nics(void)
446 {
447         unsigned socket;
448         uint32_t lcore;
449         uint8_t port, queue;
450         int ret;
451         uint32_t n_rx_queues, n_tx_queues;
452
453         /* Init driver */
454         printf("Initializing the PMD driver ...\n");
455         if (rte_pmd_init_all() < 0) {
456                 rte_panic("Cannot init PMD\n");
457         }
458
459         if (rte_eal_pci_probe() < 0) {
460                 rte_panic("Cannot probe PCI\n");
461         }
462
463         /* Init NIC ports and queues, then start the ports */
464         for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
465                 struct rte_mempool *pool;
466
467                 n_rx_queues = app_get_nic_rx_queues_per_port(port);
468                 n_tx_queues = app.nic_tx_port_mask[port];
469
470                 if ((n_rx_queues == 0) && (n_tx_queues == 0)) {
471                         continue;
472                 }
473
474                 /* Init port */
475                 printf("Initializing NIC port %u ...\n", (unsigned) port);
476                 ret = rte_eth_dev_configure(
477                         port,
478                         (uint8_t) n_rx_queues,
479                         (uint8_t) n_tx_queues,
480                         &port_conf);
481                 if (ret < 0) {
482                         rte_panic("Cannot init NIC port %u (%d)\n", (unsigned) port, ret);
483                 }
484                 rte_eth_promiscuous_enable(port);
485
486                 /* Init RX queues */
487                 for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) {
488                         if (app.nic_rx_queue_mask[port][queue] == 0) {
489                                 continue;
490                         }
491
492                         app_get_lcore_for_nic_rx(port, queue, &lcore);
493                         socket = rte_lcore_to_socket_id(lcore);
494                         pool = app.lcore_params[lcore].pool;
495
496                         printf("Initializing NIC port %u RX queue %u ...\n",
497                                 (unsigned) port,
498                                 (unsigned) queue);
499                         ret = rte_eth_rx_queue_setup(
500                                 port,
501                                 queue,
502                                 (uint16_t) app.nic_rx_ring_size,
503                                 socket,
504                                 &rx_conf,
505                                 pool);
506                         if (ret < 0) {
507                                 rte_panic("Cannot init RX queue %u for port %u (%d)\n",
508                                         (unsigned) queue,
509                                         (unsigned) port,
510                                         ret);
511                         }
512                 }
513
514                 /* Init TX queues */
515                 if (app.nic_tx_port_mask[port] == 1) {
516                         app_get_lcore_for_nic_tx(port, &lcore);
517                         socket = rte_lcore_to_socket_id(lcore);
518                         printf("Initializing NIC port %u TX queue 0 ...\n",
519                                 (unsigned) port);
520                         ret = rte_eth_tx_queue_setup(
521                                 port,
522                                 0,
523                                 (uint16_t) app.nic_tx_ring_size,
524                                 socket,
525                                 &tx_conf);
526                         if (ret < 0) {
527                                 rte_panic("Cannot init TX queue 0 for port %d (%d)\n",
528                                         port,
529                                         ret);
530                         }
531                 }
532
533                 /* Start port */
534                 ret = rte_eth_dev_start(port);
535                 if (ret < 0) {
536                         rte_panic("Cannot start port %d (%d)\n", port, ret);
537                 }
538         }
539
540         check_all_ports_link_status(APP_MAX_NIC_PORTS, (~0x0));
541 }
542
543 void
544 app_init(void)
545 {
546         app_assign_worker_ids();
547         app_init_mbuf_pools();
548         app_init_lpm_tables();
549         app_init_rings_rx();
550         app_init_rings_tx();
551         app_init_nics();
552
553         printf("Initialization completed.\n");
554 }