b35f797fd6ef11a33ff4a7c9fc7f2ef2e7bdaf50
[dpdk.git] / examples / load_balancer / init.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdint.h>
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <sys/queue.h>
41 #include <stdarg.h>
42 #include <errno.h>
43 #include <getopt.h>
44
45 #include <rte_common.h>
46 #include <rte_byteorder.h>
47 #include <rte_log.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
51 #include <rte_eal.h>
52 #include <rte_per_lcore.h>
53 #include <rte_launch.h>
54 #include <rte_atomic.h>
55 #include <rte_cycles.h>
56 #include <rte_prefetch.h>
57 #include <rte_lcore.h>
58 #include <rte_per_lcore.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_interrupts.h>
61 #include <rte_pci.h>
62 #include <rte_random.h>
63 #include <rte_debug.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_ring.h>
67 #include <rte_mempool.h>
68 #include <rte_mbuf.h>
69 #include <rte_string_fns.h>
70 #include <rte_ip.h>
71 #include <rte_tcp.h>
72 #include <rte_lpm.h>
73
74 #include "main.h"
75
76 static struct rte_eth_conf port_conf = {
77         .rxmode = {
78                 .mq_mode        = ETH_MQ_RX_RSS,
79                 .split_hdr_size = 0,
80                 .header_split   = 0, /**< Header Split disabled */
81                 .hw_ip_checksum = 1, /**< IP checksum offload enabled */
82                 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
83                 .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
84                 .hw_strip_crc   = 0, /**< CRC stripped by hardware */
85         },
86         .rx_adv_conf = {
87                 .rss_conf = {
88                         .rss_key = NULL,
89                         .rss_hf = ETH_RSS_IP,
90                 },
91         },
92         .txmode = {
93                 .mq_mode = ETH_MQ_TX_NONE,
94         },
95 };
96
97 static void
98 app_assign_worker_ids(void)
99 {
100         uint32_t lcore, worker_id;
101
102         /* Assign ID for each worker */
103         worker_id = 0;
104         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
105                 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
106
107                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
108                         continue;
109                 }
110
111                 lp_worker->worker_id = worker_id;
112                 worker_id ++;
113         }
114 }
115
116 static void
117 app_init_mbuf_pools(void)
118 {
119         unsigned socket, lcore;
120
121         /* Init the buffer pools */
122         for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
123                 char name[32];
124                 if (app_is_socket_used(socket) == 0) {
125                         continue;
126                 }
127
128                 snprintf(name, sizeof(name), "mbuf_pool_%u", socket);
129                 printf("Creating the mbuf pool for socket %u ...\n", socket);
130                 app.pools[socket] = rte_mempool_create(
131                         name,
132                         APP_DEFAULT_MEMPOOL_BUFFERS,
133                         APP_DEFAULT_MBUF_SIZE,
134                         APP_DEFAULT_MEMPOOL_CACHE_SIZE,
135                         sizeof(struct rte_pktmbuf_pool_private),
136                         rte_pktmbuf_pool_init, NULL,
137                         rte_pktmbuf_init, NULL,
138                         socket,
139                         0);
140                 if (app.pools[socket] == NULL) {
141                         rte_panic("Cannot create mbuf pool on socket %u\n", socket);
142                 }
143         }
144
145         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
146                 if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) {
147                         continue;
148                 }
149
150                 socket = rte_lcore_to_socket_id(lcore);
151                 app.lcore_params[lcore].pool = app.pools[socket];
152         }
153 }
154
155 static void
156 app_init_lpm_tables(void)
157 {
158         unsigned socket, lcore;
159
160         /* Init the LPM tables */
161         for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
162                 char name[32];
163                 uint32_t rule;
164
165                 if (app_is_socket_used(socket) == 0) {
166                         continue;
167                 }
168
169                 snprintf(name, sizeof(name), "lpm_table_%u", socket);
170                 printf("Creating the LPM table for socket %u ...\n", socket);
171                 app.lpm_tables[socket] = rte_lpm_create(
172                         name,
173                         socket,
174                         APP_MAX_LPM_RULES,
175                         0);
176                 if (app.lpm_tables[socket] == NULL) {
177                         rte_panic("Unable to create LPM table on socket %u\n", socket);
178                 }
179
180                 for (rule = 0; rule < app.n_lpm_rules; rule ++) {
181                         int ret;
182
183                         ret = rte_lpm_add(app.lpm_tables[socket],
184                                 app.lpm_rules[rule].ip,
185                                 app.lpm_rules[rule].depth,
186                                 app.lpm_rules[rule].if_out);
187
188                         if (ret < 0) {
189                                 rte_panic("Unable to add entry %u (%x/%u => %u) to the LPM table on socket %u (%d)\n",
190                                         (unsigned) rule,
191                                         (unsigned) app.lpm_rules[rule].ip,
192                                         (unsigned) app.lpm_rules[rule].depth,
193                                         (unsigned) app.lpm_rules[rule].if_out,
194                                         socket,
195                                         ret);
196                         }
197                 }
198
199         }
200
201         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
202                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
203                         continue;
204                 }
205
206                 socket = rte_lcore_to_socket_id(lcore);
207                 app.lcore_params[lcore].worker.lpm_table = app.lpm_tables[socket];
208         }
209 }
210
211 static void
212 app_init_rings_rx(void)
213 {
214         unsigned lcore;
215
216         /* Initialize the rings for the RX side */
217         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
218                 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
219                 unsigned socket_io, lcore_worker;
220
221                 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
222                     (lp_io->rx.n_nic_queues == 0)) {
223                         continue;
224                 }
225
226                 socket_io = rte_lcore_to_socket_id(lcore);
227
228                 for (lcore_worker = 0; lcore_worker < APP_MAX_LCORES; lcore_worker ++) {
229                         char name[32];
230                         struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore_worker].worker;
231                         struct rte_ring *ring = NULL;
232
233                         if (app.lcore_params[lcore_worker].type != e_APP_LCORE_WORKER) {
234                                 continue;
235                         }
236
237                         printf("Creating ring to connect I/O lcore %u (socket %u) with worker lcore %u ...\n",
238                                 lcore,
239                                 socket_io,
240                                 lcore_worker);
241                         snprintf(name, sizeof(name), "app_ring_rx_s%u_io%u_w%u",
242                                 socket_io,
243                                 lcore,
244                                 lcore_worker);
245                         ring = rte_ring_create(
246                                 name,
247                                 app.ring_rx_size,
248                                 socket_io,
249                                 RING_F_SP_ENQ | RING_F_SC_DEQ);
250                         if (ring == NULL) {
251                                 rte_panic("Cannot create ring to connect I/O core %u with worker core %u\n",
252                                         lcore,
253                                         lcore_worker);
254                         }
255
256                         lp_io->rx.rings[lp_io->rx.n_rings] = ring;
257                         lp_io->rx.n_rings ++;
258
259                         lp_worker->rings_in[lp_worker->n_rings_in] = ring;
260                         lp_worker->n_rings_in ++;
261                 }
262         }
263
264         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
265                 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
266
267                 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
268                     (lp_io->rx.n_nic_queues == 0)) {
269                         continue;
270                 }
271
272                 if (lp_io->rx.n_rings != app_get_lcores_worker()) {
273                         rte_panic("Algorithmic error (I/O RX rings)\n");
274                 }
275         }
276
277         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
278                 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
279
280                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
281                         continue;
282                 }
283
284                 if (lp_worker->n_rings_in != app_get_lcores_io_rx()) {
285                         rte_panic("Algorithmic error (worker input rings)\n");
286                 }
287         }
288 }
289
290 static void
291 app_init_rings_tx(void)
292 {
293         unsigned lcore;
294
295         /* Initialize the rings for the TX side */
296         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
297                 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
298                 unsigned port;
299
300                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
301                         continue;
302                 }
303
304                 for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
305                         char name[32];
306                         struct app_lcore_params_io *lp_io = NULL;
307                         struct rte_ring *ring;
308                         uint32_t socket_io, lcore_io;
309
310                         if (app.nic_tx_port_mask[port] == 0) {
311                                 continue;
312                         }
313
314                         if (app_get_lcore_for_nic_tx((uint8_t) port, &lcore_io) < 0) {
315                                 rte_panic("Algorithmic error (no I/O core to handle TX of port %u)\n",
316                                         port);
317                         }
318
319                         lp_io = &app.lcore_params[lcore_io].io;
320                         socket_io = rte_lcore_to_socket_id(lcore_io);
321
322                         printf("Creating ring to connect worker lcore %u with TX port %u (through I/O lcore %u) (socket %u) ...\n",
323                                 lcore, port, (unsigned)lcore_io, (unsigned)socket_io);
324                         snprintf(name, sizeof(name), "app_ring_tx_s%u_w%u_p%u", socket_io, lcore, port);
325                         ring = rte_ring_create(
326                                 name,
327                                 app.ring_tx_size,
328                                 socket_io,
329                                 RING_F_SP_ENQ | RING_F_SC_DEQ);
330                         if (ring == NULL) {
331                                 rte_panic("Cannot create ring to connect worker core %u with TX port %u\n",
332                                         lcore,
333                                         port);
334                         }
335
336                         lp_worker->rings_out[port] = ring;
337                         lp_io->tx.rings[port][lp_worker->worker_id] = ring;
338                 }
339         }
340
341         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
342                 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
343                 unsigned i;
344
345                 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
346                     (lp_io->tx.n_nic_ports == 0)) {
347                         continue;
348                 }
349
350                 for (i = 0; i < lp_io->tx.n_nic_ports; i ++){
351                         unsigned port, j;
352
353                         port = lp_io->tx.nic_ports[i];
354                         for (j = 0; j < app_get_lcores_worker(); j ++) {
355                                 if (lp_io->tx.rings[port][j] == NULL) {
356                                         rte_panic("Algorithmic error (I/O TX rings)\n");
357                                 }
358                         }
359                 }
360         }
361 }
362
363 /* Check the link status of all ports in up to 9s, and print them finally */
364 static void
365 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
366 {
367 #define CHECK_INTERVAL 100 /* 100ms */
368 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
369         uint8_t portid, count, all_ports_up, print_flag = 0;
370         struct rte_eth_link link;
371         uint32_t n_rx_queues, n_tx_queues;
372
373         printf("\nChecking link status");
374         fflush(stdout);
375         for (count = 0; count <= MAX_CHECK_TIME; count++) {
376                 all_ports_up = 1;
377                 for (portid = 0; portid < port_num; portid++) {
378                         if ((port_mask & (1 << portid)) == 0)
379                                 continue;
380                         n_rx_queues = app_get_nic_rx_queues_per_port(portid);
381                         n_tx_queues = app.nic_tx_port_mask[portid];
382                         if ((n_rx_queues == 0) && (n_tx_queues == 0))
383                                 continue;
384                         memset(&link, 0, sizeof(link));
385                         rte_eth_link_get_nowait(portid, &link);
386                         /* print link status if flag set */
387                         if (print_flag == 1) {
388                                 if (link.link_status)
389                                         printf("Port %d Link Up - speed %u "
390                                                 "Mbps - %s\n", (uint8_t)portid,
391                                                 (unsigned)link.link_speed,
392                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
393                                         ("full-duplex") : ("half-duplex\n"));
394                                 else
395                                         printf("Port %d Link Down\n",
396                                                         (uint8_t)portid);
397                                 continue;
398                         }
399                         /* clear all_ports_up flag if any link down */
400                         if (link.link_status == 0) {
401                                 all_ports_up = 0;
402                                 break;
403                         }
404                 }
405                 /* after finally printing all link status, get out */
406                 if (print_flag == 1)
407                         break;
408
409                 if (all_ports_up == 0) {
410                         printf(".");
411                         fflush(stdout);
412                         rte_delay_ms(CHECK_INTERVAL);
413                 }
414
415                 /* set the print_flag if all ports up or timeout */
416                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
417                         print_flag = 1;
418                         printf("done\n");
419                 }
420         }
421 }
422
423 static void
424 app_init_nics(void)
425 {
426         unsigned socket;
427         uint32_t lcore;
428         uint8_t port, queue;
429         int ret;
430         uint32_t n_rx_queues, n_tx_queues;
431
432         /* Init NIC ports and queues, then start the ports */
433         for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
434                 struct rte_mempool *pool;
435
436                 n_rx_queues = app_get_nic_rx_queues_per_port(port);
437                 n_tx_queues = app.nic_tx_port_mask[port];
438
439                 if ((n_rx_queues == 0) && (n_tx_queues == 0)) {
440                         continue;
441                 }
442
443                 /* Init port */
444                 printf("Initializing NIC port %u ...\n", (unsigned) port);
445                 ret = rte_eth_dev_configure(
446                         port,
447                         (uint8_t) n_rx_queues,
448                         (uint8_t) n_tx_queues,
449                         &port_conf);
450                 if (ret < 0) {
451                         rte_panic("Cannot init NIC port %u (%d)\n", (unsigned) port, ret);
452                 }
453                 rte_eth_promiscuous_enable(port);
454
455                 /* Init RX queues */
456                 for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) {
457                         if (app.nic_rx_queue_mask[port][queue] == 0) {
458                                 continue;
459                         }
460
461                         app_get_lcore_for_nic_rx(port, queue, &lcore);
462                         socket = rte_lcore_to_socket_id(lcore);
463                         pool = app.lcore_params[lcore].pool;
464
465                         printf("Initializing NIC port %u RX queue %u ...\n",
466                                 (unsigned) port,
467                                 (unsigned) queue);
468                         ret = rte_eth_rx_queue_setup(
469                                 port,
470                                 queue,
471                                 (uint16_t) app.nic_rx_ring_size,
472                                 socket,
473                                 NULL,
474                                 pool);
475                         if (ret < 0) {
476                                 rte_panic("Cannot init RX queue %u for port %u (%d)\n",
477                                         (unsigned) queue,
478                                         (unsigned) port,
479                                         ret);
480                         }
481                 }
482
483                 /* Init TX queues */
484                 if (app.nic_tx_port_mask[port] == 1) {
485                         app_get_lcore_for_nic_tx(port, &lcore);
486                         socket = rte_lcore_to_socket_id(lcore);
487                         printf("Initializing NIC port %u TX queue 0 ...\n",
488                                 (unsigned) port);
489                         ret = rte_eth_tx_queue_setup(
490                                 port,
491                                 0,
492                                 (uint16_t) app.nic_tx_ring_size,
493                                 socket,
494                                 NULL);
495                         if (ret < 0) {
496                                 rte_panic("Cannot init TX queue 0 for port %d (%d)\n",
497                                         port,
498                                         ret);
499                         }
500                 }
501
502                 /* Start port */
503                 ret = rte_eth_dev_start(port);
504                 if (ret < 0) {
505                         rte_panic("Cannot start port %d (%d)\n", port, ret);
506                 }
507         }
508
509         check_all_ports_link_status(APP_MAX_NIC_PORTS, (~0x0));
510 }
511
512 void
513 app_init(void)
514 {
515         app_assign_worker_ids();
516         app_init_mbuf_pools();
517         app_init_lpm_tables();
518         app_init_rings_rx();
519         app_init_rings_tx();
520         app_init_nics();
521
522         printf("Initialization completed.\n");
523 }