ethdev: promote API to set packet types
[dpdk.git] / examples / l3fwd / l3fwd_fib.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <stddef.h>
7 #include <stdint.h>
8 #include <sys/socket.h>
9 #include <arpa/inet.h>
10
11 #include <rte_fib.h>
12 #include <rte_fib6.h>
13
14 #include "l3fwd.h"
15 #if defined RTE_ARCH_X86
16 #include "l3fwd_sse.h"
17 #elif defined __ARM_NEON
18 #include "l3fwd_neon.h"
19 #elif defined RTE_ARCH_PPC_64
20 #include "l3fwd_altivec.h"
21 #endif
22 #include "l3fwd_event.h"
23 #include "l3fwd_route.h"
24
25 /* Configure how many packets ahead to prefetch for fib. */
26 #define FIB_PREFETCH_OFFSET 4
27
28 /* A non-existent portid is needed to denote a default hop for fib. */
29 #define FIB_DEFAULT_HOP 999
30
31 /*
32  * If the machine has SSE, NEON or PPC 64 then multiple packets
33  * can be sent at once if not only single packets will be sent
34  */
35 #if defined RTE_ARCH_X86 || defined __ARM_NEON \
36                 || defined RTE_ARCH_PPC_64
37 #define FIB_SEND_MULTI
38 #endif
39
40 static struct rte_fib *ipv4_l3fwd_fib_lookup_struct[NB_SOCKETS];
41 static struct rte_fib6 *ipv6_l3fwd_fib_lookup_struct[NB_SOCKETS];
42
43 /* Parse packet type and ip address. */
44 static inline void
45 fib_parse_packet(struct rte_mbuf *mbuf,
46                 uint32_t *ipv4, uint32_t *ipv4_cnt,
47                 uint8_t ipv6[RTE_FIB6_IPV6_ADDR_SIZE],
48                 uint32_t *ipv6_cnt, uint8_t *ip_type)
49 {
50         struct rte_ether_hdr *eth_hdr;
51         struct rte_ipv4_hdr *ipv4_hdr;
52         struct rte_ipv6_hdr *ipv6_hdr;
53
54         eth_hdr = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);
55         /* IPv4 */
56         if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) {
57                 ipv4_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
58                 *ipv4 = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
59                 /* Store type of packet in type_arr (IPv4=1, IPv6=0). */
60                 *ip_type = 1;
61                 (*ipv4_cnt)++;
62         }
63         /* IPv6 */
64         else {
65                 ipv6_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1);
66                 rte_mov16(ipv6, (const uint8_t *)ipv6_hdr->dst_addr);
67                 *ip_type = 0;
68                 (*ipv6_cnt)++;
69         }
70 }
71
72 /*
73  * If the machine does not have SSE, NEON or PPC 64 then the packets
74  * are sent one at a time using send_single_packet()
75  */
76 #if !defined FIB_SEND_MULTI
77 static inline void
78 fib_send_single(int nb_tx, struct lcore_conf *qconf,
79                 struct rte_mbuf **pkts_burst, uint16_t hops[nb_tx])
80 {
81         int32_t j;
82         struct rte_ether_hdr *eth_hdr;
83
84         for (j = 0; j < nb_tx; j++) {
85                 /* Run rfc1812 if packet is ipv4 and checks enabled. */
86 #if defined DO_RFC_1812_CHECKS
87                 rfc1812_process((struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(
88                                 pkts_burst[j], struct rte_ether_hdr *) + 1),
89                                 &hops[j], pkts_burst[j]->packet_type);
90 #endif
91
92                 /* Set MAC addresses. */
93                 eth_hdr = rte_pktmbuf_mtod(pkts_burst[j],
94                                 struct rte_ether_hdr *);
95                 *(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[hops[j]];
96                 rte_ether_addr_copy(&ports_eth_addr[hops[j]],
97                                 &eth_hdr->s_addr);
98
99                 /* Send single packet. */
100                 send_single_packet(qconf, pkts_burst[j], hops[j]);
101         }
102 }
103 #endif
104
105 /* Bulk parse, fib lookup and send. */
106 static inline void
107 fib_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
108                 uint16_t portid, struct lcore_conf *qconf)
109 {
110         uint32_t ipv4_arr[nb_rx];
111         uint8_t ipv6_arr[nb_rx][RTE_FIB6_IPV6_ADDR_SIZE];
112         uint16_t hops[nb_rx];
113         uint64_t hopsv4[nb_rx], hopsv6[nb_rx];
114         uint8_t type_arr[nb_rx];
115         uint32_t ipv4_cnt = 0, ipv6_cnt = 0;
116         uint32_t ipv4_arr_assem = 0, ipv6_arr_assem = 0;
117         uint16_t nh;
118         int32_t i;
119
120         /* Prefetch first packets. */
121         for (i = 0; i < FIB_PREFETCH_OFFSET && i < nb_rx; i++)
122                 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], void *));
123
124         /* Parse packet info and prefetch. */
125         for (i = 0; i < (nb_rx - FIB_PREFETCH_OFFSET); i++) {
126                 /* Prefetch packet. */
127                 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
128                                 i + FIB_PREFETCH_OFFSET], void *));
129                 fib_parse_packet(pkts_burst[i],
130                                 &ipv4_arr[ipv4_cnt], &ipv4_cnt,
131                                 ipv6_arr[ipv6_cnt], &ipv6_cnt,
132                                 &type_arr[i]);
133         }
134
135         /* Parse remaining packet info. */
136         for (; i < nb_rx; i++)
137                 fib_parse_packet(pkts_burst[i],
138                                 &ipv4_arr[ipv4_cnt], &ipv4_cnt,
139                                 ipv6_arr[ipv6_cnt], &ipv6_cnt,
140                                 &type_arr[i]);
141
142         /* Lookup IPv4 hops if IPv4 packets are present. */
143         if (likely(ipv4_cnt > 0))
144                 rte_fib_lookup_bulk(qconf->ipv4_lookup_struct,
145                                 ipv4_arr, hopsv4, ipv4_cnt);
146
147         /* Lookup IPv6 hops if IPv6 packets are present. */
148         if (ipv6_cnt > 0)
149                 rte_fib6_lookup_bulk(qconf->ipv6_lookup_struct,
150                                 ipv6_arr, hopsv6, ipv6_cnt);
151
152         /* Add IPv4 and IPv6 hops to one array depending on type. */
153         for (i = 0; i < nb_rx; i++) {
154                 if (type_arr[i])
155                         nh = (uint16_t)hopsv4[ipv4_arr_assem++];
156                 else
157                         nh = (uint16_t)hopsv6[ipv6_arr_assem++];
158                 hops[i] = nh != FIB_DEFAULT_HOP ? nh : portid;
159         }
160
161 #if defined FIB_SEND_MULTI
162         send_packets_multi(qconf, pkts_burst, hops, nb_rx);
163 #else
164         fib_send_single(nb_rx, qconf, pkts_burst, hops);
165 #endif
166 }
167
168 /* Main fib processing loop. */
169 int
170 fib_main_loop(__rte_unused void *dummy)
171 {
172         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
173         unsigned int lcore_id;
174         uint64_t prev_tsc, diff_tsc, cur_tsc;
175         int i, nb_rx;
176         uint16_t portid;
177         uint8_t queueid;
178         struct lcore_conf *qconf;
179         const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
180                         US_PER_S * BURST_TX_DRAIN_US;
181
182         lcore_id = rte_lcore_id();
183         qconf = &lcore_conf[lcore_id];
184
185         const uint16_t n_rx_q = qconf->n_rx_queue;
186         const uint16_t n_tx_p = qconf->n_tx_port;
187         if (n_rx_q == 0) {
188                 RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
189                 return 0;
190         }
191
192         RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
193
194         for (i = 0; i < n_rx_q; i++) {
195
196                 portid = qconf->rx_queue_list[i].port_id;
197                 queueid = qconf->rx_queue_list[i].queue_id;
198                 RTE_LOG(INFO, L3FWD,
199                                 " -- lcoreid=%u portid=%u rxqueueid=%hhu\n",
200                                 lcore_id, portid, queueid);
201         }
202
203         cur_tsc = rte_rdtsc();
204         prev_tsc = cur_tsc;
205
206         while (!force_quit) {
207
208                 /* TX burst queue drain. */
209                 diff_tsc = cur_tsc - prev_tsc;
210                 if (unlikely(diff_tsc > drain_tsc)) {
211
212                         for (i = 0; i < n_tx_p; ++i) {
213                                 portid = qconf->tx_port_id[i];
214                                 if (qconf->tx_mbufs[portid].len == 0)
215                                         continue;
216                                 send_burst(qconf,
217                                         qconf->tx_mbufs[portid].len,
218                                         portid);
219                                 qconf->tx_mbufs[portid].len = 0;
220                         }
221
222                         prev_tsc = cur_tsc;
223                 }
224
225                 /* Read packet from RX queues. */
226                 for (i = 0; i < n_rx_q; ++i) {
227                         portid = qconf->rx_queue_list[i].port_id;
228                         queueid = qconf->rx_queue_list[i].queue_id;
229                         nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
230                                         MAX_PKT_BURST);
231                         if (nb_rx == 0)
232                                 continue;
233
234                         /* Use fib to lookup port IDs and transmit them. */
235                         fib_send_packets(nb_rx, pkts_burst,     portid, qconf);
236                 }
237
238                 cur_tsc = rte_rdtsc();
239         }
240
241         return 0;
242 }
243
244 /* One eventdev loop for single and burst using fib. */
245 static __rte_always_inline void
246 fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
247                 const uint8_t flags)
248 {
249         const int event_p_id = l3fwd_get_free_event_port(evt_rsrc);
250         const uint8_t tx_q_id = evt_rsrc->evq.event_q_id[
251                         evt_rsrc->evq.nb_queues - 1];
252         const uint8_t event_d_id = evt_rsrc->event_d_id;
253         const uint16_t deq_len = evt_rsrc->deq_depth;
254         struct rte_event events[MAX_PKT_BURST];
255         struct lcore_conf *lconf;
256         unsigned int lcore_id;
257         int nb_enq, nb_deq, i;
258
259         uint32_t ipv4_arr[MAX_PKT_BURST];
260         uint8_t ipv6_arr[MAX_PKT_BURST][RTE_FIB6_IPV6_ADDR_SIZE];
261         uint64_t hopsv4[MAX_PKT_BURST], hopsv6[MAX_PKT_BURST];
262         uint16_t nh;
263         uint8_t type_arr[MAX_PKT_BURST];
264         uint32_t ipv4_cnt, ipv6_cnt;
265         uint32_t ipv4_arr_assem, ipv6_arr_assem;
266
267         if (event_p_id < 0)
268                 return;
269
270         lcore_id = rte_lcore_id();
271
272         lconf = &lcore_conf[lcore_id];
273
274         RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
275
276         while (!force_quit) {
277                 /* Read events from RX queues. */
278                 nb_deq = rte_event_dequeue_burst(event_d_id, event_p_id,
279                                 events, deq_len, 0);
280                 if (nb_deq == 0) {
281                         rte_pause();
282                         continue;
283                 }
284
285                 /* Reset counters. */
286                 ipv4_cnt = 0;
287                 ipv6_cnt = 0;
288                 ipv4_arr_assem = 0;
289                 ipv6_arr_assem = 0;
290
291                 /* Prefetch first packets. */
292                 for (i = 0; i < FIB_PREFETCH_OFFSET && i < nb_deq; i++)
293                         rte_prefetch0(rte_pktmbuf_mtod(events[i].mbuf, void *));
294
295                 /* Parse packet info and prefetch. */
296                 for (i = 0; i < (nb_deq - FIB_PREFETCH_OFFSET); i++) {
297                         if (flags & L3FWD_EVENT_TX_ENQ) {
298                                 events[i].queue_id = tx_q_id;
299                                 events[i].op = RTE_EVENT_OP_FORWARD;
300                         }
301
302                         if (flags & L3FWD_EVENT_TX_DIRECT)
303                                 rte_event_eth_tx_adapter_txq_set(events[i].mbuf,
304                                                 0);
305
306                         /* Prefetch packet. */
307                         rte_prefetch0(rte_pktmbuf_mtod(events[
308                                         i + FIB_PREFETCH_OFFSET].mbuf,
309                                         void *));
310
311                         fib_parse_packet(events[i].mbuf,
312                                         &ipv4_arr[ipv4_cnt], &ipv4_cnt,
313                                         ipv6_arr[ipv6_cnt], &ipv6_cnt,
314                                         &type_arr[i]);
315                 }
316
317                 /* Parse remaining packet info. */
318                 for (; i < nb_deq; i++) {
319                         if (flags & L3FWD_EVENT_TX_ENQ) {
320                                 events[i].queue_id = tx_q_id;
321                                 events[i].op = RTE_EVENT_OP_FORWARD;
322                         }
323
324                         if (flags & L3FWD_EVENT_TX_DIRECT)
325                                 rte_event_eth_tx_adapter_txq_set(events[i].mbuf,
326                                                 0);
327
328                         fib_parse_packet(events[i].mbuf,
329                                         &ipv4_arr[ipv4_cnt], &ipv4_cnt,
330                                         ipv6_arr[ipv6_cnt], &ipv6_cnt,
331                                         &type_arr[i]);
332                 }
333
334                 /* Lookup IPv4 hops if IPv4 packets are present. */
335                 if (likely(ipv4_cnt > 0))
336                         rte_fib_lookup_bulk(lconf->ipv4_lookup_struct,
337                                         ipv4_arr, hopsv4, ipv4_cnt);
338
339                 /* Lookup IPv6 hops if IPv6 packets are present. */
340                 if (ipv6_cnt > 0)
341                         rte_fib6_lookup_bulk(lconf->ipv6_lookup_struct,
342                                         ipv6_arr, hopsv6, ipv6_cnt);
343
344                 /* Assign ports looked up in fib depending on IPv4 or IPv6 */
345                 for (i = 0; i < nb_deq; i++) {
346                         if (type_arr[i])
347                                 nh = (uint16_t)hopsv4[ipv4_arr_assem++];
348                         else
349                                 nh = (uint16_t)hopsv6[ipv6_arr_assem++];
350                         if (nh != FIB_DEFAULT_HOP)
351                                 events[i].mbuf->port = nh;
352                 }
353
354                 if (flags & L3FWD_EVENT_TX_ENQ) {
355                         nb_enq = rte_event_enqueue_burst(event_d_id, event_p_id,
356                                         events, nb_deq);
357                         while (nb_enq < nb_deq && !force_quit)
358                                 nb_enq += rte_event_enqueue_burst(event_d_id,
359                                                 event_p_id, events + nb_enq,
360                                                 nb_deq - nb_enq);
361                 }
362
363                 if (flags & L3FWD_EVENT_TX_DIRECT) {
364                         nb_enq = rte_event_eth_tx_adapter_enqueue(event_d_id,
365                                         event_p_id, events, nb_deq, 0);
366                         while (nb_enq < nb_deq && !force_quit)
367                                 nb_enq += rte_event_eth_tx_adapter_enqueue(
368                                                 event_d_id, event_p_id,
369                                                 events + nb_enq,
370                                                 nb_deq - nb_enq, 0);
371                 }
372         }
373 }
374
375 int __rte_noinline
376 fib_event_main_loop_tx_d(__rte_unused void *dummy)
377 {
378         struct l3fwd_event_resources *evt_rsrc =
379                         l3fwd_get_eventdev_rsrc();
380
381         fib_event_loop(evt_rsrc, L3FWD_EVENT_TX_DIRECT);
382         return 0;
383 }
384
385 int __rte_noinline
386 fib_event_main_loop_tx_d_burst(__rte_unused void *dummy)
387 {
388         struct l3fwd_event_resources *evt_rsrc =
389                         l3fwd_get_eventdev_rsrc();
390
391         fib_event_loop(evt_rsrc, L3FWD_EVENT_TX_DIRECT);
392         return 0;
393 }
394
395 int __rte_noinline
396 fib_event_main_loop_tx_q(__rte_unused void *dummy)
397 {
398         struct l3fwd_event_resources *evt_rsrc =
399                         l3fwd_get_eventdev_rsrc();
400
401         fib_event_loop(evt_rsrc, L3FWD_EVENT_TX_ENQ);
402         return 0;
403 }
404
405 int __rte_noinline
406 fib_event_main_loop_tx_q_burst(__rte_unused void *dummy)
407 {
408         struct l3fwd_event_resources *evt_rsrc =
409                         l3fwd_get_eventdev_rsrc();
410
411         fib_event_loop(evt_rsrc, L3FWD_EVENT_TX_ENQ);
412         return 0;
413 }
414
415 /* Function to setup fib. 8< */
416 void
417 setup_fib(const int socketid)
418 {
419         struct rte_fib6_conf config;
420         struct rte_fib_conf config_ipv4;
421         unsigned int i;
422         int ret;
423         char s[64];
424         char abuf[INET6_ADDRSTRLEN];
425
426         /* Create the fib IPv4 table. */
427         config_ipv4.type = RTE_FIB_DIR24_8;
428         config_ipv4.max_routes = (1 << 16);
429         config_ipv4.default_nh = FIB_DEFAULT_HOP;
430         config_ipv4.dir24_8.nh_sz = RTE_FIB_DIR24_8_4B;
431         config_ipv4.dir24_8.num_tbl8 = (1 << 15);
432         snprintf(s, sizeof(s), "IPV4_L3FWD_FIB_%d", socketid);
433         ipv4_l3fwd_fib_lookup_struct[socketid] =
434                         rte_fib_create(s, socketid, &config_ipv4);
435         if (ipv4_l3fwd_fib_lookup_struct[socketid] == NULL)
436                 rte_exit(EXIT_FAILURE,
437                         "Unable to create the l3fwd FIB table on socket %d\n",
438                         socketid);
439
440         /* Populate the fib ipv4 table. */
441         for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) {
442                 struct in_addr in;
443
444                 /* Skip unused ports. */
445                 if ((1 << ipv4_l3fwd_route_array[i].if_out &
446                                 enabled_port_mask) == 0)
447                         continue;
448
449                 ret = rte_fib_add(ipv4_l3fwd_fib_lookup_struct[socketid],
450                         ipv4_l3fwd_route_array[i].ip,
451                         ipv4_l3fwd_route_array[i].depth,
452                         ipv4_l3fwd_route_array[i].if_out);
453
454                 if (ret < 0) {
455                         rte_exit(EXIT_FAILURE,
456                                         "Unable to add entry %u to the l3fwd FIB table on socket %d\n",
457                                         i, socketid);
458                 }
459
460                 in.s_addr = htonl(ipv4_l3fwd_route_array[i].ip);
461                 if (inet_ntop(AF_INET, &in, abuf, sizeof(abuf)) != NULL) {
462                         printf("FIB: Adding route %s / %d (%d)\n",
463                                 abuf,
464                                 ipv4_l3fwd_route_array[i].depth,
465                                 ipv4_l3fwd_route_array[i].if_out);
466                 } else {
467                         printf("FIB: IPv4 route added to port %d\n",
468                                 ipv4_l3fwd_route_array[i].if_out);
469                 }
470         }
471         /* >8 End of setup fib. */
472
473         /* Create the fib IPv6 table. */
474         snprintf(s, sizeof(s), "IPV6_L3FWD_FIB_%d", socketid);
475
476         config.type = RTE_FIB6_TRIE;
477         config.max_routes = (1 << 16) - 1;
478         config.default_nh = FIB_DEFAULT_HOP;
479         config.trie.nh_sz = RTE_FIB6_TRIE_4B;
480         config.trie.num_tbl8 = (1 << 15);
481         ipv6_l3fwd_fib_lookup_struct[socketid] = rte_fib6_create(s, socketid,
482                         &config);
483         if (ipv6_l3fwd_fib_lookup_struct[socketid] == NULL)
484                 rte_exit(EXIT_FAILURE,
485                                 "Unable to create the l3fwd FIB table on socket %d\n",
486                                 socketid);
487
488         /* Populate the fib IPv6 table. */
489         for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) {
490
491                 /* Skip unused ports. */
492                 if ((1 << ipv6_l3fwd_route_array[i].if_out &
493                                 enabled_port_mask) == 0)
494                         continue;
495
496                 ret = rte_fib6_add(ipv6_l3fwd_fib_lookup_struct[socketid],
497                         ipv6_l3fwd_route_array[i].ip,
498                         ipv6_l3fwd_route_array[i].depth,
499                         ipv6_l3fwd_route_array[i].if_out);
500
501                 if (ret < 0) {
502                         rte_exit(EXIT_FAILURE,
503                                         "Unable to add entry %u to the l3fwd FIB table on socket %d\n",
504                                         i, socketid);
505                 }
506
507                 if (inet_ntop(AF_INET6, ipv6_l3fwd_route_array[i].ip,
508                                 abuf, sizeof(abuf)) != NULL) {
509                         printf("FIB: Adding route %s / %d (%d)\n",
510                                 abuf,
511                                 ipv6_l3fwd_route_array[i].depth,
512                                 ipv6_l3fwd_route_array[i].if_out);
513                 } else {
514                         printf("FIB: IPv6 route added to port %d\n",
515                                 ipv6_l3fwd_route_array[i].if_out);
516                 }
517         }
518 }
519
520 /* Return ipv4 fib lookup struct. */
521 void *
522 fib_get_ipv4_l3fwd_lookup_struct(const int socketid)
523 {
524         return ipv4_l3fwd_fib_lookup_struct[socketid];
525 }
526
527 /* Return ipv6 fib lookup struct. */
528 void *
529 fib_get_ipv6_l3fwd_lookup_struct(const int socketid)
530 {
531         return ipv6_l3fwd_fib_lookup_struct[socketid];
532 }