node: add IPv4 rewrite
authorKiran Kumar K <kirankumark@marvell.com>
Sat, 11 Apr 2020 14:14:21 +0000 (19:44 +0530)
committerThomas Monjalon <thomas@monjalon.net>
Tue, 5 May 2020 21:41:11 +0000 (23:41 +0200)
Add ip4 rewrite process function for ip4_rewrite
rte_node. On every packet received by this node,
header is overwritten with new data before forwarding
it to next node. Header data to overwrite with is
identified by next hop id passed in mbuf priv data
by previous node.

Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
lib/librte_node/Makefile
lib/librte_node/ip4_rewrite.c [new file with mode: 0644]
lib/librte_node/ip4_rewrite_priv.h [new file with mode: 0644]
lib/librte_node/meson.build

index f5ac147..1377f5f 100644 (file)
@@ -22,6 +22,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ethdev_rx.c
 SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ethdev_tx.c
 SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ethdev_ctrl.c
 SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ip4_lookup.c
+SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ip4_rewrite.c
 
 # install header files
 SYMLINK-$(CONFIG_RTE_LIBRTE_NODE)-include += rte_node_ip4_api.h
diff --git a/lib/librte_node/ip4_rewrite.c b/lib/librte_node/ip4_rewrite.c
new file mode 100644 (file)
index 0000000..333e347
--- /dev/null
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+
+#include <rte_debug.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_graph.h>
+#include <rte_graph_worker.h>
+#include <rte_ip.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_vect.h>
+
+#include "rte_node_ip4_api.h"
+
+#include "ip4_rewrite_priv.h"
+#include "node_private.h"
+
+static struct ip4_rewrite_node_main *ip4_rewrite_nm;
+
+static uint16_t
+ip4_rewrite_node_process(struct rte_graph *graph, struct rte_node *node,
+                        void **objs, uint16_t nb_objs)
+{
+       struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts;
+       struct ip4_rewrite_nh_header *nh = ip4_rewrite_nm->nh;
+       uint16_t next0, next1, next2, next3, next_index;
+       struct rte_ipv4_hdr *ip0, *ip1, *ip2, *ip3;
+       uint16_t n_left_from, held = 0, last_spec = 0;
+       void *d0, *d1, *d2, *d3;
+       void **to_next, **from;
+       rte_xmm_t priv01;
+       rte_xmm_t priv23;
+       int i;
+
+       /* Speculative next as last next */
+       next_index = *(uint16_t *)node->ctx;
+       rte_prefetch0(nh);
+
+       pkts = (struct rte_mbuf **)objs;
+       from = objs;
+       n_left_from = nb_objs;
+
+       for (i = 0; i < 4 && i < n_left_from; i++)
+               rte_prefetch0(pkts[i]);
+
+       /* Get stream for the speculated next node */
+       to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
+       /* Update Ethernet header of pkts */
+       while (n_left_from >= 4) {
+               if (likely(n_left_from > 7)) {
+                       /* Prefetch only next-mbuf struct and priv area.
+                        * Data need not be prefetched as we only write.
+                        */
+                       rte_prefetch0(pkts[4]);
+                       rte_prefetch0(pkts[5]);
+                       rte_prefetch0(pkts[6]);
+                       rte_prefetch0(pkts[7]);
+               }
+
+               mbuf0 = pkts[0];
+               mbuf1 = pkts[1];
+               mbuf2 = pkts[2];
+               mbuf3 = pkts[3];
+
+               pkts += 4;
+               n_left_from -= 4;
+               priv01.u64[0] = node_mbuf_priv1(mbuf0)->u;
+               priv01.u64[1] = node_mbuf_priv1(mbuf1)->u;
+               priv23.u64[0] = node_mbuf_priv1(mbuf2)->u;
+               priv23.u64[1] = node_mbuf_priv1(mbuf3)->u;
+
+               /* Increment checksum by one. */
+               priv01.u32[1] += rte_cpu_to_be_16(0x0100);
+               priv01.u32[3] += rte_cpu_to_be_16(0x0100);
+               priv23.u32[1] += rte_cpu_to_be_16(0x0100);
+               priv23.u32[3] += rte_cpu_to_be_16(0x0100);
+
+               /* Update ttl,cksum rewrite ethernet hdr on mbuf0 */
+               d0 = rte_pktmbuf_mtod(mbuf0, void *);
+               rte_memcpy(d0, nh[priv01.u16[0]].rewrite_data,
+                          nh[priv01.u16[0]].rewrite_len);
+
+               next0 = nh[priv01.u16[0]].tx_node;
+               ip0 = (struct rte_ipv4_hdr *)((uint8_t *)d0 +
+                                             sizeof(struct rte_ether_hdr));
+               ip0->time_to_live = priv01.u16[1] - 1;
+               ip0->hdr_checksum = priv01.u16[2] + priv01.u16[3];
+
+               /* Update ttl,cksum rewrite ethernet hdr on mbuf1 */
+               d1 = rte_pktmbuf_mtod(mbuf1, void *);
+               rte_memcpy(d1, nh[priv01.u16[4]].rewrite_data,
+                          nh[priv01.u16[4]].rewrite_len);
+
+               next1 = nh[priv01.u16[4]].tx_node;
+               ip1 = (struct rte_ipv4_hdr *)((uint8_t *)d1 +
+                                             sizeof(struct rte_ether_hdr));
+               ip1->time_to_live = priv01.u16[5] - 1;
+               ip1->hdr_checksum = priv01.u16[6] + priv01.u16[7];
+
+               /* Update ttl,cksum rewrite ethernet hdr on mbuf2 */
+               d2 = rte_pktmbuf_mtod(mbuf2, void *);
+               rte_memcpy(d2, nh[priv23.u16[0]].rewrite_data,
+                          nh[priv23.u16[0]].rewrite_len);
+               next2 = nh[priv23.u16[0]].tx_node;
+               ip2 = (struct rte_ipv4_hdr *)((uint8_t *)d2 +
+                                             sizeof(struct rte_ether_hdr));
+               ip2->time_to_live = priv23.u16[1] - 1;
+               ip2->hdr_checksum = priv23.u16[2] + priv23.u16[3];
+
+               /* Update ttl,cksum rewrite ethernet hdr on mbuf3 */
+               d3 = rte_pktmbuf_mtod(mbuf3, void *);
+               rte_memcpy(d3, nh[priv23.u16[4]].rewrite_data,
+                          nh[priv23.u16[4]].rewrite_len);
+
+               next3 = nh[priv23.u16[4]].tx_node;
+               ip3 = (struct rte_ipv4_hdr *)((uint8_t *)d3 +
+                                             sizeof(struct rte_ether_hdr));
+               ip3->time_to_live = priv23.u16[5] - 1;
+               ip3->hdr_checksum = priv23.u16[6] + priv23.u16[7];
+
+               /* Enqueue four to next node */
+               rte_edge_t fix_spec =
+                       ((next_index == next0) && (next0 == next1) &&
+                        (next1 == next2) && (next2 == next3));
+
+               if (unlikely(fix_spec == 0)) {
+                       /* Copy things successfully speculated till now */
+                       rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
+                       from += last_spec;
+                       to_next += last_spec;
+                       held += last_spec;
+                       last_spec = 0;
+
+                       /* next0 */
+                       if (next_index == next0) {
+                               to_next[0] = from[0];
+                               to_next++;
+                               held++;
+                       } else {
+                               rte_node_enqueue_x1(graph, node, next0,
+                                                   from[0]);
+                       }
+
+                       /* next1 */
+                       if (next_index == next1) {
+                               to_next[0] = from[1];
+                               to_next++;
+                               held++;
+                       } else {
+                               rte_node_enqueue_x1(graph, node, next1,
+                                                   from[1]);
+                       }
+
+                       /* next2 */
+                       if (next_index == next2) {
+                               to_next[0] = from[2];
+                               to_next++;
+                               held++;
+                       } else {
+                               rte_node_enqueue_x1(graph, node, next2,
+                                                   from[2]);
+                       }
+
+                       /* next3 */
+                       if (next_index == next3) {
+                               to_next[0] = from[3];
+                               to_next++;
+                               held++;
+                       } else {
+                               rte_node_enqueue_x1(graph, node, next3,
+                                                   from[3]);
+                       }
+
+                       from += 4;
+
+                       /* Change speculation if last two are same */
+                       if ((next_index != next3) && (next2 == next3)) {
+                               /* Put the current speculated node */
+                               rte_node_next_stream_put(graph, node,
+                                                        next_index, held);
+                               held = 0;
+
+                               /* Get next speculated stream */
+                               next_index = next3;
+                               to_next = rte_node_next_stream_get(
+                                       graph, node, next_index, nb_objs);
+                       }
+               } else {
+                       last_spec += 4;
+               }
+       }
+
+       while (n_left_from > 0) {
+               uint16_t chksum;
+
+               mbuf0 = pkts[0];
+
+               pkts += 1;
+               n_left_from -= 1;
+
+               d0 = rte_pktmbuf_mtod(mbuf0, void *);
+               rte_memcpy(d0, nh[node_mbuf_priv1(mbuf0)->nh].rewrite_data,
+                          nh[node_mbuf_priv1(mbuf0)->nh].rewrite_len);
+
+               next0 = nh[node_mbuf_priv1(mbuf0)->nh].tx_node;
+               ip0 = (struct rte_ipv4_hdr *)((uint8_t *)d0 +
+                                             sizeof(struct rte_ether_hdr));
+               chksum = node_mbuf_priv1(mbuf0)->cksum +
+                        rte_cpu_to_be_16(0x0100);
+               chksum += chksum >= 0xffff;
+               ip0->hdr_checksum = chksum;
+               ip0->time_to_live = node_mbuf_priv1(mbuf0)->ttl - 1;
+
+               if (unlikely(next_index ^ next0)) {
+                       /* Copy things successfully speculated till now */
+                       rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
+                       from += last_spec;
+                       to_next += last_spec;
+                       held += last_spec;
+                       last_spec = 0;
+
+                       rte_node_enqueue_x1(graph, node, next0, from[0]);
+                       from += 1;
+               } else {
+                       last_spec += 1;
+               }
+       }
+
+       /* !!! Home run !!! */
+       if (likely(last_spec == nb_objs)) {
+               rte_node_next_stream_move(graph, node, next_index);
+               return nb_objs;
+       }
+
+       held += last_spec;
+       rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
+       rte_node_next_stream_put(graph, node, next_index, held);
+       /* Save the last next used */
+       *(uint16_t *)node->ctx = next_index;
+
+       return nb_objs;
+}
+
+static int
+ip4_rewrite_node_init(const struct rte_graph *graph, struct rte_node *node)
+{
+
+       RTE_SET_USED(graph);
+       RTE_SET_USED(node);
+       node_dbg("ip4_rewrite", "Initialized ip4_rewrite node initialized");
+
+       return 0;
+}
+
+static struct rte_node_register ip4_rewrite_node = {
+       .process = ip4_rewrite_node_process,
+       .name = "ip4_rewrite",
+       /* Default edge i.e '0' is pkt drop */
+       .nb_edges = 1,
+       .next_nodes = {
+               [0] = "pkt_drop",
+       },
+       .init = ip4_rewrite_node_init,
+};
+
+RTE_NODE_REGISTER(ip4_rewrite_node);
diff --git a/lib/librte_node/ip4_rewrite_priv.h b/lib/librte_node/ip4_rewrite_priv.h
new file mode 100644 (file)
index 0000000..420996a
--- /dev/null
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2020 Marvell International Ltd.
+ */
+#ifndef __INCLUDE_IP4_REWRITE_PRIV_H__
+#define __INCLUDE_IP4_REWRITE_PRIV_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_common.h>
+
+#define RTE_GRAPH_IP4_REWRITE_MAX_NH 64
+#define RTE_GRAPH_IP4_REWRITE_MAX_LEN 56
+
+/**
+ * @internal
+ *
+ * Ipv4 rewrite next hop header data structure. Used to store port specific
+ * rewrite data.
+ */
+struct ip4_rewrite_nh_header {
+       uint16_t rewrite_len; /**< Header rewrite length. */
+       uint16_t tx_node;     /**< Tx node next index identifier. */
+       uint16_t enabled;     /**< NH enable flag */
+       uint16_t rsvd;
+       union {
+               struct {
+                       struct rte_ether_addr dst;
+                       /**< Destination mac address. */
+                       struct rte_ether_addr src;
+                       /**< Source mac address. */
+               };
+               uint8_t rewrite_data[RTE_GRAPH_IP4_REWRITE_MAX_LEN];
+               /**< Generic rewrite data */
+       };
+};
+
+/**
+ * @internal
+ *
+ * Ipv4 node main data structure.
+ */
+struct ip4_rewrite_node_main {
+       struct ip4_rewrite_nh_header nh[RTE_GRAPH_IP4_REWRITE_MAX_NH];
+       /**< Array of next hop header data */
+       uint16_t next_index[RTE_MAX_ETHPORTS];
+       /**< Next index of each configured port. */
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_IP4_REWRITE_PRIV_H__ */
index e2e1b03..ad3e53a 100644 (file)
@@ -2,7 +2,7 @@
 # Copyright(C) 2020 Marvell International Ltd.
 
 sources = files('null.c', 'log.c', 'ethdev_rx.c', 'ethdev_tx.c', 'ip4_lookup.c',
-               'ethdev_ctrl.c')
+               'ip4_rewrite.c', 'ethdev_ctrl.c')
 headers = files('rte_node_ip4_api.h', 'rte_node_eth_api.h')
 # Strict-aliasing rules are violated by uint8_t[] to context size casts.
 cflags += '-fno-strict-aliasing'