other than IPv4 or IPv6, the mbuf was not released, and caused
a memory leak.
+* **l3fwd: Fixed using packet type blindly.**
+
+ l3fwd makes use of packet type information without even query if devices or PMDs
+ really set it. For those don't set ptypes, add an option to parse it softly.
+
* **examples/vhost: Fixed frequent mbuf allocation failure.**
vhost-switch often fails to allocate mbuf when dequeue from vring because it
.. code-block:: console
- ./build/l3fwd [EAL options] -- -p PORTMASK [-P] --config(port,queue,lcore)[,(port,queue,lcore)] [--enable-jumbo [--max-pkt-len PKTLEN]] [--no-numa][--hash-entry-num][--ipv6]
+ ./build/l3fwd [EAL options] -- -p PORTMASK [-P] --config(port,queue,lcore)[,(port,queue,lcore)] [--enable-jumbo [--max-pkt-len PKTLEN]] [--no-numa][--hash-entry-num][--ipv6] [--parse-ptype]
where,
* --ipv6: optional, set it if running ipv6 packets
+* --parse-ptype: optional, set it if use software way to analyze packet type
+
For example, consider a dual processor socket platform where cores 0-7 and 16-23 appear on socket 0, while cores 8-15 and 24-31 appear on socket 1.
Let's say that the programmer wants to use memory from both NUMA nodes, the platform has only two ports, one connected to each NUMA node,
and the programmer wants to use two cores from each processor socket to do the packet processing.
The simple_ipv6_fwd_4pkts() function is similar to the simple_ipv4_fwd_4pkts() function.
+Known issue: IP packets with extensions or IP packets which are not TCP/UDP cannot work well at this mode.
+
Packet Forwarding for LPM-based Lookups
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
void
setup_hash(const int socketid);
+int
+em_check_ptype(int portid);
+
+int
+lpm_check_ptype(int portid);
+
+uint16_t
+em_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
+ uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
+
+uint16_t
+lpm_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
+ uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
+
int
em_main_loop(__attribute__((unused)) void *dummy);
#include <errno.h>
#include <getopt.h>
#include <stdbool.h>
+#include <netinet/in.h>
#include <rte_debug.h>
#include <rte_ether.h>
printf("Hash: Adding 0x%x keys\n", nr_flow);
}
+/* Requirements:
+ * 1. IP packets without extension;
+ * 2. L4 payload should be either TCP or UDP.
+ */
+int
+em_check_ptype(int portid)
+{
+ int i, ret;
+ int ptype_l3_ipv4_ext = 0;
+ int ptype_l3_ipv6_ext = 0;
+ int ptype_l4_tcp = 0;
+ int ptype_l4_udp = 0;
+ uint32_t ptype_mask = RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK;
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
+ if (ret <= 0)
+ return 0;
+
+ uint32_t ptypes[ret];
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
+ for (i = 0; i < ret; ++i) {
+ switch (ptypes[i]) {
+ case RTE_PTYPE_L3_IPV4_EXT:
+ ptype_l3_ipv4_ext = 1;
+ break;
+ case RTE_PTYPE_L3_IPV6_EXT:
+ ptype_l3_ipv6_ext = 1;
+ break;
+ case RTE_PTYPE_L4_TCP:
+ ptype_l4_tcp = 1;
+ break;
+ case RTE_PTYPE_L4_UDP:
+ ptype_l4_udp = 1;
+ break;
+ }
+ }
+
+ if (ptype_l3_ipv4_ext == 0)
+ printf("port %d cannot parse RTE_PTYPE_L3_IPV4_EXT\n", portid);
+ if (ptype_l3_ipv6_ext == 0)
+ printf("port %d cannot parse RTE_PTYPE_L3_IPV6_EXT\n", portid);
+ if (!ptype_l3_ipv4_ext || !ptype_l3_ipv6_ext)
+ return 0;
+
+ if (ptype_l4_tcp == 0)
+ printf("port %d cannot parse RTE_PTYPE_L4_TCP\n", portid);
+ if (ptype_l4_udp == 0)
+ printf("port %d cannot parse RTE_PTYPE_L4_UDP\n", portid);
+ if (ptype_l4_tcp && ptype_l4_udp)
+ return 1;
+
+ return 0;
+}
+
+static inline void
+em_parse_ptype(struct rte_mbuf *m)
+{
+ struct ether_hdr *eth_hdr;
+ uint32_t packet_type = RTE_PTYPE_UNKNOWN;
+ uint16_t ether_type;
+ void *l3;
+ int hdr_len;
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ ether_type = eth_hdr->ether_type;
+ l3 = (uint8_t *)eth_hdr + sizeof(struct ether_hdr);
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+ ipv4_hdr = (struct ipv4_hdr *)l3;
+ hdr_len = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
+ IPV4_IHL_MULTIPLIER;
+ if (hdr_len == sizeof(struct ipv4_hdr)) {
+ packet_type |= RTE_PTYPE_L3_IPV4;
+ if (ipv4_hdr->next_proto_id == IPPROTO_TCP)
+ packet_type |= RTE_PTYPE_L4_TCP;
+ else if (ipv4_hdr->next_proto_id == IPPROTO_UDP)
+ packet_type |= RTE_PTYPE_L4_UDP;
+ } else
+ packet_type |= RTE_PTYPE_L3_IPV4_EXT;
+ } else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+ ipv6_hdr = (struct ipv6_hdr *)l3;
+ if (ipv6_hdr->proto == IPPROTO_TCP)
+ packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP;
+ else if (ipv6_hdr->proto == IPPROTO_UDP)
+ packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP;
+ else
+ packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+ }
+
+ m->packet_type = packet_type;
+}
+
+uint16_t
+em_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts,
+ uint16_t max_pkts __rte_unused,
+ void *user_param __rte_unused)
+{
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; ++i)
+ em_parse_ptype(pkts[i]);
+
+ return nb_pkts;
+}
+
/* main processing loop */
int
em_main_loop(__attribute__((unused)) void *dummy)
struct ether_hdr *eth_hdr;
struct ipv4_hdr *ipv4_hdr;
uint8_t dst_port;
+ uint32_t tcp_or_udp;
+ uint32_t l3_ptypes;
eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ tcp_or_udp = m->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+ l3_ptypes = m->packet_type & RTE_PTYPE_L3_MASK;
- if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+ if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
/* Handle IPv4 headers.*/
ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
sizeof(struct ether_hdr));
return;
}
#endif
- dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid,
+ dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid,
qconf->ipv4_lookup_struct);
if (dst_port >= RTE_MAX_ETHPORTS ||
ether_addr_copy(&ports_eth_addr[dst_port], ð_hdr->s_addr);
send_single_packet(qconf, m, dst_port);
- } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+ } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
/* Handle IPv6 headers.*/
struct ipv6_hdr *ipv6_hdr;
uint8_t next_hop;
struct ipv4_hdr *ipv4_hdr;
struct ipv6_hdr *ipv6_hdr;
+ uint32_t tcp_or_udp;
+ uint32_t l3_ptypes;
- if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
+ tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+ l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK;
+
+ if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
/* Handle IPv4 headers.*/
ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
return next_hop;
- } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
+ } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
/* Handle IPv6 headers.*/
ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *,
pkts_burst[j+6]->packet_type &
pkts_burst[j+7]->packet_type;
- if (pkt_type & RTE_PTYPE_L3_IPV4) {
+ uint32_t l3_type = pkt_type & RTE_PTYPE_L3_MASK;
+ uint32_t tcp_or_udp = pkt_type &
+ (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+
+ if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
em_get_dst_port_ipv4x8(qconf, &pkts_burst[j], portid, &dst_port[j]);
- } else if (pkt_type & RTE_PTYPE_L3_IPV6) {
+ } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
em_get_dst_port_ipv6x8(qconf, &pkts_burst[j], portid, &dst_port[j]);
uint8_t next_hop;
struct ipv4_hdr *ipv4_hdr;
struct ipv6_hdr *ipv6_hdr;
+ uint32_t tcp_or_udp;
+ uint32_t l3_ptypes;
- if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
+ tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+ l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK;
+
+ if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
/* Handle IPv4 headers.*/
ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
return next_hop;
- } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
+ } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
/* Handle IPv6 headers.*/
ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *,
}
}
+int
+lpm_check_ptype(int portid)
+{
+ int i, ret;
+ int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
+ uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
+ if (ret <= 0)
+ return 0;
+
+ uint32_t ptypes[ret];
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
+ for (i = 0; i < ret; ++i) {
+ if (ptypes[i] & RTE_PTYPE_L3_IPV4)
+ ptype_l3_ipv4 = 1;
+ if (ptypes[i] & RTE_PTYPE_L3_IPV6)
+ ptype_l3_ipv6 = 1;
+ }
+
+ if (ptype_l3_ipv4 == 0)
+ printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
+
+ if (ptype_l3_ipv6 == 0)
+ printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
+
+ if (ptype_l3_ipv4 && ptype_l3_ipv6)
+ return 1;
+
+ return 0;
+
+}
+
+static inline void
+lpm_parse_ptype(struct rte_mbuf *m)
+{
+ struct ether_hdr *eth_hdr;
+ uint32_t packet_type = RTE_PTYPE_UNKNOWN;
+ uint16_t ether_type;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ ether_type = eth_hdr->ether_type;
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
+ packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+ else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
+ packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+
+ m->packet_type = packet_type;
+}
+
+uint16_t
+lpm_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts,
+ uint16_t max_pkts __rte_unused,
+ void *user_param __rte_unused)
+{
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; ++i)
+ lpm_parse_ptype(pkts[i]);
+
+ return nb_pkts;
+}
+
/* Return ipv4/ipv6 lpm fwd lookup struct. */
void *
lpm_get_ipv4_l3fwd_lookup_struct(const int socketid)
static int l3fwd_em_on;
static int numa_on = 1; /**< NUMA is enabled by default. */
+static int parse_ptype; /**< Parse packet type using rx callback, and */
+ /**< disabled by default */
/* Global variables. */
struct l3fwd_lkp_mode {
void (*setup)(int);
+ int (*check_ptype)(int);
+ rte_rx_callback_fn cb_parse_ptype;
int (*main_loop)(void *);
void* (*get_ipv4_lookup_struct)(int);
void* (*get_ipv6_lookup_struct)(int);
static struct l3fwd_lkp_mode l3fwd_em_lkp = {
.setup = setup_hash,
+ .check_ptype = em_check_ptype,
+ .cb_parse_ptype = em_cb_parse_ptype,
.main_loop = em_main_loop,
.get_ipv4_lookup_struct = em_get_ipv4_l3fwd_lookup_struct,
.get_ipv6_lookup_struct = em_get_ipv6_l3fwd_lookup_struct,
static struct l3fwd_lkp_mode l3fwd_lpm_lkp = {
.setup = setup_lpm,
+ .check_ptype = lpm_check_ptype,
+ .cb_parse_ptype = lpm_cb_parse_ptype,
.main_loop = lpm_main_loop,
.get_ipv4_lookup_struct = lpm_get_ipv4_l3fwd_lookup_struct,
.get_ipv6_lookup_struct = lpm_get_ipv6_l3fwd_lookup_struct,
#define CMD_LINE_OPT_IPV6 "ipv6"
#define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
#define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
+#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
/*
* This expression is used to calculate the number of mbufs needed
{CMD_LINE_OPT_IPV6, 0, 0, 0},
{CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
{CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
+ {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
{NULL, 0, 0, 0}
};
return -1;
}
}
+
+ if (!strncmp(lgopts[option_index].name,
+ CMD_LINE_OPT_PARSE_PTYPE,
+ sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
+ printf("soft parse-ptype is enabled\n");
+ parse_ptype = 1;
+ }
+
break;
default:
}
}
+static int
+prepare_ptype_parser(uint8_t portid, uint16_t queueid)
+{
+ if (parse_ptype) {
+ printf("Port %d: softly parse packet type info\n", portid);
+ if (rte_eth_add_rx_callback(portid, queueid,
+ l3fwd_lkp.cb_parse_ptype,
+ NULL))
+ return 1;
+
+ printf("Failed to add rx callback: port=%d\n", portid);
+ return 0;
+ }
+
+ if (l3fwd_lkp.check_ptype(portid))
+ return 1;
+
+ printf("port %d cannot parse packet type, please add --%s\n",
+ portid, CMD_LINE_OPT_PARSE_PTYPE);
+ return 0;
+}
+
int
main(int argc, char **argv)
{
rte_eth_promiscuous_enable(portid);
}
+ printf("\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+ qconf = &lcore_conf[lcore_id];
+ for (queue = 0; queue < qconf->n_rx_queue; ++queue) {
+ portid = qconf->rx_queue_list[queue].port_id;
+ queueid = qconf->rx_queue_list[queue].queue_id;
+ if (prepare_ptype_parser(portid, queueid) == 0)
+ rte_exit(EXIT_FAILURE, "ptype check fails\n");
+ }
+ }
+
+
check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
ret = 0;