From b6ea6408fbc784f174626aa6ea2fa49610b1f432 Mon Sep 17 00:00:00 2001 From: Intel Date: Mon, 3 Jun 2013 00:00:00 +0000 Subject: [PATCH] ethdev: store numa_node per device Signed-off-by: Intel --- app/test-pmd/config.c | 13 +++ app/test-pmd/parameters.c | 165 +++++++++++++++++++++++++++++++++- app/test-pmd/testpmd.c | 113 +++++++++++++++++------ app/test-pmd/testpmd.h | 25 ++++++ examples/kni/main.c | 12 ++- lib/librte_ether/rte_ethdev.c | 8 ++ lib/librte_ether/rte_ethdev.h | 13 +++ 7 files changed, 316 insertions(+), 33 deletions(-) diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index 6d636fffa6..2d1821227e 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -237,6 +237,8 @@ port_infos_display(portid_t port_id) struct rte_port *port; struct rte_eth_link link; int vlan_offload; + int socket_id; + struct rte_mempool * mp; static const char *info_border = "*********************"; if (port_id >= nb_ports) { @@ -245,9 +247,20 @@ port_infos_display(portid_t port_id) } port = &ports[port_id]; rte_eth_link_get_nowait(port_id, &link); + socket_id = rte_eth_dev_socket_id(port_id); printf("\n%s Infos for port %-2d %s\n", info_border, port_id, info_border); print_ethaddr("MAC address: ", &port->eth_addr); + printf("\nConnect to socket: %d",socket_id); + + if (port_numa[port_id] != NUMA_NO_CONFIG) { + mp = mbuf_pool_find(port_numa[port_id]); + if (mp) + printf("\nmemory allocation on the socket: %d", + port_numa[port_id]); + } else + printf("\nmemory allocation on the socket: %d",socket_id); + printf("\nLink status: %s\n", (link.link_status) ? ("up") : ("down")); printf("Link speed: %u Mbps\n", (unsigned) link.link_speed); printf("Link duplex: %s\n", (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c index b7f8969179..a541f20f6c 100644 --- a/app/test-pmd/parameters.c +++ b/app/test-pmd/parameters.c @@ -106,6 +106,15 @@ usage(char* progname) "by the packet forwarding test.\n"); printf(" --numa: enable NUMA-aware allocation of RX/TX rings and of " "RX memory buffers (mbufs).\n"); + printf(" --port-numa-config=(port,socket)[,(port,socket)]: " + "specify the socket on which the memory pool " + "used by the port will be allocated.\n"); + printf(" --ring-numa-config=(port,flag,socket)[,(port,flag,socket)]: " + "specify the socket on which the TX/RX rings for " + "the port will be allocated " + "(flag: 1 for RX; 2 for TX; 3 for RX and TX).\n"); + printf(" --socket-num=N: set socket from which all memory is allocated " + "in NUMA mode.\n"); printf(" --mbuf-size=N: set the data size of mbuf to N bytes.\n"); printf(" --total-num-mbufs=N: set the number of mbufs to be allocated " "in mbuf pools.\n"); @@ -319,6 +328,135 @@ parse_queue_stats_mapping_config(const char *q_arg, int is_rx) return 0; } +static int +parse_portnuma_config(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + uint8_t i,port_id,socket_id; + unsigned size; + enum fieldnames { + FLD_PORT = 0, + FLD_SOCKET, + _NUM_FLD + }; + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + + /* reset from value set at definition */ + while ((p = strchr(p0,'(')) != NULL) { + ++p; + if((p0 = strchr(p,')')) == NULL) + return -1; + + size = p0 - p; + if(size >= sizeof(s)) + return -1; + + rte_snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++) { + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + port_id = (uint8_t)int_fld[FLD_PORT]; + if (port_id >= nb_ports) { + printf("Invalid port, range is [0, %d]\n", nb_ports - 1); + return -1; + } + socket_id = (uint8_t)int_fld[FLD_SOCKET]; + if(socket_id >= MAX_SOCKET) { + printf("Invalid socket id, range is [0, %d]\n", + MAX_SOCKET - 1); + return -1; + } + port_numa[port_id] = socket_id; + } + + return 0; +} + +static int +parse_ringnuma_config(const char *q_arg) +{ + char s[256]; + const char *p, *p0 = q_arg; + char *end; + uint8_t i,port_id,ring_flag,socket_id; + unsigned size; + enum fieldnames { + FLD_PORT = 0, + FLD_FLAG, + FLD_SOCKET, + _NUM_FLD + }; + unsigned long int_fld[_NUM_FLD]; + char *str_fld[_NUM_FLD]; + #define RX_RING_ONLY 0x1 + #define TX_RING_ONLY 0x2 + #define RXTX_RING 0x3 + + /* reset from value set at definition */ + while ((p = strchr(p0,'(')) != NULL) { + ++p; + if((p0 = strchr(p,')')) == NULL) + return -1; + + size = p0 - p; + if(size >= sizeof(s)) + return -1; + + rte_snprintf(s, sizeof(s), "%.*s", size, p); + if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD) + return -1; + for (i = 0; i < _NUM_FLD; i++) { + errno = 0; + int_fld[i] = strtoul(str_fld[i], &end, 0); + if (errno != 0 || end == str_fld[i] || int_fld[i] > 255) + return -1; + } + port_id = (uint8_t)int_fld[FLD_PORT]; + if (port_id >= nb_ports) { + printf("Invalid port, range is [0, %d]\n", nb_ports - 1); + return -1; + } + socket_id = (uint8_t)int_fld[FLD_SOCKET]; + if (socket_id >= MAX_SOCKET) { + printf("Invalid socket id, range is [0, %d]\n", + MAX_SOCKET - 1); + return -1; + } + ring_flag = (uint8_t)int_fld[FLD_FLAG]; + if ((ring_flag < RX_RING_ONLY) || (ring_flag > RXTX_RING)) { + printf("Invalid ring-flag=%d config for port =%d\n", + ring_flag,port_id); + return -1; + } + + switch (ring_flag & RXTX_RING) { + case RX_RING_ONLY: + rxring_numa[port_id] = socket_id; + break; + case TX_RING_ONLY: + txring_numa[port_id] = socket_id; + break; + case RXTX_RING: + rxring_numa[port_id] = socket_id; + txring_numa[port_id] = socket_id; + break; + default: + printf("Invalid ring-flag=%d config for port=%d\n", + ring_flag,port_id); + break; + } + } + + return 0; +} void launch_args_parse(int argc, char** argv) @@ -339,6 +477,9 @@ launch_args_parse(int argc, char** argv) { "coremask", 1, 0, 0 }, { "portmask", 1, 0, 0 }, { "numa", 0, 0, 0 }, + { "port-numa-config", 1, 0, 0 }, + { "ring-numa-config", 1, 0, 0 }, + { "socket-num", 1, 0, 0 }, { "mbuf-size", 1, 0, 0 }, { "total-num-mbufs", 1, 0, 0 }, { "max-pkt-len", 1, 0, 0 }, @@ -445,8 +586,30 @@ launch_args_parse(int argc, char** argv) parse_fwd_coremask(optarg); if (!strcmp(lgopts[opt_idx].name, "portmask")) parse_fwd_portmask(optarg); - if (!strcmp(lgopts[opt_idx].name, "numa")) + if (!strcmp(lgopts[opt_idx].name, "numa")) { numa_support = 1; + memset(port_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS); + memset(rxring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS); + memset(txring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS); + } + if (!strcmp(lgopts[opt_idx].name, "port-numa-config")) { + if (parse_portnuma_config(optarg)) + rte_exit(EXIT_FAILURE, + "invalid port-numa configuration\n"); + } + if (!strcmp(lgopts[opt_idx].name, "ring-numa-config")) + if (parse_ringnuma_config(optarg)) + rte_exit(EXIT_FAILURE, + "invalid ring-numa configuration\n"); + if (!strcmp(lgopts[opt_idx].name, "socket-num")) { + n = atoi(optarg); + if(n < MAX_SOCKET) + socket_num = (uint8_t)n; + else + rte_exit(EXIT_FAILURE, + "The socket number should be < %d\n", + MAX_SOCKET); + } if (!strcmp(lgopts[opt_idx].name, "mbuf-size")) { n = atoi(optarg); if (n > 0 && n <= 0xFFFF) diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 58833acdbb..cb9c14e349 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -90,6 +90,12 @@ uint8_t interactive = 0; */ uint8_t numa_support = 0; /**< No numa support by default */ +/* + * In UMA mode,all memory is allocated from socket 0 if --socket-num is + * not configured. + */ +uint8_t socket_num = UMA_NO_CONFIG; + /* * Record the Ethernet address of peer target ports to which packets are * forwarded. @@ -429,7 +435,9 @@ init_config(void) struct rte_mempool *mbp; unsigned int nb_mbuf_per_pool; lcoreid_t lc_id; + uint8_t port_per_socket[MAX_SOCKET]; + memset(port_per_socket,0,MAX_SOCKET); /* Configuration of logical cores. */ fwd_lcores = rte_zmalloc("testpmd: fwd_lcores", sizeof(struct fwd_lcore *) * nb_lcores, @@ -452,27 +460,29 @@ init_config(void) /* * Create pools of mbuf. * If NUMA support is disabled, create a single pool of mbuf in - * socket 0 memory. + * socket 0 memory by default. * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1. * * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and * nb_txd can be configured at run time. */ - if (param_total_num_mbufs) + if (param_total_num_mbufs) nb_mbuf_per_pool = param_total_num_mbufs; else { nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX + (nb_lcores * mb_mempool_cache) + RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST; - nb_mbuf_per_pool = (nb_mbuf_per_pool * nb_ports); - } - if (numa_support) { - nb_mbuf_per_pool /= 2; - mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool, 0); - mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool, 1); - } else { - mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool, 0); + + if (!numa_support) + nb_mbuf_per_pool = (nb_mbuf_per_pool * nb_ports); } + if (!numa_support) { + if (socket_num == UMA_NO_CONFIG) + mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool, 0); + else + mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool, + socket_num); + } /* * Records which Mbuf pool to use by each logical core, if needed. */ @@ -491,18 +501,41 @@ init_config(void) rte_exit(EXIT_FAILURE, "rte_zmalloc(%d struct rte_port) " "failed\n", nb_ports); } - + for (pid = 0; pid < nb_ports; pid++) { port = &ports[pid]; rte_eth_dev_info_get(pid, &port->dev_info); + if (numa_support) { + if (port_numa[pid] != NUMA_NO_CONFIG) + port_per_socket[port_numa[pid]]++; + else { + uint32_t socket_id = rte_eth_dev_socket_id(pid); + port_per_socket[socket_id]++; + } + } + /* set flag to initialize port/queue */ port->need_reconfig = 1; port->need_reconfig_queues = 1; } + if (numa_support) { + uint8_t i; + unsigned int nb_mbuf; + + if (param_total_num_mbufs) + nb_mbuf_per_pool = nb_mbuf_per_pool/nb_ports; + + for (i = 0; i < MAX_SOCKET; i++) { + nb_mbuf = (nb_mbuf_per_pool * + port_per_socket[i]); + if (nb_mbuf) + mbuf_pool_create(mbuf_data_size, + nb_mbuf,i); + } + } init_port_config(); - /* Configuration of packet forwarding streams. */ if (init_fwd_streams() < 0) rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n"); @@ -530,10 +563,14 @@ init_fwd_streams(void) port->dev_info.max_tx_queues); return -1; } - if (numa_support) - port->socket_id = (pid < (nb_ports >> 1)) ? 0 : 1; - else - port->socket_id = 0; + if (numa_support) + port->socket_id = rte_eth_dev_socket_id(pid); + else { + if (socket_num == UMA_NO_CONFIG) + port->socket_id = 0; + else + port->socket_id = socket_num; + } } nb_fwd_streams_new = (streamid_t)(nb_ports * nb_rxq); @@ -1102,7 +1139,8 @@ start_port(portid_t pid) if (port->need_reconfig > 0) { port->need_reconfig = 0; - printf("Configuring Port %d\n", pi); + printf("Configuring Port %d (socket %d)\n", pi, + rte_eth_dev_socket_id(pi)); /* configure port */ diag = rte_eth_dev_configure(pi, nb_rxq, nb_txq, &(port->dev_conf)); @@ -1117,14 +1155,20 @@ start_port(portid_t pid) return; } } - if (port->need_reconfig_queues > 0) { port->need_reconfig_queues = 0; - /* setup tx queues */ for (qi = 0; qi < nb_txq; qi++) { - diag = rte_eth_tx_queue_setup(pi, qi, nb_txd, - port->socket_id, &(port->tx_conf)); + if ((numa_support) && + (txring_numa[pi] != NUMA_NO_CONFIG)) + diag = rte_eth_tx_queue_setup(pi, qi, + nb_txd,txring_numa[pi], + &(port->tx_conf)); + else + diag = rte_eth_tx_queue_setup(pi, qi, + nb_txd,port->socket_id, + &(port->tx_conf)); + if (diag == 0) continue; @@ -1141,12 +1185,32 @@ start_port(portid_t pid) } /* setup rx queues */ for (qi = 0; qi < nb_rxq; qi++) { - diag = rte_eth_rx_queue_setup(pi, qi, nb_rxd, - port->socket_id, &(port->rx_conf), - mbuf_pool_find(port->socket_id)); + if ((numa_support) && + (rxring_numa[pi] != NUMA_NO_CONFIG)) { + struct rte_mempool * mp = + mbuf_pool_find(rxring_numa[pi]); + if (mp == NULL) { + printf("Failed to setup RX queue:" + "No mempool allocation" + "on the socket %d\n", + rxring_numa[pi]); + return; + } + + diag = rte_eth_rx_queue_setup(pi, qi, + nb_rxd,rxring_numa[pi], + &(port->rx_conf),mp); + } + else + diag = rte_eth_rx_queue_setup(pi, qi, + nb_rxd,port->socket_id, + &(port->rx_conf), + mbuf_pool_find(port->socket_id)); + if (diag == 0) continue; + /* Fail to setup rx queue, return */ if (rte_atomic16_cmpset(&(port->port_status), RTE_PORT_HANDLING, @@ -1159,7 +1223,6 @@ start_port(portid_t pid) return; } } - /* start port */ if (rte_eth_dev_start(pi) < 0) { printf("Fail to start port %d\n", pi); diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index cd3edb7051..0f05b2d2cc 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -69,6 +69,9 @@ int main(int argc, char **argv); #define CACHE_LINE_SIZE_ROUNDUP(size) \ (CACHE_LINE_SIZE * ((size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE)) +#define NUMA_NO_CONFIG 0xFF +#define UMA_NO_CONFIG 0xFF + typedef uint8_t lcoreid_t; typedef uint8_t portid_t; typedef uint16_t queueid_t; @@ -268,6 +271,28 @@ extern uint8_t interactive; extern uint8_t numa_support; /**< set by "--numa" parameter */ extern uint16_t port_topology; /**< set by "--port-topology" parameter */ +#define MAX_SOCKET 2 /*MAX SOCKET:currently, it is 2 */ + +/* + * Store specified sockets on which memory pool to be used by ports + * is allocated. + */ +uint8_t port_numa[RTE_MAX_ETHPORTS]; + +/* + * Store specified sockets on which RX ring to be used by ports + * is allocated. + */ +uint8_t rxring_numa[RTE_MAX_ETHPORTS]; + +/* + * Store specified sockets on which TX ring to be used by ports + * is allocated. + */ +uint8_t txring_numa[RTE_MAX_ETHPORTS]; + +extern uint8_t socket_num; + /* * Configuration of logical cores: * nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores diff --git a/examples/kni/main.c b/examples/kni/main.c index 4a1a07a454..34a6f4aa8b 100644 --- a/examples/kni/main.c +++ b/examples/kni/main.c @@ -79,9 +79,6 @@ /* Macros for printing using RTE_LOG */ #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 -/* NUMA socket to allocate mbuf pool on */ -#define SOCKET 0 - /* Max size of a single packet */ #define MAX_PACKET_SZ 2048 @@ -550,13 +547,14 @@ init_port(uint8_t port) rte_exit(EXIT_FAILURE, "Could not configure port%u (%d)", (unsigned)port, ret); - ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, SOCKET, &rx_conf, - pktmbuf_pool); + ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, rte_eth_dev_socket_id(port), + &rx_conf, pktmbuf_pool); if (ret < 0) rte_exit(EXIT_FAILURE, "Could not setup up RX queue for " "port%u (%d)", (unsigned)port, ret); - ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, SOCKET, &tx_conf); + ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, rte_eth_dev_socket_id(port), + &tx_conf); if (ret < 0) rte_exit(EXIT_FAILURE, "Could not setup up TX queue for " "port%u (%d)", (unsigned)port, ret); @@ -720,7 +718,7 @@ main(int argc, char** argv) MEMPOOL_CACHE_SZ, sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL, - SOCKET, 0); + rte_socket_id(), 0); if (pktmbuf_pool == NULL) { rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool"); return -1; diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 06b5817f92..2fd84980e8 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -241,6 +241,14 @@ rte_eth_driver_register(struct eth_driver *eth_drv) rte_eal_pci_register(ð_drv->pci_drv); } +int +rte_eth_dev_socket_id(uint8_t port_id) +{ + if (port_id >= nb_ports) + return -1; + return rte_eth_devices[port_id].pci_dev->numa_node; +} + uint8_t rte_eth_dev_count(void) { diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 1ac95505d9..6c006d5a77 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -1234,6 +1234,19 @@ extern int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf); +/* + * Return the NUMA socket to which an Ethernet device is connected + * + * @param port_id + * The port identifier of the Ethernet device + * @return + * The NUMA socket id to which the Ethernet device is connected or + * a default of zero if the socket could not be determined. + * -1 is returned is the port_id value is out of range. + */ +extern int rte_eth_dev_socket_id(uint8_t port_id); + + /** * Start an Ethernet device. * -- 2.20.1