ethdev: store numa_node per device
authorIntel <intel.com>
Mon, 3 Jun 2013 00:00:00 +0000 (00:00 +0000)
committerThomas Monjalon <thomas.monjalon@6wind.com>
Tue, 17 Sep 2013 12:16:07 +0000 (14:16 +0200)
Signed-off-by: Intel
app/test-pmd/config.c
app/test-pmd/parameters.c
app/test-pmd/testpmd.c
app/test-pmd/testpmd.h
examples/kni/main.c
lib/librte_ether/rte_ethdev.c
lib/librte_ether/rte_ethdev.h

index 6d636ff..2d18212 100644 (file)
@@ -237,6 +237,8 @@ port_infos_display(portid_t port_id)
        struct rte_port *port;
        struct rte_eth_link link;
        int vlan_offload;
+       int socket_id;
+       struct rte_mempool * mp;
        static const char *info_border = "*********************";
 
        if (port_id >= nb_ports) {
@@ -245,9 +247,20 @@ port_infos_display(portid_t port_id)
        }
        port = &ports[port_id];
        rte_eth_link_get_nowait(port_id, &link);
+       socket_id = rte_eth_dev_socket_id(port_id);
        printf("\n%s Infos for port %-2d %s\n",
               info_border, port_id, info_border);
        print_ethaddr("MAC address: ", &port->eth_addr);
+       printf("\nConnect to socket: %d",socket_id);
+
+       if (port_numa[port_id] != NUMA_NO_CONFIG) {
+               mp = mbuf_pool_find(port_numa[port_id]);
+               if (mp)
+                       printf("\nmemory allocation on the socket: %d",
+                                                       port_numa[port_id]);
+       } else
+               printf("\nmemory allocation on the socket: %d",socket_id);
+
        printf("\nLink status: %s\n", (link.link_status) ? ("up") : ("down"));
        printf("Link speed: %u Mbps\n", (unsigned) link.link_speed);
        printf("Link duplex: %s\n", (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
index b7f8969..a541f20 100644 (file)
@@ -106,6 +106,15 @@ usage(char* progname)
               "by the packet forwarding test.\n");
        printf("  --numa: enable NUMA-aware allocation of RX/TX rings and of "
               "RX memory buffers (mbufs).\n");
+       printf("  --port-numa-config=(port,socket)[,(port,socket)]: "
+              "specify the socket on which the memory pool "
+              "used by the port will be allocated.\n");
+       printf("  --ring-numa-config=(port,flag,socket)[,(port,flag,socket)]: "
+              "specify the socket on which the TX/RX rings for "
+              "the port will be allocated "
+              "(flag: 1 for RX; 2 for TX; 3 for RX and TX).\n");
+       printf(" --socket-num=N: set socket from which all memory is allocated "
+              "in NUMA mode.\n");
        printf("  --mbuf-size=N: set the data size of mbuf to N bytes.\n");
        printf("  --total-num-mbufs=N: set the number of mbufs to be allocated "
               "in mbuf pools.\n");
@@ -319,6 +328,135 @@ parse_queue_stats_mapping_config(const char *q_arg, int is_rx)
        return 0;
 }
 
+static int
+parse_portnuma_config(const char *q_arg)
+{
+       char s[256];
+       const char *p, *p0 = q_arg;
+       char *end;
+       uint8_t i,port_id,socket_id;
+       unsigned size;
+       enum fieldnames {
+               FLD_PORT = 0,
+               FLD_SOCKET,
+               _NUM_FLD
+       };
+       unsigned long int_fld[_NUM_FLD];
+       char *str_fld[_NUM_FLD];        
+
+       /* reset from value set at definition */
+       while ((p = strchr(p0,'(')) != NULL) {
+               ++p;
+               if((p0 = strchr(p,')')) == NULL)
+                       return -1;
+
+               size = p0 - p;
+               if(size >= sizeof(s))
+                       return -1;
+
+               rte_snprintf(s, sizeof(s), "%.*s", size, p);
+               if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
+                       return -1;
+               for (i = 0; i < _NUM_FLD; i++) {
+                       errno = 0;
+                       int_fld[i] = strtoul(str_fld[i], &end, 0);
+                       if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+                               return -1;
+               }
+               port_id = (uint8_t)int_fld[FLD_PORT];
+               if (port_id >= nb_ports) {
+                       printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
+                       return -1;
+               }
+               socket_id = (uint8_t)int_fld[FLD_SOCKET];
+               if(socket_id >= MAX_SOCKET) {
+                       printf("Invalid socket id, range is [0, %d]\n",
+                                MAX_SOCKET - 1);
+                       return -1;
+               }
+               port_numa[port_id] = socket_id;
+       }
+
+       return 0;
+}
+
+static int
+parse_ringnuma_config(const char *q_arg)
+{
+       char s[256];
+       const char *p, *p0 = q_arg;
+       char *end;
+       uint8_t i,port_id,ring_flag,socket_id;
+       unsigned size;
+       enum fieldnames {
+               FLD_PORT = 0,
+               FLD_FLAG,
+               FLD_SOCKET,
+               _NUM_FLD
+       };
+       unsigned long int_fld[_NUM_FLD];
+       char *str_fld[_NUM_FLD];        
+       #define RX_RING_ONLY 0x1
+       #define TX_RING_ONLY 0x2
+       #define RXTX_RING    0x3
+
+       /* reset from value set at definition */
+       while ((p = strchr(p0,'(')) != NULL) {
+               ++p;
+               if((p0 = strchr(p,')')) == NULL)
+                       return -1;
+
+               size = p0 - p;
+               if(size >= sizeof(s))
+                       return -1;
+
+               rte_snprintf(s, sizeof(s), "%.*s", size, p);
+               if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
+                       return -1;
+               for (i = 0; i < _NUM_FLD; i++) {
+                       errno = 0;
+                       int_fld[i] = strtoul(str_fld[i], &end, 0);
+                       if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+                               return -1;
+               }
+               port_id = (uint8_t)int_fld[FLD_PORT];
+               if (port_id >= nb_ports) {
+                       printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
+                       return -1;
+               }
+               socket_id = (uint8_t)int_fld[FLD_SOCKET];
+               if (socket_id >= MAX_SOCKET) {
+                       printf("Invalid socket id, range is [0, %d]\n",
+                               MAX_SOCKET - 1);
+                       return -1;
+               }
+               ring_flag = (uint8_t)int_fld[FLD_FLAG];
+               if ((ring_flag < RX_RING_ONLY) || (ring_flag > RXTX_RING)) {
+                       printf("Invalid ring-flag=%d config for port =%d\n",
+                               ring_flag,port_id);
+                       return -1;
+               }
+
+               switch (ring_flag & RXTX_RING) {
+               case RX_RING_ONLY:
+                       rxring_numa[port_id] = socket_id;
+                       break;
+               case TX_RING_ONLY:
+                       txring_numa[port_id] = socket_id;
+                       break;
+               case RXTX_RING:
+                       rxring_numa[port_id] = socket_id;
+                       txring_numa[port_id] = socket_id;
+                       break;
+               default:
+                       printf("Invalid ring-flag=%d config for port=%d\n",
+                               ring_flag,port_id);
+                       break;
+               }
+       }       
+       
+       return 0;
+}
 
 void
 launch_args_parse(int argc, char** argv)
@@ -339,6 +477,9 @@ launch_args_parse(int argc, char** argv)
                { "coremask",                   1, 0, 0 },
                { "portmask",                   1, 0, 0 },
                { "numa",                       0, 0, 0 },
+               { "port-numa-config",           1, 0, 0 },
+               { "ring-numa-config",           1, 0, 0 },
+               { "socket-num",                 1, 0, 0 },      
                { "mbuf-size",                  1, 0, 0 },
                { "total-num-mbufs",            1, 0, 0 },
                { "max-pkt-len",                1, 0, 0 },
@@ -445,8 +586,30 @@ launch_args_parse(int argc, char** argv)
                                parse_fwd_coremask(optarg);
                        if (!strcmp(lgopts[opt_idx].name, "portmask"))
                                parse_fwd_portmask(optarg);
-                       if (!strcmp(lgopts[opt_idx].name, "numa"))
+                       if (!strcmp(lgopts[opt_idx].name, "numa")) {
                                numa_support = 1;
+                               memset(port_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS);
+                               memset(rxring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS);
+                               memset(txring_numa,NUMA_NO_CONFIG,RTE_MAX_ETHPORTS);
+                       }
+                       if (!strcmp(lgopts[opt_idx].name, "port-numa-config")) {
+                               if (parse_portnuma_config(optarg))
+                                       rte_exit(EXIT_FAILURE,
+                                          "invalid port-numa configuration\n");
+                       }
+                       if (!strcmp(lgopts[opt_idx].name, "ring-numa-config"))
+                               if (parse_ringnuma_config(optarg))
+                                       rte_exit(EXIT_FAILURE,
+                                          "invalid ring-numa configuration\n");
+                       if (!strcmp(lgopts[opt_idx].name, "socket-num")) {
+                               n = atoi(optarg);
+                               if(n < MAX_SOCKET)
+                                       socket_num = (uint8_t)n;
+                               else
+                                       rte_exit(EXIT_FAILURE,
+                                               "The socket number should be < %d\n",
+                                               MAX_SOCKET);
+                       }
                        if (!strcmp(lgopts[opt_idx].name, "mbuf-size")) {
                                n = atoi(optarg);
                                if (n > 0 && n <= 0xFFFF)
index 58833ac..cb9c14e 100644 (file)
@@ -90,6 +90,12 @@ uint8_t interactive = 0;
  */
 uint8_t numa_support = 0; /**< No numa support by default */
 
+/*
+ * In UMA mode,all memory is allocated from socket 0 if --socket-num is 
+ * not configured.
+ */
+uint8_t socket_num = UMA_NO_CONFIG; 
+
 /*
  * Record the Ethernet address of peer target ports to which packets are
  * forwarded.
@@ -429,7 +435,9 @@ init_config(void)
        struct rte_mempool *mbp;
        unsigned int nb_mbuf_per_pool;
        lcoreid_t  lc_id;
+       uint8_t port_per_socket[MAX_SOCKET];
 
+       memset(port_per_socket,0,MAX_SOCKET);
        /* Configuration of logical cores. */
        fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
                                sizeof(struct fwd_lcore *) * nb_lcores,
@@ -452,27 +460,29 @@ init_config(void)
        /*
         * Create pools of mbuf.
         * If NUMA support is disabled, create a single pool of mbuf in
-        * socket 0 memory.
+        * socket 0 memory by default.
         * Otherwise, create a pool of mbuf in the memory of sockets 0 and 1.
         *
         * Use the maximum value of nb_rxd and nb_txd here, then nb_rxd and
         * nb_txd can be configured at run time.
         */
-       if (param_total_num_mbufs)
+       if (param_total_num_mbufs) 
                nb_mbuf_per_pool = param_total_num_mbufs;
        else {
                nb_mbuf_per_pool = RTE_TEST_RX_DESC_MAX + (nb_lcores * mb_mempool_cache)
                                + RTE_TEST_TX_DESC_MAX + MAX_PKT_BURST;
-               nb_mbuf_per_pool = (nb_mbuf_per_pool * nb_ports);
-       }
-       if (numa_support) {
-               nb_mbuf_per_pool /= 2;
-               mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool, 0);
-               mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool, 1);
-       } else {
-               mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool, 0);
+               
+               if (!numa_support) 
+                       nb_mbuf_per_pool = (nb_mbuf_per_pool * nb_ports);
        }
 
+       if (!numa_support) {
+               if (socket_num == UMA_NO_CONFIG)
+                       mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool, 0);
+               else
+                       mbuf_pool_create(mbuf_data_size, nb_mbuf_per_pool,
+                                                socket_num);
+       }
        /*
         * Records which Mbuf pool to use by each logical core, if needed.
         */
@@ -491,18 +501,41 @@ init_config(void)
                rte_exit(EXIT_FAILURE, "rte_zmalloc(%d struct rte_port) "
                                                        "failed\n", nb_ports);
        }
-
+       
        for (pid = 0; pid < nb_ports; pid++) {
                port = &ports[pid];
                rte_eth_dev_info_get(pid, &port->dev_info);
 
+               if (numa_support) {
+                       if (port_numa[pid] != NUMA_NO_CONFIG) 
+                               port_per_socket[port_numa[pid]]++;
+                       else {
+                               uint32_t socket_id = rte_eth_dev_socket_id(pid);
+                               port_per_socket[socket_id]++; 
+                       }
+               }
+
                /* set flag to initialize port/queue */
                port->need_reconfig = 1;
                port->need_reconfig_queues = 1;
        }
 
+       if (numa_support) {
+               uint8_t i;
+               unsigned int nb_mbuf;
+
+               if (param_total_num_mbufs)
+                       nb_mbuf_per_pool = nb_mbuf_per_pool/nb_ports;
+
+               for (i = 0; i < MAX_SOCKET; i++) {
+                       nb_mbuf = (nb_mbuf_per_pool * 
+                                               port_per_socket[i]);
+                       if (nb_mbuf) 
+                               mbuf_pool_create(mbuf_data_size,
+                                               nb_mbuf,i);
+               }
+       }
        init_port_config();
-
        /* Configuration of packet forwarding streams. */
        if (init_fwd_streams() < 0)
                rte_exit(EXIT_FAILURE, "FAIL from init_fwd_streams()\n");
@@ -530,10 +563,14 @@ init_fwd_streams(void)
                                port->dev_info.max_tx_queues);
                        return -1;
                }
-               if (numa_support)
-                       port->socket_id = (pid < (nb_ports >> 1)) ? 0 : 1;
-               else
-                       port->socket_id = 0;
+               if (numa_support) 
+                       port->socket_id = rte_eth_dev_socket_id(pid);
+               else {
+                       if (socket_num == UMA_NO_CONFIG)         
+                               port->socket_id = 0;
+                       else 
+                               port->socket_id = socket_num;   
+               }
        }
 
        nb_fwd_streams_new = (streamid_t)(nb_ports * nb_rxq);
@@ -1102,7 +1139,8 @@ start_port(portid_t pid)
                if (port->need_reconfig > 0) {
                        port->need_reconfig = 0;
 
-                       printf("Configuring Port %d\n", pi);
+                       printf("Configuring Port %d (socket %d)\n", pi,
+                                       rte_eth_dev_socket_id(pi));
                        /* configure port */
                        diag = rte_eth_dev_configure(pi, nb_rxq, nb_txq,
                                                &(port->dev_conf));
@@ -1117,14 +1155,20 @@ start_port(portid_t pid)
                                return;
                        }
                }
-
                if (port->need_reconfig_queues > 0) {
                        port->need_reconfig_queues = 0;
-
                        /* setup tx queues */
                        for (qi = 0; qi < nb_txq; qi++) {
-                               diag = rte_eth_tx_queue_setup(pi, qi, nb_txd,
-                                       port->socket_id, &(port->tx_conf));
+                               if ((numa_support) &&
+                                       (txring_numa[pi] != NUMA_NO_CONFIG)) 
+                                       diag = rte_eth_tx_queue_setup(pi, qi,
+                                               nb_txd,txring_numa[pi],
+                                               &(port->tx_conf));
+                               else
+                                       diag = rte_eth_tx_queue_setup(pi, qi, 
+                                               nb_txd,port->socket_id,
+                                               &(port->tx_conf));
+                                       
                                if (diag == 0)
                                        continue;
 
@@ -1141,12 +1185,32 @@ start_port(portid_t pid)
                        }
                        /* setup rx queues */
                        for (qi = 0; qi < nb_rxq; qi++) {
-                               diag = rte_eth_rx_queue_setup(pi, qi, nb_rxd,
-                                       port->socket_id, &(port->rx_conf),
-                                       mbuf_pool_find(port->socket_id));
+                               if ((numa_support) && 
+                                       (rxring_numa[pi] != NUMA_NO_CONFIG)) {
+                                       struct rte_mempool * mp = 
+                                               mbuf_pool_find(rxring_numa[pi]);
+                                       if (mp == NULL) {
+                                               printf("Failed to setup RX queue:"
+                                                       "No mempool allocation"
+                                                       "on the socket %d\n",
+                                                       rxring_numa[pi]);
+                                               return;
+                                       }
+                                       
+                                       diag = rte_eth_rx_queue_setup(pi, qi,
+                                            nb_rxd,rxring_numa[pi],
+                                            &(port->rx_conf),mp);
+                               }
+                               else
+                                       diag = rte_eth_rx_queue_setup(pi, qi, 
+                                            nb_rxd,port->socket_id,
+                                            &(port->rx_conf),
+                                            mbuf_pool_find(port->socket_id));
+
                                if (diag == 0)
                                        continue;
 
+
                                /* Fail to setup rx queue, return */
                                if (rte_atomic16_cmpset(&(port->port_status),
                                                        RTE_PORT_HANDLING,
@@ -1159,7 +1223,6 @@ start_port(portid_t pid)
                                return;
                        }
                }
-
                /* start port */
                if (rte_eth_dev_start(pi) < 0) {
                        printf("Fail to start port %d\n", pi);
index cd3edb7..0f05b2d 100644 (file)
@@ -69,6 +69,9 @@ int main(int argc, char **argv);
 #define CACHE_LINE_SIZE_ROUNDUP(size) \
        (CACHE_LINE_SIZE * ((size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE))
 
+#define NUMA_NO_CONFIG 0xFF
+#define UMA_NO_CONFIG  0xFF
+
 typedef uint8_t  lcoreid_t;
 typedef uint8_t  portid_t;
 typedef uint16_t queueid_t;
@@ -268,6 +271,28 @@ extern uint8_t  interactive;
 extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 
+#define MAX_SOCKET 2 /*MAX SOCKET:currently, it is 2 */
+
+/*
+ * Store specified sockets on which memory pool to be used by ports 
+ * is allocated. 
+ */
+uint8_t port_numa[RTE_MAX_ETHPORTS];
+
+/*
+ * Store specified sockets on which RX ring to be used by ports
+ * is allocated. 
+ */
+uint8_t rxring_numa[RTE_MAX_ETHPORTS];
+
+/*
+ * Store specified sockets on which TX ring to be used by ports
+ * is allocated. 
+ */
+uint8_t txring_numa[RTE_MAX_ETHPORTS];
+
+extern uint8_t socket_num;
+
 /*
  * Configuration of logical cores:
  * nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
index 4a1a07a..34a6f4a 100644 (file)
@@ -79,9 +79,6 @@
 /* Macros for printing using RTE_LOG */
 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
 
-/* NUMA socket to allocate mbuf pool on */
-#define SOCKET                  0
-
 /* Max size of a single packet */
 #define MAX_PACKET_SZ           2048
 
@@ -550,13 +547,14 @@ init_port(uint8_t port)
                rte_exit(EXIT_FAILURE, "Could not configure port%u (%d)",
                            (unsigned)port, ret);
 
-       ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, SOCKET, &rx_conf,
-                                    pktmbuf_pool);
+       ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, rte_eth_dev_socket_id(port),
+                                 &rx_conf, pktmbuf_pool);
        if (ret < 0)
                rte_exit(EXIT_FAILURE, "Could not setup up RX queue for "
                                        "port%u (%d)", (unsigned)port, ret);
 
-       ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, SOCKET, &tx_conf);
+       ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, rte_eth_dev_socket_id(port),
+                                 &tx_conf);
        if (ret < 0)
                rte_exit(EXIT_FAILURE, "Could not setup up TX queue for "
                                        "port%u (%d)", (unsigned)port, ret);
@@ -720,7 +718,7 @@ main(int argc, char** argv)
                        MEMPOOL_CACHE_SZ,
                        sizeof(struct rte_pktmbuf_pool_private),
                        rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL,
-                       SOCKET, 0);
+                       rte_socket_id(), 0);
        if (pktmbuf_pool == NULL) {
                rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool");
                return -1;
index 06b5817..2fd8498 100644 (file)
@@ -241,6 +241,14 @@ rte_eth_driver_register(struct eth_driver *eth_drv)
        rte_eal_pci_register(&eth_drv->pci_drv);
 }
 
+int
+rte_eth_dev_socket_id(uint8_t port_id)
+{
+       if (port_id >= nb_ports)
+               return -1;
+       return rte_eth_devices[port_id].pci_dev->numa_node;
+}
+
 uint8_t
 rte_eth_dev_count(void)
 {
index 1ac9550..6c006d5 100644 (file)
@@ -1234,6 +1234,19 @@ extern int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id,
                                  uint16_t nb_tx_desc, unsigned int socket_id,
                                  const struct rte_eth_txconf *tx_conf);
 
+/*
+ * Return the NUMA socket to which an Ethernet device is connected
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device
+ * @return
+ *   The NUMA socket id to which the Ethernet device is connected or
+ *   a default of zero if the socket could not be determined.
+ *   -1 is returned is the port_id value is out of range.
+ */
+extern int rte_eth_dev_socket_id(uint8_t port_id);
+
+
 /**
  * Start an Ethernet device.
  *