app/test/test_mempool_perf.c

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include <string.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <stdint.h>
  38 #include <inttypes.h>
  39 #include <stdarg.h>
  40 #include <errno.h>
  41 #include <sys/queue.h>
  42
  43 #include <rte_common.h>
  44 #include <rte_log.h>
  45 #include <rte_debug.h>
  46 #include <rte_memory.h>
  47 #include <rte_memzone.h>
  48 #include <rte_launch.h>
  49 #include <rte_cycles.h>
  50 #include <rte_eal.h>
  51 #include <rte_per_lcore.h>
  52 #include <rte_lcore.h>
  53 #include <rte_atomic.h>
  54 #include <rte_branch_prediction.h>
  55 #include <rte_ring.h>
  56 #include <rte_mempool.h>
  57 #include <rte_spinlock.h>
  58 #include <rte_malloc.h>
  59
  60 #include "test.h"
  61
  62 /*
  63  * Mempool performance
  64  * =======
  65  *
  66  *    Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
  67  *    objects are put back in the pool per bulk of *n_put_bulk*.
  68  *
  69  *    This sequence is done during TIME_S seconds.
  70  *
  71  *    This test is done on the following configurations:
  72  *
  73  *    - Cores configuration (*cores*)
  74  *
  75  *      - One core with cache
  76  *      - Two cores with cache
  77  *      - Max. cores with cache
  78  *      - One core without cache
  79  *      - Two cores without cache
  80  *      - Max. cores without cache
  81  *
  82  *    - Bulk size (*n_get_bulk*, *n_put_bulk*)
  83  *
  84  *      - Bulk get from 1 to 32
  85  *      - Bulk put from 1 to 32
  86  *
  87  *    - Number of kept objects (*n_keep*)
  88  *
  89  *      - 32
  90  *      - 128
  91  */
  92
  93 #define N 65536
  94 #define TIME_S 5
  95 #define MEMPOOL_ELT_SIZE 2048
  96 #define MAX_KEEP 128
  97 #define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
  98
  99 static struct rte_mempool *mp;
 100 static struct rte_mempool *mp_cache, *mp_nocache;
 101
 102 static rte_atomic32_t synchro;
 103
 104 /* number of objects in one bulk operation (get or put) */
 105 static unsigned n_get_bulk;
 106 static unsigned n_put_bulk;
 107
 108 /* number of objects retrived from mempool before putting them back */
 109 static unsigned n_keep;
 110
 111 /* number of enqueues / dequeues */
 112 struct mempool_test_stats {
 113         uint64_t enq_count;
 114 } __rte_cache_aligned;
 115
 116 static struct mempool_test_stats stats[RTE_MAX_LCORE];
 117
 118 /*
 119  * save the object number in the first 4 bytes of object data. All
 120  * other bytes are set to 0.
 121  */
 122 static void
 123 my_obj_init(struct rte_mempool *mp, __attribute__((unused)) void *arg,
 124             void *obj, unsigned i)
 125 {
 126         uint32_t *objnum = obj;
 127         memset(obj, 0, mp->elt_size);
 128         *objnum = i;
 129 }
 130
 131 static int
 132 per_lcore_mempool_test(__attribute__((unused)) void *arg)
 133 {
 134         void *obj_table[MAX_KEEP];
 135         unsigned i, idx;
 136         unsigned lcore_id = rte_lcore_id();
 137         int ret;
 138         uint64_t start_cycles, end_cycles;
 139         uint64_t time_diff = 0, hz = rte_get_timer_hz();
 140
 141         /* n_get_bulk and n_put_bulk must be divisors of n_keep */
 142         if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
 143                 return -1;
 144         if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
 145                 return -1;
 146
 147         stats[lcore_id].enq_count = 0;
 148
 149         /* wait synchro for slaves */
 150         if (lcore_id != rte_get_master_lcore())
 151                 while (rte_atomic32_read(&synchro) == 0);
 152
 153         start_cycles = rte_get_timer_cycles();
 154
 155         while (time_diff/hz < TIME_S) {
 156                 for (i = 0; likely(i < (N/n_keep)); i++) {
 157                         /* get n_keep objects by bulk of n_bulk */
 158                         idx = 0;
 159                         while (idx < n_keep) {
 160                                 ret = rte_mempool_get_bulk(mp, &obj_table[idx],
 161                                                            n_get_bulk);
 162                                 if (unlikely(ret < 0)) {
 163                                         rte_mempool_dump(stdout, mp);
 164                                         /* in this case, objects are lost... */
 165                                         return -1;
 166                                 }
 167                                 idx += n_get_bulk;
 168                         }
 169
 170                         /* put the objects back */
 171                         idx = 0;
 172                         while (idx < n_keep) {
 173                                 rte_mempool_put_bulk(mp, &obj_table[idx],
 174                                                      n_put_bulk);
 175                                 idx += n_put_bulk;
 176                         }
 177                 }
 178                 end_cycles = rte_get_timer_cycles();
 179                 time_diff = end_cycles - start_cycles;
 180                 stats[lcore_id].enq_count += N;
 181         }
 182
 183         return 0;
 184 }
 185
 186 /* launch all the per-lcore test, and display the result */
 187 static int
 188 launch_cores(unsigned cores)
 189 {
 190         unsigned lcore_id;
 191         uint64_t rate;
 192         int ret;
 193         unsigned cores_save = cores;
 194
 195         rte_atomic32_set(&synchro, 0);
 196
 197         /* reset stats */
 198         memset(stats, 0, sizeof(stats));
 199
 200         printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
 201                "n_put_bulk=%u n_keep=%u ",
 202                (unsigned) mp->cache_size, cores, n_get_bulk, n_put_bulk, n_keep);
 203
 204         if (rte_mempool_count(mp) != MEMPOOL_SIZE) {
 205                 printf("mempool is not full\n");
 206                 return -1;
 207         }
 208
 209         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 210                 if (cores == 1)
 211                         break;
 212                 cores--;
 213                 rte_eal_remote_launch(per_lcore_mempool_test,
 214                                       NULL, lcore_id);
 215         }
 216
 217         /* start synchro and launch test on master */
 218         rte_atomic32_set(&synchro, 1);
 219
 220         ret = per_lcore_mempool_test(NULL);
 221
 222         cores = cores_save;
 223         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 224                 if (cores == 1)
 225                         break;
 226                 cores--;
 227                 if (rte_eal_wait_lcore(lcore_id) < 0)
 228                         ret = -1;
 229         }
 230
 231         if (ret < 0) {
 232                 printf("per-lcore test returned -1\n");
 233                 return -1;
 234         }
 235
 236         rate = 0;
 237         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
 238                 rate += (stats[lcore_id].enq_count / TIME_S);
 239
 240         printf("rate_persec=%" PRIu64 "\n", rate);
 241
 242         return 0;
 243 }
 244
 245 /* for a given number of core, launch all test cases */
 246 static int
 247 do_one_mempool_test(unsigned cores)
 248 {
 249         unsigned bulk_tab_get[] = { 1, 4, 32, 0 };
 250         unsigned bulk_tab_put[] = { 1, 4, 32, 0 };
 251         unsigned keep_tab[] = { 32, 128, 0 };
 252         unsigned *get_bulk_ptr;
 253         unsigned *put_bulk_ptr;
 254         unsigned *keep_ptr;
 255         int ret;
 256
 257         for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
 258                 for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
 259                         for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
 260
 261                                 n_get_bulk = *get_bulk_ptr;
 262                                 n_put_bulk = *put_bulk_ptr;
 263                                 n_keep = *keep_ptr;
 264                                 ret = launch_cores(cores);
 265
 266                                 if (ret < 0)
 267                                         return -1;
 268                         }
 269                 }
 270         }
 271         return 0;
 272 }
 273
 274 static int
 275 test_mempool_perf(void)
 276 {
 277         rte_atomic32_init(&synchro);
 278
 279         /* create a mempool (without cache) */
 280         if (mp_nocache == NULL)
 281                 mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE,
 282                                                 MEMPOOL_ELT_SIZE, 0, 0,
 283                                                 NULL, NULL,
 284                                                 my_obj_init, NULL,
 285                                                 SOCKET_ID_ANY, 0);
 286         if (mp_nocache == NULL)
 287                 return -1;
 288
 289         /* create a mempool (with cache) */
 290         if (mp_cache == NULL)
 291                 mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
 292                                               MEMPOOL_ELT_SIZE,
 293                                               RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
 294                                               NULL, NULL,
 295                                               my_obj_init, NULL,
 296                                               SOCKET_ID_ANY, 0);
 297         if (mp_cache == NULL)
 298                 return -1;
 299
 300         /* performance test with 1, 2 and max cores */
 301         printf("start performance test (without cache)\n");
 302         mp = mp_nocache;
 303
 304         if (do_one_mempool_test(1) < 0)
 305                 return -1;
 306
 307         if (do_one_mempool_test(2) < 0)
 308                 return -1;
 309
 310         if (do_one_mempool_test(rte_lcore_count()) < 0)
 311                 return -1;
 312
 313         /* performance test with 1, 2 and max cores */
 314         printf("start performance test (with cache)\n");
 315         mp = mp_cache;
 316
 317         if (do_one_mempool_test(1) < 0)
 318                 return -1;
 319
 320         if (do_one_mempool_test(2) < 0)
 321                 return -1;
 322
 323         if (do_one_mempool_test(rte_lcore_count()) < 0)
 324                 return -1;
 325
 326         rte_mempool_list_dump(stdout);
 327
 328         return 0;
 329 }
 330
 331 static struct test_command mempool_perf_cmd = {
 332         .command = "mempool_perf_autotest",
 333         .callback = test_mempool_perf,
 334 };
 335 REGISTER_TEST_COMMAND(mempool_perf_cmd);