test_mempool_perf.c

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include <string.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <stdint.h>
  38 #include <inttypes.h>
  39 #include <stdarg.h>
  40 #include <errno.h>
  41 #include <sys/queue.h>
  42
  43 #include <rte_common.h>
  44 #include <rte_log.h>
  45 #include <rte_debug.h>
  46 #include <rte_memory.h>
  47 #include <rte_memzone.h>
  48 #include <rte_launch.h>
  49 #include <rte_cycles.h>
  50 #include <rte_eal.h>
  51 #include <rte_per_lcore.h>
  52 #include <rte_lcore.h>
  53 #include <rte_atomic.h>
  54 #include <rte_branch_prediction.h>
  55 #include <rte_ring.h>
  56 #include <rte_mempool.h>
  57 #include <rte_spinlock.h>
  58 #include <rte_malloc.h>
  59
  60 #include "test.h"
  61
  62 /*
  63  * Mempool performance
  64  * =======
  65  *
  66  *    Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
  67  *    objects are put back in the pool per bulk of *n_put_bulk*.
  68  *
  69  *    This sequence is done during TIME_S seconds.
  70  *
  71  *    This test is done on the following configurations:
  72  *
  73  *    - Cores configuration (*cores*)
  74  *
  75  *      - One core with cache
  76  *      - Two cores with cache
  77  *      - Max. cores with cache
  78  *      - One core without cache
  79  *      - Two cores without cache
  80  *      - Max. cores without cache
  81  *
  82  *    - Bulk size (*n_get_bulk*, *n_put_bulk*)
  83  *
  84  *      - Bulk get from 1 to 32
  85  *      - Bulk put from 1 to 32
  86  *
  87  *    - Number of kept objects (*n_keep*)
  88  *
  89  *      - 32
  90  *      - 128
  91  */
  92
  93 #define N 65536
  94 #define TIME_S 5
  95 #define MEMPOOL_ELT_SIZE 2048
  96 #define MAX_KEEP 128
  97 #define MEMPOOL_SIZE ((RTE_MAX_LCORE*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
  98
  99 static struct rte_mempool *mp;
 100 static struct rte_mempool *mp_cache, *mp_nocache;
 101
 102 static rte_atomic32_t synchro;
 103
 104 /* number of objects in one bulk operation (get or put) */
 105 static unsigned n_get_bulk;
 106 static unsigned n_put_bulk;
 107
 108 /* number of objects retrived from mempool before putting them back */
 109 static unsigned n_keep;
 110
 111 /* number of enqueues / dequeues */
 112 struct mempool_test_stats {
 113         unsigned enq_count;
 114 } __rte_cache_aligned;
 115
 116 static struct mempool_test_stats stats[RTE_MAX_LCORE];
 117
 118 /*
 119  * save the object number in the first 4 bytes of object data. All
 120  * other bytes are set to 0.
 121  */
 122 static void
 123 my_obj_init(struct rte_mempool *mp, __attribute__((unused)) void *arg,
 124             void *obj, unsigned i)
 125 {
 126         uint32_t *objnum = obj;
 127         memset(obj, 0, mp->elt_size);
 128         *objnum = i;
 129 }
 130
 131 static int
 132 per_lcore_mempool_test(__attribute__((unused)) void *arg)
 133 {
 134         void *obj_table[MAX_KEEP];
 135         unsigned i, idx;
 136         unsigned lcore_id = rte_lcore_id();
 137         int ret;
 138         uint64_t start_cycles, end_cycles;
 139         uint64_t time_diff = 0, hz = rte_get_timer_hz();
 140
 141         /* n_get_bulk and n_put_bulk must be divisors of n_keep */
 142         if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
 143                 return -1;
 144         if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
 145                 return -1;
 146
 147         stats[lcore_id].enq_count = 0;
 148
 149         /* wait synchro for slaves */
 150         if (lcore_id != rte_get_master_lcore())
 151                 while (rte_atomic32_read(&synchro) == 0);
 152
 153         start_cycles = rte_get_timer_cycles();
 154
 155         while (time_diff/hz < TIME_S) {
 156                 for (i = 0; likely(i < (N/n_keep)); i++) {
 157                         /* get n_keep objects by bulk of n_bulk */
 158                         idx = 0;
 159                         while (idx < n_keep) {
 160                                 ret = rte_mempool_get_bulk(mp, &obj_table[idx],
 161                                                            n_get_bulk);
 162                                 if (unlikely(ret < 0)) {
 163                                         rte_mempool_dump(stdout, mp);
 164                                         rte_ring_dump(stdout, mp->ring);
 165                                         /* in this case, objects are lost... */
 166                                         return -1;
 167                                 }
 168                                 idx += n_get_bulk;
 169                         }
 170
 171                         /* put the objects back */
 172                         idx = 0;
 173                         while (idx < n_keep) {
 174                                 rte_mempool_put_bulk(mp, &obj_table[idx],
 175                                                      n_put_bulk);
 176                                 idx += n_put_bulk;
 177                         }
 178                 }
 179                 end_cycles = rte_get_timer_cycles();
 180                 time_diff = end_cycles - start_cycles;
 181                 stats[lcore_id].enq_count += N;
 182         }
 183
 184         return 0;
 185 }
 186
 187 /* launch all the per-lcore test, and display the result */
 188 static int
 189 launch_cores(unsigned cores)
 190 {
 191         unsigned lcore_id;
 192         unsigned rate;
 193         int ret;
 194         unsigned cores_save = cores;
 195
 196         rte_atomic32_set(&synchro, 0);
 197
 198         /* reset stats */
 199         memset(stats, 0, sizeof(stats));
 200
 201         printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
 202                "n_put_bulk=%u n_keep=%u ",
 203                (unsigned) mp->cache_size, cores, n_get_bulk, n_put_bulk, n_keep);
 204
 205         if (rte_mempool_count(mp) != MEMPOOL_SIZE) {
 206                 printf("mempool is not full\n");
 207                 return -1;
 208         }
 209
 210         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 211                 if (cores == 1)
 212                         break;
 213                 cores--;
 214                 rte_eal_remote_launch(per_lcore_mempool_test,
 215                                       NULL, lcore_id);
 216         }
 217
 218         /* start synchro and launch test on master */
 219         rte_atomic32_set(&synchro, 1);
 220
 221         ret = per_lcore_mempool_test(NULL);
 222
 223         cores = cores_save;
 224         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 225                 if (cores == 1)
 226                         break;
 227                 cores--;
 228                 if (rte_eal_wait_lcore(lcore_id) < 0)
 229                         ret = -1;
 230         }
 231
 232         if (ret < 0) {
 233                 printf("per-lcore test returned -1\n");
 234                 return -1;
 235         }
 236
 237         rate = 0;
 238         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
 239                 rate += (stats[lcore_id].enq_count / TIME_S);
 240
 241         printf("rate_persec=%u\n", rate);
 242
 243         return 0;
 244 }
 245
 246 /* for a given number of core, launch all test cases */
 247 static int
 248 do_one_mempool_test(unsigned cores)
 249 {
 250         unsigned bulk_tab_get[] = { 1, 4, 32, 0 };
 251         unsigned bulk_tab_put[] = { 1, 4, 32, 0 };
 252         unsigned keep_tab[] = { 32, 128, 0 };
 253         unsigned *get_bulk_ptr;
 254         unsigned *put_bulk_ptr;
 255         unsigned *keep_ptr;
 256         int ret;
 257
 258         for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
 259                 for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
 260                         for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
 261
 262                                 n_get_bulk = *get_bulk_ptr;
 263                                 n_put_bulk = *put_bulk_ptr;
 264                                 n_keep = *keep_ptr;
 265                                 ret = launch_cores(cores);
 266
 267                                 if (ret < 0)
 268                                         return -1;
 269                         }
 270                 }
 271         }
 272         return 0;
 273 }
 274
 275 static int
 276 test_mempool_perf(void)
 277 {
 278         rte_atomic32_init(&synchro);
 279
 280         /* create a mempool (without cache) */
 281         if (mp_nocache == NULL)
 282                 mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE,
 283                                                 MEMPOOL_ELT_SIZE, 0, 0,
 284                                                 NULL, NULL,
 285                                                 my_obj_init, NULL,
 286                                                 SOCKET_ID_ANY, 0);
 287         if (mp_nocache == NULL)
 288                 return -1;
 289
 290         /* create a mempool (with cache) */
 291         if (mp_cache == NULL)
 292                 mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
 293                                               MEMPOOL_ELT_SIZE,
 294                                               RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
 295                                               NULL, NULL,
 296                                               my_obj_init, NULL,
 297                                               SOCKET_ID_ANY, 0);
 298         if (mp_cache == NULL)
 299                 return -1;
 300
 301         /* performance test with 1, 2 and max cores */
 302         printf("start performance test (without cache)\n");
 303         mp = mp_nocache;
 304
 305         if (do_one_mempool_test(1) < 0)
 306                 return -1;
 307
 308         if (do_one_mempool_test(2) < 0)
 309                 return -1;
 310
 311         if (do_one_mempool_test(rte_lcore_count()) < 0)
 312                 return -1;
 313
 314         /* performance test with 1, 2 and max cores */
 315         printf("start performance test (with cache)\n");
 316         mp = mp_cache;
 317
 318         if (do_one_mempool_test(1) < 0)
 319                 return -1;
 320
 321         if (do_one_mempool_test(2) < 0)
 322                 return -1;
 323
 324         if (do_one_mempool_test(rte_lcore_count()) < 0)
 325                 return -1;
 326
 327         rte_mempool_list_dump(stdout);
 328
 329         return 0;
 330 }
 331
 332 static struct test_command mempool_perf_cmd = {
 333         .command = "mempool_perf_autotest",
 334         .callback = test_mempool_perf,
 335 };
 336 REGISTER_TEST_COMMAND(mempool_perf_cmd);