usertools: add huge page setup script
authorStephen Hemminger <stephen@networkplumber.org>
Wed, 9 Sep 2020 18:51:01 +0000 (11:51 -0700)
committerThomas Monjalon <thomas@monjalon.net>
Sun, 22 Nov 2020 21:38:03 +0000 (22:38 +0100)
This is an improved version of the setup of huge pages
bases on earlier DPDK setup.

Differences are:
   * autodetects NUMA vs non NUMA
   * allows setting different page sizes
     recent kernels support multiple sizes.
   * accepts a parameter in bytes (not pages).
   * can display current hugepage settings.

Most users will just use --setup argument but if necessary
the steps of clearing old settings and mounting/umounting
can be done individually.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Ferruh Yigit <ferruh.yigit@intel.com>
doc/guides/tools/hugepages.rst [new file with mode: 0644]
doc/guides/tools/index.rst
usertools/dpdk-hugepages.py [new file with mode: 0755]
usertools/meson.build

diff --git a/doc/guides/tools/hugepages.rst b/doc/guides/tools/hugepages.rst
new file mode 100644 (file)
index 0000000..6d3f410
--- /dev/null
@@ -0,0 +1,78 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright (c) 2020 Microsoft Corporation
+
+dpdk-hugepages Application
+==========================
+
+The ``dpdk-hugepages`` tool is a Data Plane Development Kit (DPDK) utility
+that helps in reserving hugepages.
+As well as checking for current settings.
+
+
+Running the Application
+-----------------------
+
+The tool has a number of command line options:
+
+.. code-block:: console
+
+   dpdk-hugepages [options]
+
+
+Options
+-------
+
+* ``-h, --help``
+
+    Display usage information and quit
+
+* ``-s, --show``
+
+    Print the current huge page configuration
+
+* ``-c driver, --clear``
+
+    Clear existing huge page reservation
+
+* ``-m, --mount``
+
+    Mount the huge page filesystem
+
+* ``-u, --unmount``
+
+    Unmount the huge page filesystem
+
+* ``-n NODE, --node=NODE``
+
+    Set NUMA node to reserve pages on
+
+* ``-p SIZE, --pagesize=SIZE``
+
+    Select hugepage size to use.
+       If not specified the default system huge page size is used.
+
+* ``-r SIZE, --reserve=SIZE``
+
+    Reserve huge pages.
+       Size is in bytes with K, M or G suffix.
+
+* ``--setup SIZE``
+
+    Short cut to clear, unmount, reserve and mount.
+
+.. warning::
+
+   While any user can run the ``dpdk-hugpages.py`` script to view the
+   status of huge pages, modifying the setup requires root privileges.
+
+
+Examples
+--------
+
+To display current huge page settings::
+
+   dpdk-hugpages.py -s
+
+To a complete setup of with 2 Gigabyte of 1G huge pages::
+
+   dpdk-hugpages.py -p 1G --setup 2G
index c721943..93dde41 100644 (file)
@@ -11,6 +11,7 @@ DPDK Tools User Guides
     proc_info
     pdump
     pmdinfo
+    hugepages
     devbind
     flow-perf
     testbbdev
diff --git a/usertools/dpdk-hugepages.py b/usertools/dpdk-hugepages.py
new file mode 100755 (executable)
index 0000000..1be100c
--- /dev/null
@@ -0,0 +1,270 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2020 Microsoft Corporation
+"""Script to query and setup huge pages for DPDK applications."""
+
+import argparse
+import glob
+import os
+import re
+import sys
+from math import log2
+
+# Standard binary prefix
+BINARY_PREFIX = "KMG"
+
+# systemd mount point for huge pages
+HUGE_MOUNT = "/dev/hugepages"
+
+
+def fmt_memsize(kb):
+    '''Format memory size in kB into conventional format'''
+    logk = int(log2(kb) / 10)
+    suffix = BINARY_PREFIX[logk]
+    unit = 2**(logk * 10)
+    return '{}{}b'.format(int(kb / unit), suffix)
+
+
+def get_memsize(arg):
+    '''Convert memory size with suffix to kB'''
+    match = re.match(r'(\d+)([' + BINARY_PREFIX + r']?)$', arg.upper())
+    if match is None:
+        sys.exit('{} is not a valid page size'.format(arg))
+    num = float(match.group(1))
+    suffix = match.group(2)
+    if suffix == "":
+        return int(num / 1024)
+    idx = BINARY_PREFIX.find(suffix)
+    return int(num * (2**(idx * 10)))
+
+
+def is_numa():
+    '''Test if NUMA is necessary on this system'''
+    return os.path.exists('/sys/devices/system/node')
+
+
+def get_hugepages(path):
+    '''Read number of reserved pages'''
+    with open(path + '/nr_hugepages') as nr_hugepages:
+        return int(nr_hugepages.read())
+    return 0
+
+
+def set_hugepages(path, pages):
+    '''Write the number of reserved huge pages'''
+    filename = path + '/nr_hugepages'
+    try:
+        with open(filename, 'w') as nr_hugepages:
+            nr_hugepages.write('{}\n'.format(pages))
+    except PermissionError:
+        sys.exit('Permission denied: need to be root!')
+    except FileNotFoundError:
+        filename = os.path.basename(path)
+        size = filename[10:]
+        sys.exit('{} is not a valid system huge page size'.format(size))
+
+
+def show_numa_pages():
+    '''Show huge page reservations on Numa system'''
+    print('Node Pages Size Total')
+    for numa_path in glob.glob('/sys/devices/system/node/node*'):
+        node = numa_path[29:]  # slice after /sys/devices/system/node/node
+        path = numa_path + '/hugepages'
+        for hdir in os.listdir(path):
+            pages = get_hugepages(path + '/' + hdir)
+            if pages > 0:
+                kb = int(hdir[10:-2])  # slice out of hugepages-NNNkB
+                print('{:<4} {:<5} {:<6} {}'.format(node, pages,
+                                                    fmt_memsize(kb),
+                                                    fmt_memsize(pages * kb)))
+
+
+def show_non_numa_pages():
+    '''Show huge page reservations on non Numa system'''
+    print('Pages Size Total')
+    path = '/sys/kernel/mm/hugepages'
+    for hdir in os.listdir(path):
+        pages = get_hugepages(path + '/' + hdir)
+        if pages > 0:
+            kb = int(hdir[10:-2])
+            print('{:<5} {:<6} {}'.format(pages, fmt_memsize(kb),
+                                          fmt_memsize(pages * kb)))
+
+
+def show_pages():
+    '''Show existing huge page settings'''
+    if is_numa():
+        show_numa_pages()
+    else:
+        show_non_numa_pages()
+
+
+def clear_pages():
+    '''Clear all existing huge page mappings'''
+    if is_numa():
+        dirs = glob.glob(
+            '/sys/devices/system/node/node*/hugepages/hugepages-*')
+    else:
+        dirs = glob.glob('/sys/kernel/mm/hugepages/hugepages-*')
+
+    for path in dirs:
+        set_hugepages(path, 0)
+
+
+def default_pagesize():
+    '''Get default huge page size from /proc/meminfo'''
+    with open('/proc/meminfo') as meminfo:
+        for line in meminfo:
+            if line.startswith('Hugepagesize:'):
+                return int(line.split()[1])
+    return None
+
+
+def set_numa_pages(pages, hugepgsz, node=None):
+    '''Set huge page reservation on Numa system'''
+    if node:
+        nodes = ['/sys/devices/system/node/node{}/hugepages'.format(node)]
+    else:
+        nodes = glob.glob('/sys/devices/system/node/node*/hugepages')
+
+    for node_path in nodes:
+        huge_path = '{}/hugepages-{}kB'.format(node_path, hugepgsz)
+        set_hugepages(huge_path, pages)
+
+
+def set_non_numa_pages(pages, hugepgsz):
+    '''Set huge page reservation on non Numa system'''
+    path = '/sys/kernel/mm/hugepages/hugepages-{}kB'.format(hugepgsz)
+    set_hugepages(path, pages)
+
+
+def reserve_pages(pages, hugepgsz, node=None):
+    '''Set the number of huge pages to be reserved'''
+    if node or is_numa():
+        set_numa_pages(pages, hugepgsz, node=node)
+    else:
+        set_non_numa_pages(pages, hugepgsz)
+
+
+def get_mountpoints():
+    '''Get list of where hugepage filesystem is mounted'''
+    mounted = []
+    with open('/proc/mounts') as mounts:
+        for line in mounts:
+            fields = line.split()
+            if fields[2] != 'hugetlbfs':
+                continue
+            mounted.append(fields[1])
+    return mounted
+
+
+def mount_huge(pagesize, mountpoint):
+    '''Mount the huge TLB file system'''
+    if mountpoint in get_mountpoints():
+        print(mountpoint, "already mounted")
+        return
+    cmd = "mount -t hugetlbfs"
+    if pagesize:
+        cmd += ' -o pagesize={}'.format(pagesize * 1024)
+    cmd += ' nodev ' + mountpoint
+    os.system(cmd)
+
+
+def umount_huge(mountpoint):
+    '''Unmount the huge TLB file system (if mounted)'''
+    if mountpoint in get_mountpoints():
+        os.system("umount " + mountpoint)
+
+
+def show_mount():
+    '''Show where huge page filesystem is mounted'''
+    mounted = get_mountpoints()
+    if mounted:
+        print("Hugepages mounted on", *mounted)
+    else:
+        print("Hugepages not mounted")
+
+
+def main():
+    '''Process the command line arguments and setup huge pages'''
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description="Setup huge pages",
+        epilog="""
+Examples:
+
+To display current huge page settings:
+    %(prog)s -s
+
+To a complete setup of with 2 Gigabyte of 1G huge pages:
+    %(prog)s -p 1G --setup 2G
+""")
+    parser.add_argument(
+        '--show',
+        '-s',
+        action='store_true',
+        help="print the current huge page configuration")
+    parser.add_argument(
+        '--clear', '-c', action='store_true', help="clear existing huge pages")
+    parser.add_argument(
+        '--mount',
+        '-m',
+        action='store_true',
+        help='mount the huge page filesystem')
+    parser.add_argument(
+        '--unmount',
+        '-u',
+        action='store_true',
+        help='unmount the system huge page directory')
+    parser.add_argument(
+        '--node', '-n', help='select numa node to reserve pages on')
+    parser.add_argument(
+        '--pagesize',
+        '-p',
+        metavar='SIZE',
+        help='choose huge page size to use')
+    parser.add_argument(
+        '--reserve',
+        '-r',
+        metavar='SIZE',
+        help='reserve huge pages. Size is in bytes with K, M, or G suffix')
+    parser.add_argument(
+        '--setup',
+        metavar='SIZE',
+        help='setup huge pages by doing clear, unmount, reserve and mount')
+    args = parser.parse_args()
+
+    if args.setup:
+        args.clear = True
+        args.unmount = True
+        args.reserve = args.setup
+        args.mount = True
+
+    if args.pagesize:
+        pagesize_kb = get_memsize(args.pagesize)
+    else:
+        pagesize_kb = default_pagesize()
+
+    if args.clear:
+        clear_pages()
+    if args.unmount:
+        umount_huge(HUGE_MOUNT)
+
+    if args.reserve:
+        reserve_kb = get_memsize(args.reserve)
+        if reserve_kb % pagesize_kb != 0:
+            sys.exit(
+                'Huge reservation {}kB is not a multiple of page size {}kB'.
+                format(reserve_kb, pagesize_kb))
+        reserve_pages(
+            int(reserve_kb / pagesize_kb), pagesize_kb, node=args.node)
+    if args.mount:
+        mount_huge(pagesize_kb, HUGE_MOUNT)
+    if args.show:
+        show_pages()
+        print()
+        show_mount()
+
+
+if __name__ == "__main__":
+    main()
index 64e2723..596eaef 100644 (file)
@@ -1,4 +1,9 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-install_data(['dpdk-devbind.py', 'dpdk-pmdinfo.py', 'dpdk-telemetry.py'], install_dir: 'bin')
+install_data([
+       'dpdk-devbind.py',
+       'dpdk-pmdinfo.py',
+       'dpdk-telemetry.py',
+       'dpdk-hugepages.py'
+],install_dir: 'bin')