Projects
Eulaceura:Mainline
memkind
_service:obs_scm:0002-Support-initializing-HBW-...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:obs_scm:0002-Support-initializing-HBW-nodes-from-memory_locality.patch of Package memkind
From 448eb95b45b0cf6ecc7cf1a3e24056a2fdae85bd Mon Sep 17 00:00:00 2001 From: Yicong Yang <yangyicong@hisilicon.com> Date: Fri, 13 Oct 2023 15:21:11 +0800 Subject: [PATCH] Support initializing HBW nodes from memory_locality In current implementation we mainly infer the HBW nodes from the HMAT/SLIT, which may not describe all the cases. For example the HMAT/SLIT cannot describe the topology below: [ Node 0 ] [ CPU 0-3 ][ CPU 4-7 ] | | [ HBM 0 ][ HBM 1 ] [ Node 1 ][ Node 2 ] CPU 0-7 are in one NUMA node, but CPU 0-3 is closest to HBM 0 while CPU 4-7 is closest to HBM 1. Current HMAT/SLIT cannot support this case. In order to support this, openeuler has merged a HBM device driver to export the topology by sysfs[1]. The description of above topology will be like: $ cat /sys/kernel/hbm_memory/memory_topo/memory_locality 1 0-3 1 4-7 This patch cooperate with the HBM device driver to support initializing the HBW nodes from memory_locality for memkind. Will try to obtains the HBW nodes by parsing the memory_locality first, on failure or there is no memory_locality on the system will fallback to HMAT/SLIT. User can disable this function by MEMKIND_DISABLE_MEMORY_LOCALITY=1 as well. [1] https://gitee.com/openeuler/kernel/pulls/451 Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> --- include/memkind/internal/memkind_bitmask.h | 2 + src/memkind_bitmask.c | 185 +++++++++++++++++++++ src/memkind_hbw.c | 42 +++++ 3 files changed, 229 insertions(+) diff --git a/include/memkind/internal/memkind_bitmask.h b/include/memkind/internal/memkind_bitmask.h index 5c5b8434..6b0c3f64 100644 --- a/include/memkind/internal/memkind_bitmask.h +++ b/include/memkind/internal/memkind_bitmask.h @@ -12,6 +12,8 @@ extern "C" { typedef int (*get_node_bitmask)(struct bitmask **); +int set_numanode_from_memory_locality(void **numanode, + memkind_node_variant_t node_variant); int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode, memkind_node_variant_t node_variant); int set_bitmask_for_current_numanode(unsigned long *nodemask, diff --git a/src/memkind_bitmask.c b/src/memkind_bitmask.c index 4f6d9f00..84300395 100644 --- a/src/memkind_bitmask.c +++ b/src/memkind_bitmask.c @@ -1,9 +1,11 @@ // SPDX-License-Identifier: BSD-2-Clause /* Copyright (C) 2019 - 2021 Intel Corporation. */ +#include <ctype.h> #include <errno.h> #include <limits.h> #include <stdint.h> +#include <stdio.h> #include <memkind/internal/memkind_bitmask.h> #include <memkind/internal/memkind_log.h> @@ -12,6 +14,89 @@ // Vector of CPUs with memory NUMA Node id(s) VEC(vec_cpu_node, int); +void init_node_closet_cpu(cpu_set_t **cpunode_mask, int num_cpu, int num_nodes) +{ + char *line = NULL; + size_t len = 0; + ssize_t n; + FILE *f; + + /* + * The content of /sys/kernel/hbm_memory/memory_topo/memory_locality should + * be like: + * 2 0-3 + * 3 4-7 + * 4 8-11 + * 5 12-15 + * 6 16-19 + * 7 20-23 + * 8 24-27 + * 9 28-31 + * + * The 1st column is the HBW node number and the 2nd column is the CPU list + * which is closet to the HBW node. + */ + f = fopen("/sys/kernel/hbm_memory/memory_topo/memory_locality", "r"); + if (!f) + return; + + while ((n = getline(&line, &len, f)) != -1) { + long int node, begin_cpu, end_cpu; + char *begin, *end; + + /* Get the node number first */ + node = strtol(line, &end, 0); + + /* Either the node number is invalid or the whole line is invalid */ + if (line == end || node == LONG_MAX || node == LONG_MIN) + break; + + if (node >= num_nodes) { + log_err("Invalid node number provided by memory_locality."); + break; + } + + /* Try to find the beginning of the CPU list string */ + while (*end == ' ' && end != line + len) + end++; + + if (end == line + len || !isdigit(*end)) + break; + + begin = end; + do { + begin_cpu = strtol(begin, &end, 0); + if (begin == end || begin_cpu == LONG_MAX || begin_cpu == LONG_MIN) + break; + + /* End of the line */ + if (*end == '\0' || *end == '\n') { + CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]); + break; + } else if (*end == ',') { + CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]); + } else if (*end == '-' && isdigit(*(++end))) { + begin = end; + end_cpu = strtol(begin, &end, 0); + if (begin == end || end_cpu == LONG_MAX || end_cpu == LONG_MIN) + break; + + while (begin_cpu <= end_cpu) { + CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]); + ++begin_cpu; + } + } else { + break; + } + + begin = end + 1; + } while (begin < line + len); + } + + free(line); + fclose(f); +} + int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm) { *bm = numa_parse_nodestring(nodes_env); @@ -22,6 +107,106 @@ int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm) return MEMKIND_SUCCESS; } +int set_numanode_from_memory_locality(void **numanode, + memkind_node_variant_t node_variant) +{ + int num_cpu = numa_num_configured_cpus(); + int cpuset_size = CPU_ALLOC_SIZE(num_cpu); + int max_node_id = numa_max_node(); + cpu_set_t **cpunode_mask; + int init_node, cpu_id; + int status; + + cpunode_mask = calloc(max_node_id + 1, sizeof(*cpunode_mask)); + if (!cpunode_mask) { + status = MEMKIND_ERROR_MALLOC; + log_err("calloc() failed."); + goto out; + } + + for (init_node = 0; init_node <= max_node_id; init_node++) { + cpunode_mask[init_node] = CPU_ALLOC(num_cpu); + if (!cpunode_mask[init_node]) { + while (init_node >= 0) { + CPU_FREE(cpunode_mask[init_node]); + init_node--; + } + + status = MEMKIND_ERROR_MALLOC; + log_err("CPU_ALLOC_SIZE() failed."); + goto free_cpunode_mask; + } + + CPU_ZERO_S(cpuset_size, cpunode_mask[init_node]); + } + + init_node_closet_cpu(cpunode_mask, num_cpu, max_node_id + 1); + + struct vec_cpu_node *node_arr = + (struct vec_cpu_node *)calloc(num_cpu, sizeof(struct vec_cpu_node)); + if (!node_arr) { + status = MEMKIND_ERROR_MALLOC; + log_err("calloc() failed."); + goto free_cpunode_mask_array; + } + + /* Scan CPUs once. Assuming the CPU number are much more bigger than NUMA Nodes */ + for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) { + for (init_node = 0; init_node <= max_node_id; init_node++) { + if (CPU_ISSET_S(cpu_id, cpuset_size, cpunode_mask[init_node])) { + VEC_PUSH_BACK(&node_arr[cpu_id], init_node); + + /* + * A cpu should always have one closet node, log error if + * violate this. + */ + if (node_variant == NODE_VARIANT_SINGLE && + VEC_SIZE(&node_arr[cpu_id]) > 1) { + log_err("CPU%d has more than one closet node.", cpu_id); + status = MEMKIND_ERROR_RUNTIME; + for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) { + if (VEC_CAPACITY(&node_arr[cpu_id])) + VEC_DELETE(&node_arr[cpu_id]); + } + + goto free_node_arr; + } + } + } + } + + /* Sanity Check each node_arr */ + for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) { + if (VEC_SIZE(&node_arr[cpu_id]) == 0) { + log_err("CPU%d's nodemask is not initialized.", cpu_id); + status = MEMKIND_ERROR_RUNTIME; + for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) { + if (VEC_CAPACITY(&node_arr[cpu_id])) + VEC_DELETE(&node_arr[cpu_id]); + } + + goto free_node_arr; + } + } + + *numanode = node_arr; + status = MEMKIND_SUCCESS; + goto free_cpunode_mask_array; + +free_node_arr: + free(node_arr); + +free_cpunode_mask_array: + for (init_node = 0; init_node <= max_node_id; init_node++) + CPU_FREE(cpunode_mask[init_node]); + +free_cpunode_mask: + free(cpunode_mask); + +out: + return status; +} + int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode, memkind_node_variant_t node_variant) { diff --git a/src/memkind_hbw.c b/src/memkind_hbw.c index 077660ab..e9948593 100644 --- a/src/memkind_hbw.c +++ b/src/memkind_hbw.c @@ -363,10 +363,36 @@ static bool is_hmat_supported(void) return true; } +/* + * OS may provide further information of HBW topology in + * /sys/kernel/hbm_memory/memory_topo/memory_locality. Use it unless user + * specified HBW nodes or disabled using of memory_locality. + */ +static bool use_memory_locality(void) +{ + char *memory_locality_disable = memkind_get_env("MEMKIND_DISABLE_MEMORY_LOCALITY"); + + if (memory_locality_disable && !strncmp(memory_locality_disable, "1", 1)) + return false; + + if (memkind_get_env("MEMKIND_HBW_NODES")) + return false; + + return true; +} + static void memkind_hbw_closest_numanode_init(void) { struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_MULTIPLE]; g->numanode = NULL; + + if (use_memory_locality()) { + g->init_err = set_numanode_from_memory_locality(&g->numanode, + NODE_VARIANT_MULTIPLE); + if (!g->init_err) + return; + } + if (!is_hmat_supported()) { g->init_err = set_closest_numanode(memkind_hbw_get_nodemask, &g->numanode, NODE_VARIANT_MULTIPLE); @@ -380,6 +406,14 @@ static void memkind_hbw_closest_preferred_numanode_init(void) { struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_SINGLE]; g->numanode = NULL; + + if (use_memory_locality()) { + g->init_err = set_numanode_from_memory_locality(&g->numanode, + NODE_VARIANT_SINGLE); + if (!g->init_err) + return; + } + if (!is_hmat_supported()) { g->init_err = set_closest_numanode(memkind_hbw_get_nodemask, &g->numanode, NODE_VARIANT_SINGLE); @@ -393,6 +427,14 @@ static void memkind_hbw_all_numanode_init(void) { struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_ALL]; g->numanode = NULL; + + if (use_memory_locality()) { + g->init_err = set_numanode_from_memory_locality(&g->numanode, + NODE_VARIANT_ALL); + if (!g->init_err) + return; + } + if (!is_hmat_supported()) { g->init_err = set_closest_numanode(memkind_hbw_get_nodemask, &g->numanode, NODE_VARIANT_ALL); -- 2.24.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2