Create Memory Node "types" (SysRAM and Specific Purpose) which can be set at memory hotplug time.
SysRAM nodes present at __init time are added to the mt_sysram_nodelist and memory hotplug will decide whether hotplugged nodes will be placed in mt_sysram_nodelist or mt_spm_nodelist. SPM nodes are not included in demotion targets. Setting a node type is permanent and cannot be switched once set, this prevents type-change race conditions on the global mt_sysram_nodelist. Signed-off-by: Gregory Price <[email protected]> --- include/linux/memory-tiers.h | 47 +++++++++++++++++++++++++ mm/memory-tiers.c | 66 ++++++++++++++++++++++++++++++++++-- 2 files changed, 111 insertions(+), 2 deletions(-) diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 7a805796fcfd..59443cbfaec3 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -35,10 +35,44 @@ struct memory_dev_type { struct access_coordinate; +enum { + MT_NODE_TYPE_SYSRAM, + MT_NODE_TYPE_SPM +}; + #ifdef CONFIG_NUMA extern bool numa_demotion_enabled; extern struct memory_dev_type *default_dram_type; extern nodemask_t default_dram_nodes; +extern nodemask_t mt_sysram_nodelist; +extern nodemask_t mt_spm_nodelist; +static inline nodemask_t *mt_sysram_nodemask(void) +{ + if (nodes_empty(mt_sysram_nodelist)) + return NULL; + return &mt_sysram_nodelist; +} +static inline void mt_nodemask_sysram_mask(nodemask_t *dst, nodemask_t *mask) +{ + /* If the sysram filter isn't available, this allows all */ + if (nodes_empty(mt_sysram_nodelist)) { + nodes_or(*dst, *mask, NODE_MASK_NONE); + return; + } + nodes_and(*dst, *mask, mt_sysram_nodelist); +} +static inline bool mt_node_is_sysram(int nid) +{ + /* if sysram filter isn't setup, this allows all */ + return nodes_empty(mt_sysram_nodelist) || + node_isset(nid, mt_sysram_nodelist); +} +static inline bool mt_node_allowed(int nid, gfp_t gfp_mask) +{ + if (gfp_mask & __GFP_SPM_NODE) + return true; + return mt_node_is_sysram(nid); +} struct memory_dev_type *alloc_memory_type(int adistance); void put_memory_type(struct memory_dev_type *memtype); void init_node_memory_type(int node, struct memory_dev_type *default_type); @@ -73,11 +107,19 @@ static inline bool node_is_toptier(int node) } #endif +int mt_set_node_type(int node, int type); + #else #define numa_demotion_enabled false #define default_dram_type NULL #define default_dram_nodes NODE_MASK_NONE +#define mt_sysram_nodelist NODE_MASK_NONE +#define mt_spm_nodelist NODE_MASK_NONE +static inline nodemask_t *mt_sysram_nodemask(void) { return NULL; } +static inline void mt_nodemask_sysram_mask(nodemask_t *dst, nodemask_t *mask) {} +static inline bool mt_node_is_sysram(int nid) { return true; } +static inline bool mt_node_allowed(int nid, gfp_t gfp_mask) { return true; } /* * CONFIG_NUMA implementation returns non NULL error. */ @@ -151,5 +193,10 @@ static inline struct memory_dev_type *mt_find_alloc_memory_type(int adist, static inline void mt_put_memory_types(struct list_head *memory_types) { } + +int mt_set_node_type(int node, int type) +{ + return 0; +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 0ea5c13f10a2..dd6cfaa4c667 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -44,7 +44,15 @@ static LIST_HEAD(memory_tiers); static LIST_HEAD(default_memory_types); static struct node_memory_type_map node_memory_types[MAX_NUMNODES]; struct memory_dev_type *default_dram_type; -nodemask_t default_dram_nodes __initdata = NODE_MASK_NONE; + +/* default_dram_nodes is the list of nodes with both CPUs and RAM */ +nodemask_t default_dram_nodes = NODE_MASK_NONE; + +/* mt_sysram_nodelist is the list of nodes with SysramRAM */ +nodemask_t mt_sysram_nodelist = NODE_MASK_NONE; + +/* mt_spm_nodelist is the list of nodes with Specific Purpose Memory */ +nodemask_t mt_spm_nodelist = NODE_MASK_NONE; static const struct bus_type memory_tier_subsys = { .name = "memory_tiering", @@ -427,6 +435,14 @@ static void establish_demotion_targets(void) disable_all_demotion_targets(); for_each_node_state(node, N_MEMORY) { + /* + * If this is not a sysram node, direct-demotion is not allowed + * and must be managed by special logic that understands the + * memory features of that particular node. + */ + if (!node_isset(node, mt_sysram_nodelist)) + continue; + best_distance = -1; nd = &node_demotion[node]; @@ -457,7 +473,8 @@ static void establish_demotion_targets(void) break; distance = node_distance(node, target); - if (distance == best_distance || best_distance == -1) { + if ((distance == best_distance || best_distance == -1) && + node_isset(target, mt_sysram_nodelist)) { best_distance = distance; node_set(target, nd->preferred); } else { @@ -689,6 +706,48 @@ void mt_put_memory_types(struct list_head *memory_types) } EXPORT_SYMBOL_GPL(mt_put_memory_types); +/** + * mt_set_node_type() - Set a NUMA Node's Memory type. + * @node: The node type to set + * @type: The type to set + * + * This is a one-way setting, once a type is assigned it cannot be cleared + * without resetting the system. This is to avoid race conditions associated + * with moving nodes from one type to another during memory hotplug. + * + * Once a node is added as a SysRAM node, it will be used by default in + * the page allocator as a valid target when the calling does not provide + * a node or nodemask. This is safe as the page allocator iterates through + * zones and uses this nodemask to filter zones - if a node is present but + * has no zones the node is ignored. + * + * Return: 0 if the node type is set successfully (or it's already set) + * -EBUSY if the node has a different type already + * -ENODEV if the type is invalid + */ +int mt_set_node_type(int node, int type) +{ + int err; + + mutex_lock(&memory_tier_lock); + if (type == MT_NODE_TYPE_SYSRAM) + err = node_isset(node, mt_spm_nodelist) ? -EBUSY : 0; + else if (type == MT_NODE_TYPE_SPM) + err = node_isset(node, mt_sysram_nodelist) ? -EBUSY : 0; + if (err) + goto out; + + if (type == MT_NODE_TYPE_SYSRAM) + node_set(node, mt_sysram_nodelist); + else if (type == MT_NODE_TYPE_SPM) + node_set(node, mt_spm_nodelist); + else + err = -ENODEV; +out: + mutex_unlock(&memory_tier_lock); + return err; +} + /* * This is invoked via `late_initcall()` to initialize memory tiers for * memory nodes, both with and without CPUs. After the initialization of @@ -922,6 +981,9 @@ static int __init memory_tier_init(void) nodes_and(default_dram_nodes, node_states[N_MEMORY], node_states[N_CPU]); + /* Record all nodes with non-hotplugged memory as default SYSRAM nodes */ + mt_sysram_nodelist = node_states[N_MEMORY]; + hotplug_node_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI); return 0; } -- 2.51.1

