Create Memory Node "types" (SysRAM and Specific Purpose) which can be
set at memory hotplug time.

SysRAM nodes present at __init time are added to the mt_sysram_nodelist
and memory hotplug will decide whether hotplugged nodes will be placed
in mt_sysram_nodelist or mt_spm_nodelist.

SPM nodes are not included in demotion targets.

Setting a node type is permanent and cannot be switched once set, this
prevents type-change race conditions on the global mt_sysram_nodelist.

Signed-off-by: Gregory Price <[email protected]>
---
 include/linux/memory-tiers.h | 47 +++++++++++++++++++++++++
 mm/memory-tiers.c            | 66 ++++++++++++++++++++++++++++++++++--
 2 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 7a805796fcfd..59443cbfaec3 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -35,10 +35,44 @@ struct memory_dev_type {
 
 struct access_coordinate;
 
+enum {
+       MT_NODE_TYPE_SYSRAM,
+       MT_NODE_TYPE_SPM
+};
+
 #ifdef CONFIG_NUMA
 extern bool numa_demotion_enabled;
 extern struct memory_dev_type *default_dram_type;
 extern nodemask_t default_dram_nodes;
+extern nodemask_t mt_sysram_nodelist;
+extern nodemask_t mt_spm_nodelist;
+static inline nodemask_t *mt_sysram_nodemask(void)
+{
+       if (nodes_empty(mt_sysram_nodelist))
+               return NULL;
+       return &mt_sysram_nodelist;
+}
+static inline void mt_nodemask_sysram_mask(nodemask_t *dst, nodemask_t *mask)
+{
+       /* If the sysram filter isn't available, this allows all */
+       if (nodes_empty(mt_sysram_nodelist)) {
+               nodes_or(*dst, *mask, NODE_MASK_NONE);
+               return;
+       }
+       nodes_and(*dst, *mask, mt_sysram_nodelist);
+}
+static inline bool mt_node_is_sysram(int nid)
+{
+       /* if sysram filter isn't setup, this allows all */
+       return nodes_empty(mt_sysram_nodelist) ||
+              node_isset(nid, mt_sysram_nodelist);
+}
+static inline bool mt_node_allowed(int nid, gfp_t gfp_mask)
+{
+       if (gfp_mask & __GFP_SPM_NODE)
+               return true;
+       return mt_node_is_sysram(nid);
+}
 struct memory_dev_type *alloc_memory_type(int adistance);
 void put_memory_type(struct memory_dev_type *memtype);
 void init_node_memory_type(int node, struct memory_dev_type *default_type);
@@ -73,11 +107,19 @@ static inline bool node_is_toptier(int node)
 }
 #endif
 
+int mt_set_node_type(int node, int type);
+
 #else
 
 #define numa_demotion_enabled  false
 #define default_dram_type      NULL
 #define default_dram_nodes     NODE_MASK_NONE
+#define mt_sysram_nodelist     NODE_MASK_NONE
+#define mt_spm_nodelist                NODE_MASK_NONE
+static inline nodemask_t *mt_sysram_nodemask(void) { return NULL; }
+static inline void mt_nodemask_sysram_mask(nodemask_t *dst, nodemask_t *mask) 
{}
+static inline bool mt_node_is_sysram(int nid) { return true; }
+static inline bool mt_node_allowed(int nid, gfp_t gfp_mask) { return true; }
 /*
  * CONFIG_NUMA implementation returns non NULL error.
  */
@@ -151,5 +193,10 @@ static inline struct memory_dev_type 
*mt_find_alloc_memory_type(int adist,
 static inline void mt_put_memory_types(struct list_head *memory_types)
 {
 }
+
+int mt_set_node_type(int node, int type)
+{
+       return 0;
+}
 #endif /* CONFIG_NUMA */
 #endif  /* _LINUX_MEMORY_TIERS_H */
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 0ea5c13f10a2..dd6cfaa4c667 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -44,7 +44,15 @@ static LIST_HEAD(memory_tiers);
 static LIST_HEAD(default_memory_types);
 static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
 struct memory_dev_type *default_dram_type;
-nodemask_t default_dram_nodes __initdata = NODE_MASK_NONE;
+
+/* default_dram_nodes is the list of nodes with both CPUs and RAM */
+nodemask_t default_dram_nodes = NODE_MASK_NONE;
+
+/* mt_sysram_nodelist is the list of nodes with SysramRAM */
+nodemask_t mt_sysram_nodelist = NODE_MASK_NONE;
+
+/* mt_spm_nodelist is the list of nodes with Specific Purpose Memory */
+nodemask_t mt_spm_nodelist = NODE_MASK_NONE;
 
 static const struct bus_type memory_tier_subsys = {
        .name = "memory_tiering",
@@ -427,6 +435,14 @@ static void establish_demotion_targets(void)
        disable_all_demotion_targets();
 
        for_each_node_state(node, N_MEMORY) {
+               /*
+                * If this is not a sysram node, direct-demotion is not allowed
+                * and must be managed by special logic that understands the
+                * memory features of that particular node.
+                */
+               if (!node_isset(node, mt_sysram_nodelist))
+                       continue;
+
                best_distance = -1;
                nd = &node_demotion[node];
 
@@ -457,7 +473,8 @@ static void establish_demotion_targets(void)
                                break;
 
                        distance = node_distance(node, target);
-                       if (distance == best_distance || best_distance == -1) {
+                       if ((distance == best_distance || best_distance == -1) 
&&
+                           node_isset(target, mt_sysram_nodelist)) {
                                best_distance = distance;
                                node_set(target, nd->preferred);
                        } else {
@@ -689,6 +706,48 @@ void mt_put_memory_types(struct list_head *memory_types)
 }
 EXPORT_SYMBOL_GPL(mt_put_memory_types);
 
+/**
+ * mt_set_node_type() - Set a NUMA Node's Memory type.
+ * @node: The node type to set
+ * @type: The type to set
+ *
+ * This is a one-way setting, once a type is assigned it cannot be cleared
+ * without resetting the system.  This is to avoid race conditions associated
+ * with moving nodes from one type to another during memory hotplug.
+ *
+ * Once a node is added as a SysRAM node, it will be used by default in
+ * the page allocator as a valid target when the calling does not provide
+ * a node or nodemask.  This is safe as the page allocator iterates through
+ * zones and uses this nodemask to filter zones - if a node is present but
+ * has no zones the node is ignored.
+ *
+ * Return: 0 if the node type is set successfully (or it's already set)
+ *         -EBUSY if the node has a different type already
+ *         -ENODEV if the type is invalid
+ */
+int mt_set_node_type(int node, int type)
+{
+       int err;
+
+       mutex_lock(&memory_tier_lock);
+       if (type == MT_NODE_TYPE_SYSRAM)
+               err = node_isset(node, mt_spm_nodelist) ? -EBUSY : 0;
+       else if (type == MT_NODE_TYPE_SPM)
+               err = node_isset(node, mt_sysram_nodelist) ? -EBUSY : 0;
+       if (err)
+               goto out;
+
+       if (type == MT_NODE_TYPE_SYSRAM)
+               node_set(node, mt_sysram_nodelist);
+       else if (type == MT_NODE_TYPE_SPM)
+               node_set(node, mt_spm_nodelist);
+       else
+               err = -ENODEV;
+out:
+       mutex_unlock(&memory_tier_lock);
+       return err;
+}
+
 /*
  * This is invoked via `late_initcall()` to initialize memory tiers for
  * memory nodes, both with and without CPUs. After the initialization of
@@ -922,6 +981,9 @@ static int __init memory_tier_init(void)
        nodes_and(default_dram_nodes, node_states[N_MEMORY],
                  node_states[N_CPU]);
 
+       /* Record all nodes with non-hotplugged memory as default SYSRAM nodes 
*/
+       mt_sysram_nodelist = node_states[N_MEMORY];
+
        hotplug_node_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
        return 0;
 }
-- 
2.51.1


Reply via email to