The main problem : when a class is over the guarantee,
this class only is penalized by the pagination and the
task in the class can not do anything.
Without guarantee, the same task is running without
pagination.
A fair share of the memory should be :
- A class without guarantee and the default class should be
penalized too. Only a class with guarantee and with
current_usage < guarantee should not be penalized.
- When a class become a victim, all the czones of this
class should be penalized (and not just the last used czone).
- When a class is under the guarantee, all the czones of this
class should be removed from the list of the victims (and not
just the last used czone).
- When the guarantee of a running class is changed, check the
current memory usage of the class and put or remove it from
the list of the victims.
This patch addresses these issues.
Apply on linux-2.6.15.1 + mem_rc-f0.4-2615
Signed-Off-By: Patrick Le Dot <[EMAIL PROTECTED]>
diff -uprN a/include/linux/ckrm_mem.h b1/include/linux/ckrm_mem.h
--- a/include/linux/ckrm_mem.h 2006-02-23 15:03:22.000000000 +0100
+++ b1/include/linux/ckrm_mem.h 2006-02-24 07:54:15.000000000 +0100
@@ -32,8 +32,6 @@ struct ckrm_zone {
unsigned long nr_active;
unsigned long nr_inactive;
- unsigned long active_over;
- unsigned long inactive_over;
struct list_head guar_list; /* list of all over guar classes */
struct zone *zone;
@@ -99,7 +97,7 @@ extern void add_use_count(struct ckrm_me
extern void sub_use_count(struct ckrm_mem_res *, int, int, int);
extern int ckrm_class_limit_ok(struct ckrm_mem_res *);
-extern struct ckrm_zone *ckrm_get_max_overguar_czone(int);
+extern struct ckrm_zone *ckrm_get_czone_to_shrink(int, int, int *);
extern void ckrm_shrink_atlimit(struct ckrm_mem_res *);
diff -uprN a/include/linux/ckrm_mem_inline.h b1/include/linux/ckrm_mem_inline.h
--- a/include/linux/ckrm_mem_inline.h 2006-02-23 16:12:01.000000000 +0100
+++ b1/include/linux/ckrm_mem_inline.h 2006-02-24 09:12:04.000000000 +0100
@@ -26,6 +26,8 @@
#ifdef CONFIG_CKRM_RES_MEM
+extern struct list_head *ckrm_overguar_list;
+
#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list)
/*
@@ -375,6 +377,37 @@ static inline void ckrm_add_tail_inactiv
list_add_tail(&page->lru, &ckrm_zone->inactive_list);
}
+static inline void
+ckrm_add_to_guar_list(struct ckrm_zone *czone, int czindex)
+{
+ unsigned long flags;
+
+ /* fast path, after this czone gets into the list */
+ if (!list_empty(&czone->guar_list))
+ return;
+
+ spin_lock_irqsave(&ckrm_overguar_lock[czindex], flags);
+ if (list_empty(&czone->guar_list))
+ list_add_tail(&czone->guar_list,
+ &ckrm_overguar_list[czindex]);
+ spin_unlock_irqrestore(&ckrm_overguar_lock[czindex], flags);
+}
+
+static inline void
+ckrm_del_from_guar_list(struct ckrm_zone *czone, int czindex)
+{
+ unsigned long flags;
+
+ /* fast path, return immediately if we are not in the list */
+ if (list_empty(&czone->guar_list))
+ return;
+
+ spin_lock_irqsave(&ckrm_overguar_lock[czindex], flags);
+ if (!list_empty(&czone->guar_list))
+ list_del_init(&czone->guar_list);
+ spin_unlock_irqrestore(&ckrm_overguar_lock[czindex], flags);
+}
+
#else
#define ckrm_shrink_list_empty() (1)
diff -uprN a/kernel/ckrm/ckrm_memcore.c b1/kernel/ckrm/ckrm_memcore.c
--- a/kernel/ckrm/ckrm_memcore.c 2006-02-23 15:02:43.000000000 +0100
+++ b1/kernel/ckrm/ckrm_memcore.c 2006-02-24 09:11:06.000000000 +0100
@@ -403,10 +403,14 @@ mem_set_share_values(void *my_res, struc
struct ckrm_mem_res *res = my_res;
struct ckrm_mem_res *parres;
int rc;
+ int new_guarantee = 0;
if (!res)
return -EINVAL;
+ if (shares->my_guarantee != res->shares.my_guarantee)
+ new_guarantee = 1;
+
parres = ckrm_memclass(res->parent);
rc = ckrm_set_shares(shares, &res->shares, parres ? &parres->shares :
NULL);
@@ -417,6 +421,25 @@ mem_set_share_values(void *my_res, struc
set_impl_guar_children(parres);
}
+ if ((rc == 0) && new_guarantee) {
+ int i, pg_total = 0;
+
+ for (i = 0; i < ckrm_nr_czones; i++)
+ pg_total += res->ckrm_zone[i].pg_total;
+
+ if ((res == ckrm_mem_root_class) ||
+ (res->pg_guar == CKRM_SHARE_DONTCARE) ||
+ (pg_total > res->pg_guar)) {
+ for (i = 0; i < ckrm_nr_czones; i++)
+ if (res->ckrm_zone[i].nr_active +
+ res->ckrm_zone[i].nr_inactive != 0)
+
ckrm_add_to_guar_list(&res->ckrm_zone[i], i);
+ } else {
+ for (i = 0; i < ckrm_nr_czones; i++)
+ ckrm_del_from_guar_list(&res->ckrm_zone[i], i);
+ }
+ }
+
/* If the user has changed the shares, enable the controller */
ckrm_mem_state = 1;
diff -uprN a/kernel/ckrm/ckrm_memctlr.c b1/kernel/ckrm/ckrm_memctlr.c
--- a/kernel/ckrm/ckrm_memctlr.c 2006-02-23 15:03:08.000000000 +0100
+++ b1/kernel/ckrm/ckrm_memctlr.c 2006-02-24 09:57:00.000000000 +0100
@@ -18,47 +18,6 @@
#include <linux/pagemap.h>
#include <linux/ckrm_mem_inline.h>
-extern struct list_head *ckrm_overguar_list;
-
-static inline void
-ckrm_add_to_guar_list(struct ckrm_zone *czone, int czindex)
-{
- int usage;
- unsigned long flags;
-
- /* fast path, after this czone gets into the list */
- if (!list_empty(&czone->guar_list))
- return;
-
- usage = czone->nr_active + czone->nr_inactive;
- if (usage > czone->guar) {
- spin_lock_irqsave(&ckrm_overguar_lock[czindex], flags);
- if (list_empty(&czone->guar_list))
- list_add_tail(&czone->guar_list,
- &ckrm_overguar_list[czindex]);
- spin_unlock_irqrestore(&ckrm_overguar_lock[czindex], flags);
- }
-}
-
-static inline void
-ckrm_del_from_guar_list(struct ckrm_zone *czone, int czindex)
-{
- int usage;
- unsigned long flags;
-
- /* fast path, return immediately if we are not in the list */
- if (list_empty(&czone->guar_list))
- return;
-
- usage = czone->nr_active + czone->nr_inactive;
- if (usage <= czone->guar) {
- spin_lock_irqsave(&ckrm_overguar_lock[czindex], flags);
- if (!list_empty(&czone->guar_list))
- list_del_init(&czone->guar_list);
- spin_unlock_irqrestore(&ckrm_overguar_lock[czindex], flags);
- }
-}
-
extern int ckrm_mem_state;
void
@@ -87,7 +46,10 @@ add_use_count(struct ckrm_mem_res *cls,
add_use_count(parcls, 1, czindex, cnt);
czone->pg_borrowed += cnt;
}
- if (pg_total >= cls->pg_guar)
+
+ if ((cls == ckrm_mem_root_class) ||
+ (cls->pg_guar == CKRM_SHARE_DONTCARE) ||
+ (pg_total > cls->pg_guar))
ckrm_add_to_guar_list(czone, czindex);
if (ckrm_mem_state && (cls->pg_limit != CKRM_SHARE_DONTCARE) &&
@@ -103,6 +65,7 @@ sub_use_count(struct ckrm_mem_res *cls,
{
int borrow_cnt = 0;
struct ckrm_zone *czone;
+ int i, pg_total = 0;
if (!cls)
return;
@@ -118,7 +81,17 @@ sub_use_count(struct ckrm_mem_res *cls,
czone->pg_borrowed -= borrow_cnt;
}
}
- ckrm_del_from_guar_list(czone, czindex);
+
+ if ((cls == ckrm_mem_root_class) || (cls->pg_guar ==
CKRM_SHARE_DONTCARE))
+ return;
+
+ for (i = 0; i < ckrm_nr_czones; i++)
+ pg_total += cls->ckrm_zone[i].pg_total;
+
+ if (pg_total <= cls->pg_guar)
+ for (i = 0; i < ckrm_nr_czones; i++)
+ ckrm_del_from_guar_list(czone, i);
+
return;
}
@@ -394,47 +367,32 @@ ckrm_mem_migrate_mm(struct mm_struct* mm
}
/*
- * Returns the ckrm zone whose usage is over its guarantee and is
- * is the most among all the ckrm zones who are over their respective
- * guarantees.
+ * Returns a czone = the requested element of the list or NULL
+ * and the number of element of the list.
*
* While returning holds a reference to the class, Caller is responsible
* for dropping the reference(kref_put), when it is done with the ckrm
* zone.
*/
struct ckrm_zone *
-ckrm_get_max_overguar_czone(int czindex)
+ckrm_get_czone_to_shrink(int czindex, int asked_idx, int *nr_czone)
{
- struct ckrm_zone *czone;
- struct ckrm_zone *maxczone = &ckrm_mem_root_class->ckrm_zone[czindex];
- int max_overguar = 0, usage, cnt;
- struct ckrm_mem_res *cls;
-
- kref_get(&maxczone->memcls->nr_users);
+ struct ckrm_zone *czone, *asked_czone = NULL;
+ int idx = 0;
spin_lock_irq(&ckrm_overguar_lock[czindex]);
list_for_each_entry(czone, &ckrm_overguar_list[czindex], guar_list) {
- cls = czone->memcls;
- usage = czone->nr_active + czone->nr_inactive;
- if ((usage - czone->guar) > max_overguar) {
- kref_put(&maxczone->memcls->nr_users, memclass_release);
- max_overguar = usage - czone->guar;
- maxczone = czone;
- kref_get(&maxczone->memcls->nr_users);
+ idx++;
+ if (idx == asked_idx) {
+ asked_czone = czone;
+ kref_get(&czone->memcls->nr_users);
}
}
spin_unlock_irq(&ckrm_overguar_lock[czindex]);
- BUG_ON(maxczone == NULL);
-
- /* calculate active_over and inactive_over */
- cnt = maxczone->nr_active - (2 * maxczone->guar / 3);
- maxczone->active_over = (cnt > 0) ? cnt : SWAP_CLUSTER_MAX;
- cnt = maxczone->active_over + maxczone->nr_inactive
- - (maxczone->guar / 3);
- maxczone->inactive_over = (cnt > 0) ? cnt : SWAP_CLUSTER_MAX;
-
- return maxczone;
+ *nr_czone = idx;
+ return asked_czone;
}
+
LIST_HEAD(ckrm_shrink_list);
void
diff -uprN a/mm/vmscan.c b1/mm/vmscan.c
--- a/mm/vmscan.c 2006-02-23 15:02:14.000000000 +0100
+++ b1/mm/vmscan.c 2006-02-24 08:44:21.000000000 +0100
@@ -1023,15 +1023,34 @@ shrink_zone(struct zone *zone, struct sc
while (nr_active || nr_inactive) {
int czindex = zone_to_czindex(zone);
struct ckrm_zone *czone;
+ int nr_czone, idx = 1;
+ unsigned long czone_active, czone_inactive;
BUG_ON(czindex == -1);
- czone = ckrm_get_max_overguar_czone(czindex);
- sc->ckrm_active = min(nr_active, czone->active_over);
- sc->ckrm_inactive = min(nr_inactive, czone->inactive_over);
- nr_active -= sc->ckrm_active;
- nr_inactive -= sc->ckrm_inactive;
- shrink_ckrmzone(czone, sc);
- kref_put(&czone->memcls->nr_users, memclass_release);
+ czone = ckrm_get_czone_to_shrink(czindex, idx, &nr_czone);
+ while (czone != NULL) {
+ BUG_ON(nr_czone == 0);
+ czone_active = (czone->nr_active > 0) ?
+ czone->nr_active : sc->swap_cluster_max;
+ czone_inactive = (czone->nr_inactive > 0) ?
+ czone->nr_inactive :
sc->swap_cluster_max;
+ sc->ckrm_active = min((nr_active / nr_czone),
+ czone_active);
+ sc->ckrm_inactive = min((nr_inactive / nr_czone),
+ czone_inactive);
+ nr_active -= sc->ckrm_active;
+ nr_inactive -= sc->ckrm_inactive;
+ shrink_ckrmzone(czone, sc);
+ kref_put(&czone->memcls->nr_users, memclass_release);
+ idx++;
+ czone = ckrm_get_czone_to_shrink(czindex, idx,
&nr_czone);
+ }
+ if ((signed long)nr_active < nr_czone)
+ nr_active = 0;
+ if ((signed long)nr_inactive < nr_czone)
+ nr_inactive = 0;
+ if (sc->nr_to_reclaim <= 0)
+ break;
}
#else
while (nr_active || nr_inactive) {
-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642
_______________________________________________
ckrm-tech mailing list
https://lists.sourceforge.net/lists/listinfo/ckrm-tech