Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.
This lack of consistentcy cannot be addressed without refactoring the API.
The above change to the Workqueue API has been introduced by:
commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag")
alloc_workqueue() treats all queues as per-CPU by default, while unbound
workqueues must opt-in via WQ_UNBOUND.
This default is suboptimal: most workloads benefit from unbound queues,
allowing the scheduler to place worker threads where they’re needed and
reducing noise when CPUs are isolated.
This change adds a new WQ_PERCPU flag to explicitly request
alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified.
With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND),
any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND
must now use WQ_PERCPU.
Once migration is complete, WQ_UNBOUND can be removed and unbound will
become the implicit default.
Suggested-by: Tejun Heo <[email protected]>
Signed-off-by: Marco Crivellari <[email protected]>
---
drivers/gpu/drm/xe/xe_device.c | 4 ++--
drivers/gpu/drm/xe/xe_ggtt.c | 2 +-
drivers/gpu/drm/xe/xe_hw_engine_group.c | 3 ++-
drivers/gpu/drm/xe/xe_sriov.c | 2 +-
4 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 34d33965eac2..38b42d4f930f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -486,8 +486,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
WQ_MEM_RECLAIM);
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
- xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
- xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0);
+ xe->unordered_wq = alloc_workqueue("xe-unordered-wq", WQ_PERCPU, 0);
+ xe->destroy_wq = alloc_workqueue("xe-destroy-wq", WQ_PERCPU, 0);
if (!xe->ordered_wq || !xe->unordered_wq ||
!xe->preempt_fence_wq || !xe->destroy_wq) {
/*
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 5edc0cad47e2..566163ab96ae 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -291,7 +291,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
else
ggtt->pt_ops = &xelp_pt_ops;
- ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM);
+ ggtt->wq = alloc_workqueue("xe-ggtt-wq", WQ_PERCPU, WQ_MEM_RECLAIM);
if (!ggtt->wq)
return -ENOMEM;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c
b/drivers/gpu/drm/xe/xe_hw_engine_group.c
index fa4db5f23342..8526addcdf42 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -48,7 +48,8 @@ hw_engine_group_alloc(struct xe_device *xe)
if (!group)
return ERR_PTR(-ENOMEM);
- group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
+ group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", WQ_PERCPU,
+ 0);
if (!group->resume_wq)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 7d2d6de2aabf..5c36da17f745 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -120,7 +120,7 @@ int xe_sriov_init(struct xe_device *xe)
xe_sriov_vf_init_early(xe);
xe_assert(xe, !xe->sriov.wq);
- xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);
+ xe->sriov.wq = alloc_workqueue("xe-sriov-wq", WQ_PERCPU, 0);
if (!xe->sriov.wq)
return -ENOMEM;
--
2.51.1