On 10/28/2025 12:57 PM, Tanmay Shah wrote:
Current attach on recovery mechanism loads the clean resource table
during recovery, but doesn't re-allocate the resources. RPMsg
communication will fail after recovery due to this. Fix this
incorrect behavior by doing the full detach and attach of remote
processor during the recovery. This will load the clean resource table
and re-allocate all the resources, which will set up correct vring
information in the resource table.

Signed-off-by: Tanmay Shah <[email protected]>
---
  drivers/remoteproc/remoteproc_core.c | 22 +++++++++++++---------
  1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c 
b/drivers/remoteproc/remoteproc_core.c
index aada2780b343..f5b078fe056a 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1777,11 +1777,11 @@ static int rproc_attach_recovery(struct rproc *rproc)
  {
        int ret;
- ret = __rproc_detach(rproc);
+       ret = rproc_detach(rproc);
        if (ret)
                return ret;
- return __rproc_attach(rproc);
+       return rproc_attach(rproc);
  }
static int rproc_boot_recovery(struct rproc *rproc)
@@ -1829,6 +1829,9 @@ int rproc_trigger_recovery(struct rproc *rproc)
        struct device *dev = &rproc->dev;
        int ret;
+ if (rproc_has_feature(rproc, RPROC_FEAT_ATTACH_ON_RECOVERY))
+               return rproc_attach_recovery(rproc);
+
        ret = mutex_lock_interruptible(&rproc->lock);

Hi Tanmay,

I have a concern about this patch, specifically regarding the locking
behavior and potential race conditions introduced by the early return in
rproc_trigger_recovery(), by calling rproc_attach_recovery() directly
and bypassing the original mutex_lock_interruptible() in
rproc_trigger_recovery(), the recovery flow now executes rproc_attach()
without holding rproc->lock.

This could potentially lead to race conditions if other threads are
accessing or modifying shared resources concurrently.For example, one
possible scenario is:


state_store/rproc_trigger_auto_boot
-->rproc_boot
   -->ret = mutex_lock_interruptible(&rproc->lock);    <--(4)
   -->if (rproc->state == RPROC_DETACHED) {
       -->ret = rproc_attach(rproc);                   <--(5)
      }
   -->mutex_unlock(&rproc->lock);
        

rproc_trigger_recovery
-->rproc_attach_recovery
   -->rproc_detach
      -->ret = mutex_lock_interruptible(&rproc->lock); <--(1)
      -->ret = __rproc_detach(rproc);
      -->rproc->state = RPROC_DETACHED;                <--(2)
      -->mutex_unlock(&rproc->lock);                   <--(3)
  -->return rproc_attach(rproc);                       <--(6)

As shown in stack (5) and (6), two threads may simultaneously
execute the rproc_attach() function, which could lead to a race
condition.

Please feel free to correct me, thanks~


        if (ret)
                return ret;
@@ -1839,10 +1842,7 @@ int rproc_trigger_recovery(struct rproc *rproc)
dev_err(dev, "recovering %s\n", rproc->name); - if (rproc_has_feature(rproc, RPROC_FEAT_ATTACH_ON_RECOVERY))
-               ret = rproc_attach_recovery(rproc);
-       else
-               ret = rproc_boot_recovery(rproc);
+       ret = rproc_boot_recovery(rproc);
unlock_mutex:
        mutex_unlock(&rproc->lock);
@@ -1860,6 +1860,7 @@ static void rproc_crash_handler_work(struct work_struct 
*work)
  {
        struct rproc *rproc = container_of(work, struct rproc, crash_handler);
        struct device *dev = &rproc->dev;
+       int ret;
dev_dbg(dev, "enter %s\n", __func__); @@ -1883,8 +1884,11 @@ static void rproc_crash_handler_work(struct work_struct *work) mutex_unlock(&rproc->lock); - if (!rproc->recovery_disabled)
-               rproc_trigger_recovery(rproc);
+       if (!rproc->recovery_disabled) {
+               ret = rproc_trigger_recovery(rproc);
+               if (ret)
+                       dev_warn(dev, "rproc recovery failed, err %d\n", ret);
+       }
out:
        pm_relax(rproc->dev.parent);
@@ -2057,7 +2061,7 @@ int rproc_detach(struct rproc *rproc)
                return ret;
        }
- if (rproc->state != RPROC_ATTACHED) {
+       if (rproc->state != RPROC_ATTACHED && rproc->state != RPROC_CRASHED) {
                ret = -EINVAL;
                goto out;
        }


--
Thx and BRs,
Zhongqiu Han

Reply via email to