On 23/03/2026 01:47, Benjamin Marzinski wrote:
  static void alua_rtpg_work(struct work_struct *work)
  {
        struct alua_dh_data *h =
@@ -670,56 +179,41 @@ static void alua_rtpg_work(struct work_struct *work)
        int err = SCSI_DH_OK;
        struct alua_queue_data *qdata, *tmp;
        unsigned long flags;
+       int ret;
spin_lock_irqsave(&h->lock, flags);
        h->flags |= ALUA_PG_RUNNING;
        if (h->flags & ALUA_PG_RUN_RTPG) {
-               int state = h->state;
h->flags &= ~ALUA_PG_RUN_RTPG;
                spin_unlock_irqrestore(&h->lock, flags);
-               if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
-                       if (alua_tur(sdev) == SCSI_DH_RETRY) {
-                               spin_lock_irqsave(&h->lock, flags);
-                               h->flags &= ~ALUA_PG_RUNNING;
-                               h->flags |= ALUA_PG_RUN_RTPG;
-                               if (!h->interval)
-                                       h->interval = ALUA_RTPG_RETRY_DELAY;
-                               spin_unlock_irqrestore(&h->lock, flags);
-                               queue_delayed_work(kaluad_wq, &h->rtpg_work,
-                                                  h->interval * HZ);
-                               return;
-                       }
-                       /* Send RTPG on failure or if TUR indicates SUCCESS */
-               }
-               err = alua_rtpg(sdev);
-               spin_lock_irqsave(&h->lock, flags);
-
-               if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
+               ret = scsi_alua_rtpg_run(sdev);
+               if (ret == -EAGAIN) {
This no longer handles the case where you want to trigger a new rtpg as
soon as the running one finishes. I think it should be checking
(ret == -EAGAIN || h->flags & ALUA_PG_RUN_RTPG)
with a spinlock held.


Yeah, this is all tricky to handle, as the code in scsi_dh_alua.c was handling the error codes with the state machine, and I want to move the error handling into the core driver.

As for your specific point, I think that ALUA_PG_RUN_RTPG can only now be set from outside this work handler, and that should also trigger the work (so the check on ALUA_PG_RUN_RTPG was not really required). But, I think that I can just have as before (with the h->flags & ALUA_PG_RUN_RTPG check)

+                       spin_lock_irqsave(&h->lock, flags);
                        h->flags &= ~ALUA_PG_RUNNING;
-                       if (err == SCSI_DH_IMM_RETRY)
-                               h->interval = 0;
-                       else if (!h->interval && !(h->flags & ALUA_PG_RUN_RTPG))
-                               h->interval = ALUA_RTPG_RETRY_DELAY;
                        h->flags |= ALUA_PG_RUN_RTPG;
                        spin_unlock_irqrestore(&h->lock, flags);
-                       goto queue_rtpg;
+                       queue_delayed_work(kaluad_wq, &h->rtpg_work,
+                                                          sdev->alua->interval 
* HZ);
+                       return;
                }
-               if (err != SCSI_DH_OK)
-                       h->flags &= ~ALUA_PG_RUN_STPG;
+               if (err != 0)
+                               h->flags &= ~ALUA_PG_RUN_STPG;
        }
+       spin_lock_irqsave(&h->lock, flags);
If h->flags & ALUA_PG_RUN_RTPG is false above, h->lock will already be
locked.


Right, that is a bug

        if (h->flags & ALUA_PG_RUN_STPG) {
                h->flags &= ~ALUA_PG_RUN_STPG;
                spin_unlock_irqrestore(&h->lock, flags);
-               err = alua_stpg(sdev);
-               spin_lock_irqsave(&h->lock, flags);
-               if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) {
+               ret = scsi_alua_stpg_run(sdev, h->flags & ALUA_OPTIMIZE_STPG);
+               if (err == -EAGAIN || h->flags & ALUA_PG_RUN_RTPG) {
To avoid a race with resetting ALUA_PG_RUN_RTPG, this check needs to be
done with the spinlock held.

Yes,

Thanks,
John


Reply via email to