diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 1fb11e6..7989fa5 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -220,6 +220,13 @@ typedef struct LVShared
 	pg_atomic_uint32 cost_balance;
 
 	/*
+	 * Number of active parallel workers.  This is used for computing the
+	 * minimum threshold of the vacuum cost balance for a worker to go
+	 * for the delay.
+	 */
+	pg_atomic_uint32 nworkers;
+
+	/*
 	 * Variables to control parallel index vacuuming.  Index statistics
 	 * returned from ambulkdelete and amvacuumcleanup is nullable variable
 	 * length.  'offset' is NULL bitmap. Note that a 0 indicates a null,
@@ -241,6 +248,9 @@ typedef struct LVShared
 /* Global variable for shared cost-based vacuum delay */
 pg_atomic_uint32	*VacuumSharedCostBalance = NULL;
 
+/* Active worker count for shared cost-based vacuum delay */
+pg_atomic_uint32	*VacuumActiveWorkers = NULL;
+
 /*
  * Struct for an index bulk-deletion statistic used for parallel lazy
  * vacuum. This is allocated in the DSM segment.  IndexBulkDeleteResult
@@ -1999,9 +2009,10 @@ lazy_parallel_vacuum_or_cleanup_indexes(LVRelStats *vacrelstats, Relation *Irel,
 	 * balance.
 	 */
 	pg_atomic_write_u32(&(lps->lvshared->cost_balance), VacuumCostBalance);
+	pg_atomic_write_u32(&(lps->lvshared->nworkers), 0);
 	VacuumCostBalance = 0;
 	VacuumSharedCostBalance = &(lps->lvshared->cost_balance);
-
+	VacuumActiveWorkers = &(lps->lvshared->nworkers);
 	LaunchParallelWorkers(lps->pcxt);
 
 	if (lps->lvshared->for_cleanup)
@@ -2020,13 +2031,24 @@ lazy_parallel_vacuum_or_cleanup_indexes(LVRelStats *vacrelstats, Relation *Irel,
 	costdelay = VacuumCostTotalDelay;
 	VacuumCostTotalDelay = 0;
 	_nhit=_nmiss=_ndirty=0;
+	VacuumCostBalanceLocal = 0;
 	/*
 	 * Join as parallel workers. The leader process alone does that in case where
 	 * no workers launched.
 	 */
 	if (lps->leaderparticipates || lps->pcxt->nworkers_launched == 0)
+	{
+		/* Increment the active worker count. */
+		(void) pg_atomic_add_fetch_u32(VacuumActiveWorkers, 1);
+		
 		vacuum_or_cleanup_indexes_worker(Irel, nindexes, stats, lps->lvshared,
 										 vacrelstats->dead_tuples);
+		/*
+		 * We have completed the index vacuum so decrement the active worker
+		 * count.
+		 */
+		(void) pg_atomic_sub_fetch_u32(VacuumActiveWorkers, 1);		
+	}
 
 	/* Wait for all vacuum workers to finish */
 	WaitForParallelWorkersToFinish(lps->pcxt);
@@ -3077,6 +3099,7 @@ begin_parallel_vacuum(LVRelStats *vacrelstats, Oid relid, BlockNumber nblocks,
 	prepare_index_statistics(shared, Irel, nindexes);
 	pg_atomic_init_u32(&(shared->nprocessed), 0);
 	pg_atomic_init_u32(&(shared->cost_balance), 0);
+	pg_atomic_init_u32(&(shared->nworkers), 0);
 
 	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared);
 	lps->lvshared = shared;
@@ -3289,10 +3312,20 @@ heap_parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	if (lvshared->maintenance_work_mem_worker > 0)
 		maintenance_work_mem = lvshared->maintenance_work_mem_worker;
 
+	VacuumActiveWorkers = &(lvshared->nworkers);
+
+	/* Increment the active worker count. */
+	(void) pg_atomic_add_fetch_u32(VacuumActiveWorkers, 1);
+
 	/* Do either vacuuming indexes or cleaning indexes */
 	vacuum_or_cleanup_indexes_worker(indrels, nindexes, stats, lvshared,
 									 dead_tuples);
 
+	/*
+	 * We have completed the index vacuum so decrement the active worker count.
+	 */
+	(void) pg_atomic_sub_fetch_u32(VacuumActiveWorkers, 1);
+
 	/* update the total delay in the shared location. */
 	costdelay->stats[slot].time = VacuumCostTotalDelay;
 	costdelay->stats[slot].hit = _nhit;
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index bb07a5a..2a2cae2 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -2001,6 +2001,8 @@ vacuum_delay_point(void)
 		 */
 		if (VacuumSharedCostBalance != NULL)
 		{
+			int nworkers = pg_atomic_read_u32(VacuumActiveWorkers);
+
 			while (true)
 			{
 				uint32 shared_balance;
@@ -2011,12 +2013,14 @@ vacuum_delay_point(void)
 				/* compute new balance by adding the local value */
 				shared_balance = pg_atomic_read_u32(VacuumSharedCostBalance);
 				new_balance = shared_balance + VacuumCostBalance;
-
-				if (new_balance >= VacuumCostLimit)
+				VacuumCostBalanceLocal += VacuumCostBalance;
+				if ((new_balance >= VacuumCostLimit) &&
+					(VacuumCostBalanceLocal > 0.5 * (VacuumCostLimit/nworkers)))
 				{
-					/* compute sleep time based on the shared cost balance */
-					msec = VacuumCostDelay * new_balance / VacuumCostLimit;
-					new_balance %= VacuumCostLimit;
+					/* compute sleep time based on the local cost balance */
+					msec = VacuumCostDelay * VacuumCostBalanceLocal / VacuumCostLimit;
+					new_balance = shared_balance - VacuumCostBalanceLocal;
+					VacuumCostBalanceLocal = 0;
 				}
 
 				if (pg_atomic_compare_exchange_u32(VacuumSharedCostBalance,
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index de214f3..f7752a4 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -149,6 +149,7 @@ int _nmiss = 0;
 int _ndirty = 0;
 
 int			VacuumCostBalance = 0;	/* working state for vacuum */
+int			VacuumCostBalanceLocal = 0;
 bool		VacuumCostActive = false;
 
 double		vacuum_cleanup_index_scale_factor;
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index ac883f6..81223ed 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -193,6 +193,7 @@ extern Size SyncScanShmemSize(void);
 
 /* in heap/vacuumlazy.c */
 extern pg_atomic_uint32	*VacuumSharedCostBalance;
+extern pg_atomic_uint32	*VacuumActiveWorkers;
 struct VacuumParams;
 extern void heap_vacuum_rel(Relation onerel,
 							struct VacuumParams *params, BufferAccessStrategy bstrategy);
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 8d95b6e..55f19ec 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -261,6 +261,7 @@ extern int	VacuumPageMiss;
 extern int	VacuumPageDirty;
 
 extern int	VacuumCostBalance;
+extern int	VacuumCostBalanceLocal;
 extern bool VacuumCostActive;
 
 extern double vacuum_cleanup_index_scale_factor;
