From 942de12a49f53e20ab460d68191a6dc576f0750b Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-172-31-18-99.ec2.internal>
Date: Fri, 5 Jul 2024 16:47:21 +0000
Subject: [PATCH 1/1] vaccum_delay with absolute time nanosleep

---
 src/backend/commands/vacuum.c | 74 +++++++++++++++++++++++++++++------
 1 file changed, 63 insertions(+), 11 deletions(-)

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 48f8eab202..bfa024f583 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -40,6 +40,7 @@
 #include "catalog/pg_inherits.h"
 #include "commands/cluster.h"
 #include "commands/defrem.h"
+#include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -116,6 +117,58 @@ static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 static double compute_parallel_delay(void);
 static VacOptValue get_vacoptval_from_boolean(DefElem *def);
 static bool vac_tid_reaped(ItemPointer itemptr, void *state);
+static void vacuum_sleep(double msec);
+
+static
+void vacuum_sleep(double msec)
+{
+	long microsec = msec * 1000;
+
+	if (microsec > 0)
+	{
+#ifndef WIN32
+		/*
+		 * We allow nanosleep to handle interrupts and retry with the remaining time.
+		 * However, since nanosleep is susceptible to time drift when interrupted
+		 * frequently, we add a safeguard to break out of the nanosleep whenever the
+		 * current time is past the absolute time. The absolute time for sleeping is
+		 * set before the nanosleep loop starts and the current time is checked
+		 * within the loop whenever nanosleep encounters an interrupt.
+		 */
+		struct timespec delay;
+		struct timespec remain;
+		struct timespec absolute;
+
+		clock_gettime(PG_INSTR_CLOCK, &absolute);
+
+		absolute.tv_sec += microsec / 1000000L;
+		absolute.tv_nsec += (microsec % 1000000L) * 1000;
+
+		delay.tv_sec = microsec / 1000000L;
+		delay.tv_nsec = (microsec % 1000000L) * 1000;
+
+		while(nanosleep(&delay, &remain) == -1 && errno == EINTR)
+		{
+			struct timespec current;
+			float time_diff;
+
+			clock_gettime(PG_INSTR_CLOCK, &current);
+
+			time_diff = (absolute.tv_sec - current.tv_sec) + (absolute.tv_nsec - current.tv_nsec) / 1000000000.0;
+
+			if (time_diff <= 0)
+				break;
+
+			delay = remain;
+		}
+#else
+		SleepEx((microsec < 500 ? 1 : (microsec + 500) / 1000), FALSE);
+#endif
+	}
+
+	if (IsUnderPostmaster && !PostmasterIsAlive())
+		exit(1);
+}
 
 /*
  * GUC check function to ensure GUC value specified is within the allowable
@@ -2380,21 +2433,20 @@ vacuum_delay_point(void)
 	/* Nap if appropriate */
 	if (msec > 0)
 	{
+		instr_time	delay_start;
+		instr_time	delay_end;
+		instr_time	delayed_time;
+
 		if (msec > vacuum_cost_delay * 4)
 			msec = vacuum_cost_delay * 4;
 
-		pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
-		pg_usleep(msec * 1000);
-		pgstat_report_wait_end();
+		INSTR_TIME_SET_CURRENT(delay_start);
+		vacuum_sleep(msec);
+		INSTR_TIME_SET_CURRENT(delay_end);
 
-		/*
-		 * We don't want to ignore postmaster death during very long vacuums
-		 * with vacuum_cost_delay configured.  We can't use the usual
-		 * WaitLatch() approach here because we want microsecond-based sleep
-		 * durations above.
-		 */
-		if (IsUnderPostmaster && !PostmasterIsAlive())
-			exit(1);
+		INSTR_TIME_SET_ZERO(delayed_time);
+		INSTR_TIME_ACCUM_DIFF(delayed_time, delay_end, delay_start);
+		elog(LOG, "msec = %lf, delayed_time = %lf", msec, INSTR_TIME_GET_MILLISEC(delayed_time));
 
 		VacuumCostBalance = 0;
 
-- 
2.40.1

