diff --git a/contrib/auto_explain/auto_explain.c b/contrib/auto_explain/auto_explain.c
index 445bb37191..e9092ba359 100644
--- a/contrib/auto_explain/auto_explain.c
+++ b/contrib/auto_explain/auto_explain.c
@@ -314,7 +314,7 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
 			MemoryContext oldcxt;
 
 			oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
-			queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
+			queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
 			MemoryContextSwitchTo(oldcxt);
 		}
 	}
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index f42f07622e..77ca5abcdc 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -974,7 +974,7 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
 			MemoryContext oldcxt;
 
 			oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
-			queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
+			queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
 			MemoryContextSwitchTo(oldcxt);
 		}
 	}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 6f533c745d..6a0e27d0f6 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -10132,18 +10132,32 @@ SELECT * FROM join_tbl ORDER BY a1;
 (3 rows)
 
 DELETE FROM join_tbl;
+DROP TABLE local_tbl;
+DROP FOREIGN TABLE remote_tbl;
+DROP FOREIGN TABLE insert_tbl;
+DROP TABLE base_tbl3;
+DROP TABLE base_tbl4;
 RESET enable_mergejoin;
 RESET enable_hashjoin;
+-- Check EXPLAIN ANALYZE for a query that scans empty partitions asynchronously
+DELETE FROM async_p1;
+DELETE FROM async_p2;
+DELETE FROM async_p3;
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+SELECT * FROM async_pt;
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Append (actual rows=0 loops=1)
+   ->  Async Foreign Scan on async_p1 async_pt_1 (actual rows=0 loops=1)
+   ->  Async Foreign Scan on async_p2 async_pt_2 (actual rows=0 loops=1)
+   ->  Seq Scan on async_p3 async_pt_3 (actual rows=0 loops=1)
+(4 rows)
+
 -- Clean up
 DROP TABLE async_pt;
 DROP TABLE base_tbl1;
 DROP TABLE base_tbl2;
 DROP TABLE result_tbl;
-DROP TABLE local_tbl;
-DROP FOREIGN TABLE remote_tbl;
-DROP FOREIGN TABLE insert_tbl;
-DROP TABLE base_tbl3;
-DROP TABLE base_tbl4;
 DROP TABLE join_tbl;
 ALTER SERVER loopback OPTIONS (DROP async_capable);
 ALTER SERVER loopback2 OPTIONS (DROP async_capable);
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 8bcdc8d616..38ae9149b8 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -1542,7 +1542,7 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags)
 							 &fsstate->param_values);
 
 	/* Set the async-capable flag */
-	fsstate->async_capable = node->ss.ps.plan->async_capable;
+	fsstate->async_capable = node->ss.ps.async_capable;
 }
 
 /*
@@ -6864,7 +6864,7 @@ produce_tuple_asynchronously(AsyncRequest *areq, bool fetch)
 	}
 
 	/* Get a tuple from the ForeignScan node */
-	result = ExecProcNode((PlanState *) node);
+	result = areq->requestee->ExecProcNodeReal(areq->requestee);
 	if (!TupIsNull(result))
 	{
 		/* Mark the request as complete */
@@ -6953,6 +6953,11 @@ process_pending_request(AsyncRequest *areq)
 	/* Unlike AsyncNotify, we call ExecAsyncResponse ourselves */
 	ExecAsyncResponse(areq);
 
+	/* Also, we do instrumentation ourselves, if required */
+	if (areq->requestee->instrument)
+		InstrUpdateTupleCount(areq->requestee->instrument,
+							  TupIsNull(areq->result) ? 0.0 : 1.0);
+
 	MemoryContextSwitchTo(oldcontext);
 }
 
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 000e2534fc..333988cd93 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -3226,19 +3226,28 @@ INSERT INTO join_tbl SELECT * FROM async_pt LEFT JOIN t ON (async_pt.a = t.a AND
 SELECT * FROM join_tbl ORDER BY a1;
 DELETE FROM join_tbl;
 
+DROP TABLE local_tbl;
+DROP FOREIGN TABLE remote_tbl;
+DROP FOREIGN TABLE insert_tbl;
+DROP TABLE base_tbl3;
+DROP TABLE base_tbl4;
+
 RESET enable_mergejoin;
 RESET enable_hashjoin;
 
+-- Check EXPLAIN ANALYZE for a query that scans empty partitions asynchronously
+DELETE FROM async_p1;
+DELETE FROM async_p2;
+DELETE FROM async_p3;
+
+EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
+SELECT * FROM async_pt;
+
 -- Clean up
 DROP TABLE async_pt;
 DROP TABLE base_tbl1;
 DROP TABLE base_tbl2;
 DROP TABLE result_tbl;
-DROP TABLE local_tbl;
-DROP FOREIGN TABLE remote_tbl;
-DROP FOREIGN TABLE insert_tbl;
-DROP TABLE base_tbl3;
-DROP TABLE base_tbl4;
 DROP TABLE join_tbl;
 
 ALTER SERVER loopback OPTIONS (DROP async_capable);
diff --git a/src/backend/executor/execAsync.c b/src/backend/executor/execAsync.c
index f1985e658c..75108d36be 100644
--- a/src/backend/executor/execAsync.c
+++ b/src/backend/executor/execAsync.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "executor/execAsync.h"
+#include "executor/executor.h"
 #include "executor/nodeAppend.h"
 #include "executor/nodeForeignscan.h"
 
@@ -24,6 +25,13 @@
 void
 ExecAsyncRequest(AsyncRequest *areq)
 {
+	if (areq->requestee->chgParam != NULL)	/* something changed? */
+		ExecReScan(areq->requestee);		/* let ReScan handle this */
+
+	/* must provide our own instrumentation support */
+	if (areq->requestee->instrument)
+		InstrStartNode(areq->requestee->instrument);
+
 	switch (nodeTag(areq->requestee))
 	{
 		case T_ForeignScanState:
@@ -36,6 +44,11 @@ ExecAsyncRequest(AsyncRequest *areq)
 	}
 
 	ExecAsyncResponse(areq);
+
+	/* must provide our own instrumentation support */
+	if (areq->requestee->instrument)
+		InstrStopNode(areq->requestee->instrument,
+					  TupIsNull(areq->result) ? 0.0 : 1.0);
 }
 
 /*
@@ -48,6 +61,10 @@ ExecAsyncRequest(AsyncRequest *areq)
 void
 ExecAsyncConfigureWait(AsyncRequest *areq)
 {
+	/* must provide our own instrumentation support */
+	if (areq->requestee->instrument)
+		InstrStartNode(areq->requestee->instrument);
+
 	switch (nodeTag(areq->requestee))
 	{
 		case T_ForeignScanState:
@@ -58,6 +75,10 @@ ExecAsyncConfigureWait(AsyncRequest *areq)
 			elog(ERROR, "unrecognized node type: %d",
 				 (int) nodeTag(areq->requestee));
 	}
+
+	/* must provide our own instrumentation support */
+	if (areq->requestee->instrument)
+		InstrStopNode(areq->requestee->instrument, 0.0);
 }
 
 /*
@@ -66,6 +87,10 @@ ExecAsyncConfigureWait(AsyncRequest *areq)
 void
 ExecAsyncNotify(AsyncRequest *areq)
 {
+	/* must provide our own instrumentation support */
+	if (areq->requestee->instrument)
+		InstrStartNode(areq->requestee->instrument);
+
 	switch (nodeTag(areq->requestee))
 	{
 		case T_ForeignScanState:
@@ -78,6 +103,11 @@ ExecAsyncNotify(AsyncRequest *areq)
 	}
 
 	ExecAsyncResponse(areq);
+
+	/* must provide our own instrumentation support */
+	if (areq->requestee->instrument)
+		InstrStopNode(areq->requestee->instrument,
+					  TupIsNull(areq->result) ? 0.0 : 1.0);
 }
 
 /*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index df3d7f9a8b..58b4968735 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1214,7 +1214,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
 		resultRelInfo->ri_TrigWhenExprs = (ExprState **)
 			palloc0(n * sizeof(ExprState *));
 		if (instrument_options)
-			resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options);
+			resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options, false);
 	}
 	else
 	{
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 9f8c7582e0..753f46863b 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -407,7 +407,8 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
 
 	/* Set up instrumentation for this node if requested */
 	if (estate->es_instrument)
-		result->instrument = InstrAlloc(1, estate->es_instrument);
+		result->instrument = InstrAlloc(1, estate->es_instrument,
+										result->async_capable);
 
 	return result;
 }
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 237e13361b..b5792c1e53 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -28,7 +28,7 @@ static void WalUsageAdd(WalUsage *dst, WalUsage *add);
 
 /* Allocate new instrumentation structure(s) */
 Instrumentation *
-InstrAlloc(int n, int instrument_options)
+InstrAlloc(int n, int instrument_options, bool async_mode)
 {
 	Instrumentation *instr;
 
@@ -46,6 +46,7 @@ InstrAlloc(int n, int instrument_options)
 			instr[i].need_bufusage = need_buffers;
 			instr[i].need_walusage = need_wal;
 			instr[i].need_timer = need_timer;
+			instr[i].async_mode = async_mode;
 		}
 	}
 
@@ -82,6 +83,7 @@ InstrStartNode(Instrumentation *instr)
 void
 InstrStopNode(Instrumentation *instr, double nTuples)
 {
+	double		save_tuplecount = instr->tuplecount;
 	instr_time	endtime;
 
 	/* count the returned tuples */
@@ -114,6 +116,26 @@ InstrStopNode(Instrumentation *instr, double nTuples)
 		instr->running = true;
 		instr->firsttuple = INSTR_TIME_GET_DOUBLE(instr->counter);
 	}
+	else
+	{
+		/*
+		 * In async mode, if the plan node hadn't emitted any tuples before,
+		 * this might be the first tuple
+		 */
+		if (instr->async_mode && save_tuplecount < 1.0)
+			instr->firsttuple = INSTR_TIME_GET_DOUBLE(instr->counter);
+	}
+}
+
+/* Update tuple count */
+void
+InstrUpdateTupleCount(Instrumentation *instr, double nTuples)
+{
+	if (!instr->running)
+		elog(ERROR, "InstrUpdateTupleCount called on node not yet executed");
+
+	/* count the returned tuples */
+	instr->tuplecount += nTuples;
 }
 
 /* Finish a run cycle for a plan node */
diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c
index 3c1f12adaf..bf6aa10ad6 100644
--- a/src/backend/executor/nodeAppend.c
+++ b/src/backend/executor/nodeAppend.c
@@ -440,7 +440,7 @@ ExecReScanAppend(AppendState *node)
 
 		/*
 		 * If chgParam of subnode is not null then plan will be re-scanned by
-		 * first ExecProcNode.
+		 * first ExecProcNode or by first ExecAsyncRequest.
 		 */
 		if (subnode->chgParam == NULL)
 			ExecReScan(subnode);
diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c
index 898890fb08..9dc38d47ea 100644
--- a/src/backend/executor/nodeForeignscan.c
+++ b/src/backend/executor/nodeForeignscan.c
@@ -209,6 +209,13 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
 	scanstate->fdw_recheck_quals =
 		ExecInitQual(node->fdw_recheck_quals, (PlanState *) scanstate);
 
+	/*
+	 * Determine whether to scan the foreign relation asynchronously or not;
+	 * this has to be kept in sync with the code in ExecInitAppend().
+	 */
+	scanstate->ss.ps.async_capable = (((Plan *) node)->async_capable &&
+									  estate->es_epq_active == NULL);
+
 	/*
 	 * Initialize FDW-related state.
 	 */
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index aa8eceda5f..c79e46aaaa 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -55,6 +55,7 @@ typedef struct Instrumentation
 	bool		need_timer;		/* true if we need timer data */
 	bool		need_bufusage;	/* true if we need buffer usage data */
 	bool		need_walusage;	/* true if we need WAL usage data */
+	bool		async_mode;		/* true if node is in async mode */
 	/* Info about current plan cycle: */
 	bool		running;		/* true if we've completed first tuple */
 	instr_time	starttime;		/* start time of current iteration of node */
@@ -84,10 +85,12 @@ typedef struct WorkerInstrumentation
 extern PGDLLIMPORT BufferUsage pgBufferUsage;
 extern PGDLLIMPORT WalUsage pgWalUsage;
 
-extern Instrumentation *InstrAlloc(int n, int instrument_options);
+extern Instrumentation *InstrAlloc(int n, int instrument_options,
+								   bool async_mode);
 extern void InstrInit(Instrumentation *instr, int instrument_options);
 extern void InstrStartNode(Instrumentation *instr);
 extern void InstrStopNode(Instrumentation *instr, double nTuples);
+extern void InstrUpdateTupleCount(Instrumentation *instr, double nTuples);
 extern void InstrEndLoop(Instrumentation *instr);
 extern void InstrAggNode(Instrumentation *dst, Instrumentation *add);
 extern void InstrStartParallelQuery(void);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e7ae21c023..3c22284b09 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1003,6 +1003,8 @@ typedef struct PlanState
 	ExprContext *ps_ExprContext;	/* node's expression-evaluation context */
 	ProjectionInfo *ps_ProjInfo;	/* info for doing tuple projection */
 
+	bool		async_capable;	/* true if node is async-capable */
+
 	/*
 	 * Scanslot's descriptor if known. This is a bit of a hack, but otherwise
 	 * it's hard for expression compilation to optimize based on the
