diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c
index 28e3aed..3e70d07 100644
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -17,6 +17,7 @@
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/sysattr.h"
 #include "catalog/dependency.h"
 #include "catalog/indexing.h"
 #include "catalog/objectaccess.h"
@@ -871,6 +872,90 @@ get_domain_constraint_oid(Oid typid, const char *conname, bool missing_ok)
 }
 
 /*
+ * get_primary_key_attnos
+ *		Returns a Bitmapset of each Var's varattno offset by
+ *		FirstLowInvalidHeapAttributeNumber which makes up the PRIMARY KEY of
+ *		'relid'. If no PRIMARY KEY exists on 'relid' then NULL is returned.
+ *
+ * When 'deferrableOk' is false we will return NULL if the PRIMARY KEY
+ * constraint is set to deferrable.
+ * 'constraintOid' will be set to the OID of the PRIMARY KEY constraint,
+ * although this is only set when the function does not return NULL.
+ */
+Bitmapset *
+get_primary_key_attnos(Oid relid, bool deferrableOk, Oid *constraintOid)
+{
+	Relation		pg_constraint;
+	HeapTuple		tuple;
+	SysScanDesc		scan;
+	ScanKeyData		skey[1];
+	Bitmapset	   *pkattnos = NULL;
+
+	/* Scan pg_constraint for constraints of the target rel */
+	pg_constraint = heap_open(ConstraintRelationId, AccessShareLock);
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_constraint_conrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+
+	scan = systable_beginscan(pg_constraint, ConstraintRelidIndexId, true,
+							  NULL, 1, skey);
+
+	while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+	{
+		Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tuple);
+		Datum		adatum;
+		bool		isNull;
+		ArrayType  *arr;
+		int16	   *attnums;
+		int			numkeys;
+		int			i;
+
+		/* Skip constraints which are not PRIMARY KEYs */
+		if (con->contype != CONSTRAINT_PRIMARY)
+			continue;
+
+		/*
+		 * If the primary key is deferrable, but we've been instructed to
+		 * ignore deferrable constraints, then we'll simply break and exit
+		 * returning NULL. There can only be a single primary key on a table,
+		 * so it's senseless to look for another.
+		 */
+		if (con->condeferrable && !deferrableOk)
+			break;
+
+		/* Extract the conkey array, ie, attnums of PK's columns */
+		adatum = heap_getattr(tuple, Anum_pg_constraint_conkey,
+							  RelationGetDescr(pg_constraint), &isNull);
+		if (isNull)
+			elog(ERROR, "null conkey for constraint %u",
+				 HeapTupleGetOid(tuple));
+		arr = DatumGetArrayTypeP(adatum);		/* ensure not toasted */
+		numkeys = ARR_DIMS(arr)[0];
+		if (ARR_NDIM(arr) != 1 ||
+			numkeys < 0 ||
+			ARR_HASNULL(arr) ||
+			ARR_ELEMTYPE(arr) != INT2OID)
+			elog(ERROR, "conkey is not a 1-D smallint array");
+		attnums = (int16 *) ARR_DATA_PTR(arr);
+
+		for (i = 0; i < numkeys; i++)
+			pkattnos = bms_add_member(pkattnos,
+							attnums[i] - FirstLowInvalidHeapAttributeNumber);
+
+		*constraintOid = HeapTupleGetOid(tuple);
+		break;
+	}
+
+	systable_endscan(scan);
+
+	heap_close(pg_constraint, AccessShareLock);
+
+	return pkattnos;
+}
+
+/*
  * Determine whether a relation can be proven functionally dependent on
  * a set of grouping columns.  If so, return TRUE and add the pg_constraint
  * OIDs of the constraints needed for the proof to the *constraintDeps list.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index c0ec905..b32a369 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -20,7 +20,9 @@
 
 #include "access/htup_details.h"
 #include "access/parallel.h"
+#include "access/sysattr.h"
 #include "access/xact.h"
+#include "catalog/pg_constraint.h"
 #include "executor/executor.h"
 #include "executor/nodeAgg.h"
 #include "foreign/fdwapi.h"
@@ -87,6 +89,7 @@ static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
 				 int64 *offset_est, int64 *count_est);
 static bool limit_needed(Query *parse);
+static void remove_useless_groupby_columns(PlannerInfo *root);
 static List *preprocess_groupclause(PlannerInfo *root, List *force);
 static List *extract_rollup_sets(List *groupingSets);
 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
@@ -664,6 +667,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	}
 	parse->havingQual = (Node *) newHaving;
 
+	/* remove any dedundant GROUP BY columns */
+	remove_useless_groupby_columns(root);
+
 	/*
 	 * If we have any outer joins, try to reduce them to plain inner joins.
 	 * This step is most easily done after we've done expression
@@ -3160,6 +3166,176 @@ limit_needed(Query *parse)
 	return false;				/* don't need a Limit plan node */
 }
 
+/*
+ * remove_useless_groupby_columns
+ *		Removes columns from the GROUP BY clause which are redundant due to
+ *		being functionally dependant on one or more other column which are
+ *		present.
+ *
+ * We do support non-aggregated column in the query's targetlist which are not
+ * present in the GROUP BY clause, providing these columns can be determined to
+ * be functionally dependant on the relation's primary key constraint. There's
+ * quite a few relational databases which don't allow this, so this means it
+ * can be quite common that queries are written to have the GROUP BY clause
+ * include all of the columns which are not aggregated in the query's
+ * targetlist. These columns have no need to be there as long as the columns
+ * which make up the table's PRIMARY KEY are present in the GROUP BY clause.
+ * Here we test to see if the primary key columns are present for each relation
+ * which make up the columns of the GROUP BY clause, and if so we remove any
+ * that belong to the same relation which are not part of the primary key.
+ *
+ * In reality we could do more than just the primary key columns here as UNIQUE
+ * indexes with NOT NULL constraints on the columns allow us to also determine
+ * functional dependencies. However we mustn't go any further than what's done
+ * by check_functional_grouping().
+ */
+static void
+remove_useless_groupby_columns(PlannerInfo *root)
+{
+	Bitmapset	  **groupbyattnos;
+	Bitmapset	  **surplusvars;
+	ListCell	   *lc;
+	int				relid;
+	Query		   *parse = root->parse;
+
+	/* Nothing do to when there are fewer than two group by expression */
+	if (list_length(parse->groupClause) < 2)
+		return;
+
+	/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
+	if (parse->groupingSets)
+		return;
+
+	groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
+										   (list_length(parse->rtable) + 1));
+	surplusvars = NULL;			/* don't allocate unless required */
+
+	/*
+	 * Pre-process the GROUP BY clause and populate groupbyattnos with
+	 * varattnos of Vars which belong to the clause.
+	 */
+	foreach (lc, parse->groupClause)
+	{
+		SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
+		TargetEntry *tle;
+		Var *var;
+
+		tle = get_sortgroupclause_tle(sgc, parse->targetList);
+		var = (Var *) tle->expr;
+
+		/*
+		 * Ignore non-Vars or Vars from a higher level.
+		 * XXX it may be useful to put more effort into removing non-Vars here.
+		 * If the clause is an Expr containing Vars from a single relation then
+		 * we may also be able to remove it, providing that we find all PRIMARY
+		 * KEY columns elsewhere in the GROUP BY clause. Perhaps we can improve
+		 * on this later, but for now it seems better to keep it simple.
+		 */
+		if (!IsA(var, Var) || var->varlevelsup > 0)
+			continue;
+
+		relid = var->varno;
+
+		Assert(relid <= list_length(parse->rtable));
+		groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
+						var->varattno - FirstLowInvalidHeapAttributeNumber);
+	}
+
+	/*
+	 * Look at each relation and check if any of the Vars which were found in
+	 * the GROUP BY clause are not needed due to all the PK constraint Vars
+	 * being present in the clause. We'll take note of any surplus Vars which
+	 * we find rather than removing them from the GROUP BY clause right away.
+	 * This allows us to make a single pass over the GROUP BY clause, rather
+	 * than one pass per relation.
+	 */
+	relid = 0;
+	foreach(lc, parse->rtable)
+	{
+		RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+		Bitmapset *relattnos;
+		Bitmapset *pkattnos;
+		Oid		  constraintOid;
+
+		relid++;
+
+		if (rte->rtekind != RTE_RELATION)
+			continue;
+
+		relattnos = groupbyattnos[relid];
+
+		/*
+		 * Clearly there's no room for removing Vars from this rel from the
+		 * GROUP BY clause if there were fewer than two Vars found to belong to
+		 * this rel.
+		 */
+		if (bms_num_members(relattnos) < 2)
+			continue;
+
+		pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
+
+		/*
+		 * Can't remove any columns for this rel if there is no suitable
+		 * primary key constraint.
+		 */
+		if (pkattnos == NULL)
+			continue;
+
+		/*
+		 * If the pkattnos is a proper subset of relattnos then we have some
+		 * items in the GROUP BY which can be removed.
+		 */
+		if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
+		{
+			parse->constraintDeps = lappend_oid(parse->constraintDeps,
+												constraintOid);
+
+			/*
+			 * Now record which Vars are not required in the GROUP BY. We must
+			 * first allocate space to do this, which we've delayed until here
+			 * to save on unnecessarily allocations where no removals are
+			 * possible.
+			 */
+			if (surplusvars == NULL)
+				surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
+											(list_length(parse->rtable) + 1));
+
+			/*
+			 * Mark all Vars which we originally collected above minus the
+			 * primary key Vars as surplus. We'll remove all of these from the
+			 * GROUP BY in one pass, once we've checked all other relations
+			 * which are involved in the GROUP BY clause.
+			 */
+			surplusvars[relid] = bms_difference(relattnos, pkattnos);
+		}
+	}
+
+	/* build a new GROUP BY clause with all of the surplus Vars removed */
+	if (surplusvars != NULL)
+	{
+		List *new_groupby = NIL;
+
+		foreach (lc, root->parse->groupClause)
+		{
+			SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
+			TargetEntry *tle;
+			Var *var;
+
+			tle = get_sortgroupclause_tle(sgc, root->parse->targetList);
+			var = (Var *) tle->expr;
+
+			/* re-add non-Vars and anything not marked as surplus */
+			if (!IsA(var, Var) ||
+				!bms_is_member(var->varattno -
+				FirstLowInvalidHeapAttributeNumber, surplusvars[var->varno]))
+				new_groupby = lappend(new_groupby, sgc);
+
+			/* XXX do we to alter tleSortGroupRef to remove gaps? */
+		}
+		root->parse->groupClause = new_groupby;
+	}
+}
+
 
 /*
  * preprocess_groupclause - do preparatory work on GROUP BY clause
diff --git a/src/include/catalog/pg_constraint.h b/src/include/catalog/pg_constraint.h
index 07dbcea..bb8983c 100644
--- a/src/include/catalog/pg_constraint.h
+++ b/src/include/catalog/pg_constraint.h
@@ -250,6 +250,9 @@ extern void AlterConstraintNamespaces(Oid ownerId, Oid oldNspId,
 extern Oid	get_relation_constraint_oid(Oid relid, const char *conname, bool missing_ok);
 extern Oid	get_domain_constraint_oid(Oid typid, const char *conname, bool missing_ok);
 
+extern Bitmapset *get_primary_key_attnos(Oid relid, bool deferrableOk,
+										 Oid *constraintOid);
+
 extern bool check_functional_grouping(Oid relid,
 						  Index varno, Index varlevelsup,
 						  List *grouping_columns,
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index de826b5..65f6d4f 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -751,6 +751,69 @@ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
  9999 | 1
 (3 rows)
 
+-- check group by clause is processed correctly to remove surplus columns
+create temp table t1 (a int, b int, c int, d int, primary key (a, b));
+create temp table t2 (x int, y int, z int, primary key (x, y));
+create temp table t3 (a int, b int, c int, primary key(a, b) deferrable);
+-- ensure only primary key columns remain in group by
+explain (costs off) select * from t1 group by a,b,c,d;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a, b
+   ->  Seq Scan on t1
+(3 rows)
+
+-- ensure c and d are not removed since the complete PK is not present in the
+-- group by clause
+explain (costs off) select a,c from t1 group by a,c,d;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a, c, d
+   ->  Seq Scan on t1
+(3 rows)
+
+-- ensure non-pk columns are removed from group by for all relations.
+explain (costs off) select * from t1
+inner join t2 on t1.a = t2.x and t1.b = t2.y
+group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Group
+   Group Key: t1.a, t1.b, t2.x, t2.y
+   ->  Merge Join
+         Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y))
+         ->  Index Scan using t1_pkey on t1
+         ->  Index Scan using t2_pkey on t2
+(6 rows)
+
+-- ensure non-pk columns are only removed from t1.
+explain (costs off) select t1.*,t2.x,t2.z from t1
+inner join t2 on t1.a = t2.x and t1.b = t2.y
+group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
+                      QUERY PLAN                       
+-------------------------------------------------------
+ HashAggregate
+   Group Key: t1.a, t1.b, t2.x, t2.z
+   ->  Merge Join
+         Merge Cond: ((t1.a = t2.x) AND (t1.b = t2.y))
+         ->  Index Scan using t1_pkey on t1
+         ->  Index Scan using t2_pkey on t2
+(6 rows)
+
+-- ensure group by columns are not removed when pk is deferrable
+explain (costs off) select * from t3 group by a,b,c;
+      QUERY PLAN      
+----------------------
+ HashAggregate
+   Group Key: a, b, c
+   ->  Seq Scan on t3
+(3 rows)
+
+drop table t1;
+drop table t2;
+drop table t3;
 -- try it on an inheritance tree
 create table minmaxtest(f1 int);
 create table minmaxtest1() inherits (minmaxtest);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 64f046e..5bb406c 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3919,8 +3919,8 @@ select d.* from d left join (select distinct * from b) s
 explain (costs off)
 select d.* from d left join (select * from b group by b.id, b.c_id) s
   on d.a = s.id;
-                 QUERY PLAN                  
----------------------------------------------
+              QUERY PLAN               
+---------------------------------------
  Merge Left Join
    Merge Cond: (d.a = s.id)
    ->  Sort
@@ -3930,7 +3930,7 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
          Sort Key: s.id
          ->  Subquery Scan on s
                ->  HashAggregate
-                     Group Key: b.id, b.c_id
+                     Group Key: b.id
                      ->  Seq Scan on b
 (11 rows)
 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 8d501dc..d09ff08 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -270,6 +270,35 @@ explain (costs off)
   select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
 select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
 
+-- check group by clause is processed correctly to remove surplus columns
+create temp table t1 (a int, b int, c int, d int, primary key (a, b));
+create temp table t2 (x int, y int, z int, primary key (x, y));
+create temp table t3 (a int, b int, c int, primary key(a, b) deferrable);
+
+-- ensure only primary key columns remain in group by
+explain (costs off) select * from t1 group by a,b,c,d;
+
+-- ensure c and d are not removed since the complete PK is not present in the
+-- group by clause
+explain (costs off) select a,c from t1 group by a,c,d;
+
+-- ensure non-pk columns are removed from group by for all relations.
+explain (costs off) select * from t1
+inner join t2 on t1.a = t2.x and t1.b = t2.y
+group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
+
+-- ensure non-pk columns are only removed from t1.
+explain (costs off) select t1.*,t2.x,t2.z from t1
+inner join t2 on t1.a = t2.x and t1.b = t2.y
+group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
+
+-- ensure group by columns are not removed when pk is deferrable
+explain (costs off) select * from t3 group by a,b,c;
+
+drop table t1;
+drop table t2;
+drop table t3;
+
 -- try it on an inheritance tree
 create table minmaxtest(f1 int);
 create table minmaxtest1() inherits (minmaxtest);
