From ddccbe9247161cc280e2eec058e3618d46fa2e58 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Tue, 6 Nov 2018 16:51:35 +0500
Subject: [PATCH] Implement different B-tree page layouts

---
 src/backend/access/nbtree/nbtree.c    | 212 ++++++++++++++++++++++++++
 src/backend/access/nbtree/nbtsearch.c |  19 +++
 src/backend/storage/page/bufpage.c    |  26 ++++
 src/include/storage/bufpage.h         |   1 +
 4 files changed, 258 insertions(+)

diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index e8725fbbe1..67271df1a6 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -1089,6 +1089,210 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 		*oldestBtpoXact = vstate.oldestBtpoXact;
 }
 
+static void
+_bt_sequential_layout(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high)
+{
+	while (high > low)
+	{
+		order[*next] = low;
+		(*next)++;
+		low++;
+	}
+}
+
+static void
+_bt_metabtree_recursion(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high)
+{
+	if (high > low)
+	{
+		OffsetNumber low1, high1, mid_low, mid_high;
+
+		OffsetNumber mid = low + ((high - low) / 2);
+		low1 = mid + 1;
+		high1 = mid;
+		mid_low = low + ((high1 - low) / 2);
+		mid_high = low1 + ((high - low1) / 2);
+		/* mid is already packed! */
+
+		if (mid_low != mid)
+		{
+			order[*next] = mid_low;
+			(*next)++;
+		}
+		if (mid_high != mid && mid_high != high)
+		{
+			order[*next] = mid_high;
+			(*next)++;
+		}
+
+		_bt_metabtree_recursion(order, next, low, high1);
+		_bt_metabtree_recursion(order, next, low1, high);
+
+		/* left here for reference
+		if (result >= cmpval)
+			low = mid + 1;
+		else
+			high = mid;*/
+	}
+}
+
+static void
+_bt_metabtree_layout(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high)
+{
+	if (high > low)
+	{
+		OffsetNumber mid = low + ((high - low) / 2);
+
+		order[*next] = mid;
+		(*next)++;
+
+		_bt_metabtree_recursion(order, next, low, high);
+
+		/* left here for reference
+		if (result >= cmpval)
+			low = mid + 1;
+		else
+			high = mid;*/
+	}
+}
+
+static void
+_bt_eyzinger_recursion(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high)
+{
+	if (high > low)
+	{
+		OffsetNumber mid = low + ((high - low) / 2);
+
+		order[*next] = mid;
+		(*next)++;
+
+		_bt_eyzinger_recursion(order, next, low, mid);
+		_bt_eyzinger_recursion(order, next, mid + 1, high);
+
+		/* left here for reference
+		if (result >= cmpval)
+			low = mid + 1;
+		else
+			high = mid;*/
+	}
+}
+
+static void
+_bt_veb_recursion(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high)
+{
+	if (high > low)
+	{
+		OffsetNumber low1, high1, mid_low, mid_high;
+
+		OffsetNumber mid = low + ((high - low) / 2);
+		low1 = mid + 1;
+		high1 = mid;
+		mid_low = low + ((high1 - low) / 2);
+		mid_high = low1 + ((high - low1) / 2);
+
+		order[*next] = mid;
+		(*next)++;
+		if (mid_low != mid)
+		{
+			order[*next] = mid_low;
+			(*next)++;
+		}
+		if (mid_high != high)
+		{
+			order[*next] = mid_high;
+			(*next)++;
+		}
+
+		/* We have low <= mid < high, so mid points at a real slot */		
+
+		_bt_veb_recursion(order, next, low, mid_low);
+		_bt_veb_recursion(order, next, mid_low + 1, mid);
+		_bt_veb_recursion(order, next, mid + 1, mid_high);
+		_bt_veb_recursion(order, next, mid_high + 1, high);
+
+		/* left here for reference
+		if (result >= cmpval)
+			low = mid + 1;
+		else
+			high = mid;*/
+	}
+}
+
+static bool
+_bt_check_layout(Page	page, OffsetNumber* order)
+{
+	char busy[MaxOffsetNumber];
+	OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+	for (int i = 0; i <= maxoff; i++)
+	{
+		busy[i] = 0;
+	}
+
+	for (int i = 0; i < maxoff; i++)
+	{
+		OffsetNumber current = order[i];
+		if (current > maxoff || current == InvalidOffsetNumber)
+		{
+			elog(ERROR,"Page layout is broken: incorrect offset number %u at %i", current, i);
+		}
+		if (busy[current])
+		{
+			elog(ERROR,"Page layout is broken: offset number %u is used more than once at %i", current, i);
+		}
+		busy[current] = 1;
+	}
+	for (int i = FirstOffsetNumber; i <= maxoff; i++)
+	{	
+		if (!busy[i])
+		{
+			elog(ERROR,"Page layout is broken: offset number %u is not used", i);
+		}
+	}
+	return true;
+}
+
+#define USE_EYZINGER_ORDER
+static bool
+_bt_prepare_layout(Page	page, OffsetNumber* order)
+{
+	BTPageOpaque opaque;
+	OffsetNumber low,
+				high;
+	int			next = 0;
+
+	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+	low = P_FIRSTDATAKEY(opaque);
+	high = PageGetMaxOffsetNumber(page);
+
+	/* check if there is something to defrag*/
+	if (high < low)
+		return false;
+
+	high++;						/* establish the loop invariant for high */
+
+#ifdef USE_BT_ORDER
+	_bt_metabtree_layout(order, &next, low, high);
+#elif defined(USE_VEB_ORDER)
+	_bt_veb_recursion(order, &next, low, high);
+#elif defined(USE_EYZINGER_ORDER)
+	_bt_eyzinger_recursion(order, &next, low, high);
+#elif defined(USE_SEQ_ORDER)
+	_bt_sequential_layout(order, &next, low, high);
+#endif
+
+	if (!P_RIGHTMOST(opaque))
+	{
+		order[next] = P_HIKEY;
+		next++;
+	}
+
+	Assert(next == PageGetMaxOffsetNumber(page));
+	Assert(_bt_check_layout(page, order));
+
+	return true;
+}
+
 /*
  * btvacuumpage --- VACUUM one page
  *
@@ -1114,6 +1318,8 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
 	Page		page;
 	BTPageOpaque opaque = NULL;
 
+	OffsetNumber order[MaxOffsetNumber];
+
 restart:
 	delete_now = false;
 	recurse_to = P_NONE;
@@ -1350,7 +1556,13 @@ restart:
 		/* pagedel released buffer, so we shouldn't */
 	}
 	else
+	{
+#if defined(USE_BT_ORDER) || defined(USE_VEB_ORDER) || defined(USE_EYZINGER_ORDER) || defined(USE_SEQ_ORDER)
+		_bt_prepare_layout(page, order);
+		PageMakeSpecialFragmentation(page, order);
+#endif
 		_bt_relbuf(rel, buf);
+	}
 
 	/*
 	 * This is really tail recursion, but if the compiler is too stupid to
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 8b2772c154..e7c5308349 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -399,6 +399,25 @@ _bt_binsrch(Relation rel,
 	{
 		OffsetNumber mid = low + ((high - low) / 2);
 
+#define USE_EYZINGER_ORDER
+#define USE_PREFETCH
+
+#ifdef USE_PREFETCH
+#ifdef USE_BT_ORDER
+		/* in this case we only need one prefetch */
+		OffsetNumber x = mid + 1 + ((high - mid + 1) / 2);
+		if (x < high)
+			__builtin_prefetch (PageGetItem(page, PageGetItemId(page, x)), 0, 2);
+#else
+		OffsetNumber x = mid + 1 + ((high - mid + 1) / 2);
+		if (x < high)
+			__builtin_prefetch (PageGetItem(page, PageGetItemId(page, x)), 0, 2);
+		x = low + ((mid - low) / 2);
+		if (x > low)
+			__builtin_prefetch (PageGetItem(page, PageGetItemId(page, x)), 0, 2);
+#endif
+#endif
+
 		/* We have low <= mid < high, so mid points at a real slot */
 
 		result = _bt_compare(rel, keysz, scankey, page, mid);
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index dfbda5458f..e3f553478c 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -375,6 +375,32 @@ PageGetTempPageCopy(Page page)
 	return temp;
 }
 
+void
+PageMakeSpecialFragmentation(Page page, uint16 *order)
+{
+	Page temp = PageGetTempPageCopy(page);
+
+	PageHeader	phdr = (PageHeader) page;
+	Offset		upper;
+	int			i;
+	int			nitems = PageGetMaxOffsetNumber(page);
+
+	upper = phdr->pd_special;
+	for (i = nitems - 1; i >= 0; i--)
+	{
+		ItemId lp = PageGetItemId(page, order[i]);
+		upper -= MAXALIGN(ItemIdGetLength(lp));
+		memmove((char *) page + upper,
+				(char *) temp + ItemIdGetOffset(lp),
+				MAXALIGN(ItemIdGetLength(lp)));
+		lp->lp_off = upper;
+	}
+
+	phdr->pd_upper = upper;
+
+	pfree(temp);
+}
+
 /*
  * PageGetTempPageCopySpecial
  *		Get a temporary page in local memory for special processing.
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 85dd10c45a..c6b31fa3e8 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -434,5 +434,6 @@ extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
 						Item newtup, Size newsize);
 extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
 extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
+extern void PageMakeSpecialFragmentation(Page page, uint16 *order);
 
 #endif							/* BUFPAGE_H */
-- 
2.17.2 (Apple Git-113)

