From b4f9a5a23e79e70dea5946e2d70cb8aff269bf31 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Wed, 15 Apr 2020 11:39:51 -0700
Subject: [PATCH 1/2] Redefine split interval to be space-wise.

---
 src/backend/access/nbtree/nbtsplitloc.c | 86 +++++++++++++++++++------
 1 file changed, 68 insertions(+), 18 deletions(-)

diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c
index c850cd807c..91f1f8feea 100644
--- a/src/backend/access/nbtree/nbtsplitloc.c
+++ b/src/backend/access/nbtree/nbtsplitloc.c
@@ -17,10 +17,6 @@
 #include "access/nbtree.h"
 #include "storage/lmgr.h"
 
-/* limits on split interval (default strategy only) */
-#define MAX_LEAF_INTERVAL			9
-#define MAX_INTERNAL_INTERVAL		18
-
 typedef enum
 {
 	/* strategy for searching through materialized list of split points */
@@ -71,6 +67,7 @@ static void _bt_recsplitloc(FindSplitData *state,
 static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
 								bool usemult);
 static int	_bt_splitcmp(const void *arg1, const void *arg2);
+static int _bt_defaultinterval(FindSplitData *state);
 static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
 								int leaffillfactor, bool *usemult);
 static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);
@@ -338,19 +335,6 @@ _bt_findsplitloc(Relation rel,
 		fillfactormult = 0.50;
 	}
 
-	/*
-	 * Set an initial limit on the split interval/number of candidate split
-	 * points as appropriate.  The "Prefix B-Trees" paper refers to this as
-	 * sigma l for leaf splits and sigma b for internal ("branch") splits.
-	 * It's hard to provide a theoretical justification for the initial size
-	 * of the split interval, though it's clear that a small split interval
-	 * makes suffix truncation much more effective without noticeably
-	 * affecting space utilization over time.
-	 */
-	state.interval = Min(Max(1, state.nsplits * 0.05),
-						 state.is_leaf ? MAX_LEAF_INTERVAL :
-						 MAX_INTERNAL_INTERVAL);
-
 	/*
 	 * Save leftmost and rightmost splits for page before original ordinal
 	 * sort order is lost by delta/fillfactormult sort
@@ -361,6 +345,9 @@ _bt_findsplitloc(Relation rel,
 	/* Give split points a fillfactormult-wise delta, and sort on deltas */
 	_bt_deltasortsplits(&state, fillfactormult, usemult);
 
+	/* Determine optimal default strategy split interval from sorted splits */
+	state.interval = _bt_defaultinterval(&state);
+
 	/*
 	 * Determine if default strategy/split interval will produce a
 	 * sufficiently distinguishing split, or if we should change strategies.
@@ -618,6 +605,69 @@ _bt_splitcmp(const void *arg1, const void *arg2)
 	return 0;
 }
 
+#define LEAF_SPLIT_DISTANCE			0.050
+#define INTERNAL_SPLIT_DISTANCE		0.075
+
+/*
+ * Set an initial limit on the number of candidate split points we'll consider
+ * for the default strategy (i.e. the split interval).  This is based on a
+ * maximum acceptable leftfree + rightfree divergence compared to the
+ * space-wise optimal split point (i.e. compared to the split point currently
+ * at the start of state's sorted "splits" array).
+ *
+ * The "Prefix B-Trees" paper refers to split interval as sigma l for leaf
+ * splits and sigma b for internal ("branch") splits.  It's hard to provide a
+ * theoretical justification for the size of the split interval, though it's
+ * clear that a small split interval makes suffix truncation much more
+ * effective without noticeably affecting space utilization over time.
+ */
+static int
+_bt_defaultinterval(FindSplitData *state)
+{
+	SplitPoint *spaceoptimal = state->splits;
+	int16		lowleftfree;
+	int16		lowrightfree;
+	int16		highleftfree;
+	int16		highrightfree;
+	int16		tolerance;
+
+	/*
+	 * Determine values that are higher and lower than we're willing to
+	 * tolerate for both leftfree and rightfree.  Note that the final split
+	 * interval will be about 10% of nsplits in the common case where all
+	 * non-pivot tuples (data items) from a leaf page are uniformly sized.
+	 */
+	if (state->is_leaf)
+		tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
+	else
+		tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
+
+	lowleftfree = spaceoptimal->leftfree - tolerance;
+	lowrightfree = spaceoptimal->rightfree - tolerance;
+	highleftfree = spaceoptimal->leftfree + tolerance;
+	highrightfree = spaceoptimal->rightfree + tolerance;
+
+	/*
+	 * Iterate through sorted candidate split points starting from the one
+	 * after the space optimal/first split, until we go too far.
+	 *
+	 * The final split interval excludes the first candidate split point that
+	 * exceeds the space tolerance.  Posting list tuples are frequently much
+	 * larger than nearby tuples.  It's important that the split interval ends
+	 * just before any large outlier tuple (not after).
+	 */
+	for (int i = 1; i < state->nsplits; i++)
+	{
+		SplitPoint *split = state->splits + i;
+
+		if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
+			split->leftfree > highleftfree || split->rightfree > highrightfree)
+			return i;
+	}
+
+	return state->nsplits;
+}
+
 /*
  * Subroutine to determine whether or not a non-rightmost leaf page should be
  * split immediately after the would-be original page offset for the
@@ -850,7 +900,7 @@ _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
 	 */
 	if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
 		!final->newitemonleft && final->firstrightoff >= state->newitemoff &&
-		final->firstrightoff < state->newitemoff + MAX_LEAF_INTERVAL)
+		final->firstrightoff < state->newitemoff + 9)
 	{
 		/*
 		 * Avoid the problem by performing a 50:50 split when the new item is
-- 
2.25.1

