From 30d718db26f442f558e2df9a7604b8bbdcd4ae94 Mon Sep 17 00:00:00 2001
From: Joel Jakobsson <joel@compiler.org>
Date: Sun, 22 Jan 2023 12:57:30 -0400
Subject: [PATCH 1/5] Use 128-bit math to accelerate numeric division, when 8 <
 divisor digits <= 16

This patch adds div_var_int64(), which is similar to the existing div_var_int(),
but accepts a 64-bit divisor instead of a 32-bit divisor.

The new function is used within div_var() and div_var_fast().
---
 src/backend/utils/adt/numeric.c | 167 ++++++++++++++++++++++++++++++++
 1 file changed, 167 insertions(+)

diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index a6409ecbee..e48493ac4a 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -554,6 +554,10 @@ static void div_var_fast(const NumericVar *var1, const NumericVar *var2,
 						 NumericVar *result, int rscale, bool round);
 static void div_var_int(const NumericVar *var, int ival, int ival_weight,
 						NumericVar *result, int rscale, bool round);
+#ifdef HAVE_INT128
+static void div_var_int64(const NumericVar *var, int64 ival, int ival_weight,
+						NumericVar *result, int rscale, bool round);
+#endif
 static int	select_div_scale(const NumericVar *var1, const NumericVar *var2);
 static void mod_var(const NumericVar *var1, const NumericVar *var2,
 					NumericVar *result);
@@ -8484,6 +8488,10 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
 	/*
 	 * If the divisor has just one or two digits, delegate to div_var_int(),
 	 * which uses fast short division.
+	 *
+	 * On platforms with 128-bit integer support, and if the divisor has
+	 * three or four digits, delegate to div_var_int64(),
+	 * which also uses fast short division.
 	 */
 	if (var2ndigits <= 2)
 	{
@@ -8503,6 +8511,27 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
 		div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
 		return;
 	}
+	#ifdef HAVE_INT128
+	else if (var2ndigits <= 4)
+	{
+		int64		idivisor;
+		int			idivisor_weight;
+
+		idivisor = var2->digits[0];
+		idivisor_weight = var2->weight;
+		for (i = 1; i < var2ndigits; i++)
+		{
+			idivisor *= NBASE;
+			idivisor += var2->digits[i];
+			idivisor_weight--;
+		}
+		if (var2->sign == NUMERIC_NEG)
+			idivisor = -idivisor;
+
+		div_var_int64(var1, idivisor, idivisor_weight, result, rscale, round);
+		return;
+	}
+	#endif
 
 	/*
 	 * Otherwise, perform full long division.
@@ -8774,6 +8803,10 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
 	/*
 	 * If the divisor has just one or two digits, delegate to div_var_int(),
 	 * which uses fast short division.
+	 *
+	 * On platforms with 128-bit integer support, and if the divisor has
+	 * three or four digits, delegate to div_var_int64(),
+	 * which also uses fast short division.
 	 */
 	if (var2ndigits <= 2)
 	{
@@ -8793,6 +8826,27 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
 		div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
 		return;
 	}
+	#ifdef HAVE_INT128
+	else if (var2ndigits <= 4)
+	{
+		int64		idivisor;
+		int			idivisor_weight;
+
+		idivisor = var2->digits[0];
+		idivisor_weight = var2->weight;
+		for (i = 1; i < var2ndigits; i++)
+		{
+			idivisor *= NBASE;
+			idivisor += var2->digits[i];
+			idivisor_weight--;
+		}
+		if (var2->sign == NUMERIC_NEG)
+			idivisor = -idivisor;
+
+		div_var_int64(var1, idivisor, idivisor_weight, result, rscale, round);
+		return;
+	}
+	#endif
 
 	/*
 	 * Otherwise, perform full long division.
@@ -9182,6 +9236,119 @@ div_var_int(const NumericVar *var, int ival, int ival_weight,
 }
 
 
+#ifdef HAVE_INT128
+/*
+ * div_var_int64() -
+ *
+ *	Divide a numeric variable by a 64-bit integer with the specified weight.
+ *	The quotient var / (ival * NBASE^ival_weight) is stored in result.
+ */
+static void
+div_var_int64(const NumericVar *var, int64 ival, int ival_weight,
+			NumericVar *result, int rscale, bool round)
+{
+	NumericDigit *var_digits = var->digits;
+	int			var_ndigits = var->ndigits;
+	int			res_sign;
+	int			res_weight;
+	int			res_ndigits;
+	NumericDigit *res_buf;
+	NumericDigit *res_digits;
+	uint64		divisor;
+	int			i;
+
+	/* Guard against division by zero */
+	if (ival == 0)
+		ereport(ERROR,
+				errcode(ERRCODE_DIVISION_BY_ZERO),
+				errmsg("division by zero"));
+
+	/* Result zero check */
+	if (var_ndigits == 0)
+	{
+		zero_var(result);
+		result->dscale = rscale;
+		return;
+	}
+
+	/*
+	 * Determine the result sign, weight and number of digits to calculate.
+	 * The weight figured here is correct if the emitted quotient has no
+	 * leading zero digits; otherwise strip_var() will fix things up.
+	 */
+	if (var->sign == NUMERIC_POS)
+		res_sign = ival > 0 ? NUMERIC_POS : NUMERIC_NEG;
+	else
+		res_sign = ival > 0 ? NUMERIC_NEG : NUMERIC_POS;
+	res_weight = var->weight - ival_weight;
+	/* The number of accurate result digits we need to produce: */
+	res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS;
+	/* ... but always at least 1 */
+	res_ndigits = Max(res_ndigits, 1);
+	/* If rounding needed, figure one more digit to ensure correct result */
+	if (round)
+		res_ndigits++;
+
+	res_buf = digitbuf_alloc(res_ndigits + 1);
+	res_buf[0] = 0;				/* spare digit for later rounding */
+	res_digits = res_buf + 1;
+
+	/*
+	 * Now compute the quotient digits.  This is the short division algorithm
+	 * described in Knuth volume 2, section 4.3.1 exercise 16, except that we
+	 * allow the divisor to exceed the internal base.
+	 *
+	 * In this algorithm, the carry from one digit to the next is at most
+	 * divisor - 1.  Therefore, while processing the next digit, carry may
+	 * become as large as divisor * NBASE - 1, and so it requires a 128-bit
+	 * integer if this exceeds PG_UINT64_MAX.
+	 */
+	divisor = i64abs(ival);
+
+	if (divisor <= PG_UINT64_MAX / NBASE)
+	{
+		/* carry cannot overflow 64 bits */
+		uint64		carry = 0;
+
+		for (i = 0; i < res_ndigits; i++)
+		{
+			carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
+			res_digits[i] = (NumericDigit) (carry / divisor);
+			carry = carry % divisor;
+		}
+	}
+	else
+	{
+		/* carry may exceed 64 bits */
+		uint128		carry = 0;
+
+		for (i = 0; i < res_ndigits; i++)
+		{
+			carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
+			res_digits[i] = (NumericDigit) (carry / divisor);
+			carry = carry % divisor;
+		}
+	}
+
+	/* Store the quotient in result */
+	digitbuf_free(result->buf);
+	result->ndigits = res_ndigits;
+	result->buf = res_buf;
+	result->digits = res_digits;
+	result->weight = res_weight;
+	result->sign = res_sign;
+
+	/* Round or truncate to target rscale (and set result->dscale) */
+	if (round)
+		round_var(result, rscale);
+	else
+		trunc_var(result, rscale);
+
+	/* Strip leading/trailing zeroes */
+	strip_var(result);
+}
+#endif
+
 /*
  * Default scale selection for division
  *
-- 
2.37.1 (Apple Git-137.1)