singhpk234 commented on code in PR #1517:
URL: https://github.com/apache/polaris/pull/1517#discussion_r2074454437
##########
extension/persistence/relational-jdbc/src/main/java/org/apache/polaris/extension/persistence/relational/jdbc/DatasourceOperations.java:
##########
@@ -173,23 +190,82 @@ public int executeUpdate(String query) throws
SQLException {
* @throws SQLException : Exception caught during transaction execution.
*/
public void runWithinTransaction(TransactionCallback callback) throws
SQLException {
- try (Connection connection = borrowConnection()) {
- boolean autoCommit = connection.getAutoCommit();
- connection.setAutoCommit(false);
- boolean success = false;
+ withRetries(
+ () -> {
+ try (Connection connection = borrowConnection()) {
+ boolean autoCommit = connection.getAutoCommit();
+ boolean success = false;
+ connection.setAutoCommit(false);
+ try {
+ try (Statement statement = connection.createStatement()) {
+ success = callback.execute(statement);
+ }
+ } finally {
+ if (success) {
+ connection.commit();
+ } else {
+ connection.rollback();
+ }
+ connection.setAutoCommit(autoCommit);
+ }
+ }
+ return null;
+ });
+ }
+
+ private boolean isRetryable(SQLException e) {
+ String sqlState = e.getSQLState();
+
+ if (sqlState != null) {
+ return sqlState.equals(DEADLOCK_SQL_CODE)
+ || // Deadlock detected
+ sqlState.equals(SERIALIZATION_FAILURE_SQL_CODE); // Serialization
failure
+ }
+
+ // Additionally, one might check for specific error messages or other
conditions
+ return e.getMessage().contains("connection refused")
+ || e.getMessage().contains("connection reset");
+ }
+
+ public <T> T withRetries(Operation<T> operation) throws SQLException {
+ int attempts = 0;
+ // maximum number of retries.
+ int maxAttempts = relationalJdbcConfiguration.maxRetries().orElse(1);
+ // How long we should try, since the first attempt.
+ long maxDuration =
relationalJdbcConfiguration.maxDurationInMs().orElse(100L);
+ // How long to wait before first failure.
+ long delay = relationalJdbcConfiguration.initialDelayInMs().orElse(100L);
+
+ // maximum time we will retry till.
+ long maxRetryTime = Instant.now().toEpochMilli() + maxDuration;
+
+ while (attempts < maxAttempts) {
try {
- try (Statement statement = connection.createStatement()) {
- success = callback.execute(statement);
+ return operation.execute();
+ } catch (SQLException e) {
+ attempts++;
+ long timeLeft = Math.max((maxRetryTime -
Instant.now().toEpochMilli()), 0L);
+ if (attempts >= maxAttempts || !isRetryable(e) || timeLeft == 0) {
+ throw e;
}
- } finally {
- if (success) {
- connection.commit();
- } else {
- connection.rollback();
+ // Add jitter
+ long timeToSleep = Math.min(timeLeft, delay + (long)
(random.nextDouble() * 0.2 * delay));
Review Comment:
delay is not a fixed component, it raises with the exponent of 2 essentialy
its (2^ attempt) so essentially the jitter we have is
random value between ((2 ^ attempt), (2 ^ (attempt + 1) * 0.2) ], this we do
both both exponential backoff as well not exactly have collisions.
0 sleep time will not be possible as both timeLeft and delay + (long)
(random.nextDouble() * 0.2 * delay)) > 0
please let me know your thoughts considering above.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]