yihua commented on code in PR #18295:
URL: https://github.com/apache/hudi/pull/18295#discussion_r3083735118
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java:
##########
@@ -249,6 +267,87 @@ public static HoodieLockConfig.Builder newBuilder() {
return new HoodieLockConfig.Builder();
}
+ /**
+ * Build a {@link HoodieLockConfig} by copying lock-related configs from the
given write config
+ * based on the configured lock provider. Only built-in lock providers are
supported.
+ *
+ * @param lockProviderClass the lock provider class name
+ * @param writeConfig the write config to copy lock properties from
+ * @return a new HoodieLockConfig with the relevant lock properties
+ * @throws HoodieException if the lock provider is not a built-in provider
+ */
+ public static HoodieLockConfig getLockConfigForBuiltInLockProvider(String
lockProviderClass, HoodieWriteConfig writeConfig) {
+ TypedProperties dataProps = writeConfig.getProps();
+ Properties lockProps = new Properties();
+ lockProps.put(LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass);
+
+ // Common lock configs used by all providers
+ lockProps.put(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.key(),
+
writeConfig.getStringOrDefault(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS));
+ lockProps.put(LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.key(),
+
writeConfig.getStringOrDefault(LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS));
+ lockProps.put(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key(),
+
writeConfig.getStringOrDefault(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS));
+ lockProps.put(LOCK_ACQUIRE_NUM_RETRIES.key(),
+ writeConfig.getStringOrDefault(LOCK_ACQUIRE_NUM_RETRIES));
+ lockProps.put(LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key(),
+ writeConfig.getStringOrDefault(LOCK_ACQUIRE_CLIENT_NUM_RETRIES));
+ lockProps.put(LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key(),
+
String.valueOf(writeConfig.getIntOrDefault(LOCK_ACQUIRE_WAIT_TIMEOUT_MS)));
+ lockProps.put(LOCK_HEARTBEAT_INTERVAL_MS.key(),
+
String.valueOf(writeConfig.getIntOrDefault(LOCK_HEARTBEAT_INTERVAL_MS)));
+
+ // Provider-specific configs with lock key validation where applicable.
+ // Providers that infer lock keys (e.g. from TBL_NAME or
HoodieTableConfig.NAME) at build time
+ // won't carry those inferred values into the rebuilt config, so we
require them to be set explicitly.
+ if
(FileSystemBasedLockProvider.class.getCanonicalName().equals(lockProviderClass))
{
+ copyPropsWithPrefix(dataProps, lockProps,
LockConfiguration.FILESYSTEM_BASED_LOCK_PROPERTY_PREFIX);
+ } else if
(ZookeeperBasedLockProvider.class.getCanonicalName().equals(lockProviderClass))
{
+ copyPropsWithPrefix(dataProps, lockProps,
LockConfiguration.ZOOKEEPER_BASED_LOCK_PROPERTY_PREFIX);
+ if (!lockProps.containsKey(LockConfiguration.ZK_LOCK_KEY_PROP_KEY)) {
+ throw new
IllegalArgumentException(LockConfiguration.ZK_LOCK_KEY_PROP_KEY
+ + " must be explicitly set on the data table's lock config when
using " + lockProviderClass
+ + ". The inferred default from table name is not propagated to the
metadata table lock config.");
+ }
+ } else if
(ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName().equals(lockProviderClass))
{
+ copyPropsWithPrefix(dataProps, lockProps,
LockConfiguration.ZOOKEEPER_BASED_LOCK_PROPERTY_PREFIX);
+ } else if
(HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) {
+ copyPropsWithPrefix(dataProps, lockProps,
LockConfiguration.HIVE_METASTORE_LOCK_PROPERTY_PREFIX);
+ if
(!lockProps.containsKey(LockConfiguration.HIVE_DATABASE_NAME_PROP_KEY)) {
+ throw new
IllegalArgumentException(LockConfiguration.HIVE_DATABASE_NAME_PROP_KEY
+ + " must be explicitly set on the data table's lock config when
using " + lockProviderClass);
+ }
+ if (!lockProps.containsKey(LockConfiguration.HIVE_TABLE_NAME_PROP_KEY)) {
+ throw new
IllegalArgumentException(LockConfiguration.HIVE_TABLE_NAME_PROP_KEY
+ + " must be explicitly set on the data table's lock config when
using " + lockProviderClass);
Review Comment:
🤖 Same base-path divergence issue for
`DynamoDBBasedImplicitPartitionKeyLockProvider` — it hashes
`HoodieCommonConfig.BASE_PATH` to derive the DynamoDB partition key. The MDT
writer will have a different base path, so it'll get a different partition key
and won't share the same lock as the data table writer. Consider rejecting this
provider or explicitly passing the data table's base path.
<sub><i>- Generated by an AI agent and may contain mistakes. Please verify
any suggestions before applying.</i></sub>
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java:
##########
@@ -249,6 +267,87 @@ public static HoodieLockConfig.Builder newBuilder() {
return new HoodieLockConfig.Builder();
}
+ /**
+ * Build a {@link HoodieLockConfig} by copying lock-related configs from the
given write config
+ * based on the configured lock provider. Only built-in lock providers are
supported.
+ *
+ * @param lockProviderClass the lock provider class name
+ * @param writeConfig the write config to copy lock properties from
+ * @return a new HoodieLockConfig with the relevant lock properties
+ * @throws HoodieException if the lock provider is not a built-in provider
+ */
+ public static HoodieLockConfig getLockConfigForBuiltInLockProvider(String
lockProviderClass, HoodieWriteConfig writeConfig) {
+ TypedProperties dataProps = writeConfig.getProps();
+ Properties lockProps = new Properties();
+ lockProps.put(LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass);
+
+ // Common lock configs used by all providers
+ lockProps.put(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.key(),
+
writeConfig.getStringOrDefault(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS));
+ lockProps.put(LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.key(),
+
writeConfig.getStringOrDefault(LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS));
+ lockProps.put(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key(),
+
writeConfig.getStringOrDefault(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS));
+ lockProps.put(LOCK_ACQUIRE_NUM_RETRIES.key(),
+ writeConfig.getStringOrDefault(LOCK_ACQUIRE_NUM_RETRIES));
+ lockProps.put(LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key(),
+ writeConfig.getStringOrDefault(LOCK_ACQUIRE_CLIENT_NUM_RETRIES));
+ lockProps.put(LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key(),
+
String.valueOf(writeConfig.getIntOrDefault(LOCK_ACQUIRE_WAIT_TIMEOUT_MS)));
+ lockProps.put(LOCK_HEARTBEAT_INTERVAL_MS.key(),
+
String.valueOf(writeConfig.getIntOrDefault(LOCK_HEARTBEAT_INTERVAL_MS)));
+
+ // Provider-specific configs with lock key validation where applicable.
+ // Providers that infer lock keys (e.g. from TBL_NAME or
HoodieTableConfig.NAME) at build time
+ // won't carry those inferred values into the rebuilt config, so we
require them to be set explicitly.
+ if
(FileSystemBasedLockProvider.class.getCanonicalName().equals(lockProviderClass))
{
+ copyPropsWithPrefix(dataProps, lockProps,
LockConfiguration.FILESYSTEM_BASED_LOCK_PROPERTY_PREFIX);
+ } else if
(ZookeeperBasedLockProvider.class.getCanonicalName().equals(lockProviderClass))
{
Review Comment:
🤖 The `ZookeeperBasedImplicitBasePathLockProvider` derives its ZK base path
by hashing `HoodieCommonConfig.BASE_PATH`. When this config is rebuilt for the
MDT writer, the base path will be the MDT path (e.g.
`<data_path>/.hoodie/metadata`), producing a different hash and thus a
different ZK lock path than the data table writer. This means the two writers
won't contend for the same lock.
Could you either reject this provider (like `InProcessLockProvider`) or
inject the data table's base path into the lock props so the hash matches?
<sub><i>- Generated by an AI agent and may contain mistakes. Please verify
any suggestions before applying.</i></sub>
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java:
##########
@@ -249,6 +267,87 @@ public static HoodieLockConfig.Builder newBuilder() {
return new HoodieLockConfig.Builder();
}
+ /**
+ * Build a {@link HoodieLockConfig} by copying lock-related configs from the
given write config
+ * based on the configured lock provider. Only built-in lock providers are
supported.
+ *
+ * @param lockProviderClass the lock provider class name
+ * @param writeConfig the write config to copy lock properties from
+ * @return a new HoodieLockConfig with the relevant lock properties
+ * @throws HoodieException if the lock provider is not a built-in provider
+ */
+ public static HoodieLockConfig getLockConfigForBuiltInLockProvider(String
lockProviderClass, HoodieWriteConfig writeConfig) {
+ TypedProperties dataProps = writeConfig.getProps();
+ Properties lockProps = new Properties();
+ lockProps.put(LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass);
+
+ // Common lock configs used by all providers
+ lockProps.put(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.key(),
+
writeConfig.getStringOrDefault(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS));
+ lockProps.put(LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.key(),
+
writeConfig.getStringOrDefault(LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS));
+ lockProps.put(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key(),
+
writeConfig.getStringOrDefault(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS));
+ lockProps.put(LOCK_ACQUIRE_NUM_RETRIES.key(),
+ writeConfig.getStringOrDefault(LOCK_ACQUIRE_NUM_RETRIES));
+ lockProps.put(LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key(),
+ writeConfig.getStringOrDefault(LOCK_ACQUIRE_CLIENT_NUM_RETRIES));
+ lockProps.put(LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key(),
+
String.valueOf(writeConfig.getIntOrDefault(LOCK_ACQUIRE_WAIT_TIMEOUT_MS)));
+ lockProps.put(LOCK_HEARTBEAT_INTERVAL_MS.key(),
+
String.valueOf(writeConfig.getIntOrDefault(LOCK_HEARTBEAT_INTERVAL_MS)));
+
+ // Provider-specific configs with lock key validation where applicable.
Review Comment:
🤖 The `FileSystemBasedLockProvider` fallback issue from my previous review
is still open — if the user doesn't explicitly set
`hoodie.write.lock.filesystem.path`, the provider falls back to
`<base_path>/.hoodie/lock`. Since the MDT writer has a different base path, the
fallback lock file location will differ. Could you add a validation that
`FILESYSTEM_LOCK_PATH_PROP_KEY` is present after `copyPropsWithPrefix`, similar
to the ZK/DynamoDB checks?
<sub><i>- Generated by an AI agent and may contain mistakes. Please verify
any suggestions before applying.</i></sub>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]