suxiaogang223 commented on code in PR #59217:
URL: https://github.com/apache/doris/pull/59217#discussion_r2642364566
##########
fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java:
##########
@@ -363,9 +375,83 @@ private CloseableIterable<FileScanTask>
planFileScanTask(TableScan scan) {
return TableScanUtil.splitFiles(scan.planFiles(), targetSplitSize);
}
+ /**
+ * Initialize cached values for LocationPath creation on first use.
+ * This avoids repeated StorageProperties lookup, scheme parsing, and
S3URI regex parsing for each file.
+ */
+ private void initLocationPathCache(String samplePath) {
+ if (locationPathCacheInitialized) {
+ return;
+ }
+ synchronized (this) {
+ if (locationPathCacheInitialized) {
+ return;
+ }
+ try {
+ // Create a LocationPath using the full method to get all
cached values
+ LocationPath sampleLocationPath = LocationPath.of(samplePath,
storagePropertiesMap);
+ cachedStorageProperties =
sampleLocationPath.getStorageProperties();
+ cachedSchema = sampleLocationPath.getSchema();
+ cachedFsIdentifier = sampleLocationPath.getFsIdentifier();
+
+ // Extract fsIdPrefix like "s3://" from fsIdentifier like
"s3://bucket"
+ int schemeEnd = cachedFsIdentifier.indexOf("://");
+ if (schemeEnd > 0) {
+ cachedFsIdPrefix = cachedFsIdentifier.substring(0,
schemeEnd + 3);
+ }
+
+ // Cache path prefix mapping for fast transformation
+ // This allows subsequent files to skip S3URI regex parsing
entirely
+ String normalizedPath =
sampleLocationPath.getNormalizedLocation();
+
+ // Find the common prefix by looking for the last '/' before
the filename
+ int lastSlashInOriginal = samplePath.lastIndexOf('/');
+ int lastSlashInNormalized = normalizedPath.lastIndexOf('/');
+
+ if (lastSlashInOriginal > 0 && lastSlashInNormalized > 0) {
+ cachedOriginalPathPrefix = samplePath.substring(0,
lastSlashInOriginal + 1);
+ cachedNormalizedPathPrefix = normalizedPath.substring(0,
lastSlashInNormalized + 1);
+ }
+
+ locationPathCacheInitialized = true;
+ } catch (Exception e) {
+ // If caching fails, we'll fall back to the full method each
time
+ LOG.warn("Failed to initialize LocationPath cache, will use
full parsing", e);
+ locationPathCacheInitialized = true;
Review Comment:
Don't set locationPathCacheInitialized = true, try to initialize again on
next use
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]