pingtimeout commented on code in PR #1339:
URL: https://github.com/apache/polaris/pull/1339#discussion_r2073458495


##########
polaris-core/src/main/java/org/apache/polaris/core/persistence/cache/InMemoryEntityCache.java:
##########
@@ -267,104 +189,70 @@ public void setCacheMode(EntityCacheMode cacheMode) {
       @Nonnull PolarisBaseEntity entityToValidate,
       int entityMinVersion,
       int entityGrantRecordsMinVersion) {
-    long entityCatalogId = entityToValidate.getCatalogId();
     long entityId = entityToValidate.getId();
-    PolarisEntityType entityType = entityToValidate.getType();
 
-    // first lookup the cache to find the existing cache entry
+    // First lookup the cache to find the existing cache entry
     ResolvedPolarisEntity existingCacheEntry = this.getEntityById(entityId);
 
-    // the caller's fetched entity may have come from a stale lookup byName; 
we should consider
-    // the existingCacheEntry to be the older of the two for purposes of 
invalidation to make
-    // sure when we replaceCacheEntry we're also removing the old name if it's 
no longer valid
-    EntityCacheByNameKey nameKey = new EntityCacheByNameKey(entityToValidate);
-    ResolvedPolarisEntity existingCacheEntryByName = 
this.getEntityByName(nameKey);
-    if (existingCacheEntryByName != null
-        && existingCacheEntry != null
-        && isNewer(existingCacheEntry, existingCacheEntryByName)) {
-      existingCacheEntry = existingCacheEntryByName;
+    // See if we need to load or refresh that entity
+    if (existingCacheEntry != null
+        && existingCacheEntry.getEntity().getEntityVersion() >= 
entityMinVersion
+        && existingCacheEntry.getEntity().getGrantRecordsVersion()
+            >= entityGrantRecordsMinVersion) {
+      // Cache hit and cache entry is up to date, nothing to do.
+      return existingCacheEntry;
     }
 
-    // the new one to be returned
-    final ResolvedPolarisEntity newCacheEntry;
-
-    // see if we need to load or refresh that entity
-    if (existingCacheEntry == null
-        || existingCacheEntry.getEntity().getEntityVersion() < entityMinVersion
-        || existingCacheEntry.getEntity().getGrantRecordsVersion() < 
entityGrantRecordsMinVersion) {
-
-      // the refreshed entity
-      final ResolvedEntityResult refreshedCacheEntry;
-
-      // was not found in the cache?
-      final PolarisBaseEntity entity;
-      final List<PolarisGrantRecord> grantRecords;
-      final int grantRecordsVersion;
-      if (existingCacheEntry == null) {
-        // try to load it
-        refreshedCacheEntry =
-            this.polarisMetaStoreManager.loadResolvedEntityById(
-                callContext, entityCatalogId, entityId, entityType);
-        if (refreshedCacheEntry.isSuccess()) {
-          entity = refreshedCacheEntry.getEntity();
-          grantRecords = refreshedCacheEntry.getEntityGrantRecords();
-          grantRecordsVersion = refreshedCacheEntry.getGrantRecordsVersion();
-        } else {
-          return null;
-        }
-      } else {
-        // refresh it
-        refreshedCacheEntry =
-            this.polarisMetaStoreManager.refreshResolvedEntity(
-                callContext,
-                existingCacheEntry.getEntity().getEntityVersion(),
-                existingCacheEntry.getEntity().getGrantRecordsVersion(),
-                entityType,
-                entityCatalogId,
-                entityId);
-        if (refreshedCacheEntry.isSuccess()) {
-          entity =
-              (refreshedCacheEntry.getEntity() != null)
-                  ? refreshedCacheEntry.getEntity()
-                  : existingCacheEntry.getEntity();
-          if (refreshedCacheEntry.getEntityGrantRecords() != null) {
-            grantRecords = refreshedCacheEntry.getEntityGrantRecords();
-            grantRecordsVersion = refreshedCacheEntry.getGrantRecordsVersion();
-          } else {
-            grantRecords = existingCacheEntry.getAllGrantRecords();
-            grantRecordsVersion = 
existingCacheEntry.getEntity().getGrantRecordsVersion();
-          }
-        } else {
-          // entity has been purged, remove it
-          this.removeCacheEntry(existingCacheEntry);
-          return null;
-        }
+    // Either cache miss, dropped entity or stale entity. In either case, 
invalidate and reload it.
+    // Do the refresh in a critical section based on the entity ID to avoid 
race conditions.
+    lock.writeLock().lock();
+    try {
+      // Lookup the cache again in case another thread has already invalidated 
it.
+      existingCacheEntry = this.getEntityById(entityId);
+
+      // See if the entity has been refreshed concurrently
+      if (existingCacheEntry != null
+          && existingCacheEntry.getEntity().getEntityVersion() >= 
entityMinVersion
+          && existingCacheEntry.getEntity().getGrantRecordsVersion()
+              >= entityGrantRecordsMinVersion) {
+        // Cache hit and cache entry is up to date now, exit
+        return existingCacheEntry;
+      }
+
+      // Either confirmed cache miss, or the entity is stale. Invalidate it 
just in case.
+      cache.invalidate(entityId);
+
+      // Get the entity from the cache or reload it now that it has been 
invalidated
+      EntityCacheLookupResult cacheLookupResult =
+          this.maybeLoadEntityById(

Review Comment:
   So, there is another caveat.  In the initial `IndexedCache` implementation, 
indeed, a `Striped<Lock>` was used to have up to 1k different locks and improve 
write throughput.  But the `getOrLoadEntityByName(...)` method prevents the use 
of such striped locks altogether as otherwise it could lead to a deadlock.
   
   The logic is that in order to update the cache for a given entity, two write 
locks must be held: one for the entity ID and one for the entity name.  
Problem: such a sequence of event would prevent the dual-lock acquisition to be 
in a consistent order:
   
   1. Thread A calls `getOrLoadEntityById()` for entity with id 123
   2. Thread A acquires the write lock for entity 123
   3. Thread A determines that this corresponds to entity with name `Foo`, but 
before the lock for that name is acquired...
   4. Thread B calls `getOrLoadEntityByName()` for entity with name `Foo`
   5. Thread B acquires the write lock for entity with name `Foo`
   6. Thread B determines that this corresponds to entity with id 123
   7. Thread A cannot acquire the lock for name `Foo`
   8. Thread B cannot acquire the lock for id 123
   
   Essentially, it would be trivial to solve this by imposing a lock order like 
"first get the lock by ID and then by name" (or the other way around).  But 
that order cannot be enforced all the time.
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to