ATLAS-2827: Indexed string compaction when length exceeds 33482223

Change-Id: I6e250653da854ecf75bf004c43aaf854ca061d26

(cherry picked from commit 69fe4ab)


Project: http://git-wip-us.apache.org/repos/asf/atlas/repo
Commit: http://git-wip-us.apache.org/repos/asf/atlas/commit/3e89e9b4
Tree: http://git-wip-us.apache.org/repos/asf/atlas/tree/3e89e9b4
Diff: http://git-wip-us.apache.org/repos/asf/atlas/diff/3e89e9b4

Branch: refs/heads/branch-1.0
Commit: 3e89e9b4897244c8cd73407d63731000c067c951
Parents: dfd6e89
Author: apoorvnaik <[email protected]>
Authored: Tue Jul 31 07:06:17 2018 -0700
Committer: apoorvnaik <[email protected]>
Committed: Fri Aug 17 08:40:58 2018 -0700

----------------------------------------------------------------------
 .../store/graph/v2/EntityGraphMapper.java       | 24 ++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/atlas/blob/3e89e9b4/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
----------------------------------------------------------------------
diff --git 
a/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
 
b/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
index 50e19e6..066af7b 100644
--- 
a/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
+++ 
b/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
@@ -45,6 +45,7 @@ import org.apache.atlas.repository.graphdb.AtlasVertex;
 import org.apache.atlas.repository.store.graph.AtlasRelationshipStore;
 import org.apache.atlas.repository.store.graph.v1.DeleteHandlerV1;
 import org.apache.atlas.type.AtlasArrayType;
+import org.apache.atlas.type.AtlasBuiltInTypes;
 import org.apache.atlas.type.AtlasClassificationType;
 import org.apache.atlas.type.AtlasEntityType;
 import org.apache.atlas.type.AtlasMapType;
@@ -55,6 +56,7 @@ import org.apache.atlas.type.AtlasType;
 import org.apache.atlas.type.AtlasTypeRegistry;
 import org.apache.atlas.type.AtlasTypeUtil;
 import org.apache.atlas.utils.AtlasJson;
+import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.collections.MapUtils;
 import org.apache.commons.lang3.StringUtils;
@@ -104,6 +106,7 @@ import static 
org.apache.atlas.type.AtlasStructType.AtlasAttribute.AtlasRelation
 @Component
 public class EntityGraphMapper {
     private static final Logger LOG = 
LoggerFactory.getLogger(EntityGraphMapper.class);
+    private static final int INDEXED_STR_MAX_ALLOWED_LEN = 33482223;
 
     private final GraphHelper               graphHelper = 
GraphHelper.getInstance();
     private final AtlasGraph                graph;
@@ -619,9 +622,26 @@ public class EntityGraphMapper {
     }
 
     private Object mapPrimitiveValue(AttributeMutationContext ctx) {
-        AtlasGraphUtilsV2.setProperty(ctx.getReferringVertex(), 
ctx.getVertexProperty(), ctx.getValue());
+        boolean isIndexableStrAttr = ctx.getAttributeDef().getIsIndexable() && 
ctx.getAttrType() instanceof AtlasBuiltInTypes.AtlasStringType;
 
-        return ctx.getValue();
+        Object ret = ctx.getValue();
+
+        // Janus bug, when an indexed string attribute has a value longer than 
a certain length then the reverse indexed key generated by JanusGraph
+        // exceeds the HBase row length's hard limit (Short.MAX). This 
trimming and hashing procedure is to circumvent that limitation
+        if (ret != null && isIndexableStrAttr) {
+            String value = (String) ctx.getValue();
+
+            if (value.length() > INDEXED_STR_MAX_ALLOWED_LEN) {
+                LOG.warn("Indexed-String-Attribute: {} exceeds {} characters, 
trimming and appending checksum",
+                         ctx.getAttribute().getQualifiedName(), 
INDEXED_STR_MAX_ALLOWED_LEN);
+                String sha256Hex = DigestUtils.shaHex(value); // Storing SHA 
checksum in case verification is needed after retrieval
+                ret = value.substring(0, (INDEXED_STR_MAX_ALLOWED_LEN - 1) - 
sha256Hex.length()) + ":" + sha256Hex;
+            }
+        }
+
+        AtlasGraphUtilsV2.setProperty(ctx.getReferringVertex(), 
ctx.getVertexProperty(), ret);
+
+        return ret;
     }
 
     private AtlasEdge mapStructValue(AttributeMutationContext ctx, 
EntityMutationContext context) throws AtlasBaseException {

Reply via email to