Repository: atlas
Updated Branches:
  refs/heads/master 064c5767c -> 69fe4ab73


ATLAS-2827: Indexed string compaction when length exceeds 33482223

Change-Id: I6e250653da854ecf75bf004c43aaf854ca061d26


Project: http://git-wip-us.apache.org/repos/asf/atlas/repo
Commit: http://git-wip-us.apache.org/repos/asf/atlas/commit/69fe4ab7
Tree: http://git-wip-us.apache.org/repos/asf/atlas/tree/69fe4ab7
Diff: http://git-wip-us.apache.org/repos/asf/atlas/diff/69fe4ab7

Branch: refs/heads/master
Commit: 69fe4ab73bcab9ee118f1575ee476dd55fb44319
Parents: 064c576
Author: apoorvnaik <[email protected]>
Authored: Tue Jul 31 07:06:17 2018 -0700
Committer: apoorvnaik <[email protected]>
Committed: Thu Aug 16 22:22:24 2018 -0700

----------------------------------------------------------------------
 .../store/graph/v2/EntityGraphMapper.java       | 47 ++++++++++----------
 1 file changed, 24 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/atlas/blob/69fe4ab7/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
----------------------------------------------------------------------
diff --git 
a/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
 
b/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
index 2153a5a..386ebb6 100644
--- 
a/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
+++ 
b/repository/src/main/java/org/apache/atlas/repository/store/graph/v2/EntityGraphMapper.java
@@ -45,6 +45,7 @@ import org.apache.atlas.repository.graphdb.AtlasVertex;
 import org.apache.atlas.repository.store.graph.AtlasRelationshipStore;
 import org.apache.atlas.repository.store.graph.v1.DeleteHandlerV1;
 import org.apache.atlas.type.AtlasArrayType;
+import org.apache.atlas.type.AtlasBuiltInTypes;
 import org.apache.atlas.type.AtlasClassificationType;
 import org.apache.atlas.type.AtlasEntityType;
 import org.apache.atlas.type.AtlasMapType;
@@ -55,6 +56,7 @@ import org.apache.atlas.type.AtlasType;
 import org.apache.atlas.type.AtlasTypeRegistry;
 import org.apache.atlas.type.AtlasTypeUtil;
 import org.apache.atlas.utils.AtlasJson;
+import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.collections.MapUtils;
 import org.apache.commons.lang3.StringUtils;
@@ -75,27 +77,8 @@ import static 
org.apache.atlas.model.instance.EntityMutations.EntityOperation.DE
 import static 
org.apache.atlas.model.instance.EntityMutations.EntityOperation.PARTIAL_UPDATE;
 import static 
org.apache.atlas.model.instance.EntityMutations.EntityOperation.UPDATE;
 import static 
org.apache.atlas.model.typedef.AtlasStructDef.AtlasAttributeDef.Cardinality.SET;
-import static org.apache.atlas.repository.Constants.ATTRIBUTE_KEY_PROPERTY_KEY;
-import static org.apache.atlas.repository.Constants.CLASSIFICATION_LABEL;
-import static 
org.apache.atlas.repository.Constants.CLASSIFICATION_VERTEX_REMOVE_PROPAGATIONS_KEY;
-import static org.apache.atlas.repository.Constants.STATE_PROPERTY_KEY;
-import static org.apache.atlas.repository.Constants.TRAIT_NAMES_PROPERTY_KEY;
-import static 
org.apache.atlas.repository.Constants.ATTRIBUTE_INDEX_PROPERTY_KEY;
-import static 
org.apache.atlas.repository.graph.GraphHelper.getCollectionElementsUsingRelationship;
-import static 
org.apache.atlas.repository.graph.GraphHelper.getClassificationEdge;
-import static 
org.apache.atlas.repository.graph.GraphHelper.getClassificationVertex;
-import static 
org.apache.atlas.repository.graph.GraphHelper.getDefaultRemovePropagations;
-import static 
org.apache.atlas.repository.graph.GraphHelper.getMapElementsProperty;
-import static 
org.apache.atlas.repository.graph.GraphHelper.getPropagatedTraitNames;
-import static org.apache.atlas.repository.graph.GraphHelper.getStatus;
-import static org.apache.atlas.repository.graph.GraphHelper.getTraitLabel;
-import static org.apache.atlas.repository.graph.GraphHelper.getTraitNames;
-import static org.apache.atlas.repository.graph.GraphHelper.getTypeName;
-import static org.apache.atlas.repository.graph.GraphHelper.getTypeNames;
-import static 
org.apache.atlas.repository.graph.GraphHelper.isPropagationEnabled;
-import static org.apache.atlas.repository.graph.GraphHelper.isRelationshipEdge;
-import static org.apache.atlas.repository.graph.GraphHelper.string;
-import static 
org.apache.atlas.repository.graph.GraphHelper.updateModificationMetadata;
+import static org.apache.atlas.repository.Constants.*;
+import static org.apache.atlas.repository.graph.GraphHelper.*;
 import static 
org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2.getIdFromVertex;
 import static 
org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2.isReference;
 import static 
org.apache.atlas.type.AtlasStructType.AtlasAttribute.AtlasRelationshipEdgeDirection.IN;
@@ -104,6 +87,7 @@ import static 
org.apache.atlas.type.AtlasStructType.AtlasAttribute.AtlasRelation
 @Component
 public class EntityGraphMapper {
     private static final Logger LOG = 
LoggerFactory.getLogger(EntityGraphMapper.class);
+    private static final int INDEXED_STR_MAX_ALLOWED_LEN = 33482223;
 
     private final GraphHelper               graphHelper = 
GraphHelper.getInstance();
     private final AtlasGraph                graph;
@@ -623,9 +607,26 @@ public class EntityGraphMapper {
     }
 
     private Object mapPrimitiveValue(AttributeMutationContext ctx) {
-        AtlasGraphUtilsV2.setProperty(ctx.getReferringVertex(), 
ctx.getVertexProperty(), ctx.getValue());
+        boolean isIndexableStrAttr = ctx.getAttributeDef().getIsIndexable() && 
ctx.getAttrType() instanceof AtlasBuiltInTypes.AtlasStringType;
 
-        return ctx.getValue();
+        Object ret = ctx.getValue();
+
+        // Janus bug, when an indexed string attribute has a value longer than 
a certain length then the reverse indexed key generated by JanusGraph
+        // exceeds the HBase row length's hard limit (Short.MAX). This 
trimming and hashing procedure is to circumvent that limitation
+        if (ret != null && isIndexableStrAttr) {
+            String value = (String) ctx.getValue();
+
+            if (value.length() > INDEXED_STR_MAX_ALLOWED_LEN) {
+                LOG.warn("Indexed-String-Attribute: {} exceeds {} characters, 
trimming and appending checksum",
+                         ctx.getAttribute().getQualifiedName(), 
INDEXED_STR_MAX_ALLOWED_LEN);
+                String sha256Hex = DigestUtils.shaHex(value); // Storing SHA 
checksum in case verification is needed after retrieval
+                ret = value.substring(0, (INDEXED_STR_MAX_ALLOWED_LEN - 1) - 
sha256Hex.length()) + ":" + sha256Hex;
+            }
+        }
+
+        AtlasGraphUtilsV2.setProperty(ctx.getReferringVertex(), 
ctx.getVertexProperty(), ret);
+
+        return ret;
     }
 
     private AtlasEdge mapStructValue(AttributeMutationContext ctx, 
EntityMutationContext context) throws AtlasBaseException {

Reply via email to