Jackie-Jiang commented on a change in pull request #8101: URL: https://github.com/apache/pinot/pull/8101#discussion_r796908219
########## File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/StringDictionary.java ########## @@ -41,84 +47,117 @@ public DataType getValueType() { @Override public String get(int dictId) { - return getUnpaddedString(dictId, getBuffer()); + return internStringValue(dictId); } @Override public int getIntValue(int dictId) { - return Integer.parseInt(getUnpaddedString(dictId, getBuffer())); + return Integer.parseInt(internStringValue(dictId)); } @Override public long getLongValue(int dictId) { - return Long.parseLong(getUnpaddedString(dictId, getBuffer())); + return Long.parseLong(internStringValue(dictId)); } @Override public float getFloatValue(int dictId) { - return Float.parseFloat(getUnpaddedString(dictId, getBuffer())); + return Float.parseFloat(internStringValue(dictId)); } @Override public double getDoubleValue(int dictId) { - return Double.parseDouble(getUnpaddedString(dictId, getBuffer())); + return Double.parseDouble(internStringValue(dictId)); } @Override public String getStringValue(int dictId) { - return getUnpaddedString(dictId, getBuffer()); + return internStringValue(dictId); } @Override public byte[] getBytesValue(int dictId) { - return BytesUtils.toBytes(getUnpaddedString(dictId, getBuffer())); + return BytesUtils.toBytes(internStringValue(dictId, getBuffer())); } @Override public void readIntValues(int[] dictIds, int length, int[] outValues) { byte[] buffer = getBuffer(); for (int i = 0; i < length; i++) { - outValues[i] = Integer.parseInt(getUnpaddedString(dictIds[i], buffer)); + outValues[i] = Integer.parseInt(internStringValue(dictIds[i], buffer)); } } @Override public void readLongValues(int[] dictIds, int length, long[] outValues) { byte[] buffer = getBuffer(); for (int i = 0; i < length; i++) { - outValues[i] = Long.parseLong(getUnpaddedString(dictIds[i], buffer)); + outValues[i] = Long.parseLong(internStringValue(dictIds[i], buffer)); } } @Override public void readFloatValues(int[] dictIds, int length, float[] outValues) { byte[] buffer = getBuffer(); for (int i = 0; i < length; i++) { - outValues[i] = Float.parseFloat(getUnpaddedString(dictIds[i], buffer)); + outValues[i] = Float.parseFloat(internStringValue(dictIds[i], buffer)); } } @Override public void readDoubleValues(int[] dictIds, int length, double[] outValues) { byte[] buffer = getBuffer(); for (int i = 0; i < length; i++) { - outValues[i] = Double.parseDouble(getUnpaddedString(dictIds[i], buffer)); + outValues[i] = Double.parseDouble(internStringValue(dictIds[i], buffer)); } } @Override public void readStringValues(int[] dictIds, int length, String[] outValues) { byte[] buffer = getBuffer(); for (int i = 0; i < length; i++) { - outValues[i] = getUnpaddedString(dictIds[i], buffer); + outValues[i] = internStringValue(dictIds[i], buffer); } } @Override public void readBytesValues(int[] dictIds, int length, byte[][] outValues) { byte[] buffer = getBuffer(); for (int i = 0; i < length; i++) { - outValues[i] = BytesUtils.toBytes(getUnpaddedString(dictIds[i], buffer)); + outValues[i] = BytesUtils.toBytes(internStringValue(dictIds[i], buffer)); + } + } + + private String internStringValue(int dictId) { + if (_internTable == null) { + return getUnpaddedString(dictId, getBuffer()); + } + String interned = _internTable[dictId]; + if (interned == null) { + interned = getUnpaddedString(dictId, getBuffer()); + _internTable[dictId] = interned; + } + return interned; + } + + private String internStringValue(int dictId, byte[] buffer) { + if (_internTable == null) { + return getUnpaddedString(dictId, buffer); + } + String interned = _internTable[dictId]; + if (interned == null) { + interned = getUnpaddedString(dictId, buffer); + _internTable[dictId] = interned; + } + return interned; + } + + @Override + public void close() + throws IOException { + if (_internTable != null) { + Arrays.fill(_internTable, null); Review comment: Is this required? If so, does setting `_internTable = null` have better performance? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org