mocobeta commented on a change in pull request #643: URL: https://github.com/apache/lucene/pull/643#discussion_r800035383
########## File path: lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java ########## @@ -154,6 +153,98 @@ protected BinaryDictionary(ResourceScheme resourceScheme, String resourcePath) this.buffer = buffer; } + protected BinaryDictionary( + Supplier<InputStream> targetMapResource, + Supplier<InputStream> posResource, + Supplier<InputStream> dictResource) + throws IOException { + this.resourceScheme = null; + this.resourcePath = null; + + int[] targetMapOffsets = null, targetMap = null; + String[] posDict = null; + String[] inflFormDict = null; + String[] inflTypeDict = null; + ByteBuffer buffer = null; + try (InputStream mapIS = new BufferedInputStream(targetMapResource.get()); + InputStream posIS = new BufferedInputStream(posResource.get()); + // no buffering here, as we load in one large buffer + InputStream dictIS = dictResource.get()) { + DataInput in = new InputStreamDataInput(mapIS); + CodecUtil.checkHeader(in, TARGETMAP_HEADER, VERSION, VERSION); + targetMap = new int[in.readVInt()]; + targetMapOffsets = new int[in.readVInt()]; + int accum = 0, sourceId = 0; + for (int ofs = 0; ofs < targetMap.length; ofs++) { + final int val = in.readVInt(); + if ((val & 0x01) != 0) { + targetMapOffsets[sourceId] = ofs; + sourceId++; + } + accum += val >>> 1; + targetMap[ofs] = accum; + } + if (sourceId + 1 != targetMapOffsets.length) + throw new IOException( + "targetMap file format broken; targetMap.length=" + + targetMap.length + + ", targetMapOffsets.length=" + + targetMapOffsets.length + + ", sourceId=" + + sourceId); + targetMapOffsets[sourceId] = targetMap.length; + + in = new InputStreamDataInput(posIS); Review comment: I'd do this. https://github.com/apache/lucene/pull/643/commits/8bfc984390666dc4e044beec206aee1f85fb6d9a -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org