mocobeta commented on a change in pull request #643:
URL: https://github.com/apache/lucene/pull/643#discussion_r799936524



##########
File path: 
lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
##########
@@ -154,6 +153,98 @@ protected BinaryDictionary(ResourceScheme resourceScheme, 
String resourcePath)
     this.buffer = buffer;
   }
 
+  protected BinaryDictionary(
+      Supplier<InputStream> targetMapResource,
+      Supplier<InputStream> posResource,
+      Supplier<InputStream> dictResource)
+      throws IOException {
+    this.resourceScheme = null;
+    this.resourcePath = null;
+
+    int[] targetMapOffsets = null, targetMap = null;
+    String[] posDict = null;
+    String[] inflFormDict = null;
+    String[] inflTypeDict = null;
+    ByteBuffer buffer = null;
+    try (InputStream mapIS = new BufferedInputStream(targetMapResource.get());
+        InputStream posIS = new BufferedInputStream(posResource.get());
+        // no buffering here, as we load in one large buffer
+        InputStream dictIS = dictResource.get()) {
+      DataInput in = new InputStreamDataInput(mapIS);
+      CodecUtil.checkHeader(in, TARGETMAP_HEADER, VERSION, VERSION);
+      targetMap = new int[in.readVInt()];
+      targetMapOffsets = new int[in.readVInt()];
+      int accum = 0, sourceId = 0;
+      for (int ofs = 0; ofs < targetMap.length; ofs++) {
+        final int val = in.readVInt();
+        if ((val & 0x01) != 0) {
+          targetMapOffsets[sourceId] = ofs;
+          sourceId++;
+        }
+        accum += val >>> 1;
+        targetMap[ofs] = accum;
+      }
+      if (sourceId + 1 != targetMapOffsets.length)
+        throw new IOException(
+            "targetMap file format broken; targetMap.length="
+                + targetMap.length
+                + ", targetMapOffsets.length="
+                + targetMapOffsets.length
+                + ", sourceId="
+                + sourceId);
+      targetMapOffsets[sourceId] = targetMap.length;
+
+      in = new InputStreamDataInput(posIS);
+      CodecUtil.checkHeader(in, POSDICT_HEADER, VERSION, VERSION);
+      int posSize = in.readVInt();
+      posDict = new String[posSize];
+      inflTypeDict = new String[posSize];
+      inflFormDict = new String[posSize];
+      for (int j = 0; j < posSize; j++) {
+        posDict[j] = in.readString();
+        inflTypeDict[j] = in.readString();
+        inflFormDict[j] = in.readString();
+        // this is how we encode null inflections
+        if (inflTypeDict[j].length() == 0) {
+          inflTypeDict[j] = null;
+        }
+        if (inflFormDict[j].length() == 0) {
+          inflFormDict[j] = null;
+        }
+      }
+
+      in = new InputStreamDataInput(dictIS);
+      CodecUtil.checkHeader(in, DICT_HEADER, VERSION, VERSION);
+      final int size = in.readVInt();
+      final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size);
+      final ReadableByteChannel channel = Channels.newChannel(dictIS);
+      final int read = channel.read(tmpBuffer);
+      if (read != size) {
+        throw new EOFException("Cannot read whole dictionary");
+      }
+      buffer = tmpBuffer.asReadOnlyBuffer();
+    }
+
+    this.targetMap = targetMap;
+    this.targetMapOffsets = targetMapOffsets;
+    this.posDict = posDict;
+    this.inflTypeDict = inflTypeDict;
+    this.inflFormDict = inflFormDict;
+    this.buffer = buffer;
+  }
+
+  protected static Supplier<InputStream> openFileOrThrowRuntimeException(Path 
path)
+      throws RuntimeException {

Review comment:
       Changed to throw UncheckedException. Also, I added a utility interface 
to wrap the try-catch clauses here and there.
   
https://github.com/apache/lucene/pull/643/commits/cb1c5498f7563883d0b619137a36b56f01f491b0




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to