This is an automated email from the ASF dual-hosted git repository.

pkarwasz pushed a commit to branch feat/git-identifiers
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit a7f54b5893863126ca8bb703b45cf399c74d2f5a
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Thu Apr 9 13:11:32 2026 +0200

    feat: Refactor Git-related methods in `GitIdentifiers`
    
    This change moves `gitBlob` and `gitTree` from `DigestUtils` into a 
separate utility class, to prepare for an enhancement of the provided API.
    
    The git tree identifier can be computed for many objects: the most natural 
is a directory in a filesystem, but we can also compute the identifier on an 
archive containing this directory. Additional usages will require expanding the 
API, beyond what can be reasonably contained in `DigestUtils`.
---
 .../apache/commons/codec/digest/DigestUtils.java   | 144 ----------------
 .../commons/codec/digest/GitIdentifiers.java       | 181 +++++++++++++++++++++
 .../commons/codec/digest/DigestUtilsTest.java      |  92 -----------
 .../commons/codec/digest/GitIdentifiersTest.java   | 130 +++++++++++++++
 4 files changed, 311 insertions(+), 236 deletions(-)

diff --git a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java 
b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
index 7c84b0b0..8970a03d 100644
--- a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
+++ b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
@@ -18,24 +18,17 @@
 package org.apache.commons.codec.digest;
 
 import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.DirectoryStream;
 import java.nio.file.Files;
 import java.nio.file.OpenOption;
 import java.nio.file.Path;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.TreeSet;
 
 import org.apache.commons.codec.binary.Hex;
 import org.apache.commons.codec.binary.StringUtils;
@@ -191,26 +184,6 @@ public class DigestUtils {
         }
     }
 
-    /**
-     * Returns the {@link GitDirectoryEntry.Type} of a file.
-     *
-     * @param path The file to check.
-     * @return A {@link GitDirectoryEntry.Type}
-     */
-    private static GitDirectoryEntry.Type getGitDirectoryEntryType(final Path 
path) {
-        // Symbolic links first
-        if (Files.isSymbolicLink(path)) {
-            return GitDirectoryEntry.Type.SYMBOLIC_LINK;
-        }
-        if (Files.isDirectory(path)) {
-            return GitDirectoryEntry.Type.DIRECTORY;
-        }
-        if (Files.isExecutable(path)) {
-            return GitDirectoryEntry.Type.EXECUTABLE;
-        }
-        return GitDirectoryEntry.Type.REGULAR;
-    }
-
     /**
      * Gets an MD2 MessageDigest.
      *
@@ -407,123 +380,6 @@ public class DigestUtils {
         return getDigest(MessageDigestAlgorithms.SHAKE256_512);
     }
 
-    /**
-     * Reads through a byte array and return a generalized Git blob identifier.
-     *
-     * <p>The identifier is computed in the way described by the
-     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents";>SWHID
 contents identifier</a>, but it can use any hash
-     * algorithm.</p>
-     *
-     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
-     *
-     * @param messageDigest The MessageDigest to use (for example SHA-1).
-     * @param data          Data to digest.
-     * @return A generalized Git blob identifier.
-     * @since 1.22.0
-     */
-    public static byte[] gitBlob(final MessageDigest messageDigest, final 
byte[] data) {
-        messageDigest.reset();
-        updateDigest(messageDigest, gitBlobPrefix(data.length));
-        return digest(messageDigest, data);
-    }
-
-    /**
-     * Reads through a byte array and return a generalized Git blob identifier.
-     *
-     * <p>The identifier is computed in the way described by the
-     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents";>SWHID
 contents identifier</a>, but it can use any hash
-     * algorithm.</p>
-     *
-     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
-     *
-     * @param messageDigest The MessageDigest to use (for example SHA-1).
-     * @param data          Data to digest.
-     * @param options       Options how to open the file.
-     * @return A generalized Git blob identifier.
-     * @throws IOException On error accessing the file.
-     * @since 1.22.0
-     */
-    public static byte[] gitBlob(final MessageDigest messageDigest, final Path 
data, final OpenOption... options) throws IOException {
-        messageDigest.reset();
-        if (Files.isSymbolicLink(data)) {
-            final byte[] linkTarget = 
Files.readSymbolicLink(data).toString().getBytes(StandardCharsets.UTF_8);
-            updateDigest(messageDigest, gitBlobPrefix(linkTarget.length));
-            return digest(messageDigest, linkTarget);
-        }
-        updateDigest(messageDigest, gitBlobPrefix(Files.size(data)));
-        return updateDigest(messageDigest, data, options).digest();
-    }
-
-    private static byte[] gitBlobPrefix(final long dataSize) {
-        return gitPrefix("blob ", dataSize);
-    }
-
-    private static byte[] gitPrefix(final String prefix, final long dataSize) {
-        return (prefix + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
-    }
-
-    /**
-     * Returns a generalized Git tree identifier.
-     *
-     * <p>The identifier is computed in the way described by the
-     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories";>SWHID
 directory identifier</a>, but it can use any hash
-     * algorithm.</p>
-     *
-     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
tree identifier and SWHID directory identifier.</p>
-     *
-     * @param messageDigest The MessageDigest to use (for example SHA-1).
-     * @param entries       The directory entries.
-     * @return A generalized Git tree identifier.
-     */
-    static byte[] gitTree(final MessageDigest messageDigest, final 
Collection<GitDirectoryEntry> entries) {
-        final TreeSet<GitDirectoryEntry> treeSet = new TreeSet<>(entries);
-        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        for (final GitDirectoryEntry entry : treeSet) {
-            final byte[] treeEntryBytes = entry.toTreeEntryBytes();
-            baos.write(treeEntryBytes, 0, treeEntryBytes.length);
-        }
-        messageDigest.reset();
-        updateDigest(messageDigest, gitTreePrefix(baos.size()));
-        return updateDigest(messageDigest, baos.toByteArray()).digest();
-    }
-
-    /**
-     * Reads through a byte array and return a generalized Git tree identifier.
-     *
-     * <p>The identifier is computed in the way described by the
-     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories";>SWHID
 directory identifier</a>, but it can use any hash
-     * algorithm.</p>
-     *
-     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
tree identifier and SWHID directory identifier.</p>
-     *
-     * @param messageDigest The MessageDigest to use (for example SHA-1).
-     * @param data          Data to digest.
-     * @param options       Options how to open the file.
-     * @return A generalized Git tree identifier.
-     * @throws IOException On error accessing the file.
-     * @since 1.22.0
-     */
-    public static byte[] gitTree(final MessageDigest messageDigest, final Path 
data, final OpenOption... options) throws IOException {
-        final List<GitDirectoryEntry> entries = new ArrayList<>();
-        try (DirectoryStream<Path> files = Files.newDirectoryStream(data)) {
-            for (final Path path : files) {
-                final GitDirectoryEntry.Type type = 
getGitDirectoryEntryType(path);
-                final byte[] rawObjectId;
-                if (type == GitDirectoryEntry.Type.DIRECTORY) {
-                    rawObjectId = gitTree(messageDigest, path, options);
-                } else {
-                    rawObjectId = gitBlob(messageDigest, path, options);
-                }
-                entries.add(new GitDirectoryEntry(path, type, rawObjectId));
-            }
-        }
-        return gitTree(messageDigest, entries);
-    }
-
-    private static byte[] gitTreePrefix(final long dataSize) {
-        return gitPrefix("tree ", dataSize);
-    }
-
     /**
      * Test whether the algorithm is supported.
      *
diff --git a/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java 
b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java
new file mode 100644
index 00000000..3cbf48b8
--- /dev/null
+++ b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.digest;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.OpenOption;
+import java.nio.file.Path;
+import java.security.MessageDigest;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.TreeSet;
+
+/**
+ * Operations for computing Git object identifiers and their generalizations 
described by the
+ * <a href="https://www.swhid.org/swhid-specification/";>SWHID 
specification</a>.
+ *
+ * <p>When the hash algorithm is SHA-1, the identifiers produced by this class 
are identical to those used by Git.
+ * Other hash algorithms produce generalized identifiers as described by the 
SWHID specification.</p>
+ *
+ * <p>This class is immutable and thread-safe. However, the {@link 
MessageDigest} instances passed to it generally won't be.</p>
+ *
+ * @see <a href="https://git-scm.com/book/en/v2/Git-Internals-Git-Objects";>Git 
Internals – Git Objects</a>
+ * @see <a href="https://www.swhid.org/swhid-specification/";>SWHID 
Specification</a>
+ * @since 1.22.0
+ */
+public class GitIdentifiers {
+
+    private static GitDirectoryEntry.Type getGitDirectoryEntryType(final Path 
path) {
+        // Symbolic links first
+        if (Files.isSymbolicLink(path)) {
+            return GitDirectoryEntry.Type.SYMBOLIC_LINK;
+        }
+        if (Files.isDirectory(path)) {
+            return GitDirectoryEntry.Type.DIRECTORY;
+        }
+        if (Files.isExecutable(path)) {
+            return GitDirectoryEntry.Type.EXECUTABLE;
+        }
+        return GitDirectoryEntry.Type.REGULAR;
+    }
+
+    /**
+     * Reads through a byte array and returns a generalized Git blob 
identifier.
+     *
+     * <p>The identifier is computed in the way described by the
+     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents";>SWHID
 contents identifier</a>, but it can use any hash
+     * algorithm.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @param data          Data to digest.
+     * @return A generalized Git blob identifier.
+     */
+    public static byte[] blobId(final MessageDigest messageDigest, final 
byte[] data) {
+        messageDigest.reset();
+        DigestUtils.updateDigest(messageDigest, gitBlobPrefix(data.length));
+        return DigestUtils.digest(messageDigest, data);
+    }
+
+    /**
+     * Reads through a file and returns a generalized Git blob identifier.
+     *
+     * <p>The identifier is computed in the way described by the
+     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents";>SWHID
 contents identifier</a>, but it can use any hash
+     * algorithm.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @param data          Path to the file to digest.
+     * @param options       Options how to open the file.
+     * @return A generalized Git blob identifier.
+     * @throws IOException On error accessing the file.
+     * @since 1.22.0
+     */
+    public static byte[] blobId(final MessageDigest messageDigest, final Path 
data, final OpenOption... options) throws IOException {
+        messageDigest.reset();
+        if (Files.isSymbolicLink(data)) {
+            final byte[] linkTarget = 
Files.readSymbolicLink(data).toString().getBytes(StandardCharsets.UTF_8);
+            DigestUtils.updateDigest(messageDigest, 
gitBlobPrefix(linkTarget.length));
+            return DigestUtils.digest(messageDigest, linkTarget);
+        }
+        DigestUtils.updateDigest(messageDigest, 
gitBlobPrefix(Files.size(data)));
+        return DigestUtils.updateDigest(messageDigest, data, options).digest();
+    }
+
+    private static byte[] gitBlobPrefix(final long dataSize) {
+        return gitPrefix("blob ", dataSize);
+    }
+
+    private static byte[] gitPrefix(final String prefix, final long dataSize) {
+        return (prefix + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
+    }
+
+    /**
+     * Returns a generalized Git tree identifier for a collection of directory 
entries.
+     *
+     * <p>The identifier is computed in the way described by the
+     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories";>SWHID
 directory identifier</a>, but it can use any hash
+     * algorithm.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
tree identifier and SWHID directory identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @param entries       The directory entries.
+     * @return A generalized Git tree identifier.
+     */
+    static byte[] treeId(final MessageDigest messageDigest, final 
Collection<GitDirectoryEntry> entries) {
+        final TreeSet<GitDirectoryEntry> treeSet = new TreeSet<>(entries);
+        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        for (final GitDirectoryEntry entry : treeSet) {
+            final byte[] treeEntryBytes = entry.toTreeEntryBytes();
+            baos.write(treeEntryBytes, 0, treeEntryBytes.length);
+        }
+        messageDigest.reset();
+        DigestUtils.updateDigest(messageDigest, gitTreePrefix(baos.size()));
+        return DigestUtils.updateDigest(messageDigest, 
baos.toByteArray()).digest();
+    }
+
+    /**
+     * Reads through a directory and returns a generalized Git tree identifier.
+     *
+     * <p>The identifier is computed in the way described by the
+     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories";>SWHID
 directory identifier</a>, but it can use any hash
+     * algorithm.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
tree identifier and SWHID directory identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @param data          Path to the directory to digest.
+     * @param options       Options how to open files within the directory.
+     * @return A generalized Git tree identifier.
+     * @throws IOException On error accessing the directory or its contents.
+     * @since 1.22.0
+     */
+    public static byte[] treeId(final MessageDigest messageDigest, final Path 
data, final OpenOption... options) throws IOException {
+        final List<GitDirectoryEntry> entries = new ArrayList<>();
+        try (DirectoryStream<Path> files = Files.newDirectoryStream(data)) {
+            for (final Path path : files) {
+                final GitDirectoryEntry.Type type = 
getGitDirectoryEntryType(path);
+                final byte[] rawObjectId;
+                if (type == GitDirectoryEntry.Type.DIRECTORY) {
+                    rawObjectId = treeId(messageDigest, path, options);
+                } else {
+                    rawObjectId = blobId(messageDigest, path, options);
+                }
+                entries.add(new GitDirectoryEntry(path, type, rawObjectId));
+            }
+        }
+        return treeId(messageDigest, entries);
+    }
+
+    private static byte[] gitTreePrefix(final long dataSize) {
+        return gitPrefix("tree ", dataSize);
+    }
+
+    private GitIdentifiers() {
+        // utility class
+    }
+}
diff --git a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java 
b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
index 7d1e72b0..6f7160ba 100644
--- a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
+++ b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
@@ -32,14 +32,11 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.security.MessageDigest;
-import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.List;
 import java.util.Locale;
 import java.util.Random;
 import java.util.stream.Stream;
@@ -50,14 +47,11 @@ import org.apache.commons.lang3.JavaVersion;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.SystemUtils;
 import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.Assumptions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
-import org.junit.jupiter.params.provider.ValueSource;
 
 /**
  * Tests {@link DigestUtils}.
@@ -244,31 +238,6 @@ class DigestUtilsTest {
             "CA 92 BF 0B E5 61 5E 96 95 9D 76 71 97 A0 BE EB";
     // @formatter:on
 
-    /**
-     * Binary body of the test tree object used in {@link 
#testGitTreeCollection}.
-     *
-     * <p>Each entry has the format {@code <mode> SP <name> NUL 
<20-byte-object-id>}.</p>
-     */
-    private static final String TREE_BODY_HEX =
-            // 100644 hello.txt\0 + objectId
-            "3130303634342068656c6c6f2e74787400" + 
"a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" +
-            // 120000 link.txt\0 + objectId
-            "313230303030206c696e6b2e74787400" + 
"1234567890abcdef1234567890abcdef12345678" +
-            // 100755 run.sh\0 + objectId
-            "3130303735352072756e2e736800" + 
"f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9" +
-            // 40000 src\0 + objectId
-            "34303030302073726300" + 
"deadbeefdeadbeefdeadbeefdeadbeefdeadbeef";
-
-    static Stream<Arguments> gitBlobProvider() {
-        return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", 
"5f4a83288e67f1be2d6fcdad84165a86c6a970d7"),
-                Arguments.of("DigestUtilsTest/greetings.txt", 
"6cf4f797455661e61d1ee6913fc29344f5897243"),
-                Arguments.of("DigestUtilsTest/subdir/nested.txt", 
"07a392ddb4dbff06a373a7617939f30b2dcfe719"));
-    }
-
-    private static Path resourcePath(final String resourceName) throws 
Exception {
-        return 
Paths.get(DigestUtilsTest.class.getClassLoader().getResource(resourceName).toURI());
-    }
-
     static Stream<Arguments> testShake128_256() {
         // @formatter:off
         return Stream.of(
@@ -506,67 +475,6 @@ class DigestUtilsTest {
         assertEquals(MessageDigestAlgorithms.MD5, 
digestUtils.getMessageDigest().getAlgorithm());
     }
 
-    @ParameterizedTest
-    @MethodSource("gitBlobProvider")
-    void testGitBlobByteArray(final String resourceName, final String 
expectedSha1Hex) throws Exception {
-        final byte[] data = Files.readAllBytes(resourcePath(resourceName));
-        assertArrayEquals(Hex.decodeHex(expectedSha1Hex), 
DigestUtils.gitBlob(DigestUtils.getSha1Digest(), data));
-    }
-
-    @ParameterizedTest
-    @MethodSource("gitBlobProvider")
-    void testGitBlobPath(final String resourceName, final String 
expectedSha1Hex) throws Exception {
-        assertArrayEquals(Hex.decodeHex(expectedSha1Hex), 
DigestUtils.gitBlob(DigestUtils.getSha1Digest(), resourcePath(resourceName)));
-    }
-
-    @Test
-    void testGitBlobSymlink(@TempDir final Path tempDir) throws Exception {
-        final Path subDir = Files.createDirectory(tempDir.resolve("subdir"));
-        Files.write(subDir.resolve("file.txt"), 
"hello".getBytes(StandardCharsets.UTF_8));
-        final Path linkToDir;
-        final Path linkToFile;
-        try {
-            linkToDir = 
Files.createSymbolicLink(tempDir.resolve("link-to-dir"), Paths.get("subdir"));
-            linkToFile = 
Files.createSymbolicLink(tempDir.resolve("link-to-file"), 
Paths.get("subdir/file.txt"));
-        } catch (final UnsupportedOperationException e) {
-            Assumptions.assumeTrue(false, "Symbolic links not supported on 
this filesystem");
-            return;
-        }
-        final MessageDigest sha1 = DigestUtils.getSha1Digest();
-        
assertArrayEquals(Hex.decodeHex("8bbe8a53790056316b23b7c270f10ab6bf6bb1b4"), 
DigestUtils.gitBlob(sha1, linkToDir));
-        
assertArrayEquals(Hex.decodeHex("dfe6ef8392ae13a11ff85419b4fd906d997b6cb7"), 
DigestUtils.gitBlob(sha1, linkToFile));
-    }
-
-    @ParameterizedTest
-    @ValueSource(strings = {MessageDigestAlgorithms.SHA_1, 
MessageDigestAlgorithms.SHA_256})
-    void testGitTreeCollection(final String algorithm) throws Exception {
-        final byte[] helloId = 
Hex.decodeHex("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0");
-        final byte[] runId = 
Hex.decodeHex("f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9");
-        final byte[] linkId = 
Hex.decodeHex("1234567890abcdef1234567890abcdef12345678");
-        final byte[] srcId = 
Hex.decodeHex("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef");
-
-        // Entries are supplied out of order to verify that the method sorts 
them correctly.
-        final List<GitDirectoryEntry> entries = new ArrayList<>();
-        entries.add(new GitDirectoryEntry(Paths.get("src"), 
GitDirectoryEntry.Type.DIRECTORY, srcId));
-        entries.add(new GitDirectoryEntry(Paths.get("run.sh"), 
GitDirectoryEntry.Type.EXECUTABLE, runId));
-        entries.add(new GitDirectoryEntry(Paths.get("hello.txt"), 
GitDirectoryEntry.Type.REGULAR, helloId));
-        entries.add(new GitDirectoryEntry(Paths.get("link.txt"), 
GitDirectoryEntry.Type.SYMBOLIC_LINK, linkId));
-
-        // Compute expected value
-        final byte[] treeBody = Hex.decodeHex(TREE_BODY_HEX);
-        final MessageDigest md = DigestUtils.getDigest(algorithm);
-        DigestUtils.updateDigest(md, ("tree " + treeBody.length + 
"\0").getBytes(StandardCharsets.UTF_8));
-        final byte[] expected = DigestUtils.updateDigest(md, 
treeBody).digest();
-
-        assertArrayEquals(expected, DigestUtils.gitTree(md, entries));
-    }
-
-    @Test
-    void testGitTreePath() throws Exception {
-        
assertArrayEquals(Hex.decodeHex("e4b21f6d78ceba6eb7c211ac15e3337ec4614e8a"),
-                DigestUtils.gitTree(DigestUtils.getSha1Digest(), 
resourcePath("DigestUtilsTest")));
-    }
-
     @Test
     void testInternalNoSuchAlgorithmException() {
         assertThrows(IllegalArgumentException.class, () -> 
DigestUtils.getDigest("Bogus Bogus"));
diff --git 
a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java 
b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java
new file mode 100644
index 00000000..075e0823
--- /dev/null
+++ b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.digest;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.security.MessageDigest;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+
+import org.apache.commons.codec.binary.Hex;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+/**
+ * Tests {@link GitIdentifiers}.
+ */
+class GitIdentifiersTest {
+
+    /**
+     * Binary body of the test tree object used in {@link 
#testTreeIdCollection}.
+     *
+     * <p>Each entry has the format {@code <mode> SP <name> NUL 
<20-byte-object-id>}.</p>
+     */
+    private static final String TREE_BODY_HEX =
+            // 100644 hello.txt\0 + objectId
+            "3130303634342068656c6c6f2e74787400" + 
"a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" +
+            // 120000 link.txt\0 + objectId
+            "313230303030206c696e6b2e74787400" + 
"1234567890abcdef1234567890abcdef12345678" +
+            // 100755 run.sh\0 + objectId
+            "3130303735352072756e2e736800" + 
"f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9" +
+            // 40000 src\0 + objectId
+            "34303030302073726300" + 
"deadbeefdeadbeefdeadbeefdeadbeefdeadbeef";
+
+    static Stream<Arguments> blobIdProvider() {
+        return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", 
"5f4a83288e67f1be2d6fcdad84165a86c6a970d7"),
+                Arguments.of("DigestUtilsTest/greetings.txt", 
"6cf4f797455661e61d1ee6913fc29344f5897243"),
+                Arguments.of("DigestUtilsTest/subdir/nested.txt", 
"07a392ddb4dbff06a373a7617939f30b2dcfe719"));
+    }
+
+    private static Path resourcePath(final String resourceName) throws 
Exception {
+        return 
Paths.get(GitIdentifiersTest.class.getClassLoader().getResource(resourceName).toURI());
+    }
+
+    @ParameterizedTest
+    @MethodSource("blobIdProvider")
+    void testBlobIdByteArray(final String resourceName, final String 
expectedSha1Hex) throws Exception {
+        final byte[] data = Files.readAllBytes(resourcePath(resourceName));
+        assertArrayEquals(Hex.decodeHex(expectedSha1Hex), 
GitIdentifiers.blobId(DigestUtils.getSha1Digest(), data));
+    }
+
+    @ParameterizedTest
+    @MethodSource("blobIdProvider")
+    void testBlobIdPath(final String resourceName, final String 
expectedSha1Hex) throws Exception {
+        assertArrayEquals(Hex.decodeHex(expectedSha1Hex), 
GitIdentifiers.blobId(DigestUtils.getSha1Digest(), resourcePath(resourceName)));
+    }
+
+    @Test
+    void testBlobIdSymlink(@TempDir final Path tempDir) throws Exception {
+        final Path subDir = Files.createDirectory(tempDir.resolve("subdir"));
+        Files.write(subDir.resolve("file.txt"), 
"hello".getBytes(StandardCharsets.UTF_8));
+        final Path linkToDir;
+        final Path linkToFile;
+        try {
+            linkToDir = 
Files.createSymbolicLink(tempDir.resolve("link-to-dir"), Paths.get("subdir"));
+            linkToFile = 
Files.createSymbolicLink(tempDir.resolve("link-to-file"), 
Paths.get("subdir/file.txt"));
+        } catch (final UnsupportedOperationException e) {
+            Assumptions.assumeTrue(false, "Symbolic links not supported on 
this filesystem");
+            return;
+        }
+        final MessageDigest sha1 = DigestUtils.getSha1Digest();
+        
assertArrayEquals(Hex.decodeHex("8bbe8a53790056316b23b7c270f10ab6bf6bb1b4"), 
GitIdentifiers.blobId(sha1, linkToDir));
+        
assertArrayEquals(Hex.decodeHex("dfe6ef8392ae13a11ff85419b4fd906d997b6cb7"), 
GitIdentifiers.blobId(sha1, linkToFile));
+    }
+
+    @ParameterizedTest
+    @ValueSource(strings = {MessageDigestAlgorithms.SHA_1, 
MessageDigestAlgorithms.SHA_256})
+    void testTreeIdCollection(final String algorithm) throws Exception {
+        final byte[] helloId = 
Hex.decodeHex("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0");
+        final byte[] runId = 
Hex.decodeHex("f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9");
+        final byte[] linkId = 
Hex.decodeHex("1234567890abcdef1234567890abcdef12345678");
+        final byte[] srcId = 
Hex.decodeHex("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef");
+
+        // Entries are supplied out of order to verify that the method sorts 
them correctly.
+        final List<GitDirectoryEntry> entries = new ArrayList<>();
+        entries.add(new GitDirectoryEntry(Paths.get("src"), 
GitDirectoryEntry.Type.DIRECTORY, srcId));
+        entries.add(new GitDirectoryEntry(Paths.get("run.sh"), 
GitDirectoryEntry.Type.EXECUTABLE, runId));
+        entries.add(new GitDirectoryEntry(Paths.get("hello.txt"), 
GitDirectoryEntry.Type.REGULAR, helloId));
+        entries.add(new GitDirectoryEntry(Paths.get("link.txt"), 
GitDirectoryEntry.Type.SYMBOLIC_LINK, linkId));
+
+        // Compute expected value
+        final byte[] treeBody = Hex.decodeHex(TREE_BODY_HEX);
+        final MessageDigest md = DigestUtils.getDigest(algorithm);
+        DigestUtils.updateDigest(md, ("tree " + treeBody.length + 
"\0").getBytes(StandardCharsets.UTF_8));
+        final byte[] expected = DigestUtils.updateDigest(md, 
treeBody).digest();
+
+        assertArrayEquals(expected, GitIdentifiers.treeId(md, entries));
+    }
+
+    @Test
+    void testTreeIdPath() throws Exception {
+        
assertArrayEquals(Hex.decodeHex("e4b21f6d78ceba6eb7c211ac15e3337ec4614e8a"),
+                GitIdentifiers.treeId(DigestUtils.getSha1Digest(), 
resourcePath("DigestUtilsTest")));
+    }
+}

Reply via email to