This is an automated email from the ASF dual-hosted git repository.
pkarwasz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push:
new 3019feb5 CODEC-335: Add `DigestUtils.gitBlob` and
`DigestUtils.gitTree` methods (#427)
3019feb5 is described below
commit 3019feb5fda4f40d2f5055f55a9e94d74eb0367b
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Sun Mar 29 20:15:50 2026 +0200
CODEC-335: Add `DigestUtils.gitBlob` and `DigestUtils.gitTree` methods
(#427)
This change adds two methods to `DigestUtils` that compute generalized Git
object
identifiers using an arbitrary `MessageDigest`, rather than being
restricted to SHA-1:
- `gitBlob(digest, input)`: computes a generalized
[Git blob object
identifier](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) for a
given file or byte content.
- `gitTree(digest, file)`: computes a generalized
[Git tree object
identifier](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) for a
given directory.
### Motivation
The standard Git object identifiers use SHA-1, which is
[in the process of being replaced by
SHA-256](https://git-scm.com/docs/hash-function-transition) in Git itself.
These methods generalize the identifier computation to support any
`MessageDigest`,
enabling both forward compatibility and use with external standards.
In particular, the `swh:1:cnt:` (content) and `swh:1:dir:` (directory)
identifier
types defined by [SWHID (ISO/IEC
18670)](https://www.swhid.org/specification/v1.2/5.Core_identifiers/) are
currently compatible with
Git blob and tree identifiers respectively (using SHA-1), and can be used
to generate
canonical, persistent identifiers for unpacked source and binary
distributions.
---
src/changes/changes.xml | 1 +
.../apache/commons/codec/digest/DigestUtils.java | 135 +++++++++++++++
.../commons/codec/digest/GitDirectoryEntry.java | 183 +++++++++++++++++++++
.../commons/codec/digest/DigestUtilsTest.java | 72 ++++++++
.../codec/digest/GitDirectoryEntryTest.java | 94 +++++++++++
src/test/resources/DigestUtilsTest/greetings.txt | 2 +
src/test/resources/DigestUtilsTest/hello.txt | 2 +
.../resources/DigestUtilsTest/subdir/nested.txt | 2 +
8 files changed, 491 insertions(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index ff3fb60c..70a136f1 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -52,6 +52,7 @@ The <action> type attribute can be add,update,fix,remove.
<!-- ADD -->
<action type="add" dev="ggregory" due-to="Inkeet, Gary Gregory, Wolff
Bock von Wuelfingen" issue="CODEC-326">Add Base58 support.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add
BaseNCodecInputStream.AbstracBuilder.setByteArray(byte[]).</action>
+ <action type="add" issue="CODEC-335" dev="pkarwasz" due-to="Piotr P.
Karwasz">Add DigestUtils.gitBlob() and DigestUtils.gitTree() to compute Git
blob and tree object identifiers.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump
org.apache.commons:commons-parent from 96 to 97.</action>
</release>
diff --git a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
index 786cc4e5..2b5f7cdb 100644
--- a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
+++ b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java
@@ -18,17 +18,24 @@
package org.apache.commons.codec.digest;
import java.io.BufferedInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.TreeSet;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.binary.StringUtils;
@@ -139,6 +146,134 @@ public class DigestUtils {
return updateDigest(messageDigest, data).digest();
}
+ /**
+ * Reads through a byte array and return a generalized Git blob identifier
+ *
+ * <p>The identifier is computed in the way described by the
+ * <a
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID
contents identifier</a>, but it can use any hash
+ * algorithm.</p>
+ *
+ * <p>When the hash algorithm is SHA-1, the identifier is identical to Git
blob identifier and SWHID contents identifier.</p>
+ *
+ * @param messageDigest The MessageDigest to use (for example SHA-1).
+ * @param data Data to digest.
+ * @return A generalized Git blob identifier.
+ * @since 1.22.0
+ */
+ public static byte[] gitBlob(final MessageDigest messageDigest, final
byte[] data) {
+ messageDigest.reset();
+ updateDigest(messageDigest, gitBlobPrefix(data.length));
+ return digest(messageDigest, data);
+ }
+
+ /**
+ * Reads through a byte array and return a generalized Git blob identifier
+ *
+ * <p>The identifier is computed in the way described by the
+ * <a
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID
contents identifier</a>, but it can use any hash
+ * algorithm.</p>
+ *
+ * <p>When the hash algorithm is SHA-1, the identifier is identical to Git
blob identifier and SWHID contents identifier.</p>
+ *
+ * @param messageDigest The MessageDigest to use (for example SHA-1).
+ * @param data Data to digest.
+ * @param options Options how to open the file
+ * @return A generalized Git blob identifier.
+ * @throws IOException On error accessing the file
+ * @since 1.22.0
+ */
+ public static byte[] gitBlob(final MessageDigest messageDigest, final Path
data, final OpenOption... options) throws IOException {
+ messageDigest.reset();
+ updateDigest(messageDigest, gitBlobPrefix(Files.size(data)));
+ return updateDigest(messageDigest, data, options).digest();
+ }
+
+ private static byte[] gitBlobPrefix(final long dataSize) {
+ return ("blob " + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
+ }
+
+ /**
+ * Returns a generalized Git tree identifier
+ *
+ * <p>The identifier is computed in the way described by the
+ * <a
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID
directory identifier</a>, but it can use any hash
+ * algorithm.</p>
+ *
+ * <p>When the hash algorithm is SHA-1, the identifier is identical to Git
tree identifier and SWHID directory identifier.</p>
+ *
+ * @param messageDigest The MessageDigest to use (for example SHA-1)
+ * @param entries The directory entries
+ * @return A generalized Git tree identifier.
+ */
+ static byte[] gitTree(final MessageDigest messageDigest, final
Collection<GitDirectoryEntry> entries) {
+ final TreeSet<GitDirectoryEntry> treeSet = new TreeSet<>(entries);
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ for (final GitDirectoryEntry entry : treeSet) {
+ final byte[] treeEntryBytes = entry.toTreeEntryBytes();
+ baos.write(treeEntryBytes, 0, treeEntryBytes.length);
+ }
+ messageDigest.reset();
+ updateDigest(messageDigest, gitTreePrefix(baos.size()));
+ return updateDigest(messageDigest, baos.toByteArray()).digest();
+ }
+
+ /**
+ * Reads through a byte array and return a generalized Git tree identifier
+ *
+ * <p>The identifier is computed in the way described by the
+ * <a
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID
directory identifier</a>, but it can use any hash
+ * algorithm.</p>
+ *
+ * <p>When the hash algorithm is SHA-1, the identifier is identical to Git
tree identifier and SWHID directory identifier.</p>
+ *
+ * @param messageDigest The MessageDigest to use (for example SHA-1).
+ * @param data Data to digest.
+ * @param options Options how to open the file
+ * @return A generalized Git tree identifier.
+ * @throws IOException On error accessing the file
+ * @since 1.22.0
+ */
+ public static byte[] gitTree(final MessageDigest messageDigest, final Path
data, final OpenOption... options) throws IOException {
+ final List<GitDirectoryEntry> entries = new ArrayList<>();
+ try (DirectoryStream<Path> files = Files.newDirectoryStream(data)) {
+ for (final Path path : files) {
+ final GitDirectoryEntry.Type type =
getGitDirectoryEntryType(path);
+ final byte[] rawObjectId;
+ if (type == GitDirectoryEntry.Type.DIRECTORY) {
+ rawObjectId = gitTree(messageDigest, path, options);
+ } else {
+ rawObjectId = gitBlob(messageDigest, path, options);
+ }
+ entries.add(new GitDirectoryEntry(path, type, rawObjectId));
+ }
+ }
+ return gitTree(messageDigest, entries);
+ }
+
+ /**
+ * Returns the {@link GitDirectoryEntry.Type} of a file.
+ *
+ * @param path The file to check.
+ * @return A {@link GitDirectoryEntry.Type}
+ */
+ private static GitDirectoryEntry.Type getGitDirectoryEntryType(final Path
path) {
+ // Symbolic links first
+ if (Files.isSymbolicLink(path)) {
+ return GitDirectoryEntry.Type.SYMBOLIC_LINK;
+ }
+ if (Files.isDirectory(path)) {
+ return GitDirectoryEntry.Type.DIRECTORY;
+ }
+ if (Files.isExecutable(path)) {
+ return GitDirectoryEntry.Type.EXECUTABLE;
+ }
+ return GitDirectoryEntry.Type.REGULAR;
+ }
+
+ private static byte[] gitTreePrefix(final long dataSize) {
+ return ("tree " + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
+ }
+
/**
* Gets a {@code MessageDigest} for the given {@code algorithm}.
*
diff --git
a/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java
b/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java
new file mode 100644
index 00000000..e1073611
--- /dev/null
+++ b/src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.digest;
+
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.util.Objects;
+
+/**
+ * Represents a single entry in a Git tree object.
+ *
+ * <p>A Git tree object encodes a directory snapshot. Each entry holds:</p>
+ * <ul>
+ * <li>a {@link Type} that determines the Unix file mode (e.g. {@code
100644} for a regular file),</li>
+ * <li>the entry name (file or directory name, without a path
separator),</li>
+ * <li>the raw object id of the referenced blob or sub-tree.</li>
+ * </ul>
+ *
+ * <p>Entries are ordered by {@link #compareTo} using Git's tree-sort rule:
directory names are compared as if they ended with {@code '/'}, so that {@code
foo/}
+ * sorts after {@code foobar}.</p>
+ *
+ * <p>Call {@link #toTreeEntryBytes()} to obtain the binary encoding that Git
feeds to its hash function when computing the tree object identifier.</p>
+ *
+ * @see <a href="https://git-scm.com/book/en/v2/Git-Internals-Git-Objects">Git
Internals – Git Objects</a>
+ * @see <a
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID
Directory Identifier</a>
+ */
+class GitDirectoryEntry implements Comparable<GitDirectoryEntry> {
+
+ /**
+ * The entry name (file or directory name, no path separator).
+ */
+ private final String name;
+
+ /**
+ * The key used for ordering entries within a tree object.
+ *
+ * <p>>Git appends {@code '/'} to directory names before comparing.</p>
+ */
+ private final String sortKey;
+
+ /**
+ * The Git object type, which determines the Unix file-mode prefix.
+ */
+ private final Type type;
+
+ /**
+ * The raw object id of the referenced blob or sub-tree.
+ */
+ private final byte[] rawObjectId;
+
+ private static String getFileName(final Path path) {
+ final Path fileName = path.getFileName();
+ if (fileName == null) {
+ throw new IllegalArgumentException(path.toString());
+ }
+ return fileName.toString();
+ }
+
+ /**
+ * Creates an entry
+ *
+ * @param name The name of the entry
+ * @param type The type of the entry
+ * @param rawObjectId The id of the entry
+ */
+ private GitDirectoryEntry(final String name, final Type type, final byte[]
rawObjectId) {
+ this.name = name;
+ this.type = type;
+ this.sortKey = type == Type.DIRECTORY ? name + "/" : name;
+ this.rawObjectId = rawObjectId;
+ }
+
+ /**
+ * Creates an entry
+ *
+ * @param path The path of the entry; must not be an empty path
+ * @param type The type of the entry
+ * @param rawObjectId The id of the entry
+ * @throws IllegalArgumentException If the path is empty
+ * @throws NullPointerException If any argument is {@code null}
+ */
+ GitDirectoryEntry(final Path path, final Type type, final byte[]
rawObjectId) {
+ this(getFileName(path), Objects.requireNonNull(type),
Objects.requireNonNull(rawObjectId));
+ }
+
+ /**
+ * Returns the binary encoding of this entry as it appears inside a Git
tree object.
+ *
+ * <p>The format follows the Git tree entry layout:</p>
+ * <pre>
+ * <mode> SP <name> NUL <20-byte-object-id>
+ * </pre>
+ *
+ * @return the binary tree-entry encoding; never {@code null}
+ */
+ byte[] toTreeEntryBytes() {
+ final byte[] nameBytes = name.getBytes(StandardCharsets.UTF_8);
+ final byte[] result = new byte[type.mode.length + nameBytes.length +
rawObjectId.length + 2];
+ System.arraycopy(type.mode, 0, result, 0, type.mode.length);
+ result[type.mode.length] = ' ';
+ System.arraycopy(nameBytes, 0, result, type.mode.length + 1,
nameBytes.length);
+ result[type.mode.length + nameBytes.length + 1] = '\0';
+ System.arraycopy(rawObjectId, 0, result, type.mode.length +
nameBytes.length + 2, rawObjectId.length);
+ return result;
+ }
+
+ @Override
+ public int compareTo(GitDirectoryEntry o) {
+ return sortKey.compareTo(o.sortKey);
+ }
+
+ @Override
+ public int hashCode() {
+ return name.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == this) {
+ return true;
+ }
+ if (!(obj instanceof GitDirectoryEntry)) {
+ return false;
+ }
+ final GitDirectoryEntry other = (GitDirectoryEntry) obj;
+ return name.equals(other.name);
+ }
+
+ /**
+ * The type of a Git tree entry, which maps to a Unix file-mode string.
+ *
+ * <p>Git encodes the file type and permission bits as an ASCII octal
string that precedes the entry name in the binary tree format. The values
defined here
+ * cover the four entry types that Git itself produces.</p>
+ *
+ * <p>This enum is package-private. If it were made public, {@link #mode}
would need to be wrapped in an immutable copy to prevent external mutation.</p>
+ */
+ enum Type {
+
+ /**
+ * A sub-directory (Git sub-tree)
+ */
+ DIRECTORY("40000"),
+
+ /**
+ * An executable file
+ */
+ EXECUTABLE("100755"),
+
+ /**
+ * A regular (non-executable) file
+ */
+ REGULAR("100644"),
+
+ /**
+ * A symbolic link
+ */
+ SYMBOLIC_LINK("120000");
+
+ /**
+ * The ASCII-encoded octal mode string as it appears in the binary
tree entry.
+ */
+ private final byte[] mode;
+
+ Type(final String mode) {
+ this.mode = mode.getBytes(StandardCharsets.US_ASCII);
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
index b27705b5..01fcce06 100644
--- a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
+++ b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java
@@ -32,11 +32,14 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import java.util.Locale;
import java.util.Random;
import java.util.stream.Stream;
@@ -52,6 +55,7 @@ import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
/**
* Tests {@link DigestUtils}.
@@ -238,6 +242,16 @@ class DigestUtilsTest {
"CA 92 BF 0B E5 61 5E 96 95 9D 76 71 97 A0 BE EB";
// @formatter:on
+ static Stream<Arguments> gitBlobProvider() {
+ return Stream.of(Arguments.of("DigestUtilsTest/hello.txt",
"5f4a83288e67f1be2d6fcdad84165a86c6a970d7"),
+ Arguments.of("DigestUtilsTest/greetings.txt",
"6cf4f797455661e61d1ee6913fc29344f5897243"),
+ Arguments.of("DigestUtilsTest/subdir/nested.txt",
"07a392ddb4dbff06a373a7617939f30b2dcfe719"));
+ }
+
+ private static Path resourcePath(final String resourceName) throws
Exception {
+ return
Paths.get(DigestUtilsTest.class.getClassLoader().getResource(resourceName).toURI());
+ }
+
static Stream<Arguments> testShake128_256() {
// @formatter:off
return Stream.of(
@@ -475,6 +489,64 @@ class DigestUtilsTest {
assertEquals(MessageDigestAlgorithms.MD5,
digestUtils.getMessageDigest().getAlgorithm());
}
+ @ParameterizedTest
+ @MethodSource("gitBlobProvider")
+ void testGitBlobByteArray(final String resourceName, final String
expectedSha1Hex) throws Exception {
+ final byte[] data = Files.readAllBytes(resourcePath(resourceName));
+ assertArrayEquals(Hex.decodeHex(expectedSha1Hex),
DigestUtils.gitBlob(DigestUtils.getSha1Digest(), data));
+ }
+
+ @ParameterizedTest
+ @MethodSource("gitBlobProvider")
+ void testGitBlobPath(final String resourceName, final String
expectedSha1Hex) throws Exception {
+ assertArrayEquals(Hex.decodeHex(expectedSha1Hex),
DigestUtils.gitBlob(DigestUtils.getSha1Digest(), resourcePath(resourceName)));
+ }
+
+ /**
+ * Binary body of the test tree object used in {@link
#testGitTreeCollection}.
+ *
+ * <p>Each entry has the format {@code <mode> SP <name> NUL
<20-byte-object-id>}.</p>
+ */
+ private static final String TREE_BODY_HEX =
+ // 100644 hello.txt\0 + objectId
+ "3130303634342068656c6c6f2e74787400" +
"a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" +
+ // 120000 link.txt\0 + objectId
+ "313230303030206c696e6b2e74787400" +
"1234567890abcdef1234567890abcdef12345678" +
+ // 100755 run.sh\0 + objectId
+ "3130303735352072756e2e736800" +
"f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9" +
+ // 40000 src\0 + objectId
+ "34303030302073726300" +
"deadbeefdeadbeefdeadbeefdeadbeefdeadbeef";
+
+ @ParameterizedTest
+ @ValueSource(strings = {MessageDigestAlgorithms.SHA_1,
MessageDigestAlgorithms.SHA_256})
+ void testGitTreeCollection(final String algorithm) throws Exception {
+ final byte[] helloId =
Hex.decodeHex("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0");
+ final byte[] runId =
Hex.decodeHex("f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9");
+ final byte[] linkId =
Hex.decodeHex("1234567890abcdef1234567890abcdef12345678");
+ final byte[] srcId =
Hex.decodeHex("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef");
+
+ // Entries are supplied out of order to verify that the method sorts
them correctly.
+ final List<GitDirectoryEntry> entries = new ArrayList<>();
+ entries.add(new GitDirectoryEntry(Paths.get("src"),
GitDirectoryEntry.Type.DIRECTORY, srcId));
+ entries.add(new GitDirectoryEntry(Paths.get("run.sh"),
GitDirectoryEntry.Type.EXECUTABLE, runId));
+ entries.add(new GitDirectoryEntry(Paths.get("hello.txt"),
GitDirectoryEntry.Type.REGULAR, helloId));
+ entries.add(new GitDirectoryEntry(Paths.get("link.txt"),
GitDirectoryEntry.Type.SYMBOLIC_LINK, linkId));
+
+ // Compute expected value
+ final byte[] treeBody = Hex.decodeHex(TREE_BODY_HEX);
+ final MessageDigest md = DigestUtils.getDigest(algorithm);
+ DigestUtils.updateDigest(md, ("tree " + treeBody.length +
"\0").getBytes(StandardCharsets.UTF_8));
+ final byte[] expected = DigestUtils.updateDigest(md,
treeBody).digest();
+
+ assertArrayEquals(expected, DigestUtils.gitTree(md, entries));
+ }
+
+ @Test
+ void testGitTreePath() throws Exception {
+
assertArrayEquals(Hex.decodeHex("e4b21f6d78ceba6eb7c211ac15e3337ec4614e8a"),
+ DigestUtils.gitTree(DigestUtils.getSha1Digest(),
resourcePath("DigestUtilsTest")));
+ }
+
@Test
void testInternalNoSuchAlgorithmException() {
assertThrows(IllegalArgumentException.class, () ->
DigestUtils.getDigest("Bogus Bogus"));
diff --git
a/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java
b/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java
new file mode 100644
index 00000000..ce37c0e1
--- /dev/null
+++ b/src/test/java/org/apache/commons/codec/digest/GitDirectoryEntryTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.digest;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.jupiter.api.Test;
+
+class GitDirectoryEntryTest {
+
+ private static final byte[] ZERO_ID = new byte[20];
+
+ /**
+ * The Path constructor must extract the filename component.
+ */
+ @Test
+ void testPathConstructorUsesFilename() {
+ final GitDirectoryEntry fromLabel = new
GitDirectoryEntry(Paths.get("hello.txt"), GitDirectoryEntry.Type.REGULAR,
ZERO_ID);
+ final GitDirectoryEntry fromRelative = new
GitDirectoryEntry(Paths.get("subdir/hello.txt"),
GitDirectoryEntry.Type.REGULAR, ZERO_ID);
+ final GitDirectoryEntry fromAbsolute = new
GitDirectoryEntry(Paths.get("hello.txt").toAbsolutePath(),
GitDirectoryEntry.Type.REGULAR, ZERO_ID);
+
+ assertEquals(fromLabel, fromRelative);
+ assertEquals(fromLabel, fromAbsolute);
+ assertArrayEquals(fromLabel.toTreeEntryBytes(),
fromRelative.toTreeEntryBytes());
+ assertArrayEquals(fromLabel.toTreeEntryBytes(),
fromAbsolute.toTreeEntryBytes());
+ }
+
+ /**
+ * Equality and hash code are based solely on the entry name.
+ */
+ @Test
+ void testEqualityBasedOnNameOnly() {
+ final byte[] otherId = new byte[20];
+ Arrays.fill(otherId, (byte) 0xff);
+
+ final GitDirectoryEntry regular = new
GitDirectoryEntry(Paths.get("foo"), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
+ final GitDirectoryEntry executable = new
GitDirectoryEntry(Paths.get("foo"), GitDirectoryEntry.Type.EXECUTABLE, otherId);
+
+ // Same name, different type and object id -> equal
+ assertEquals(regular, executable);
+ assertEquals(regular.hashCode(), executable.hashCode());
+
+ // Different name -> not equal
+ assertNotEquals(regular, new GitDirectoryEntry(Paths.get("bar"),
GitDirectoryEntry.Type.REGULAR, ZERO_ID));
+
+ // Same reference -> equal
+ assertEquals(regular, regular);
+
+ // Not equal to null or unrelated type
+ assertNotEquals(regular, null);
+ assertNotEquals(regular, "foo");
+ }
+
+ /**
+ * Entries should be sorted by Git sort rule.
+ *
+ * <p>Git compares the names of the entries, but adds a {@code /} at the
end of directory entries.</p>
+ */
+ @Test
+ void testSortOrder() {
+ final GitDirectoryEntry alpha = new
GitDirectoryEntry(Paths.get("alpha.txt"), GitDirectoryEntry.Type.REGULAR,
ZERO_ID);
+ final GitDirectoryEntry fooTxt = new
GitDirectoryEntry(Paths.get("foo.txt"), GitDirectoryEntry.Type.REGULAR,
ZERO_ID);
+ final GitDirectoryEntry fooDir = new
GitDirectoryEntry(Paths.get("foo"), GitDirectoryEntry.Type.DIRECTORY, ZERO_ID);
+ final GitDirectoryEntry foobar = new
GitDirectoryEntry(Paths.get("foobar"), GitDirectoryEntry.Type.REGULAR, ZERO_ID);
+ final GitDirectoryEntry zeta = new
GitDirectoryEntry(Paths.get("zeta.txt"), GitDirectoryEntry.Type.REGULAR,
ZERO_ID);
+
+ final List<GitDirectoryEntry> entries = new
ArrayList<>(Arrays.asList(zeta, foobar, fooDir, alpha, fooTxt));
+ entries.sort(GitDirectoryEntry::compareTo);
+
+ assertEquals(Arrays.asList(alpha, fooTxt, fooDir, foobar, zeta),
entries);
+ }
+}
diff --git a/src/test/resources/DigestUtilsTest/greetings.txt
b/src/test/resources/DigestUtilsTest/greetings.txt
new file mode 100644
index 00000000..6cf4f797
--- /dev/null
+++ b/src/test/resources/DigestUtilsTest/greetings.txt
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: Apache-2.0
+Greetings!
diff --git a/src/test/resources/DigestUtilsTest/hello.txt
b/src/test/resources/DigestUtilsTest/hello.txt
new file mode 100644
index 00000000..5f4a8328
--- /dev/null
+++ b/src/test/resources/DigestUtilsTest/hello.txt
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: Apache-2.0
+Hello, World!
diff --git a/src/test/resources/DigestUtilsTest/subdir/nested.txt
b/src/test/resources/DigestUtilsTest/subdir/nested.txt
new file mode 100644
index 00000000..07a392dd
--- /dev/null
+++ b/src/test/resources/DigestUtilsTest/subdir/nested.txt
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: Apache-2.0
+Nested file.