This is an automated email from the ASF dual-hosted git repository.

pkarwasz pushed a commit to branch feat/git-identifiers
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit e2554aa5455f730eb15b61e5f7f467db9111d9bc
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Thu Apr 9 21:34:07 2026 +0200

    feat: add `GitIdBuilder`
    
    This change adds a `GitIdentifiers.TreeIdBuilder` class to allow the 
computation of a SWHID identifier from an archive.
---
 .../commons/codec/digest/GitIdentifiers.java       | 571 +++++++++++++--------
 .../commons/codec/digest/GitIdentifiersTest.java   | 240 ++++++---
 2 files changed, 522 insertions(+), 289 deletions(-)

diff --git a/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java 
b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java
index 39deccf1..72bba0a7 100644
--- a/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java
+++ b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java
@@ -19,15 +19,16 @@ package org.apache.commons.codec.digest;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.DirectoryStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.security.MessageDigest;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Objects;
+import java.util.Set;
 import java.util.TreeSet;
 
 /**
@@ -45,134 +46,57 @@ import java.util.TreeSet;
  */
 public class GitIdentifiers {
 
-    private static DirectoryEntry.Type getGitDirectoryEntryType(final Path 
path) {
-        // Symbolic links first
-        if (Files.isSymbolicLink(path)) {
-            return DirectoryEntry.Type.SYMBOLIC_LINK;
-        }
-        if (Files.isDirectory(path)) {
-            return DirectoryEntry.Type.DIRECTORY;
-        }
-        if (Files.isExecutable(path)) {
-            return DirectoryEntry.Type.EXECUTABLE;
-        }
-        return DirectoryEntry.Type.REGULAR;
-    }
-
     /**
-     * Reads through a byte array and returns a generalized Git blob 
identifier.
+     * The type of a Git tree entry, which maps to a Unix file-mode string.
      *
-     * <p>The identifier is computed in the way described by the
-     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents";>SWHID
 contents identifier</a>, but it can use any hash
-     * algorithm.</p>
-     *
-     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
-     *
-     * @param messageDigest The MessageDigest to use (for example SHA-1).
-     * @param data          Data to digest.
-     * @return A generalized Git blob identifier.
+     * <p>Git encodes the file type and permission bits as an ASCII octal 
string that precedes the entry name in the binary tree format. The values 
defined here
+     * cover the four entry types that Git itself produces.</p>
      */
-    public static byte[] blobId(final MessageDigest messageDigest, final 
byte[] data) {
-        messageDigest.reset();
-        DigestUtils.updateDigest(messageDigest, gitBlobPrefix(data.length));
-        return DigestUtils.digest(messageDigest, data);
-    }
+    public enum FileMode {
 
-    /**
-     * Reads through a file and returns a generalized Git blob identifier.
-     *
-     * <p>The identifier is computed in the way described by the
-     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents";>SWHID
 contents identifier</a>, but it can use any hash
-     * algorithm.</p>
-     *
-     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
-     *
-     * @param messageDigest The MessageDigest to use (for example SHA-1).
-     * @param data          Path to the file to digest.
-     * @return A generalized Git blob identifier.
-     * @throws IOException On error accessing the file.
-     */
-    public static byte[] blobId(final MessageDigest messageDigest, final Path 
data) throws IOException {
-        messageDigest.reset();
-        if (Files.isSymbolicLink(data)) {
-            final byte[] linkTarget = 
Files.readSymbolicLink(data).toString().getBytes(StandardCharsets.UTF_8);
-            DigestUtils.updateDigest(messageDigest, 
gitBlobPrefix(linkTarget.length));
-            return DigestUtils.digest(messageDigest, linkTarget);
-        }
-        DigestUtils.updateDigest(messageDigest, 
gitBlobPrefix(Files.size(data)));
-        return DigestUtils.updateDigest(messageDigest, data).digest();
-    }
+        /**
+         * A sub-directory (Git sub-tree).
+         */
+        DIRECTORY("40000"),
 
-    private static byte[] gitBlobPrefix(final long dataSize) {
-        return gitPrefix("blob ", dataSize);
-    }
+        /**
+         * An executable file.
+         */
+        EXECUTABLE("100755"),
 
-    private static byte[] gitPrefix(final String prefix, final long dataSize) {
-        return (prefix + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
-    }
+        /**
+         * A regular (non-executable) file.
+         */
+        REGULAR("100644"),
 
-    /**
-     * Returns a generalized Git tree identifier for a collection of directory 
entries.
-     *
-     * <p>The identifier is computed in the way described by the
-     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories";>SWHID
 directory identifier</a>, but it can use any hash
-     * algorithm.</p>
-     *
-     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
tree identifier and SWHID directory identifier.</p>
-     *
-     * @param messageDigest The MessageDigest to use (for example SHA-1).
-     * @param entries       The directory entries.
-     * @return A generalized Git tree identifier.
-     */
-    static byte[] treeId(final MessageDigest messageDigest, final 
Collection<DirectoryEntry> entries) {
-        final TreeSet<DirectoryEntry> treeSet = new TreeSet<>(entries);
-        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        for (final DirectoryEntry entry : treeSet) {
-            final byte[] treeEntryBytes = entry.toTreeEntryBytes();
-            baos.write(treeEntryBytes, 0, treeEntryBytes.length);
-        }
-        messageDigest.reset();
-        DigestUtils.updateDigest(messageDigest, gitTreePrefix(baos.size()));
-        return DigestUtils.updateDigest(messageDigest, 
baos.toByteArray()).digest();
-    }
+        /**
+         * A symbolic link.
+         */
+        SYMBOLIC_LINK("120000");
 
-    /**
-     * Reads through a directory and returns a generalized Git tree identifier.
-     *
-     * <p>The identifier is computed in the way described by the
-     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories";>SWHID
 directory identifier</a>, but it can use any hash
-     * algorithm.</p>
-     *
-     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
tree identifier and SWHID directory identifier.</p>
-     *
-     * @param messageDigest The MessageDigest to use (for example SHA-1).
-     * @param data          Path to the directory to digest.
-     * @return A generalized Git tree identifier.
-     * @throws IOException On error accessing the directory or its contents.
-     */
-    public static byte[] treeId(final MessageDigest messageDigest, final Path 
data) throws IOException {
-        final List<DirectoryEntry> entries = new ArrayList<>();
-        try (DirectoryStream<Path> files = Files.newDirectoryStream(data)) {
-            for (final Path path : files) {
-                final DirectoryEntry.Type type = 
getGitDirectoryEntryType(path);
-                final byte[] rawObjectId;
-                if (type == DirectoryEntry.Type.DIRECTORY) {
-                    rawObjectId = treeId(messageDigest, path);
-                } else {
-                    rawObjectId = blobId(messageDigest, path);
-                }
-                entries.add(new DirectoryEntry(path, type, rawObjectId));
-            }
-        }
-        return treeId(messageDigest, entries);
-    }
+        /**
+         * The octal mode as used by Git.
+         */
+        private final String mode;
 
-    private static byte[] gitTreePrefix(final long dataSize) {
-        return gitPrefix("tree ", dataSize);
-    }
+        /**
+         * Serialized {@code mode}: since this is mutable, it must remain 
private.
+         */
+        private final byte[] modeBytes;
 
-    private GitIdentifiers() {
-        // utility class
+        FileMode(final String mode) {
+            this.mode = mode;
+            this.modeBytes = mode.getBytes(StandardCharsets.US_ASCII);
+        }
+
+        /**
+         * Gets the octal mode as used by Git.
+         *
+         * @return The octal mode
+         */
+        public String getMode() {
+            return mode;
+        }
     }
 
     /**
@@ -180,7 +104,7 @@ public class GitIdentifiers {
      *
      * <p>A Git tree object encodes a directory snapshot. Each entry holds:</p>
      * <ul>
-     *   <li>a {@link Type} that determines the Unix file mode (e.g. {@code 
100644} for a regular file),</li>
+     *   <li>a {@link FileMode} that determines the Unix file mode (e.g. 
{@code 100644} for a regular file),</li>
      *   <li>the entry name (file or directory name, without a path 
separator),</li>
      *   <li>the raw object id of the referenced blob or sub-tree.</li>
      * </ul>
@@ -188,95 +112,29 @@ public class GitIdentifiers {
      * <p>Entries are ordered by {@link #compareTo} using Git's tree-sort 
rule: directory names are compared as if they ended with {@code '/'}, so that 
{@code foo/}
      * sorts after {@code foobar}.</p>
      *
-     * <p>Call {@link #toTreeEntryBytes()} to obtain the binary encoding that 
Git feeds to its hash function when computing the tree object identifier.</p>
-     *
      * @see <a 
href="https://git-scm.com/book/en/v2/Git-Internals-Git-Objects";>Git Internals – 
Git Objects</a>
      * @see <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories";>SWHID
 Directory Identifier</a>
      */
     static class DirectoryEntry implements Comparable<DirectoryEntry> {
 
-        /**
-         * The type of a Git tree entry, which maps to a Unix file-mode string.
-         *
-         * <p>Git encodes the file type and permission bits as an ASCII octal 
string that precedes the entry name in the binary tree format. The values 
defined here
-         * cover the four entry types that Git itself produces.</p>
-         *
-         * <p>This enum is package-private. If it were made public, {@link 
#mode} would need to be wrapped in an immutable copy to prevent external 
mutation.</p>
-         */
-        enum Type {
-
-            /**
-             * A sub-directory (Git sub-tree).
-             */
-            DIRECTORY("40000"),
-
-            /**
-             * An executable file.
-             */
-            EXECUTABLE("100755"),
-
-            /**
-             * A regular (non-executable) file.
-             */
-            REGULAR("100644"),
-
-            /**
-             * A symbolic link.
-             */
-            SYMBOLIC_LINK("120000");
-
-            /**
-             * The ASCII-encoded octal mode string as it appears in the binary 
tree entry.
-             */
-            private final byte[] mode;
-
-            Type(final String mode) {
-                this.mode = mode.getBytes(StandardCharsets.US_ASCII);
-            }
-        }
-
-        private static String getFileName(final Path path) {
-            final Path fileName = path.getFileName();
-            if (fileName == null) {
-                throw new IllegalArgumentException(path.toString());
-            }
-            return fileName.toString();
-        }
-
         /**
          * The entry name (file or directory name, no path separator).
          */
         private final String name;
-
+        /**
+         * The raw object id of the referenced blob or sub-tree.
+         */
+        private final byte[] rawObjectId;
         /**
          * The key used for ordering entries within a tree object.
          *
          * <p>>Git appends {@code '/'} to directory names before comparing.</p>
          */
         private final String sortKey;
-
         /**
          * The Git object type, which determines the Unix file-mode prefix.
          */
-        private final Type type;
-
-        /**
-         * The raw object id of the referenced blob or sub-tree.
-         */
-        private final byte[] rawObjectId;
-
-        /**
-         * Creates an entry.
-         *
-         * @param path The path of the entry; must not be an empty path.
-         * @param type The type of the entry.
-         * @param rawObjectId The id of the entry.
-         * @throws IllegalArgumentException If the path is empty.
-         * @throws NullPointerException If any argument is {@code null}.
-         */
-        DirectoryEntry(final Path path, final Type type, final byte[] 
rawObjectId) {
-            this(getFileName(path), type, rawObjectId);
-        }
+        private final FileMode type;
 
         /**
          * Creates an entry.
@@ -285,10 +143,13 @@ public class GitIdentifiers {
          * @param type The type of the entry
          * @param rawObjectId The id of the entry
          */
-        private DirectoryEntry(final String name, final Type type, final 
byte[] rawObjectId) {
+        DirectoryEntry(final String name, final FileMode type, final byte[] 
rawObjectId) {
+            if (Objects.requireNonNull(name).indexOf('/') >= 0) {
+                throw new IllegalArgumentException("Entry name must not 
contain '/': " + name);
+            }
             this.name = name;
             this.type = Objects.requireNonNull(type);
-            this.sortKey = type == Type.DIRECTORY ? name + "/" : name;
+            this.sortKey = type == FileMode.DIRECTORY ? name + "/" : name;
             this.rawObjectId = Objects.requireNonNull(rawObjectId);
         }
 
@@ -314,25 +175,315 @@ public class GitIdentifiers {
             return name.hashCode();
         }
 
+    }
+
+    /**
+     * Builds a Git tree identifier for a virtual directory structure, such as 
the contents of
+     * an archive.
+     */
+    public static class TreeIdBuilder {
+
+        /**
+         * A supplier of a blob identifier that may throw {@link IOException}.
+         */
+        @FunctionalInterface
+        private interface BlobIdSupplier {
+            byte[] get() throws IOException;
+        }
+
+        private static void checkPathComponent(String name) {
+            if (".".equals(name) || "..".equals(name)) {
+                throw new IllegalArgumentException("Path component not 
allowed: " + name);
+            }
+        }
+        private final Map<String, TreeIdBuilder> dirEntries = new HashMap<>();
+        private final Map<String, DirectoryEntry> fileEntries = new 
HashMap<>();
+        private final MessageDigest messageDigest;
+
+        TreeIdBuilder(final MessageDigest messageDigest) {
+            this.messageDigest = Objects.requireNonNull(messageDigest);
+        }
+
         /**
-         * Returns the binary encoding of this entry as it appears inside a 
Git tree object.
+         * Returns the {@link TreeIdBuilder} for the named subdirectory, 
creating it if absent.
          *
-         * <p>The format follows the Git tree entry layout:</p>
-         * <pre>
-         *   &lt;mode&gt; SP &lt;name&gt; NUL &lt;20-byte-object-id&gt;
-         * </pre>
+         * @param name The relative path of the subdirectory in normalized 
form (may contain {@code '/'}).
+         * @return The {@link TreeIdBuilder} for the subdirectory.
+         * @throws IllegalArgumentException If any path component is {@code 
"."} or {@code ".."}.
+         */
+        public TreeIdBuilder addDirectory(final String name) {
+            TreeIdBuilder current = this;
+            for (final String component : name.split("/", -1)) {
+                if (component.isEmpty()) {
+                    continue;
+                }
+                checkPathComponent(component);
+                current = current.dirEntries.computeIfAbsent(component, k -> 
new TreeIdBuilder(messageDigest));
+            }
+            return current;
+        }
+
+        /**
+         * Adds a file entry at the given path within this tree.
+         *
+         * <p>If {@code name} contains {@code '/'}, intermediate 
subdirectories are created automatically.</p>
+         *
+         * <p>The stream is eagerly drained.</p>
          *
-         * @return the binary tree-entry encoding; never {@code null}.
+         * <p>If the size of the stream is known in advance, consider using 
{@link #addFile(FileMode, String, long, InputStream)} instead.</p>
+         *
+         * @param mode The file mode (e.g. {@link FileMode#REGULAR}).
+         * @param name The relative path of the entry in normalized form(may 
contain {@code '/'}).
+         * @param data The file content.
+         * @throws IOException If the stream cannot be read.
+         * @throws IllegalArgumentException If any path component is {@code 
"."} or {@code ".."}.
          */
-        byte[] toTreeEntryBytes() {
-            final byte[] nameBytes = name.getBytes(StandardCharsets.UTF_8);
-            final byte[] result = new byte[type.mode.length + nameBytes.length 
+ rawObjectId.length + 2];
-            System.arraycopy(type.mode, 0, result, 0, type.mode.length);
-            result[type.mode.length] = ' ';
-            System.arraycopy(nameBytes, 0, result, type.mode.length + 1, 
nameBytes.length);
-            result[type.mode.length + nameBytes.length + 1] = '\0';
-            System.arraycopy(rawObjectId, 0, result, type.mode.length + 
nameBytes.length + 2, rawObjectId.length);
-            return result;
+        public void addFile(final FileMode mode, final String name, final 
InputStream data) throws IOException {
+            addFile(mode, name, () -> blobId(messageDigest, 
readAllBytes(data)));
         }
+
+        /**
+         * Adds a file entry at the given path within this tree, streaming 
content without buffering.
+         *
+         * <p>If {@code name} contains {@code '/'}, intermediate 
subdirectories are created automatically.</p>
+         *
+         * <p>The stream is eagerly drained.</p>
+         *
+         * @param mode     The file mode (e.g. {@link FileMode#REGULAR}).
+         * @param name The relative path of the entry in normalized form(may 
contain {@code '/'}).
+         * @param dataSize The exact number of bytes in {@code data}.
+         * @param data     The file content.
+         * @throws IOException If the stream cannot be read.
+         * @throws IllegalArgumentException If any path component is {@code 
"."} or {@code ".."}.
+         */
+        public void addFile(final FileMode mode, final String name, final long 
dataSize, final InputStream data) throws IOException {
+            addFile(mode, name, () -> blobId(messageDigest, dataSize, data));
+        }
+
+        private void addFile(final FileMode mode, final String name, final 
BlobIdSupplier blobId) throws IOException {
+            final int slash = name.indexOf('/');
+            if (slash < 0) {
+                checkPathComponent(name);
+                fileEntries.put(name, new DirectoryEntry(name, mode, 
blobId.get()));
+            } else {
+                addDirectory(name.substring(0, slash)).addFile(mode, 
name.substring(slash + 1), blobId);
+            }
+        }
+
+        /**
+         * Adds a file entry at the given path within this tree.
+         *
+         * <p>If {@code name} contains {@code '/'}, intermediate 
subdirectories are created automatically.</p>
+         *
+         * @param mode The file mode (e.g. {@link FileMode#REGULAR}).
+         * @param name The relative path of the entry in normalized form(may 
contain {@code '/'}).
+         * @param data The file content.
+         * @throws IOException If an I/O error occurs.
+         * @throws IllegalArgumentException If any path component is {@code 
"."} or {@code ".."}.
+         */
+        public void addFile(final FileMode mode, final String name, final 
byte[] data) throws IOException {
+            addFile(mode, name, () -> blobId(messageDigest, data));
+        }
+
+        /**
+         * Computes the Git tree identifier for this directory and all its 
descendants.
+         *
+         * @return The raw tree identifier bytes.
+         * @throws IOException If a digest operation fails.
+         */
+        public byte[] build() throws IOException {
+            final Set<DirectoryEntry> entries = new 
TreeSet<>(fileEntries.values());
+            for (final Map.Entry<String, TreeIdBuilder> e : 
dirEntries.entrySet()) {
+                entries.add(new DirectoryEntry(e.getKey(), FileMode.DIRECTORY, 
e.getValue().build()));
+            }
+            final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+            for (final DirectoryEntry entry : entries) {
+                baos.write(entry.type.modeBytes);
+                baos.write(' ');
+                baos.write(entry.name.getBytes(StandardCharsets.UTF_8));
+                baos.write('\0');
+                baos.write(entry.rawObjectId);
+            }
+            messageDigest.reset();
+            DigestUtils.updateDigest(messageDigest, 
getGitTreePrefix(baos.size()));
+            return DigestUtils.updateDigest(messageDigest, 
baos.toByteArray()).digest();
+        }
+    }
+
+    /**
+     * Reads through a byte array and returns a generalized Git blob 
identifier.
+     *
+     * <p>The identifier is computed in the way described by the
+     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents";>SWHID
 contents identifier</a>, but it can use any hash
+     * algorithm.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @param data          Data to digest.
+     * @return A generalized Git blob identifier.
+     */
+    public static byte[] blobId(final MessageDigest messageDigest, final 
byte[] data) {
+        messageDigest.reset();
+        DigestUtils.updateDigest(messageDigest, getGitBlobPrefix(data.length));
+        return DigestUtils.digest(messageDigest, data);
+    }
+
+    /**
+     * Reads through a stream and returns a generalized Git blob identifier.
+     *
+     * <p>The stream is drained and its contents are buffered to determine the 
size before hashing. To avoid
+     * buffering, use {@link #blobId(MessageDigest, long, InputStream)} when 
the size is known in advance.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @param data          Stream to digest.
+     * @return A generalized Git blob identifier.
+     * @throws IOException On error reading the stream.
+     */
+    public static byte[] blobId(final MessageDigest messageDigest, final 
InputStream data) throws IOException {
+        return blobId(messageDigest, readAllBytes(data));
+    }
+
+    /**
+     * Reads through a stream of known size and returns a generalized Git blob 
identifier, without buffering.
+     *
+     * <p>When the size of the content is known in advance, this overload 
streams {@code data} directly through
+     * the digest without buffering the full content in memory.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @param dataSize      The exact number of bytes in {@code data}.
+     * @param data          Stream to digest.
+     * @return A generalized Git blob identifier.
+     * @throws IOException On error reading the stream.
+     */
+    public static byte[] blobId(final MessageDigest messageDigest, final long 
dataSize, final InputStream data) throws IOException {
+        messageDigest.reset();
+        DigestUtils.updateDigest(messageDigest, getGitBlobPrefix(dataSize));
+        return DigestUtils.updateDigest(messageDigest, data).digest();
+    }
+
+    /**
+     * Reads through a file and returns a generalized Git blob identifier.
+     *
+     * <p>The identifier is computed in the way described by the
+     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents";>SWHID
 contents identifier</a>, but it can use any hash
+     * algorithm.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
blob identifier and SWHID contents identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @param data          Path to the file to digest.
+     * @return A generalized Git blob identifier.
+     * @throws IOException On error accessing the file.
+     */
+    public static byte[] blobId(final MessageDigest messageDigest, final Path 
data) throws IOException {
+        messageDigest.reset();
+        if (Files.isSymbolicLink(data)) {
+            final byte[] linkTarget = 
Files.readSymbolicLink(data).toString().getBytes(StandardCharsets.UTF_8);
+            DigestUtils.updateDigest(messageDigest, 
getGitBlobPrefix(linkTarget.length));
+            return DigestUtils.digest(messageDigest, linkTarget);
+        }
+        DigestUtils.updateDigest(messageDigest, 
getGitBlobPrefix(Files.size(data)));
+        return DigestUtils.updateDigest(messageDigest, data).digest();
+    }
+
+    private static FileMode getGitDirectoryEntryType(final Path path) {
+        // Symbolic links first
+        if (Files.isSymbolicLink(path)) {
+            return FileMode.SYMBOLIC_LINK;
+        }
+        if (Files.isDirectory(path)) {
+            return FileMode.DIRECTORY;
+        }
+        if (Files.isExecutable(path)) {
+            return FileMode.EXECUTABLE;
+        }
+        return FileMode.REGULAR;
+    }
+
+    private static byte[] getGitBlobPrefix(final long dataSize) {
+        return ("blob " + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
+    }
+
+    private static byte[] getGitTreePrefix(final long dataSize) {
+        return ("tree " + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
+    }
+
+    private static void populateFromPath(final TreeIdBuilder builder, final 
Path directory) throws IOException {
+        try (DirectoryStream<Path> files = 
Files.newDirectoryStream(directory)) {
+            for (final Path path : files) {
+                final String name = path.getFileName().toString();
+                final FileMode mode = getGitDirectoryEntryType(path);
+                switch (mode) {
+                    case DIRECTORY:
+                        populateFromPath(builder.addDirectory(name), path);
+                        break;
+                    case SYMBOLIC_LINK:
+                        final byte[] linkTarget = 
Files.readSymbolicLink(path).toString().getBytes(StandardCharsets.UTF_8);
+                        builder.addFile(FileMode.SYMBOLIC_LINK, name, 
linkTarget);
+                        break;
+                    default:
+                        try (InputStream is = Files.newInputStream(path)) {
+                            builder.addFile(mode, name, Files.size(path), is);
+                        }
+                        break;
+                }
+            }
+        }
+    }
+
+    private static byte[] readAllBytes(final InputStream in) throws 
IOException {
+        final ByteArrayOutputStream out = new ByteArrayOutputStream();
+        final byte[] buf = new byte[DigestUtils.BUFFER_SIZE];
+        int n;
+        while ((n = in.read(buf)) != -1) {
+            out.write(buf, 0, n);
+        }
+        return out.toByteArray();
+    }
+
+    /**
+     * Reads through a directory and returns a generalized Git tree identifier.
+     *
+     * <p>The identifier is computed in the way described by the
+     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories";>SWHID
 directory identifier</a>, but it can use any hash
+     * algorithm.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
tree identifier and SWHID directory identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @param data          Path to the directory to digest.
+     * @return A generalized Git tree identifier.
+     * @throws IOException On error accessing the directory or its contents.
+     */
+    public static byte[] treeId(final MessageDigest messageDigest, final Path 
data) throws IOException {
+        final TreeIdBuilder builder = treeIdBuilder(messageDigest);
+        populateFromPath(builder, data);
+        return builder.build();
+    }
+
+    /**
+     * Returns a new {@link TreeIdBuilder} for constructing a generalized Git 
tree identifier from a virtual directory
+     * structure, such as the contents of an archive.
+     *
+     * <p>The identifier is computed in the way described by the
+     * <a 
href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories";>SWHID
 directory identifier</a>, but it can use any hash
+     * algorithm.</p>
+     *
+     * <p>When the hash algorithm is SHA-1, the identifier is identical to Git 
tree identifier and SWHID directory identifier.</p>
+     *
+     * @param messageDigest The MessageDigest to use (for example SHA-1).
+     * @return A new {@link TreeIdBuilder}.
+     */
+    public static TreeIdBuilder treeIdBuilder(final MessageDigest 
messageDigest) {
+        return new TreeIdBuilder(messageDigest);
+    }
+
+    private GitIdentifiers() {
+        // utility class
     }
 }
diff --git 
a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java 
b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java
index 063ac682..a1604b6f 100644
--- a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java
+++ b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java
@@ -19,9 +19,11 @@ package org.apache.commons.codec.digest;
 
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
+import java.io.ByteArrayInputStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -47,20 +49,8 @@ import org.junit.jupiter.params.provider.ValueSource;
  */
 class GitIdentifiersTest {
 
-    /**
-     * Binary body of the test tree object used in {@link 
#testTreeIdCollection}.
-     *
-     * <p>Each entry has the format {@code <mode> SP <name> NUL 
<20-byte-object-id>}.</p>
-     */
-    private static final String TREE_BODY_HEX =
-            // 100644 hello.txt\0 + objectId
-            "3130303634342068656c6c6f2e74787400" + 
"a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" +
-            // 120000 link.txt\0 + objectId
-            "313230303030206c696e6b2e74787400" + 
"1234567890abcdef1234567890abcdef12345678" +
-            // 100755 run.sh\0 + objectId
-            "3130303735352072756e2e736800" + 
"f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9" +
-            // 40000 src\0 + objectId
-            "34303030302073726300" + 
"deadbeefdeadbeefdeadbeefdeadbeefdeadbeef";
+
+    private static final byte[] ZERO_ID = new byte[20];
 
     static Stream<Arguments> blobIdProvider() {
         return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", 
"5f4a83288e67f1be2d6fcdad84165a86c6a970d7"),
@@ -72,6 +62,20 @@ class GitIdentifiersTest {
         return 
Paths.get(GitIdentifiersTest.class.getClassLoader().getResource(resourceName).toURI());
     }
 
+    static Stream<Arguments> testTreeIdBuilder() {
+        return Stream.of(
+                Arguments.of(MessageDigestAlgorithms.SHA_1,
+                        "ce013625030ba8dba906f756967f9e9ca394464a",  // blob 
id of "hello\n"
+                        "8bbe8a53790056316b23b7c270f10ab6bf6bb1b4",  // blob 
id of "subdir"
+                        "1a2485251c33a70432394c93fb89330ef214bfc9",  // blob 
id of "#!/bin/sh\n"
+                        "4b825dc642cb6eb9a060e54bf8d69288fbee4904"), // tree 
id of empty directory
+                Arguments.of(MessageDigestAlgorithms.SHA_256,
+                        
"2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4",
+                        
"33910dae80b0db75dbad7fa521dbbf1885a07edfab1228871c41a2e94ccd7edb",
+                        
"1249034e3cf9007362d695b09b1fbdb4c578903bf10b665749b94743f8177ce1",
+                        
"6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321"));
+    }
+
     @ParameterizedTest
     @MethodSource("blobIdProvider")
     void testBlobIdByteArray(final String resourceName, final String 
expectedSha1Hex) throws Exception {
@@ -79,21 +83,49 @@ class GitIdentifiersTest {
         assertArrayEquals(Hex.decodeHex(expectedSha1Hex), 
GitIdentifiers.blobId(DigestUtils.getSha1Digest(), data));
     }
 
+    @ParameterizedTest
+    @MethodSource("blobIdProvider")
+    void testBlobIdInputStream(final String resourceName, final String 
expectedSha1Hex) throws Exception {
+        final byte[] data = Files.readAllBytes(resourcePath(resourceName));
+        assertArrayEquals(Hex.decodeHex(expectedSha1Hex),
+                GitIdentifiers.blobId(DigestUtils.getSha1Digest(), new 
ByteArrayInputStream(data)));
+    }
+
+    @ParameterizedTest
+    @MethodSource("blobIdProvider")
+    void testBlobIdInputStreamWithSize(final String resourceName, final String 
expectedSha1Hex) throws Exception {
+        final byte[] data = Files.readAllBytes(resourcePath(resourceName));
+        assertArrayEquals(Hex.decodeHex(expectedSha1Hex),
+                GitIdentifiers.blobId(DigestUtils.getSha1Digest(), 
data.length, new ByteArrayInputStream(data)));
+    }
+
     @ParameterizedTest
     @MethodSource("blobIdProvider")
     void testBlobIdPath(final String resourceName, final String 
expectedSha1Hex) throws Exception {
         assertArrayEquals(Hex.decodeHex(expectedSha1Hex), 
GitIdentifiers.blobId(DigestUtils.getSha1Digest(), resourcePath(resourceName)));
     }
 
-
-    private static final byte[] ZERO_ID = new byte[20];
+    @Test
+    void testBlobIdSymlink(@TempDir final Path tempDir) throws Exception {
+        final Path subDir = Files.createDirectory(tempDir.resolve("subdir"));
+        Files.write(subDir.resolve("file.txt"), 
"hello".getBytes(StandardCharsets.UTF_8));
+        try {
+            final Path linkToDir = 
Files.createSymbolicLink(tempDir.resolve("link-to-dir"), Paths.get("subdir"));
+            final Path linkToFile = 
Files.createSymbolicLink(tempDir.resolve("link-to-file"), 
Paths.get("subdir/file.txt"));
+            final MessageDigest sha1 = DigestUtils.getSha1Digest();
+            
assertArrayEquals(Hex.decodeHex("8bbe8a53790056316b23b7c270f10ab6bf6bb1b4"), 
GitIdentifiers.blobId(sha1, linkToDir));
+            
assertArrayEquals(Hex.decodeHex("dfe6ef8392ae13a11ff85419b4fd906d997b6cb7"), 
GitIdentifiers.blobId(sha1, linkToFile));
+        } catch (final UnsupportedOperationException e) {
+            Assumptions.abort("Symbolic links not supported on this 
filesystem");
+        }
+    }
 
     @Test
     void testDirectoryEntryConstructor() {
-        assertThrows(NullPointerException.class, () -> new 
DirectoryEntry(null, DirectoryEntry.Type.REGULAR, ZERO_ID));
-        assertThrows(NullPointerException.class, () -> new 
DirectoryEntry(Paths.get("hello.txt"), null, ZERO_ID));
-        assertThrows(NullPointerException.class, () -> new 
DirectoryEntry(Paths.get("hello.txt"), DirectoryEntry.Type.REGULAR, null));
-        assertThrows(IllegalArgumentException.class, () -> new 
DirectoryEntry(Paths.get("/"), DirectoryEntry.Type.REGULAR, ZERO_ID));
+        assertThrows(NullPointerException.class, () -> new 
DirectoryEntry(null, GitIdentifiers.FileMode.REGULAR, ZERO_ID));
+        assertThrows(NullPointerException.class, () -> new 
DirectoryEntry("hello.txt", null, ZERO_ID));
+        assertThrows(NullPointerException.class, () -> new 
DirectoryEntry("hello.txt", GitIdentifiers.FileMode.REGULAR, null));
+        assertThrows(IllegalArgumentException.class, () -> new 
DirectoryEntry("/", GitIdentifiers.FileMode.REGULAR, ZERO_ID));
     }
 
     /**
@@ -103,32 +135,18 @@ class GitIdentifiersTest {
     void testDirectoryEntryEqualityBasedOnNameOnly() {
         final byte[] otherId = new byte[20];
         Arrays.fill(otherId, (byte) 0xff);
-        final DirectoryEntry regular = new DirectoryEntry(Paths.get("foo"), 
DirectoryEntry.Type.REGULAR, ZERO_ID);
-        final DirectoryEntry executable = new DirectoryEntry(Paths.get("foo"), 
DirectoryEntry.Type.EXECUTABLE, otherId);
+        final DirectoryEntry regular = new DirectoryEntry("foo", 
GitIdentifiers.FileMode.REGULAR, ZERO_ID);
+        final DirectoryEntry executable = new DirectoryEntry("foo", 
GitIdentifiers.FileMode.EXECUTABLE, otherId);
         // Same name, different type and object id -> equal
         assertEquals(regular, executable);
         assertEquals(regular.hashCode(), executable.hashCode());
         // Different name -> not equal
-        assertNotEquals(regular, new DirectoryEntry(Paths.get("bar"), 
DirectoryEntry.Type.REGULAR, ZERO_ID));
+        assertNotEquals(regular, new DirectoryEntry("bar", 
GitIdentifiers.FileMode.REGULAR, ZERO_ID));
         // Same reference -> equal
         assertEquals(regular, regular);
         // Not equal to null or unrelated type
-        assertNotEquals(null, regular);
-        assertNotEquals("foo", regular);
-    }
-
-    /**
-     * The Path constructor must extract the filename component.
-     */
-    @Test
-    void testDirectoryEntryPathConstructorUsesFilename() {
-        final DirectoryEntry fromLabel = new 
DirectoryEntry(Paths.get("hello.txt"), DirectoryEntry.Type.REGULAR, ZERO_ID);
-        final DirectoryEntry fromRelative = new 
DirectoryEntry(Paths.get("subdir/hello.txt"), DirectoryEntry.Type.REGULAR, 
ZERO_ID);
-        final DirectoryEntry fromAbsolute = new 
DirectoryEntry(Paths.get("hello.txt").toAbsolutePath(), 
DirectoryEntry.Type.REGULAR, ZERO_ID);
-        assertEquals(fromLabel, fromRelative);
-        assertEquals(fromLabel, fromAbsolute);
-        assertArrayEquals(fromLabel.toTreeEntryBytes(), 
fromRelative.toTreeEntryBytes());
-        assertArrayEquals(fromLabel.toTreeEntryBytes(), 
fromAbsolute.toTreeEntryBytes());
+        assertFalse(regular.equals(null));
+        assertFalse(regular.equals("foo"));
     }
 
     /**
@@ -138,56 +156,120 @@ class GitIdentifiersTest {
      */
     @Test
     void testDirectoryEntrySortOrder() {
-        final DirectoryEntry alpha = new 
DirectoryEntry(Paths.get("alpha.txt"), DirectoryEntry.Type.REGULAR, ZERO_ID);
-        final DirectoryEntry fooTxt = new DirectoryEntry(Paths.get("foo.txt"), 
DirectoryEntry.Type.REGULAR, ZERO_ID);
-        final DirectoryEntry fooDir = new DirectoryEntry(Paths.get("foo"), 
DirectoryEntry.Type.DIRECTORY, ZERO_ID);
-        final DirectoryEntry foobar = new DirectoryEntry(Paths.get("foobar"), 
DirectoryEntry.Type.REGULAR, ZERO_ID);
-        final DirectoryEntry zeta = new DirectoryEntry(Paths.get("zeta.txt"), 
DirectoryEntry.Type.REGULAR, ZERO_ID);
+        final DirectoryEntry alpha = new DirectoryEntry("alpha.txt", 
GitIdentifiers.FileMode.REGULAR, ZERO_ID);
+        final DirectoryEntry fooTxt = new DirectoryEntry("foo.txt", 
GitIdentifiers.FileMode.REGULAR, ZERO_ID);
+        final DirectoryEntry fooDir = new DirectoryEntry("foo", 
GitIdentifiers.FileMode.DIRECTORY, ZERO_ID);
+        final DirectoryEntry foobar = new DirectoryEntry("foobar", 
GitIdentifiers.FileMode.REGULAR, ZERO_ID);
+        final DirectoryEntry zeta = new DirectoryEntry("zeta.txt", 
GitIdentifiers.FileMode.REGULAR, ZERO_ID);
         final List<DirectoryEntry> entries = new 
ArrayList<>(Arrays.asList(zeta, foobar, fooDir, alpha, fooTxt));
         entries.sort(DirectoryEntry::compareTo);
         assertEquals(Arrays.asList(alpha, fooTxt, fooDir, foobar, zeta), 
entries);
     }
 
+    @ParameterizedTest
+    @MethodSource
+    void testTreeIdBuilder(final String algorithm, final String helloHex, 
final String linkHex, final String runHex, final String srcHex) throws 
Exception {
+        final byte[] helloContent = "hello\n".getBytes(StandardCharsets.UTF_8);
+        final byte[] runContent = 
"#!/bin/sh\n".getBytes(StandardCharsets.UTF_8);
+        final byte[] linkTarget = "subdir".getBytes(StandardCharsets.UTF_8);
+        final MessageDigest md = DigestUtils.getDigest(algorithm);
+
+        // Verify individual blob IDs against pre-computed constants.
+        assertArrayEquals(Hex.decodeHex(helloHex), GitIdentifiers.blobId(md, 
helloContent));
+        assertArrayEquals(Hex.decodeHex(linkHex), GitIdentifiers.blobId(md, 
linkTarget));
+        assertArrayEquals(Hex.decodeHex(runHex), GitIdentifiers.blobId(md, 
runContent));
+
+        // Entries are supplied out of order to verify that the builder sorts 
them correctly.
+        final GitIdentifiers.TreeIdBuilder builder = 
GitIdentifiers.treeIdBuilder(md);
+        builder.addDirectory("src");
+        builder.addFile(GitIdentifiers.FileMode.EXECUTABLE, "run.sh", 
runContent);
+        builder.addFile(GitIdentifiers.FileMode.REGULAR, "hello.txt", 
helloContent);
+        builder.addFile(GitIdentifiers.FileMode.SYMBOLIC_LINK, "link.txt", 
linkTarget);
+
+        // Expected tree body: entries in Git sort order (hello.txt, link.txt, 
run.sh, src/).
+        // Each entry: hex-encoded "<mode> <name>\0" followed by the object id.
+        final byte[] treeBody = 
Hex.decodeHex("3130303634342068656c6c6f2e74787400" + helloHex +   // 100644 
hello.txt\0
+                "313230303030206c696e6b2e74787400" + linkHex +   // 120000 
link.txt\0
+                "3130303735352072756e2e736800" + runHex +   // 100755 run.sh\0
+                "34303030302073726300" + srcHex);   // 40000 src\0
+        md.reset();
+        DigestUtils.updateDigest(md, ("tree " + treeBody.length + 
"\0").getBytes(StandardCharsets.UTF_8));
+        assertArrayEquals(DigestUtils.updateDigest(md, treeBody).digest(), 
builder.build());
+    }
+
     @Test
-    void testBlobIdSymlink(@TempDir final Path tempDir) throws Exception {
-        final Path subDir = Files.createDirectory(tempDir.resolve("subdir"));
-        Files.write(subDir.resolve("file.txt"), 
"hello".getBytes(StandardCharsets.UTF_8));
-        final Path linkToDir;
-        final Path linkToFile;
-        try {
-            linkToDir = 
Files.createSymbolicLink(tempDir.resolve("link-to-dir"), Paths.get("subdir"));
-            linkToFile = 
Files.createSymbolicLink(tempDir.resolve("link-to-file"), 
Paths.get("subdir/file.txt"));
-        } catch (final UnsupportedOperationException e) {
-            Assumptions.assumeTrue(false, "Symbolic links not supported on 
this filesystem");
-            return;
-        }
-        final MessageDigest sha1 = DigestUtils.getSha1Digest();
-        
assertArrayEquals(Hex.decodeHex("8bbe8a53790056316b23b7c270f10ab6bf6bb1b4"), 
GitIdentifiers.blobId(sha1, linkToDir));
-        
assertArrayEquals(Hex.decodeHex("dfe6ef8392ae13a11ff85419b4fd906d997b6cb7"), 
GitIdentifiers.blobId(sha1, linkToFile));
+    void testTreeIdBuilderAddFileInputStream() throws Exception {
+        final MessageDigest md = DigestUtils.getSha1Digest();
+        final byte[] content = "Hello, 
World!\n".getBytes(StandardCharsets.UTF_8);
+
+        final GitIdentifiers.TreeIdBuilder byteArrayBuilder = 
GitIdentifiers.treeIdBuilder(md);
+        byteArrayBuilder.addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", 
content);
+        final byte[] expected = byteArrayBuilder.build();
+
+        final GitIdentifiers.TreeIdBuilder streamBuilder = 
GitIdentifiers.treeIdBuilder(md);
+        streamBuilder.addFile(GitIdentifiers.FileMode.REGULAR, "file.txt", new 
ByteArrayInputStream(content));
+        assertArrayEquals(expected, streamBuilder.build());
+
+        final GitIdentifiers.TreeIdBuilder sizedStreamBuilder = 
GitIdentifiers.treeIdBuilder(md);
+        sizedStreamBuilder.addFile(GitIdentifiers.FileMode.REGULAR, 
"file.txt", content.length, new ByteArrayInputStream(content));
+        assertArrayEquals(expected, sizedStreamBuilder.build());
+    }
+
+    @Test
+    void testTreeIdBuilderEmptyPathSegments() throws Exception {
+        final MessageDigest md = DigestUtils.getSha1Digest();
+        final byte[] content = "hello\n".getBytes(StandardCharsets.UTF_8);
+
+        // Canonical form
+        final GitIdentifiers.TreeIdBuilder canonical = 
GitIdentifiers.treeIdBuilder(md);
+        canonical.addFile(GitIdentifiers.FileMode.REGULAR, "subdir/file.txt", 
content);
+        final byte[] expected = canonical.build();
+
+        // Leading slash
+        final GitIdentifiers.TreeIdBuilder withLeading = 
GitIdentifiers.treeIdBuilder(md);
+        withLeading.addFile(GitIdentifiers.FileMode.REGULAR, 
"/subdir/file.txt", content);
+        assertArrayEquals(expected, withLeading.build());
+
+        // Consecutive slashes
+        final GitIdentifiers.TreeIdBuilder withDouble = 
GitIdentifiers.treeIdBuilder(md);
+        withDouble.addFile(GitIdentifiers.FileMode.REGULAR, 
"subdir//file.txt", content);
+        assertArrayEquals(expected, withDouble.build());
+
+        // addDirectory with leading/trailing slashes
+        final GitIdentifiers.TreeIdBuilder viaDirectory = 
GitIdentifiers.treeIdBuilder(md);
+        
viaDirectory.addDirectory("/subdir/").addFile(GitIdentifiers.FileMode.REGULAR, 
"file.txt", content);
+        assertArrayEquals(expected, viaDirectory.build());
     }
 
     @ParameterizedTest
-    @ValueSource(strings = {MessageDigestAlgorithms.SHA_1, 
MessageDigestAlgorithms.SHA_256})
-    void testTreeIdCollection(final String algorithm) throws Exception {
-        final byte[] helloId = 
Hex.decodeHex("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0");
-        final byte[] runId = 
Hex.decodeHex("f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9");
-        final byte[] linkId = 
Hex.decodeHex("1234567890abcdef1234567890abcdef12345678");
-        final byte[] srcId = 
Hex.decodeHex("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef");
-
-        // Entries are supplied out of order to verify that the method sorts 
them correctly.
-        final List<DirectoryEntry> entries = new ArrayList<>();
-        entries.add(new DirectoryEntry(Paths.get("src"), 
DirectoryEntry.Type.DIRECTORY, srcId));
-        entries.add(new DirectoryEntry(Paths.get("run.sh"), 
DirectoryEntry.Type.EXECUTABLE, runId));
-        entries.add(new DirectoryEntry(Paths.get("hello.txt"), 
DirectoryEntry.Type.REGULAR, helloId));
-        entries.add(new DirectoryEntry(Paths.get("link.txt"), 
DirectoryEntry.Type.SYMBOLIC_LINK, linkId));
-
-        // Compute expected value
-        final byte[] treeBody = Hex.decodeHex(TREE_BODY_HEX);
-        final MessageDigest md = DigestUtils.getDigest(algorithm);
-        DigestUtils.updateDigest(md, ("tree " + treeBody.length + 
"\0").getBytes(StandardCharsets.UTF_8));
-        final byte[] expected = DigestUtils.updateDigest(md, 
treeBody).digest();
+    @ValueSource(strings = {".", ".."})
+    void testTreeIdBuilderInvalidPathSegments(final String segment) {
+        final MessageDigest md = DigestUtils.getSha1Digest();
+        final byte[] data = new byte[0];
+        // Sole path component
+        assertThrows(IllegalArgumentException.class,
+                () -> 
GitIdentifiers.treeIdBuilder(md).addFile(GitIdentifiers.FileMode.REGULAR, 
segment, data));
+        assertThrows(IllegalArgumentException.class,
+                () -> GitIdentifiers.treeIdBuilder(md).addDirectory(segment));
+        // Embedded in a longer path
+        assertThrows(IllegalArgumentException.class,
+                () -> 
GitIdentifiers.treeIdBuilder(md).addFile(GitIdentifiers.FileMode.REGULAR, 
"subdir/" + segment + "/file.txt", data));
+        assertThrows(IllegalArgumentException.class,
+                () -> GitIdentifiers.treeIdBuilder(md).addDirectory("subdir/" 
+ segment));
+    }
+
+    @Test
+    void testTreeIdBuilderNestedFileEquivalentToDirectoryAndFile() throws 
Exception {
+        final MessageDigest md = DigestUtils.getSha1Digest();
+        final byte[] content = "hello\n".getBytes(StandardCharsets.UTF_8);
+
+        final GitIdentifiers.TreeIdBuilder direct = 
GitIdentifiers.treeIdBuilder(md);
+        direct.addFile(GitIdentifiers.FileMode.REGULAR, "nested/file.txt", 
content);
+
+        final GitIdentifiers.TreeIdBuilder indirect = 
GitIdentifiers.treeIdBuilder(md);
+        
indirect.addDirectory("nested").addFile(GitIdentifiers.FileMode.REGULAR, 
"file.txt", content);
 
-        assertArrayEquals(expected, GitIdentifiers.treeId(md, entries));
+        assertArrayEquals(direct.build(), indirect.build());
     }
 
     @Test

Reply via email to