This is an automated email from the ASF dual-hosted git repository. pkarwasz pushed a commit to branch feat/git-identifiers in repository https://gitbox.apache.org/repos/asf/commons-codec.git
commit a7f54b5893863126ca8bb703b45cf399c74d2f5a Author: Piotr P. Karwasz <[email protected]> AuthorDate: Thu Apr 9 13:11:32 2026 +0200 feat: Refactor Git-related methods in `GitIdentifiers` This change moves `gitBlob` and `gitTree` from `DigestUtils` into a separate utility class, to prepare for an enhancement of the provided API. The git tree identifier can be computed for many objects: the most natural is a directory in a filesystem, but we can also compute the identifier on an archive containing this directory. Additional usages will require expanding the API, beyond what can be reasonably contained in `DigestUtils`. --- .../apache/commons/codec/digest/DigestUtils.java | 144 ---------------- .../commons/codec/digest/GitIdentifiers.java | 181 +++++++++++++++++++++ .../commons/codec/digest/DigestUtilsTest.java | 92 ----------- .../commons/codec/digest/GitIdentifiersTest.java | 130 +++++++++++++++ 4 files changed, 311 insertions(+), 236 deletions(-) diff --git a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java index 7c84b0b0..8970a03d 100644 --- a/src/main/java/org/apache/commons/codec/digest/DigestUtils.java +++ b/src/main/java/org/apache/commons/codec/digest/DigestUtils.java @@ -18,24 +18,17 @@ package org.apache.commons.codec.digest; import java.io.BufferedInputStream; -import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import java.nio.charset.StandardCharsets; -import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.OpenOption; import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.TreeSet; import org.apache.commons.codec.binary.Hex; import org.apache.commons.codec.binary.StringUtils; @@ -191,26 +184,6 @@ public class DigestUtils { } } - /** - * Returns the {@link GitDirectoryEntry.Type} of a file. - * - * @param path The file to check. - * @return A {@link GitDirectoryEntry.Type} - */ - private static GitDirectoryEntry.Type getGitDirectoryEntryType(final Path path) { - // Symbolic links first - if (Files.isSymbolicLink(path)) { - return GitDirectoryEntry.Type.SYMBOLIC_LINK; - } - if (Files.isDirectory(path)) { - return GitDirectoryEntry.Type.DIRECTORY; - } - if (Files.isExecutable(path)) { - return GitDirectoryEntry.Type.EXECUTABLE; - } - return GitDirectoryEntry.Type.REGULAR; - } - /** * Gets an MD2 MessageDigest. * @@ -407,123 +380,6 @@ public class DigestUtils { return getDigest(MessageDigestAlgorithms.SHAKE256_512); } - /** - * Reads through a byte array and return a generalized Git blob identifier. - * - * <p>The identifier is computed in the way described by the - * <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID contents identifier</a>, but it can use any hash - * algorithm.</p> - * - * <p>When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.</p> - * - * @param messageDigest The MessageDigest to use (for example SHA-1). - * @param data Data to digest. - * @return A generalized Git blob identifier. - * @since 1.22.0 - */ - public static byte[] gitBlob(final MessageDigest messageDigest, final byte[] data) { - messageDigest.reset(); - updateDigest(messageDigest, gitBlobPrefix(data.length)); - return digest(messageDigest, data); - } - - /** - * Reads through a byte array and return a generalized Git blob identifier. - * - * <p>The identifier is computed in the way described by the - * <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID contents identifier</a>, but it can use any hash - * algorithm.</p> - * - * <p>When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.</p> - * - * @param messageDigest The MessageDigest to use (for example SHA-1). - * @param data Data to digest. - * @param options Options how to open the file. - * @return A generalized Git blob identifier. - * @throws IOException On error accessing the file. - * @since 1.22.0 - */ - public static byte[] gitBlob(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException { - messageDigest.reset(); - if (Files.isSymbolicLink(data)) { - final byte[] linkTarget = Files.readSymbolicLink(data).toString().getBytes(StandardCharsets.UTF_8); - updateDigest(messageDigest, gitBlobPrefix(linkTarget.length)); - return digest(messageDigest, linkTarget); - } - updateDigest(messageDigest, gitBlobPrefix(Files.size(data))); - return updateDigest(messageDigest, data, options).digest(); - } - - private static byte[] gitBlobPrefix(final long dataSize) { - return gitPrefix("blob ", dataSize); - } - - private static byte[] gitPrefix(final String prefix, final long dataSize) { - return (prefix + dataSize + "\0").getBytes(StandardCharsets.UTF_8); - } - - /** - * Returns a generalized Git tree identifier. - * - * <p>The identifier is computed in the way described by the - * <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID directory identifier</a>, but it can use any hash - * algorithm.</p> - * - * <p>When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.</p> - * - * @param messageDigest The MessageDigest to use (for example SHA-1). - * @param entries The directory entries. - * @return A generalized Git tree identifier. - */ - static byte[] gitTree(final MessageDigest messageDigest, final Collection<GitDirectoryEntry> entries) { - final TreeSet<GitDirectoryEntry> treeSet = new TreeSet<>(entries); - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - for (final GitDirectoryEntry entry : treeSet) { - final byte[] treeEntryBytes = entry.toTreeEntryBytes(); - baos.write(treeEntryBytes, 0, treeEntryBytes.length); - } - messageDigest.reset(); - updateDigest(messageDigest, gitTreePrefix(baos.size())); - return updateDigest(messageDigest, baos.toByteArray()).digest(); - } - - /** - * Reads through a byte array and return a generalized Git tree identifier. - * - * <p>The identifier is computed in the way described by the - * <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID directory identifier</a>, but it can use any hash - * algorithm.</p> - * - * <p>When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.</p> - * - * @param messageDigest The MessageDigest to use (for example SHA-1). - * @param data Data to digest. - * @param options Options how to open the file. - * @return A generalized Git tree identifier. - * @throws IOException On error accessing the file. - * @since 1.22.0 - */ - public static byte[] gitTree(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException { - final List<GitDirectoryEntry> entries = new ArrayList<>(); - try (DirectoryStream<Path> files = Files.newDirectoryStream(data)) { - for (final Path path : files) { - final GitDirectoryEntry.Type type = getGitDirectoryEntryType(path); - final byte[] rawObjectId; - if (type == GitDirectoryEntry.Type.DIRECTORY) { - rawObjectId = gitTree(messageDigest, path, options); - } else { - rawObjectId = gitBlob(messageDigest, path, options); - } - entries.add(new GitDirectoryEntry(path, type, rawObjectId)); - } - } - return gitTree(messageDigest, entries); - } - - private static byte[] gitTreePrefix(final long dataSize) { - return gitPrefix("tree ", dataSize); - } - /** * Test whether the algorithm is supported. * diff --git a/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java new file mode 100644 index 00000000..3cbf48b8 --- /dev/null +++ b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.digest; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.TreeSet; + +/** + * Operations for computing Git object identifiers and their generalizations described by the + * <a href="https://www.swhid.org/swhid-specification/">SWHID specification</a>. + * + * <p>When the hash algorithm is SHA-1, the identifiers produced by this class are identical to those used by Git. + * Other hash algorithms produce generalized identifiers as described by the SWHID specification.</p> + * + * <p>This class is immutable and thread-safe. However, the {@link MessageDigest} instances passed to it generally won't be.</p> + * + * @see <a href="https://git-scm.com/book/en/v2/Git-Internals-Git-Objects">Git Internals – Git Objects</a> + * @see <a href="https://www.swhid.org/swhid-specification/">SWHID Specification</a> + * @since 1.22.0 + */ +public class GitIdentifiers { + + private static GitDirectoryEntry.Type getGitDirectoryEntryType(final Path path) { + // Symbolic links first + if (Files.isSymbolicLink(path)) { + return GitDirectoryEntry.Type.SYMBOLIC_LINK; + } + if (Files.isDirectory(path)) { + return GitDirectoryEntry.Type.DIRECTORY; + } + if (Files.isExecutable(path)) { + return GitDirectoryEntry.Type.EXECUTABLE; + } + return GitDirectoryEntry.Type.REGULAR; + } + + /** + * Reads through a byte array and returns a generalized Git blob identifier. + * + * <p>The identifier is computed in the way described by the + * <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID contents identifier</a>, but it can use any hash + * algorithm.</p> + * + * <p>When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.</p> + * + * @param messageDigest The MessageDigest to use (for example SHA-1). + * @param data Data to digest. + * @return A generalized Git blob identifier. + */ + public static byte[] blobId(final MessageDigest messageDigest, final byte[] data) { + messageDigest.reset(); + DigestUtils.updateDigest(messageDigest, gitBlobPrefix(data.length)); + return DigestUtils.digest(messageDigest, data); + } + + /** + * Reads through a file and returns a generalized Git blob identifier. + * + * <p>The identifier is computed in the way described by the + * <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID contents identifier</a>, but it can use any hash + * algorithm.</p> + * + * <p>When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.</p> + * + * @param messageDigest The MessageDigest to use (for example SHA-1). + * @param data Path to the file to digest. + * @param options Options how to open the file. + * @return A generalized Git blob identifier. + * @throws IOException On error accessing the file. + * @since 1.22.0 + */ + public static byte[] blobId(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException { + messageDigest.reset(); + if (Files.isSymbolicLink(data)) { + final byte[] linkTarget = Files.readSymbolicLink(data).toString().getBytes(StandardCharsets.UTF_8); + DigestUtils.updateDigest(messageDigest, gitBlobPrefix(linkTarget.length)); + return DigestUtils.digest(messageDigest, linkTarget); + } + DigestUtils.updateDigest(messageDigest, gitBlobPrefix(Files.size(data))); + return DigestUtils.updateDigest(messageDigest, data, options).digest(); + } + + private static byte[] gitBlobPrefix(final long dataSize) { + return gitPrefix("blob ", dataSize); + } + + private static byte[] gitPrefix(final String prefix, final long dataSize) { + return (prefix + dataSize + "\0").getBytes(StandardCharsets.UTF_8); + } + + /** + * Returns a generalized Git tree identifier for a collection of directory entries. + * + * <p>The identifier is computed in the way described by the + * <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID directory identifier</a>, but it can use any hash + * algorithm.</p> + * + * <p>When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.</p> + * + * @param messageDigest The MessageDigest to use (for example SHA-1). + * @param entries The directory entries. + * @return A generalized Git tree identifier. + */ + static byte[] treeId(final MessageDigest messageDigest, final Collection<GitDirectoryEntry> entries) { + final TreeSet<GitDirectoryEntry> treeSet = new TreeSet<>(entries); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + for (final GitDirectoryEntry entry : treeSet) { + final byte[] treeEntryBytes = entry.toTreeEntryBytes(); + baos.write(treeEntryBytes, 0, treeEntryBytes.length); + } + messageDigest.reset(); + DigestUtils.updateDigest(messageDigest, gitTreePrefix(baos.size())); + return DigestUtils.updateDigest(messageDigest, baos.toByteArray()).digest(); + } + + /** + * Reads through a directory and returns a generalized Git tree identifier. + * + * <p>The identifier is computed in the way described by the + * <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID directory identifier</a>, but it can use any hash + * algorithm.</p> + * + * <p>When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.</p> + * + * @param messageDigest The MessageDigest to use (for example SHA-1). + * @param data Path to the directory to digest. + * @param options Options how to open files within the directory. + * @return A generalized Git tree identifier. + * @throws IOException On error accessing the directory or its contents. + * @since 1.22.0 + */ + public static byte[] treeId(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException { + final List<GitDirectoryEntry> entries = new ArrayList<>(); + try (DirectoryStream<Path> files = Files.newDirectoryStream(data)) { + for (final Path path : files) { + final GitDirectoryEntry.Type type = getGitDirectoryEntryType(path); + final byte[] rawObjectId; + if (type == GitDirectoryEntry.Type.DIRECTORY) { + rawObjectId = treeId(messageDigest, path, options); + } else { + rawObjectId = blobId(messageDigest, path, options); + } + entries.add(new GitDirectoryEntry(path, type, rawObjectId)); + } + } + return treeId(messageDigest, entries); + } + + private static byte[] gitTreePrefix(final long dataSize) { + return gitPrefix("tree ", dataSize); + } + + private GitIdentifiers() { + // utility class + } +} diff --git a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java index 7d1e72b0..6f7160ba 100644 --- a/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java +++ b/src/test/java/org/apache/commons/codec/digest/DigestUtilsTest.java @@ -32,14 +32,11 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.RandomAccessFile; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.security.MessageDigest; -import java.util.ArrayList; import java.util.Arrays; -import java.util.List; import java.util.Locale; import java.util.Random; import java.util.stream.Stream; @@ -50,14 +47,11 @@ import org.apache.commons.lang3.JavaVersion; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.SystemUtils; import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; -import org.junit.jupiter.params.provider.ValueSource; /** * Tests {@link DigestUtils}. @@ -244,31 +238,6 @@ class DigestUtilsTest { "CA 92 BF 0B E5 61 5E 96 95 9D 76 71 97 A0 BE EB"; // @formatter:on - /** - * Binary body of the test tree object used in {@link #testGitTreeCollection}. - * - * <p>Each entry has the format {@code <mode> SP <name> NUL <20-byte-object-id>}.</p> - */ - private static final String TREE_BODY_HEX = - // 100644 hello.txt\0 + objectId - "3130303634342068656c6c6f2e74787400" + "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" + - // 120000 link.txt\0 + objectId - "313230303030206c696e6b2e74787400" + "1234567890abcdef1234567890abcdef12345678" + - // 100755 run.sh\0 + objectId - "3130303735352072756e2e736800" + "f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9" + - // 40000 src\0 + objectId - "34303030302073726300" + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"; - - static Stream<Arguments> gitBlobProvider() { - return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", "5f4a83288e67f1be2d6fcdad84165a86c6a970d7"), - Arguments.of("DigestUtilsTest/greetings.txt", "6cf4f797455661e61d1ee6913fc29344f5897243"), - Arguments.of("DigestUtilsTest/subdir/nested.txt", "07a392ddb4dbff06a373a7617939f30b2dcfe719")); - } - - private static Path resourcePath(final String resourceName) throws Exception { - return Paths.get(DigestUtilsTest.class.getClassLoader().getResource(resourceName).toURI()); - } - static Stream<Arguments> testShake128_256() { // @formatter:off return Stream.of( @@ -506,67 +475,6 @@ class DigestUtilsTest { assertEquals(MessageDigestAlgorithms.MD5, digestUtils.getMessageDigest().getAlgorithm()); } - @ParameterizedTest - @MethodSource("gitBlobProvider") - void testGitBlobByteArray(final String resourceName, final String expectedSha1Hex) throws Exception { - final byte[] data = Files.readAllBytes(resourcePath(resourceName)); - assertArrayEquals(Hex.decodeHex(expectedSha1Hex), DigestUtils.gitBlob(DigestUtils.getSha1Digest(), data)); - } - - @ParameterizedTest - @MethodSource("gitBlobProvider") - void testGitBlobPath(final String resourceName, final String expectedSha1Hex) throws Exception { - assertArrayEquals(Hex.decodeHex(expectedSha1Hex), DigestUtils.gitBlob(DigestUtils.getSha1Digest(), resourcePath(resourceName))); - } - - @Test - void testGitBlobSymlink(@TempDir final Path tempDir) throws Exception { - final Path subDir = Files.createDirectory(tempDir.resolve("subdir")); - Files.write(subDir.resolve("file.txt"), "hello".getBytes(StandardCharsets.UTF_8)); - final Path linkToDir; - final Path linkToFile; - try { - linkToDir = Files.createSymbolicLink(tempDir.resolve("link-to-dir"), Paths.get("subdir")); - linkToFile = Files.createSymbolicLink(tempDir.resolve("link-to-file"), Paths.get("subdir/file.txt")); - } catch (final UnsupportedOperationException e) { - Assumptions.assumeTrue(false, "Symbolic links not supported on this filesystem"); - return; - } - final MessageDigest sha1 = DigestUtils.getSha1Digest(); - assertArrayEquals(Hex.decodeHex("8bbe8a53790056316b23b7c270f10ab6bf6bb1b4"), DigestUtils.gitBlob(sha1, linkToDir)); - assertArrayEquals(Hex.decodeHex("dfe6ef8392ae13a11ff85419b4fd906d997b6cb7"), DigestUtils.gitBlob(sha1, linkToFile)); - } - - @ParameterizedTest - @ValueSource(strings = {MessageDigestAlgorithms.SHA_1, MessageDigestAlgorithms.SHA_256}) - void testGitTreeCollection(final String algorithm) throws Exception { - final byte[] helloId = Hex.decodeHex("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"); - final byte[] runId = Hex.decodeHex("f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9"); - final byte[] linkId = Hex.decodeHex("1234567890abcdef1234567890abcdef12345678"); - final byte[] srcId = Hex.decodeHex("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"); - - // Entries are supplied out of order to verify that the method sorts them correctly. - final List<GitDirectoryEntry> entries = new ArrayList<>(); - entries.add(new GitDirectoryEntry(Paths.get("src"), GitDirectoryEntry.Type.DIRECTORY, srcId)); - entries.add(new GitDirectoryEntry(Paths.get("run.sh"), GitDirectoryEntry.Type.EXECUTABLE, runId)); - entries.add(new GitDirectoryEntry(Paths.get("hello.txt"), GitDirectoryEntry.Type.REGULAR, helloId)); - entries.add(new GitDirectoryEntry(Paths.get("link.txt"), GitDirectoryEntry.Type.SYMBOLIC_LINK, linkId)); - - // Compute expected value - final byte[] treeBody = Hex.decodeHex(TREE_BODY_HEX); - final MessageDigest md = DigestUtils.getDigest(algorithm); - DigestUtils.updateDigest(md, ("tree " + treeBody.length + "\0").getBytes(StandardCharsets.UTF_8)); - final byte[] expected = DigestUtils.updateDigest(md, treeBody).digest(); - - assertArrayEquals(expected, DigestUtils.gitTree(md, entries)); - } - - @Test - void testGitTreePath() throws Exception { - assertArrayEquals(Hex.decodeHex("e4b21f6d78ceba6eb7c211ac15e3337ec4614e8a"), - DigestUtils.gitTree(DigestUtils.getSha1Digest(), resourcePath("DigestUtilsTest"))); - } - @Test void testInternalNoSuchAlgorithmException() { assertThrows(IllegalArgumentException.class, () -> DigestUtils.getDigest("Bogus Bogus")); diff --git a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java new file mode 100644 index 00000000..075e0823 --- /dev/null +++ b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.digest; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.apache.commons.codec.binary.Hex; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; + +/** + * Tests {@link GitIdentifiers}. + */ +class GitIdentifiersTest { + + /** + * Binary body of the test tree object used in {@link #testTreeIdCollection}. + * + * <p>Each entry has the format {@code <mode> SP <name> NUL <20-byte-object-id>}.</p> + */ + private static final String TREE_BODY_HEX = + // 100644 hello.txt\0 + objectId + "3130303634342068656c6c6f2e74787400" + "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" + + // 120000 link.txt\0 + objectId + "313230303030206c696e6b2e74787400" + "1234567890abcdef1234567890abcdef12345678" + + // 100755 run.sh\0 + objectId + "3130303735352072756e2e736800" + "f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9" + + // 40000 src\0 + objectId + "34303030302073726300" + "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"; + + static Stream<Arguments> blobIdProvider() { + return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", "5f4a83288e67f1be2d6fcdad84165a86c6a970d7"), + Arguments.of("DigestUtilsTest/greetings.txt", "6cf4f797455661e61d1ee6913fc29344f5897243"), + Arguments.of("DigestUtilsTest/subdir/nested.txt", "07a392ddb4dbff06a373a7617939f30b2dcfe719")); + } + + private static Path resourcePath(final String resourceName) throws Exception { + return Paths.get(GitIdentifiersTest.class.getClassLoader().getResource(resourceName).toURI()); + } + + @ParameterizedTest + @MethodSource("blobIdProvider") + void testBlobIdByteArray(final String resourceName, final String expectedSha1Hex) throws Exception { + final byte[] data = Files.readAllBytes(resourcePath(resourceName)); + assertArrayEquals(Hex.decodeHex(expectedSha1Hex), GitIdentifiers.blobId(DigestUtils.getSha1Digest(), data)); + } + + @ParameterizedTest + @MethodSource("blobIdProvider") + void testBlobIdPath(final String resourceName, final String expectedSha1Hex) throws Exception { + assertArrayEquals(Hex.decodeHex(expectedSha1Hex), GitIdentifiers.blobId(DigestUtils.getSha1Digest(), resourcePath(resourceName))); + } + + @Test + void testBlobIdSymlink(@TempDir final Path tempDir) throws Exception { + final Path subDir = Files.createDirectory(tempDir.resolve("subdir")); + Files.write(subDir.resolve("file.txt"), "hello".getBytes(StandardCharsets.UTF_8)); + final Path linkToDir; + final Path linkToFile; + try { + linkToDir = Files.createSymbolicLink(tempDir.resolve("link-to-dir"), Paths.get("subdir")); + linkToFile = Files.createSymbolicLink(tempDir.resolve("link-to-file"), Paths.get("subdir/file.txt")); + } catch (final UnsupportedOperationException e) { + Assumptions.assumeTrue(false, "Symbolic links not supported on this filesystem"); + return; + } + final MessageDigest sha1 = DigestUtils.getSha1Digest(); + assertArrayEquals(Hex.decodeHex("8bbe8a53790056316b23b7c270f10ab6bf6bb1b4"), GitIdentifiers.blobId(sha1, linkToDir)); + assertArrayEquals(Hex.decodeHex("dfe6ef8392ae13a11ff85419b4fd906d997b6cb7"), GitIdentifiers.blobId(sha1, linkToFile)); + } + + @ParameterizedTest + @ValueSource(strings = {MessageDigestAlgorithms.SHA_1, MessageDigestAlgorithms.SHA_256}) + void testTreeIdCollection(final String algorithm) throws Exception { + final byte[] helloId = Hex.decodeHex("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0"); + final byte[] runId = Hex.decodeHex("f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9"); + final byte[] linkId = Hex.decodeHex("1234567890abcdef1234567890abcdef12345678"); + final byte[] srcId = Hex.decodeHex("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"); + + // Entries are supplied out of order to verify that the method sorts them correctly. + final List<GitDirectoryEntry> entries = new ArrayList<>(); + entries.add(new GitDirectoryEntry(Paths.get("src"), GitDirectoryEntry.Type.DIRECTORY, srcId)); + entries.add(new GitDirectoryEntry(Paths.get("run.sh"), GitDirectoryEntry.Type.EXECUTABLE, runId)); + entries.add(new GitDirectoryEntry(Paths.get("hello.txt"), GitDirectoryEntry.Type.REGULAR, helloId)); + entries.add(new GitDirectoryEntry(Paths.get("link.txt"), GitDirectoryEntry.Type.SYMBOLIC_LINK, linkId)); + + // Compute expected value + final byte[] treeBody = Hex.decodeHex(TREE_BODY_HEX); + final MessageDigest md = DigestUtils.getDigest(algorithm); + DigestUtils.updateDigest(md, ("tree " + treeBody.length + "\0").getBytes(StandardCharsets.UTF_8)); + final byte[] expected = DigestUtils.updateDigest(md, treeBody).digest(); + + assertArrayEquals(expected, GitIdentifiers.treeId(md, entries)); + } + + @Test + void testTreeIdPath() throws Exception { + assertArrayEquals(Hex.decodeHex("e4b21f6d78ceba6eb7c211ac15e3337ec4614e8a"), + GitIdentifiers.treeId(DigestUtils.getSha1Digest(), resourcePath("DigestUtilsTest"))); + } +}
