This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-compress.git
The following commit(s) were added to refs/heads/master by this push: new c9431e0 COMPRESS-602 - Migrate zip package to use NIO #236 c9431e0 is described below commit c9431e06a61bb26b777af4c0e57d9929e0333c06 Author: Gary Gregory <garydgreg...@gmail.com> AuthorDate: Sun Dec 26 11:02:54 2021 -0500 COMPRESS-602 - Migrate zip package to use NIO #236 Update GitHub patch #236 from Postelnicu George with all of my comments from today addressed which was simpler than going round and round and re-reviewing a large PR. Credit is in changes.xml. --- src/changes/changes.xml | 3 + .../archivers/zip/ParallelScatterZipCreator.java | 5 +- .../archivers/zip/ScatterZipOutputStream.java | 28 +- .../archivers/zip/ZipArchiveOutputStream.java | 32 +- .../commons/compress/archivers/zip/ZipFile.java | 77 +++- .../archivers/zip/ZipSplitOutputStream.java | 46 +- .../zip/ZipSplitReadOnlySeekableByteChannel.java | 107 ++++- .../FileBasedScatterGatherBackingStore.java | 29 +- .../commons/compress/utils/FileNameUtils.java | 81 +++- .../utils/MultiReadOnlySeekableByteChannel.java | 26 +- .../archivers/zip/ZipMemoryFileSystemTest.java | 503 +++++++++++++++++++++ .../commons/compress/utils/FileNameUtilsTest.java | 57 ++- .../ZipSplitReadOnlySeekableByteChannelTest.java | 29 ++ 13 files changed, 920 insertions(+), 103 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 82c721d..ab08f47 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -62,6 +62,9 @@ The <action> type attribute can be add,update,fix,remove. Github Pull Request #214. </action> <!-- ADD --> + <action issue="COMPRESS-602" type="add" dev="ggregory" due-to="Postelnicu George, Gary Gregory"> + Migrate zip package to use NIO #236. + </action> <!-- none yet --> <!-- UPDATE --> <action type="update" dev="ggregory" due-to="Dependabot"> diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java b/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java index 3bced78..5ed7eb3 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ParallelScatterZipCreator.java @@ -22,8 +22,9 @@ import org.apache.commons.compress.parallel.InputStreamSupplier; import org.apache.commons.compress.parallel.ScatterGatherBackingStore; import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier; -import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.Deque; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentLinkedDeque; @@ -66,7 +67,7 @@ public class ParallelScatterZipCreator { @Override public ScatterGatherBackingStore get() throws IOException { - final File tempFile = File.createTempFile("parallelscatter", "n" + storeNum.incrementAndGet()); + final Path tempFile = Files.createTempFile("parallelscatter", "n" + storeNum.incrementAndGet()); return new FileBasedScatterGatherBackingStore(tempFile); } } diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java index 3e61666..5e78ac9 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ScatterZipOutputStream.java @@ -27,6 +27,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Path; import java.util.Iterator; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; @@ -186,7 +187,18 @@ public class ScatterZipOutputStream implements Closeable { * @throws FileNotFoundException if the file cannot be found */ public static ScatterZipOutputStream fileBased(final File file) throws FileNotFoundException { - return fileBased(file, Deflater.DEFAULT_COMPRESSION); + return pathBased(file.toPath(), Deflater.DEFAULT_COMPRESSION); + } + + /** + * Create a {@link ScatterZipOutputStream} with default compression level that is backed by a file + * @param path The path to offload compressed data into. + * @return A ScatterZipOutputStream that is ready for use. + * @throws FileNotFoundException if the path cannot be found + * @since 1.22 + */ + public static ScatterZipOutputStream pathBased(final Path path) throws FileNotFoundException { + return pathBased(path, Deflater.DEFAULT_COMPRESSION); } /** @@ -198,7 +210,19 @@ public class ScatterZipOutputStream implements Closeable { * @throws FileNotFoundException if the file cannot be found */ public static ScatterZipOutputStream fileBased(final File file, final int compressionLevel) throws FileNotFoundException { - final ScatterGatherBackingStore bs = new FileBasedScatterGatherBackingStore(file); + return pathBased(file.toPath(), compressionLevel); + } + + /** + * Create a {@link ScatterZipOutputStream} that is backed by a file + * @param path The path to offload compressed data into. + * @param compressionLevel The compression level to use, @see #Deflater + * @return A ScatterZipOutputStream that is ready for use. + * @throws FileNotFoundException if the path cannot be found + * @since 1.22 + */ + public static ScatterZipOutputStream pathBased(final Path path, final int compressionLevel) throws FileNotFoundException { + final ScatterGatherBackingStore bs = new FileBasedScatterGatherBackingStore(path); // lifecycle is bound to the ScatterZipOutputStream returned final StreamCompressor sc = StreamCompressor.create(compressionLevel, bs); //NOSONAR return new ScatterZipOutputStream(bs, sc); diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java index 7b6558e..d3784ae 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java @@ -365,8 +365,28 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { * @since 1.20 */ public ZipArchiveOutputStream(final File file, final long zipSplitSize) throws IOException { + this(file.toPath(), zipSplitSize); + } + + /** + * Creates a split ZIP Archive. + * <p>The files making up the archive will use Z01, Z02, + * ... extensions and the last part of it will be the given {@code + * file}.</p> + * <p>Even though the stream writes to a file this stream will + * behave as if no random access was possible. This means the + * sizes of stored entries need to be known before the actual + * entry data is written.</p> + * @param path the path to the file that will become the last part of the split archive + * @param zipSplitSize maximum size of a single part of the split + * archive created by this stream. Must be between 64kB and about 4GB. + * @throws IOException on error + * @throws IllegalArgumentException if zipSplitSize is not in the required range + * @since 1.22 + */ + public ZipArchiveOutputStream(final Path path, final long zipSplitSize) throws IOException { def = new Deflater(level, true); - this.out = new ZipSplitOutputStream(file, zipSplitSize); + this.out = new ZipSplitOutputStream(path, zipSplitSize); streamCompressor = StreamCompressor.create(this.out, def); channel = null; isSplitZip = true; @@ -519,6 +539,16 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { } /** + * Returns the total number of bytes written to this stream. + * @return the number of written bytes + * @since 1.22 + */ + @Override + public long getBytesWritten() { + return streamCompressor.getTotalBytesWritten(); + } + + /** * {@inheritDoc} * @throws Zip64RequiredException if the archive's size exceeds 4 * GByte or there are more than 65535 entries inside the archive diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java index 8165b0f..d80a9d8 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -29,6 +29,7 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.Collections; @@ -175,6 +176,16 @@ public class ZipFile implements Closeable { } /** + * Opens the given path for reading, assuming "UTF8" for file names. + * @param path path to the archive. + * @throws IOException if an error occurs while reading the file. + * @since 1.22 + */ + public ZipFile(final Path path) throws IOException { + this(path, ZipEncodingHelper.UTF8); + } + + /** * Opens the given file for reading, assuming "UTF8". * * @param name name of the archive. @@ -182,7 +193,7 @@ public class ZipFile implements Closeable { * @throws IOException if an error occurs while reading the file. */ public ZipFile(final String name) throws IOException { - this(new File(name), ZipEncodingHelper.UTF8); + this(new File(name).toPath(), ZipEncodingHelper.UTF8); } /** @@ -196,7 +207,7 @@ public class ZipFile implements Closeable { * @throws IOException if an error occurs while reading the file. */ public ZipFile(final String name, final String encoding) throws IOException { - this(new File(name), encoding, true); + this(new File(name).toPath(), encoding, true); } /** @@ -210,7 +221,20 @@ public class ZipFile implements Closeable { * @throws IOException if an error occurs while reading the file. */ public ZipFile(final File f, final String encoding) throws IOException { - this(f, encoding, true); + this(f.toPath(), encoding, true); + } + + /** + * Opens the given path for reading, assuming the specified + * encoding for file names and scanning for unicode extra fields. + * @param path path to the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @throws IOException if an error occurs while reading the file. + * @since 1.22 + */ + public ZipFile(final Path path, final String encoding) throws IOException { + this(path, encoding, true); } /** @@ -227,7 +251,23 @@ public class ZipFile implements Closeable { */ public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) throws IOException { - this(f, encoding, useUnicodeExtraFields, false); + this(f.toPath(), encoding, useUnicodeExtraFields, false); + } + + /** + * Opens the given path for reading, assuming the specified + * encoding for file names. + * @param path path to the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * Extra Fields (if present) to set the file names. + * @throws IOException if an error occurs while reading the file. + * @since 1.22 + */ + public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) + throws IOException { + this(path, encoding, useUnicodeExtraFields, false); } /** @@ -263,6 +303,35 @@ public class ZipFile implements Closeable { } /** + * Opens the given path for reading, assuming the specified + * encoding for file names. + * <p>By default the central directory record and all local file headers of the archive will be read immediately + * which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter + * can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header + * may contain information not present inside of the central directory which will not be available when the argument + * is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code + * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also + * {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code + * true}.</p> + * @param path path to the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param useUnicodeExtraFields whether to use InfoZIP Unicode + * Extra Fields (if present) to set the file names. + * @param ignoreLocalFileHeader whether to ignore information + * stored inside the local file header (see the notes in this method's javadoc) + * @throws IOException if an error occurs while reading the file. + * @since 1.22 + */ + public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, + final boolean ignoreLocalFileHeader) + throws IOException { + this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)), + path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, + true, ignoreLocalFileHeader); + } + + /** * Opens the given channel for reading, assuming "UTF8" for file names. * * <p>{@link diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java index bd7f235..5b48fca 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitOutputStream.java @@ -23,6 +23,9 @@ import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.Objects; /** * Used internally by {@link ZipArchiveOutputStream} when creating a split archive. @@ -31,7 +34,7 @@ import java.nio.file.Files; */ class ZipSplitOutputStream extends OutputStream { private OutputStream outputStream; - private File zipFile; + private Path zipFile; private final long splitSize; private int currentSplitSegmentIndex; private long currentSplitSegmentBytesWritten; @@ -53,19 +56,28 @@ class ZipSplitOutputStream extends OutputStream { * Create a split zip. If the zip file is smaller than the split size, * then there will only be one split zip, and its suffix is .zip, * otherwise the split segments should be like .z01, .z02, ... .z(N-1), .zip - * * @param zipFile the zip file to write to * @param splitSize the split size */ public ZipSplitOutputStream(final File zipFile, final long splitSize) throws IllegalArgumentException, IOException { + this(zipFile.toPath(), splitSize); + } + + /** + * Create a split zip. If the zip file is smaller than the split size, + * then there will only be one split zip, and its suffix is .zip, + * otherwise the split segments should be like .z01, .z02, ... .z(N-1), .zip + * @param zipFile the path to zip file to write to + * @param splitSize the split size + * @since 1.22 + */ + public ZipSplitOutputStream(final Path zipFile, final long splitSize) throws IllegalArgumentException, IOException { if (splitSize < ZIP_SEGMENT_MIN_SIZE || splitSize > ZIP_SEGMENT_MAX_SIZE) { throw new IllegalArgumentException("zip split segment size should between 64K and 4,294,967,295"); } - this.zipFile = zipFile; this.splitSize = splitSize; - - this.outputStream = Files.newOutputStream(zipFile.toPath()); + this.outputStream = Files.newOutputStream(zipFile); // write the zip split signature 0x08074B50 to the zip file writeZipSplitSignature(); } @@ -149,12 +161,9 @@ class ZipSplitOutputStream extends OutputStream { throw new IOException("This archive has already been finished"); } - final String zipFileBaseName = FileNameUtils.getBaseName(zipFile.getName()); - final File lastZipSplitSegmentFile = new File(zipFile.getParentFile(), zipFileBaseName + ".zip"); + final String zipFileBaseName = FileNameUtils.getBaseName(zipFile); outputStream.close(); - if (!zipFile.renameTo(lastZipSplitSegmentFile)) { - throw new IOException("Failed to rename " + zipFile + " to " + lastZipSplitSegmentFile); - } + Files.move(zipFile, zipFile.resolveSibling(zipFileBaseName + ".zip"), StandardCopyOption.ATOMIC_MOVE); finished = true; } @@ -164,19 +173,17 @@ class ZipSplitOutputStream extends OutputStream { * @throws IOException */ private void openNewSplitSegment() throws IOException { - File newFile; + Path newFile; if (currentSplitSegmentIndex == 0) { outputStream.close(); newFile = createNewSplitSegmentFile(1); - if (!zipFile.renameTo(newFile)) { - throw new IOException("Failed to rename " + zipFile + " to " + newFile); - } + Files.move(zipFile, newFile, StandardCopyOption.ATOMIC_MOVE); } newFile = createNewSplitSegmentFile(null); outputStream.close(); - outputStream = Files.newOutputStream(newFile.toPath()); + outputStream = Files.newOutputStream(newFile); currentSplitSegmentBytesWritten = 0; zipFile = newFile; currentSplitSegmentIndex++; @@ -215,9 +222,9 @@ class ZipSplitOutputStream extends OutputStream { * @return * @throws IOException */ - private File createNewSplitSegmentFile(final Integer zipSplitSegmentSuffixIndex) throws IOException { + private Path createNewSplitSegmentFile(final Integer zipSplitSegmentSuffixIndex) throws IOException { final int newZipSplitSegmentSuffixIndex = zipSplitSegmentSuffixIndex == null ? (currentSplitSegmentIndex + 2) : zipSplitSegmentSuffixIndex; - final String baseName = FileNameUtils.getBaseName(zipFile.getName()); + final String baseName = FileNameUtils.getBaseName(zipFile); String extension = ".z"; if (newZipSplitSegmentSuffixIndex <= 9) { extension += "0" + newZipSplitSegmentSuffixIndex; @@ -225,9 +232,10 @@ class ZipSplitOutputStream extends OutputStream { extension += newZipSplitSegmentSuffixIndex; } - final File newFile = new File(zipFile.getParent(), baseName + extension); + String dir = Objects.nonNull(zipFile.getParent()) ? zipFile.getParent().toAbsolutePath().toString() : "."; + final Path newFile = zipFile.getFileSystem().getPath(dir, baseName + extension); - if (newFile.exists()) { + if (Files.exists(newFile)) { throw new IOException("split zip segment " + baseName + extension + " already exists"); } return newFile; diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitReadOnlySeekableByteChannel.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitReadOnlySeekableByteChannel.java index b37daff..27f1145 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitReadOnlySeekableByteChannel.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipSplitReadOnlySeekableByteChannel.java @@ -28,6 +28,7 @@ import java.io.Serializable; import java.nio.ByteBuffer; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; @@ -35,6 +36,8 @@ import java.util.Comparator; import java.util.List; import java.util.Objects; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like. @@ -45,6 +48,8 @@ import java.util.regex.Pattern; * @since 1.20 */ public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel { + + private static final Path[] EMPTY_PATH_ARRAY = {}; private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4; private final ByteBuffer zipSplitSignatureByteBuffer = ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH); @@ -155,30 +160,40 @@ public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableBy * the beginning of a split archive */ public static SeekableByteChannel buildFromLastSplitSegment(final File lastSegmentFile) throws IOException { - final String extension = FileNameUtils.getExtension(lastSegmentFile.getCanonicalPath()); + return buildFromLastSplitSegment(lastSegmentFile.toPath()); + } + + /** + * Concatenates zip split files from the last segment (the extension MUST be .zip) + * @param lastSegmentPath the last segment of zip split files, note that the extension MUST be .zip + * @return SeekableByteChannel that concatenates all zip split files + * @throws IllegalArgumentException if the lastSegmentPath's extension is NOT .zip + * @throws IOException if the first channel doesn't seem to hold + * the beginning of a split archive + * @since 1.22 + */ + public static SeekableByteChannel buildFromLastSplitSegment(final Path lastSegmentPath) throws IOException { + final String extension = FileNameUtils.getExtension(lastSegmentPath); if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) { throw new IllegalArgumentException("The extension of last zip split segment should be .zip"); } - final File parent = lastSegmentFile.getParentFile(); - final String fileBaseName = FileNameUtils.getBaseName(lastSegmentFile.getCanonicalPath()); - final ArrayList<File> splitZipSegments = new ArrayList<>(); + final Path parent = Objects.nonNull(lastSegmentPath.getParent()) ? lastSegmentPath.getParent() + : lastSegmentPath.getFileSystem().getPath("."); + final String fileBaseName = FileNameUtils.getBaseName(lastSegmentPath); + final ArrayList<Path> splitZipSegments; // zip split segments should be like z01,z02....z(n-1) based on the zip specification final Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+"); - final File[] children = parent.listFiles(); - if (children != null) { - for (final File file : children) { - if (!pattern.matcher(file.getName()).matches()) { - continue; - } - - splitZipSegments.add(file); - } + try (Stream<Path> walk = Files.walk(parent, 1)) { + splitZipSegments = walk + .filter(Files::isRegularFile) + .filter(path -> pattern.matcher(path.getFileName().toString()).matches()) + .sorted(new ZipSplitSegmentComparator()) + .collect(Collectors.toCollection(ArrayList::new)); } - splitZipSegments.sort(new ZipSplitSegmentComparator()); - return forFiles(lastSegmentFile, splitZipSegments); + return forPaths(lastSegmentPath, splitZipSegments); } /** @@ -193,9 +208,29 @@ public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableBy * the beginning of a split archive */ public static SeekableByteChannel forFiles(final File... files) throws IOException { - final List<SeekableByteChannel> channels = new ArrayList<>(); + final List<Path> paths = new ArrayList<>(); for (final File f : Objects.requireNonNull(files, "files must not be null")) { - channels.add(Files.newByteChannel(f.toPath(), StandardOpenOption.READ)); + paths.add(f.toPath()); + } + + return forPaths(paths.toArray(EMPTY_PATH_ARRAY)); + } + + /** + * Concatenates the given file paths. + * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) + * and these files should be added in correct order (e.g.: .z01, .z02... .z99, .zip) + * @return SeekableByteChannel that concatenates all provided files + * @throws NullPointerException if files is null + * @throws IOException if opening a channel for one of the files fails + * @throws IOException if the first channel doesn't seem to hold + * the beginning of a split archive + * @since 1.22 + */ + public static SeekableByteChannel forPaths(final Path... paths) throws IOException { + final List<SeekableByteChannel> channels = new ArrayList<>(); + for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) { + channels.add(Files.newByteChannel(path, StandardOpenOption.READ)); } if (channels.size() == 1) { return channels.get(0); @@ -218,21 +253,45 @@ public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableBy Objects.requireNonNull(files, "files"); Objects.requireNonNull(lastSegmentFile, "lastSegmentFile"); - final List<File> filesList = new ArrayList<>(); + final List<Path> filesList = new ArrayList<>(); for (final File f : files) { + filesList.add(f.toPath()); + } + + return forPaths(lastSegmentFile.toPath(), filesList); + } + + /** + * Concatenates the given file paths. + * @param lastSegmentPath the last segment path of split zip segments, its extension must be .zip + * @param paths the file paths to concatenate except for the last segment, + * note these files should be added in correct order (e.g.: .z01, .z02... .z99) + * @return SeekableByteChannel that concatenates all provided files + * @throws IOException if the first channel doesn't seem to hold + * the beginning of a split archive + * @throws NullPointerException if files or lastSegmentPath is null + * @since 1.22 + */ + public static SeekableByteChannel forPaths(final Path lastSegmentPath, final Iterable<Path> paths) throws IOException { + Objects.requireNonNull(paths, "paths"); + Objects.requireNonNull(lastSegmentPath, "lastSegmentPath"); + + final List<Path> filesList = new ArrayList<>(); + for (final Path f : paths) { filesList.add(f); } - filesList.add(lastSegmentFile); + filesList.add(lastSegmentPath); - return forFiles(filesList.toArray(new File[0])); + return forPaths(filesList.toArray(EMPTY_PATH_ARRAY)); } - private static class ZipSplitSegmentComparator implements Comparator<File>, Serializable { + private static class ZipSplitSegmentComparator implements Comparator<Path>, Serializable { private static final long serialVersionUID = 20200123L; + @Override - public int compare(final File file1, final File file2) { - final String extension1 = FileNameUtils.getExtension(file1.getPath()); - final String extension2 = FileNameUtils.getExtension(file2.getPath()); + public int compare(final Path file1, final Path file2) { + final String extension1 = FileNameUtils.getExtension(file1); + final String extension2 = FileNameUtils.getExtension(file2); if (!extension1.startsWith("z")) { return -1; diff --git a/src/main/java/org/apache/commons/compress/parallel/FileBasedScatterGatherBackingStore.java b/src/main/java/org/apache/commons/compress/parallel/FileBasedScatterGatherBackingStore.java index 5d0d628..362afe9 100644 --- a/src/main/java/org/apache/commons/compress/parallel/FileBasedScatterGatherBackingStore.java +++ b/src/main/java/org/apache/commons/compress/parallel/FileBasedScatterGatherBackingStore.java @@ -23,6 +23,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; /** * ScatterGatherBackingStore that is backed by a file. @@ -30,14 +32,24 @@ import java.nio.file.Files; * @since 1.10 */ public class FileBasedScatterGatherBackingStore implements ScatterGatherBackingStore { - private final File target; - private final OutputStream os; + private final Path target; + private final OutputStream outputStream; private boolean closed; public FileBasedScatterGatherBackingStore(final File target) throws FileNotFoundException { + this(target.toPath()); + } + + /** + * ScatterGatherBackingStore that is backed by a path. + * @param target The path to offload compressed data into. + * @throws FileNotFoundException if the file doesn't exist + * @since 1.22 + */ + public FileBasedScatterGatherBackingStore(final Path target) throws FileNotFoundException { this.target = target; try { - os = Files.newOutputStream(target.toPath()); + outputStream = Files.newOutputStream(target); } catch (final FileNotFoundException ex) { throw ex; } catch (final IOException ex) { @@ -48,21 +60,20 @@ public class FileBasedScatterGatherBackingStore implements ScatterGatherBackingS @Override public InputStream getInputStream() throws IOException { - return Files.newInputStream(target.toPath()); + return Files.newInputStream(target); } @Override - @SuppressWarnings("ResultOfMethodCallIgnored") public void closeForWriting() throws IOException { if (!closed) { - os.close(); + outputStream.close(); closed = true; } } @Override public void writeOut(final byte[] data, final int offset, final int length) throws IOException { - os.write(data, offset, length); + outputStream.write(data, offset, length); } @Override @@ -70,9 +81,7 @@ public class FileBasedScatterGatherBackingStore implements ScatterGatherBackingS try { closeForWriting(); } finally { - if (target.exists() && !target.delete()) { - target.deleteOnExit(); - } + Files.deleteIfExists(target); } } } diff --git a/src/main/java/org/apache/commons/compress/utils/FileNameUtils.java b/src/main/java/org/apache/commons/compress/utils/FileNameUtils.java index e77aee1..127de93 100644 --- a/src/main/java/org/apache/commons/compress/utils/FileNameUtils.java +++ b/src/main/java/org/apache/commons/compress/utils/FileNameUtils.java @@ -19,6 +19,7 @@ package org.apache.commons.compress.utils; import java.io.File; +import java.nio.file.Path; /** * Generic file name utilities. @@ -26,32 +27,35 @@ import java.io.File; */ public class FileNameUtils { + private static String fileNameToBaseName(final String name) { + final int extensionIndex = name.lastIndexOf('.'); + return extensionIndex < 0 ? name : name.substring(0, extensionIndex); + } + + private static String fileNameToExtension(final String name) { + final int extensionIndex = name.lastIndexOf('.'); + return extensionIndex < 0 ? "" : name.substring(extensionIndex + 1); + } + /** - * Returns the extension (i.e. the part after the last ".") of a file. - * - * <p>Will return an empty string if the file name doesn't contain - * any dots. Only the last segment of a the file name is consulted - * - i.e. all leading directories of the {@code filename} - * parameter are skipped.</p> - * - * @return the extension of filename - * @param filename the name of the file to obtain the extension of. + * Gets the basename (i.e. the part up to and not including the + * last ".") of the last path segment of a filename. + * <p>Will return the file name itself if it doesn't contain any + * dots. All leading directories of the {@code filename} parameter + * are skipped.</p> + * @return the basename of filename + * @param path the path of the file to obtain the basename of. + * @since 1.22 */ - public static String getExtension(final String filename) { - if (filename == null) { + public static String getBaseName(final Path path) { + if (path == null) { return null; } - - final String name = new File(filename).getName(); - final int extensionPosition = name.lastIndexOf('.'); - if (extensionPosition < 0) { - return ""; - } - return name.substring(extensionPosition + 1); + return fileNameToBaseName(path.getFileName().toString()); } /** - * Returns the basename (i.e. the part up to and not including the + * Gets the basename (i.e. the part up to and not including the * last ".") of the last path segment of a filename. * * <p>Will return the file name itself if it doesn't contain any @@ -65,14 +69,41 @@ public class FileNameUtils { if (filename == null) { return null; } + return fileNameToBaseName(new File(filename).getName()); + } - final String name = new File(filename).getName(); - - final int extensionPosition = name.lastIndexOf('.'); - if (extensionPosition < 0) { - return name; + /** + * Gets the extension (i.e. the part after the last ".") of a file. + * <p>Will return an empty string if the file name doesn't contain + * any dots. Only the last segment of a the file name is consulted + * - i.e. all leading directories of the {@code filename} + * parameter are skipped.</p> + * @return the extension of filename + * @param path the path of the file to obtain the extension of. + * @since 1.22 + */ + public static String getExtension(final Path path) { + if (path == null) { + return null; } + return fileNameToExtension(path.getFileName().toString()); + } - return name.substring(0, extensionPosition); + /** + * Gets the extension (i.e. the part after the last ".") of a file. + * + * <p>Will return an empty string if the file name doesn't contain + * any dots. Only the last segment of a the file name is consulted + * - i.e. all leading directories of the {@code filename} + * parameter are skipped.</p> + * + * @return the extension of filename + * @param filename the name of the file to obtain the extension of. + */ + public static String getExtension(final String filename) { + if (filename == null) { + return null; + } + return fileNameToExtension(new File(filename).getName()); } } diff --git a/src/main/java/org/apache/commons/compress/utils/MultiReadOnlySeekableByteChannel.java b/src/main/java/org/apache/commons/compress/utils/MultiReadOnlySeekableByteChannel.java index b2233eb..f25c697 100644 --- a/src/main/java/org/apache/commons/compress/utils/MultiReadOnlySeekableByteChannel.java +++ b/src/main/java/org/apache/commons/compress/utils/MultiReadOnlySeekableByteChannel.java @@ -25,6 +25,7 @@ import java.nio.channels.ClosedChannelException; import java.nio.channels.NonWritableChannelException; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; @@ -44,6 +45,7 @@ import java.util.Objects; */ public class MultiReadOnlySeekableByteChannel implements SeekableByteChannel { + private static final Path[] EMPTY_PATH_ARRAY = {}; private final List<SeekableByteChannel> channels; private long globalPosition; private int currentChannelIdx; @@ -241,9 +243,29 @@ public class MultiReadOnlySeekableByteChannel implements SeekableByteChannel { * @return SeekableByteChannel that concatenates all provided files */ public static SeekableByteChannel forFiles(final File... files) throws IOException { - final List<SeekableByteChannel> channels = new ArrayList<>(); + final List<Path> paths = new ArrayList<>(); for (final File f : Objects.requireNonNull(files, "files must not be null")) { - channels.add(Files.newByteChannel(f.toPath(), StandardOpenOption.READ)); + paths.add(f.toPath()); + } + + return forPaths(paths.toArray(EMPTY_PATH_ARRAY)); + } + + /** + * Concatenates the given file paths. + * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) + * and these files should be added in correct order (e.g.: .z01, .z02... .z99, .zip) + * @return SeekableByteChannel that concatenates all provided files + * @throws NullPointerException if files is null + * @throws IOException if opening a channel for one of the files fails + * @throws IOException if the first channel doesn't seem to hold + * the beginning of a split archive + * @since 1.22 + */ + public static SeekableByteChannel forPaths(final Path... paths) throws IOException { + final List<SeekableByteChannel> channels = new ArrayList<>(); + for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) { + channels.add(Files.newByteChannel(path, StandardOpenOption.READ)); } if (channels.size() == 1) { return channels.get(0); diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipMemoryFileSystemTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipMemoryFileSystemTest.java new file mode 100644 index 0000000..fee9de5 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipMemoryFileSystemTest.java @@ -0,0 +1,503 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.archivers.zip; + +import com.github.marschall.memoryfilesystem.MemoryFileSystemBuilder; +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.ArchiveStreamFactory; +import org.apache.commons.compress.parallel.InputStreamSupplier; +import org.apache.commons.compress.utils.IOUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.zip.Deflater; +import java.util.zip.ZipEntry; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.commons.compress.AbstractTestCase.getPath; +import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest; +import static org.junit.Assert.*; +import static org.junit.Assert.assertArrayEquals; + +public class ZipMemoryFileSystemTest { + private Path dir; + + @Before + public void setup() throws IOException { + dir = Files.createTempDirectory(UUID.randomUUID().toString()); + } + + @After + public void tearDown() throws IOException { + try (Stream<Path> walk = Files.walk(dir)) { + walk.sorted(Comparator.reverseOrder()) + .peek(path -> System.out.println("Deleting: " + path.toAbsolutePath())) + .forEach(path -> { + try { + Files.deleteIfExists(path); + } catch (IOException ignore) { + } + }); + } + } + + @Test + public void zipFromMemoryFileSystemOutputStream() throws IOException, ArchiveException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + final Path p = fileSystem.getPath("test.txt"); + Files.write(p, "Test".getBytes(UTF_8)); + + final Path f = Files.createTempFile(dir, "commons-compress-memoryfs", ".zip"); + try (final OutputStream out = Files.newOutputStream(f); + final ArchiveOutputStream zipOut = ArchiveStreamFactory.DEFAULT.createArchiveOutputStream(ArchiveStreamFactory.ZIP, out)) { + final ZipArchiveEntry entry = new ZipArchiveEntry(p, p.getFileName().toString()); + entry.setSize(Files.size(p)); + zipOut.putArchiveEntry(entry); + + Files.copy(p, zipOut); + zipOut.closeArchiveEntry(); + assertEquals(Files.size(f), zipOut.getBytesWritten()); + } + } + } + + @Test + public void zipFromMemoryFileSystemSplitFile() throws IOException, NoSuchAlgorithmException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + final Path textFileInMemSys = fileSystem.getPath("test.txt"); + byte[] bytes = new byte[100 * 1024]; + SecureRandom.getInstanceStrong().nextBytes(bytes); + Files.write(textFileInMemSys, bytes); + + final Path zipInLocalSys = Files.createTempFile(dir, "commons-compress-memoryfs", ".zip"); + try (final ArchiveOutputStream zipOut = new ZipArchiveOutputStream(zipInLocalSys.toFile(), 64 * 1024L)) { + final ZipArchiveEntry entry = new ZipArchiveEntry(textFileInMemSys, textFileInMemSys.getFileName().toString()); + entry.setSize(Files.size(textFileInMemSys)); + zipOut.putArchiveEntry(entry); + + Files.copy(textFileInMemSys, zipOut); + zipOut.closeArchiveEntry(); + zipOut.finish(); + List<Path> splitZips; + try (Stream<Path> paths = Files.walk(dir, 1)) { + splitZips = paths + .filter(Files::isRegularFile) + .peek(path -> System.out.println("Found: " + path.toAbsolutePath())) + .collect(Collectors.toList()); + } + assertEquals(splitZips.size(), 2); + assertEquals(Files.size(splitZips.get(0)) + + Files.size(splitZips.get(1)) - 4, zipOut.getBytesWritten()); + } + } + + } + + @Test + public void zipFromMemoryFileSystemFile() throws IOException, NoSuchAlgorithmException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + final Path textFileInMemSys = fileSystem.getPath("test.txt"); + byte[] bytes = new byte[100 * 1024]; + SecureRandom.getInstanceStrong().nextBytes(bytes); + Files.write(textFileInMemSys, bytes); + + final Path zipInLocalSys = Files.createTempFile(dir, "commons-compress-memoryfs", ".zip"); + try (final ArchiveOutputStream zipOut = new ZipArchiveOutputStream(zipInLocalSys.toFile())) { + final ZipArchiveEntry entry = new ZipArchiveEntry(textFileInMemSys, textFileInMemSys.getFileName().toString()); + entry.setSize(Files.size(textFileInMemSys)); + zipOut.putArchiveEntry(entry); + + Files.copy(textFileInMemSys, zipOut); + zipOut.closeArchiveEntry(); + zipOut.finish(); + assertEquals(Files.size(zipInLocalSys), zipOut.getBytesWritten()); + } + } + } + + @Test + public void zipFromMemoryFileSystemPath() throws IOException, NoSuchAlgorithmException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + final Path textFileInMemSys = fileSystem.getPath("test.txt"); + byte[] bytes = new byte[100 * 1024]; + SecureRandom.getInstanceStrong().nextBytes(bytes); + Files.write(textFileInMemSys, bytes); + + final Path zipInLocalSys = Files.createTempFile(dir, "commons-compress-memoryfs", ".zip"); + try (final ArchiveOutputStream zipOut = new ZipArchiveOutputStream(zipInLocalSys)) { + final ZipArchiveEntry entry = new ZipArchiveEntry(textFileInMemSys, textFileInMemSys.getFileName().toString()); + entry.setSize(Files.size(textFileInMemSys)); + zipOut.putArchiveEntry(entry); + + Files.copy(textFileInMemSys, zipOut); + zipOut.closeArchiveEntry(); + zipOut.finish(); + assertEquals(Files.size(zipInLocalSys), zipOut.getBytesWritten()); + } + } + } + + @Test + public void zipFromMemoryFileSystemSeekableByteChannel() throws IOException, NoSuchAlgorithmException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + final Path textFileInMemSys = fileSystem.getPath("test.txt"); + byte[] bytes = new byte[100 * 1024]; + SecureRandom.getInstanceStrong().nextBytes(bytes); + Files.write(textFileInMemSys, bytes); + + final Path zipInLocalSys = Files.createTempFile(dir, "commons-compress-memoryfs", ".zip"); + try (final SeekableByteChannel byteChannel = Files.newByteChannel(zipInLocalSys, + EnumSet.of(StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)); + final ArchiveOutputStream zipOut = new ZipArchiveOutputStream(byteChannel)) { + final ZipArchiveEntry entry = new ZipArchiveEntry(textFileInMemSys, textFileInMemSys.getFileName().toString()); + entry.setSize(Files.size(textFileInMemSys)); + zipOut.putArchiveEntry(entry); + + Files.copy(textFileInMemSys, zipOut); + zipOut.closeArchiveEntry(); + zipOut.finish(); + assertEquals(Files.size(zipInLocalSys), zipOut.getBytesWritten()); + } + } + } + + @Test + public void zipToMemoryFileSystemOutputStream() throws IOException, ArchiveException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + final Path p = fileSystem.getPath("target.zip"); + + try (final OutputStream out = Files.newOutputStream(p); + final ArchiveOutputStream zipOut = ArchiveStreamFactory.DEFAULT.createArchiveOutputStream(ArchiveStreamFactory.ZIP, out)) { + final String content = "Test"; + final ZipArchiveEntry entry = new ZipArchiveEntry("test.txt"); + entry.setSize(content.length()); + zipOut.putArchiveEntry(entry); + + zipOut.write("Test".getBytes(UTF_8)); + zipOut.closeArchiveEntry(); + + assertTrue(Files.exists(p)); + assertEquals(Files.size(p), zipOut.getBytesWritten()); + } + } + } + + @Test + public void zipToMemoryFileSystemPath() throws IOException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + final Path zipInMemSys = fileSystem.getPath("target.zip"); + + try (final ArchiveOutputStream zipOut = new ZipArchiveOutputStream(zipInMemSys)) { + final String content = "Test"; + final ZipArchiveEntry entry = new ZipArchiveEntry("test.txt"); + entry.setSize(content.length()); + zipOut.putArchiveEntry(entry); + + zipOut.write("Test".getBytes(UTF_8)); + zipOut.closeArchiveEntry(); + + assertTrue(Files.exists(zipInMemSys)); + assertEquals(Files.size(zipInMemSys), zipOut.getBytesWritten()); + } + } + } + + @Test + public void zipToMemoryFileSystemSeekableByteChannel() throws IOException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + final Path zipInMemSys = fileSystem.getPath("target.zip"); + + try (final SeekableByteChannel byteChannel = Files.newByteChannel(zipInMemSys, + EnumSet.of(StandardOpenOption.READ, StandardOpenOption.WRITE, + StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE_NEW)); + final ArchiveOutputStream zipOut = new ZipArchiveOutputStream(byteChannel)) { + final String content = "Test"; + final ZipArchiveEntry entry = new ZipArchiveEntry("test.txt"); + entry.setSize(content.length()); + zipOut.putArchiveEntry(entry); + + zipOut.write("Test".getBytes(UTF_8)); + zipOut.closeArchiveEntry(); + + assertTrue(Files.exists(zipInMemSys)); + assertEquals(Files.size(zipInMemSys), zipOut.getBytesWritten()); + } + } + } + + @Test + public void zipToMemoryFileSystemSplitPath() throws IOException, NoSuchAlgorithmException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + final Path zipInMemSys = fileSystem.getPath("target.zip"); + byte[] bytes = new byte[100 * 1024]; + SecureRandom.getInstanceStrong().nextBytes(bytes); + + try (final ArchiveOutputStream zipOut = new ZipArchiveOutputStream(zipInMemSys, 64 * 1024L)) { + final ZipArchiveEntry entry = new ZipArchiveEntry("test.txt"); + entry.setSize(bytes.length); + zipOut.putArchiveEntry(entry); + + zipOut.write(bytes); + + zipOut.closeArchiveEntry(); + zipOut.finish(); + + List<Path> splitZips; + try (Stream<Path> paths = Files.walk(fileSystem.getPath("."), 1)) { + splitZips = paths + .filter(Files::isRegularFile) + .peek(path -> System.out.println("Found: " + path.toAbsolutePath())) + .collect(Collectors.toList()); + } + assertEquals(splitZips.size(), 2); + assertEquals(Files.size(splitZips.get(0)) + + Files.size(splitZips.get(1)) - 4, zipOut.getBytesWritten()); + } + } + + } + + @Test + public void scatterFileInMemory() throws IOException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + Path scatterFile = fileSystem.getPath("scattertest.notzip"); + final ScatterZipOutputStream scatterZipOutputStream = ScatterZipOutputStream.pathBased(scatterFile); + final byte[] B_PAYLOAD = "RBBBBBBS".getBytes(); + final byte[] A_PAYLOAD = "XAAY".getBytes(); + + final ZipArchiveEntry zab = new ZipArchiveEntry("b.txt"); + zab.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload = new ByteArrayInputStream(B_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zab, createPayloadSupplier(payload))); + + final ZipArchiveEntry zae = new ZipArchiveEntry("a.txt"); + zae.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload1 = new ByteArrayInputStream(A_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zae, createPayloadSupplier(payload1))); + + Path target = Files.createTempFile(dir, "scattertest", ".zip"); + final ZipArchiveOutputStream outputStream = new ZipArchiveOutputStream(target); + scatterZipOutputStream.writeTo(outputStream); + outputStream.close(); + scatterZipOutputStream.close(); + + final ZipFile zf = new ZipFile(target.toFile()); + final ZipArchiveEntry b_entry = zf.getEntries("b.txt").iterator().next(); + assertEquals(8, b_entry.getSize()); + assertArrayEquals(B_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(b_entry))); + + final ZipArchiveEntry a_entry = zf.getEntries("a.txt").iterator().next(); + assertEquals(4, a_entry.getSize()); + assertArrayEquals(A_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(a_entry))); + zf.close(); + } + + } + + @Test + public void scatterFileWithCompressionInMemory() throws IOException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + Path scatterFile = fileSystem.getPath("scattertest.notzip"); + final ScatterZipOutputStream scatterZipOutputStream = ScatterZipOutputStream.pathBased(scatterFile, + Deflater.BEST_COMPRESSION); + final byte[] B_PAYLOAD = "RBBBBBBS".getBytes(); + final byte[] A_PAYLOAD = "XAAY".getBytes(); + + final ZipArchiveEntry zab = new ZipArchiveEntry("b.txt"); + zab.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload = new ByteArrayInputStream(B_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zab, createPayloadSupplier(payload))); + + final ZipArchiveEntry zae = new ZipArchiveEntry("a.txt"); + zae.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload1 = new ByteArrayInputStream(A_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zae, createPayloadSupplier(payload1))); + + Path target = Files.createTempFile(dir, "scattertest", ".zip"); + final ZipArchiveOutputStream outputStream = new ZipArchiveOutputStream(target); + scatterZipOutputStream.writeTo(outputStream); + outputStream.close(); + scatterZipOutputStream.close(); + + final ZipFile zf = new ZipFile(target.toFile()); + final ZipArchiveEntry b_entry = zf.getEntries("b.txt").iterator().next(); + assertEquals(8, b_entry.getSize()); + assertArrayEquals(B_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(b_entry))); + + final ZipArchiveEntry a_entry = zf.getEntries("a.txt").iterator().next(); + assertEquals(4, a_entry.getSize()); + assertArrayEquals(A_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(a_entry))); + zf.close(); + } + + } + + @Test + public void scatterFileWithCompressionAndTargetInMemory() throws IOException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + Path scatterFile = fileSystem.getPath("scattertest.notzip"); + final ScatterZipOutputStream scatterZipOutputStream = ScatterZipOutputStream.pathBased(scatterFile, + Deflater.BEST_COMPRESSION); + final byte[] B_PAYLOAD = "RBBBBBBS".getBytes(); + final byte[] A_PAYLOAD = "XAAY".getBytes(); + + final ZipArchiveEntry zab = new ZipArchiveEntry("b.txt"); + zab.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload = new ByteArrayInputStream(B_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zab, createPayloadSupplier(payload))); + + final ZipArchiveEntry zae = new ZipArchiveEntry("a.txt"); + zae.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload1 = new ByteArrayInputStream(A_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zae, createPayloadSupplier(payload1))); + + Path target = fileSystem.getPath("scattertest.zip"); + final ZipArchiveOutputStream outputStream = new ZipArchiveOutputStream(target); + scatterZipOutputStream.writeTo(outputStream); + outputStream.close(); + scatterZipOutputStream.close(); + + try (final ZipFile zf = new ZipFile(Files.newByteChannel(target, StandardOpenOption.READ), + target.getFileName().toString(), ZipEncodingHelper.UTF8, true)) { + final ZipArchiveEntry b_entry = zf.getEntries("b.txt").iterator().next(); + assertEquals(8, b_entry.getSize()); + assertArrayEquals(B_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(b_entry))); + + final ZipArchiveEntry a_entry = zf.getEntries("a.txt").iterator().next(); + assertEquals(4, a_entry.getSize()); + assertArrayEquals(A_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(a_entry))); + } + } + } + + @Test + public void zipFileInMemory() throws IOException { + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + Path scatterFile = fileSystem.getPath("scattertest.notzip"); + final ScatterZipOutputStream scatterZipOutputStream = ScatterZipOutputStream.pathBased(scatterFile, + Deflater.BEST_COMPRESSION); + final byte[] B_PAYLOAD = "RBBBBBBS".getBytes(); + final byte[] A_PAYLOAD = "XAAY".getBytes(); + + final ZipArchiveEntry zab = new ZipArchiveEntry("b.txt"); + zab.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload = new ByteArrayInputStream(B_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zab, createPayloadSupplier(payload))); + + final ZipArchiveEntry zae = new ZipArchiveEntry("a.txt"); + zae.setMethod(ZipEntry.DEFLATED); + final ByteArrayInputStream payload1 = new ByteArrayInputStream(A_PAYLOAD); + scatterZipOutputStream.addArchiveEntry(createZipArchiveEntryRequest(zae, createPayloadSupplier(payload1))); + + Path target = fileSystem.getPath("scattertest.zip"); + final ZipArchiveOutputStream outputStream = new ZipArchiveOutputStream(target); + scatterZipOutputStream.writeTo(outputStream); + outputStream.close(); + scatterZipOutputStream.close(); + + try (final ZipFile zf = new ZipFile(target)) { + final ZipArchiveEntry b_entry = zf.getEntries("b.txt").iterator().next(); + assertEquals(8, b_entry.getSize()); + assertArrayEquals(B_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(b_entry))); + + final ZipArchiveEntry a_entry = zf.getEntries("a.txt").iterator().next(); + assertEquals(4, a_entry.getSize()); + assertArrayEquals(A_PAYLOAD, IOUtils.toByteArray(zf.getInputStream(a_entry))); + } + } + } + + private InputStreamSupplier createPayloadSupplier(final ByteArrayInputStream payload) { + return () -> payload; + } + + @Test + public void forPathsReturnCorrectClassInMemory() throws IOException { + final Path firstFile = getPath("COMPRESS-477/split_zip_created_by_zip/split_zip_created_by_zip.z01"); + final Path secondFile = getPath("COMPRESS-477/split_zip_created_by_zip/split_zip_created_by_zip.z02"); + final Path lastFile = getPath("COMPRESS-477/split_zip_created_by_zip/split_zip_created_by_zip.zip"); + byte[] firstBytes = Files.readAllBytes(firstFile); + byte[] secondBytes = Files.readAllBytes(secondFile); + byte[] lastBytes = Files.readAllBytes(lastFile); + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + Files.write(fileSystem.getPath("split_zip_created_by_zip.z01"), firstBytes); + Files.write(fileSystem.getPath("split_zip_created_by_zip.z02"), secondBytes); + Files.write(fileSystem.getPath("split_zip_created_by_zip.zip"), lastBytes); + final ArrayList<Path> list = new ArrayList<>(); + list.add(firstFile); + list.add(secondFile); + + SeekableByteChannel channel = ZipSplitReadOnlySeekableByteChannel.forPaths(lastFile, list); + Assert.assertTrue(channel instanceof ZipSplitReadOnlySeekableByteChannel); + + channel = ZipSplitReadOnlySeekableByteChannel.forPaths(firstFile, secondFile, lastFile); + Assert.assertTrue(channel instanceof ZipSplitReadOnlySeekableByteChannel); + } + } + + @Test + public void positionToSomeZipSplitSegmentInMemory() throws IOException { + final Path firstFile = getPath("COMPRESS-477/split_zip_created_by_zip/split_zip_created_by_zip.z01"); + final Path secondFile = getPath("COMPRESS-477/split_zip_created_by_zip/split_zip_created_by_zip.z02"); + final Path lastFile = getPath("COMPRESS-477/split_zip_created_by_zip/split_zip_created_by_zip.zip"); + byte[] firstBytes = Files.readAllBytes(firstFile); + byte[] secondBytes = Files.readAllBytes(secondFile); + byte[] lastBytes = Files.readAllBytes(lastFile); + final int firstFileSize = firstBytes.length; + final int secondFileSize = secondBytes.length; + final int lastFileSize = lastBytes.length; + + try (FileSystem fileSystem = MemoryFileSystemBuilder.newLinux().build()) { + Path lastMemoryPath = fileSystem.getPath("split_zip_created_by_zip.zip"); + Files.write(fileSystem.getPath("split_zip_created_by_zip.z01"), firstBytes); + Files.write(fileSystem.getPath("split_zip_created_by_zip.z02"), secondBytes); + Files.write(lastMemoryPath, lastBytes); + final Random random = new Random(); + final int randomDiskNumber = random.nextInt(3); + final int randomOffset = randomDiskNumber < 2 ? random.nextInt(firstFileSize) : random.nextInt(lastFileSize); + + final ZipSplitReadOnlySeekableByteChannel channel = (ZipSplitReadOnlySeekableByteChannel) + ZipSplitReadOnlySeekableByteChannel.buildFromLastSplitSegment(lastMemoryPath); + channel.position(randomDiskNumber, randomOffset); + long expectedPosition = randomOffset; + + expectedPosition += randomDiskNumber > 0 ? firstFileSize : 0; + expectedPosition += randomDiskNumber > 1 ? secondFileSize : 0; + + Assert.assertEquals(expectedPosition, channel.position()); + } + + } +} diff --git a/src/test/java/org/apache/commons/compress/utils/FileNameUtilsTest.java b/src/test/java/org/apache/commons/compress/utils/FileNameUtilsTest.java index fba87c8..5aa0ff2 100644 --- a/src/test/java/org/apache/commons/compress/utils/FileNameUtilsTest.java +++ b/src/test/java/org/apache/commons/compress/utils/FileNameUtilsTest.java @@ -18,36 +18,65 @@ package org.apache.commons.compress.utils; -import org.junit.Test; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import java.nio.file.Path; +import java.nio.file.Paths; + +import org.junit.Test; + public class FileNameUtilsTest { @Test - public void getExtensionBaseCases() { + public void getBaseNameStringBaseCases() { + assertEquals("bar", FileNameUtils.getBaseName("a/b/c/bar.foo")); + assertEquals("foo", FileNameUtils.getBaseName("foo")); + } + + @Test + public void getBaseNamePathBaseCases() { + assertEquals("bar", FileNameUtils.getBaseName(Paths.get("a/b/c/bar.foo"))); + assertEquals("foo", FileNameUtils.getBaseName(Paths.get("foo"))); + } + + @Test + public void getBaseNameStringCornerCases() { + assertNull(FileNameUtils.getBaseName((String) null)); + assertEquals("foo", FileNameUtils.getBaseName("foo.")); + assertEquals("", FileNameUtils.getBaseName("bar/.foo")); + } + + @Test + public void getBaseNamePathCornerCases() { + assertNull(FileNameUtils.getBaseName((Path) null)); + assertEquals("foo", FileNameUtils.getBaseName(Paths.get("foo."))); + assertEquals("", FileNameUtils.getBaseName(Paths.get("bar/.foo"))); + } + + @Test + public void getExtensionStringBaseCases() { assertEquals("foo", FileNameUtils.getExtension("a/b/c/bar.foo")); assertEquals("", FileNameUtils.getExtension("foo")); } @Test - public void getExtensionCornerCases() { - assertNull(FileNameUtils.getExtension(null)); - assertEquals("", FileNameUtils.getExtension("foo.")); - assertEquals("foo", FileNameUtils.getExtension("bar/.foo")); + public void getExtensionPathBaseCases() { + assertEquals("foo", FileNameUtils.getExtension(Paths.get("a/b/c/bar.foo"))); + assertEquals("", FileNameUtils.getExtension(Paths.get("foo"))); } @Test - public void getBaseNameBaseCases() { - assertEquals("bar", FileNameUtils.getBaseName("a/b/c/bar.foo")); - assertEquals("foo", FileNameUtils.getBaseName("foo")); + public void getExtensionStringCornerCases() { + assertNull(FileNameUtils.getExtension((String) null)); + assertEquals("", FileNameUtils.getExtension("foo.")); + assertEquals("foo", FileNameUtils.getExtension("bar/.foo")); } @Test - public void getBaseNameCornerCases() { - assertNull(FileNameUtils.getBaseName(null)); - assertEquals("foo", FileNameUtils.getBaseName("foo.")); - assertEquals("", FileNameUtils.getBaseName("bar/.foo")); + public void getExtensionPathCornerCases() { + assertNull(FileNameUtils.getExtension((String) null)); + assertEquals("", FileNameUtils.getExtension(Paths.get("foo."))); + assertEquals("foo", FileNameUtils.getExtension(Paths.get("bar/.foo"))); } } diff --git a/src/test/java/org/apache/commons/compress/utils/ZipSplitReadOnlySeekableByteChannelTest.java b/src/test/java/org/apache/commons/compress/utils/ZipSplitReadOnlySeekableByteChannelTest.java index 4259068..444318e 100644 --- a/src/test/java/org/apache/commons/compress/utils/ZipSplitReadOnlySeekableByteChannelTest.java +++ b/src/test/java/org/apache/commons/compress/utils/ZipSplitReadOnlySeekableByteChannelTest.java @@ -28,12 +28,14 @@ import java.io.File; import java.io.IOException; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.List; import java.util.Random; import static org.apache.commons.compress.AbstractTestCase.getFile; +import static org.apache.commons.compress.AbstractTestCase.getPath; public class ZipSplitReadOnlySeekableByteChannelTest { @Rule @@ -178,4 +180,31 @@ public class ZipSplitReadOnlySeekableByteChannelTest { return channels; } + + @Test(expected = NullPointerException.class) + public void forPathsThrowsOnNullArg() throws IOException { + ZipSplitReadOnlySeekableByteChannel.forPaths(null); + } + + @Test(expected = NullPointerException.class) + public void forPathsOfTwoParametersThrowsOnNullArg() throws IOException { + ZipSplitReadOnlySeekableByteChannel.forPaths(null, null); + } + + @Test + public void forPathsReturnCorrectClass() throws IOException { + final Path firstFile = getPath("COMPRESS-477/split_zip_created_by_zip/split_zip_created_by_zip.z01"); + final Path secondFile = getPath("COMPRESS-477/split_zip_created_by_zip/split_zip_created_by_zip.z02"); + final Path lastFile = getPath("COMPRESS-477/split_zip_created_by_zip/split_zip_created_by_zip.zip"); + + final ArrayList<Path> list = new ArrayList<>(); + list.add(firstFile); + list.add(secondFile); + + SeekableByteChannel channel = ZipSplitReadOnlySeekableByteChannel.forPaths(lastFile, list); + Assert.assertTrue(channel instanceof ZipSplitReadOnlySeekableByteChannel); + + channel = ZipSplitReadOnlySeekableByteChannel.forPaths(firstFile, secondFile, lastFile); + Assert.assertTrue(channel instanceof ZipSplitReadOnlySeekableByteChannel); + } }