Author: bodewig
Date: Thu Feb 26 13:15:14 2009
New Revision: 748133
URL: http://svn.apache.org/viewvc?rev=748133&view=rev
Log:
optionally use UnicodeExtraFields to set names and comments of entries when
reading. SANDBOX-176
Modified:
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java
Modified:
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
URL:
http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java?rev=748133&r1=748132&r2=748133&view=diff
==============================================================================
---
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
(original)
+++
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
Thu Feb 26 13:15:14 2009
@@ -184,8 +184,12 @@
* <code>"UTF-8"</code> is supported in ZIP file
* version <code>6.3</code> or later.
*/
- static final String decodeName(byte[] name, String encoding) {
+ static final String decodeName(byte[] name, String encoding)
+ throws java.nio.charset.CharacterCodingException {
Charset cs = Charset.forName(encoding);
- return cs.decode(ByteBuffer.wrap(name)).toString();
+ return cs.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT)
+ .decode(ByteBuffer.wrap(name)).toString();
}
}
Modified:
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
URL:
http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java?rev=748133&r1=748132&r2=748133&view=diff
==============================================================================
---
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
(original)
+++
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
Thu Feb 26 13:15:14 2009
@@ -22,12 +22,14 @@
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.CharacterCodingException;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
+import java.util.zip.CRC32;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import java.util.zip.ZipException;
@@ -101,6 +103,11 @@
private RandomAccessFile archive;
/**
+ * Whether to look for and use Unicode extra fields.
+ */
+ private final boolean useUnicodeExtraFields;
+
+ /**
* Opens the given file for reading, assuming "UTF8" for file names.
*
* @param f the archive.
@@ -124,7 +131,7 @@
/**
* Opens the given file for reading, assuming the specified
- * encoding for file names.
+ * encoding for file names and ignoring unicode extra fields.
*
* @param name name of the archive.
* @param encoding the encoding to use for file names, use null
@@ -133,12 +140,12 @@
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(String name, String encoding) throws IOException {
- this(new File(name), encoding);
+ this(new File(name), encoding, false);
}
/**
* Opens the given file for reading, assuming the specified
- * encoding for file names.
+ * encoding for file names and ignoring unicode extra fields.
*
* @param f the archive.
* @param encoding the encoding to use for file names, use null
@@ -147,12 +154,30 @@
* @throws IOException if an error occurs while reading the file.
*/
public ZipFile(File f, String encoding) throws IOException {
+ this(f, encoding, false);
+ }
+
+ /**
+ * Opens the given file for reading, assuming the specified
+ * encoding for file names.
+ *
+ * @param f the archive.
+ * @param encoding the encoding to use for file names, use null
+ * for the platform's default encoding
+ * @param whether to use InfoZIP Unicode Extra Fields (if present)
+ * to set the file names.
+ *
+ * @throws IOException if an error occurs while reading the file.
+ */
+ public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
+ throws IOException {
this.encoding = encoding;
+ this.useUnicodeExtraFields = useUnicodeExtraFields;
archive = new RandomAccessFile(f, "r");
boolean success = false;
try {
- populateFromCentralDirectory();
- resolveLocalFileHeaderData();
+ Map entriesWithoutEFS = populateFromCentralDirectory();
+ resolveLocalFileHeaderData(entriesWithoutEFS);
success = true;
} finally {
if (!success) {
@@ -269,9 +294,15 @@
* <p>The ZipArchiveEntrys will know all data that can be obtained from
* the central directory alone, but not the data that requires the
* local file header or additional data to be read.</p>
+ *
+ * @return a Map<ZipArchiveEntry, NameAndComment>> of
+ * zipentries that didn't have the language encoding flag set when
+ * read.
*/
- private void populateFromCentralDirectory()
+ private Map populateFromCentralDirectory()
throws IOException {
+ HashMap noEFS = new HashMap();
+
positionAtCentralDirectory();
byte[] cfh = new byte[CFH_LEN];
@@ -296,10 +327,10 @@
off += SHORT; // skip version info
final int generalPurposeFlag = ZipShort.getValue(cfh, off);
- final String entryEncoding =
- (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0
- ? ZipArchiveOutputStream.UTF8
- : encoding;
+ final boolean hasEFS =
+ (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
+ final String entryEncoding =
+ hasEFS ? ZipArchiveOutputStream.UTF8 : encoding;
off += SHORT;
@@ -367,7 +398,12 @@
archive.readFully(signatureBytes);
sig = ZipLong.getValue(signatureBytes);
+
+ if (!hasEFS && useUnicodeExtraFields) {
+ noEFS.put(ze, new NameAndComment(fileName, comment));
+ }
}
+ return noEFS;
}
private static final int MIN_EOCD_SIZE =
@@ -462,7 +498,7 @@
* <p>Also records the offsets for the data to read from the
* entries.</p>
*/
- private void resolveLocalFileHeaderData()
+ private void resolveLocalFileHeaderData(Map entriesWithoutEFS)
throws IOException {
Enumeration e = getEntries();
while (e.hasMoreElements()) {
@@ -493,6 +529,12 @@
*/
offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
+ SHORT + SHORT + fileNameLen +
extraFieldLen;
+
+ if (entriesWithoutEFS.containsKey(ze)) {
+ setNameAndCommentFromExtraFields(ze,
+ (NameAndComment)
+ entriesWithoutEFS.get(ze));
+ }
}
}
@@ -538,7 +580,11 @@
return new String(bytes);
} else {
try {
- return ZipEncodingHelper.decodeName(bytes, enc);
+ try {
+ return ZipEncodingHelper.decodeName(bytes, enc);
+ } catch (CharacterCodingException ex) {
+ throw new ZipException(ex.getMessage());
+ }
} catch (java.nio.charset.UnsupportedCharsetException ex) {
// Java 1.4's NIO doesn't recognize a few names that
// String.getBytes does
@@ -568,6 +614,65 @@
}
/**
+ * If the entry has Unicode*ExtraFields and the CRCs of the
+ * names/comments match those of the extra fields, transfer the
+ * known Unicode values from the extra field.
+ */
+ private void setNameAndCommentFromExtraFields(ZipArchiveEntry ze,
+ NameAndComment nc) {
+ UnicodePathExtraField name = (UnicodePathExtraField)
+ ze.getExtraField(UnicodePathExtraField.UPATH_ID);
+ String originalName = ze.getName();
+ String newName = getUnicodeStringIfOriginalMatches(name, nc.name);
+ if (newName != null && !originalName.equals(newName)) {
+ ze.setName(newName);
+ nameMap.remove(originalName);
+ nameMap.put(newName, ze);
+ }
+
+ if (nc.comment != null && nc.comment.length > 0) {
+ UnicodeCommentExtraField cmt = (UnicodeCommentExtraField)
+ ze.getExtraField(UnicodeCommentExtraField.UCOM_ID);
+ String newComment =
+ getUnicodeStringIfOriginalMatches(cmt, nc.comment);
+ if (newComment != null) {
+ ze.setComment(newComment);
+ }
+ }
+ }
+
+ /**
+ * If the stored CRC matches the one of the given name, return the
+ * Unicode name of the given field.
+ *
+ * <p>If the field is null or the CRCs don't match, return null
+ * instead.</p>
+ */
+ private String getUnicodeStringIfOriginalMatches(AbstractUnicodeExtraField
f,
+ byte[] orig) {
+ if (f != null) {
+ CRC32 crc32 = new CRC32();
+ crc32.update(orig);
+ long origCRC32 = crc32.getValue();
+
+ if (origCRC32 == f.getNameCRC32()) {
+ try {
+ return ZipEncodingHelper
+ .decodeName(f.getUnicodeName(),
+ ZipArchiveOutputStream.UTF8);
+ } catch (CharacterCodingException ex) {
+ // UTF-8 unsupported? should be impossible the
+ // Unicode*ExtraField must contain some bad bytes
+
+ // TODO log this anywhere?
+ return null;
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
* InputStream that delegates requests to the underlying
* RandomAccessFile, making sure that only bytes from a certain
* range can be read.
@@ -634,4 +739,12 @@
}
}
+ private static final class NameAndComment {
+ private final byte[] name;
+ private final byte[] comment;
+ private NameAndComment(byte[] name, byte[] comment) {
+ this.name = name;
+ this.comment = comment;
+ }
+ }
}
Modified:
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java
URL:
http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java?rev=748133&r1=748132&r2=748133&view=diff
==============================================================================
---
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java
(original)
+++
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java
Thu Feb 26 13:15:14 2009
@@ -99,6 +99,23 @@
}
}
+ public void testZipFileReadsUnicodeFields() throws IOException {
+ File file = File.createTempFile("unicode-test", ".zip");
+ ZipFile zf = null;
+ try {
+ createTestFile(file, US_ASCII, false, true);
+ zf = new ZipFile(file, US_ASCII, true);
+ assertNotNull(zf.getEntry(ASCII_TXT));
+ assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
+ assertNotNull(zf.getEntry(OIL_BARREL_TXT));
+ } finally {
+ ZipFile.closeQuietly(zf);
+ if (file.exists()) {
+ file.delete();
+ }
+ }
+ }
+
private static void testFileRoundtrip(String encoding, boolean withEFS,
boolean withExplicitUnicodeExtra)
throws IOException {