Author: krosenvold
Date: Sun Jun 21 16:04:25 2015
New Revision: 1686739
URL: http://svn.apache.org/r1686739
Log:
IO-471 Support for additional encodings in ReversedLinesFileReader
Patch by Leandro Reis, applied with patch adjustments to trunk
Added:
commons/proper/io/trunk/src/test/resources/test-file-gbk.bin
commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin
commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin
commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin
Modified:
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java
Modified:
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java
URL:
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java?rev=1686739&r1=1686738&r2=1686739&view=diff
==============================================================================
---
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java
(original)
+++
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/ReversedLinesFileReader.java
Sun Jun 21 16:04:25 2015
@@ -121,9 +121,11 @@ public class ReversedLinesFileReader imp
// UTF-8 works fine out of the box, for multibyte sequences a
second UTF-8 byte can never be a newline byte
// http://en.wikipedia.org/wiki/UTF-8
byteDecrement = 1;
- } else if (charset == Charset.forName("Shift_JIS")) {
- // Same as for UTF-8
- // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
+ } else if(charset == Charset.forName("Shift_JIS") || // Same as for
UTF-8 http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
+ charset == Charset.forName("windows-31j") || // Windows code
page 932 (Japanese)
+ charset == Charset.forName("x-windows-949") || // Windows code
page 949 (Korean)
+ charset == Charset.forName("gbk") || // Windows code page 936
(Simplified Chinese)
+ charset == Charset.forName("x-windows-950")) { // Windows code
page 950 (Traditional Chinese)
byteDecrement = 1;
} else if (charset == Charsets.UTF_16BE || charset ==
Charsets.UTF_16LE) {
// UTF-16 new line sequences are not allowed as second tuple of
four byte sequences,
@@ -356,4 +358,4 @@ public class ReversedLinesFileReader imp
}
}
-}
\ No newline at end of file
+}
Modified:
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java
URL:
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java?rev=1686739&r1=1686738&r2=1686739&view=diff
==============================================================================
---
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java
(original)
+++
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamBlockSize.java
Sun Jun 21 16:04:25 2015
@@ -59,6 +59,18 @@ public class ReversedLinesFileReaderTest
private static final String TEST_LINE_SHIFT_JIS1 = "Hiragana letters:
\u3041\u3042\u3043\u3044\u3045";
// Kanji letters: æËŽè¼¸åÂ�京
private static final String TEST_LINE_SHIFT_JIS2 = "Kanji letters:
\u660E\u8F38\u5B50\u4EAC";
+ // windows-31j characters
+ private static final String TEST_LINE_WINDOWS_31J_1 =
"\u3041\u3042\u3043\u3044\u3045";
+ private static final String TEST_LINE_WINDOWS_31J_2 =
"\u660E\u8F38\u5B50\u4EAC";
+ // gbk characters (Simplified Chinese)
+ private static final String TEST_LINE_GBK_1 = "\u660E\u8F38\u5B50\u4EAC";
+ private static final String TEST_LINE_GBK_2 = "\u7B80\u4F53\u4E2D\u6587";
+ // x-windows-949 characters (Korean)
+ private static final String TEST_LINE_X_WINDOWS_949_1 =
"\uD55C\uAD6D\uC5B4";
+ private static final String TEST_LINE_X_WINDOWS_949_2 =
"\uB300\uD55C\uBBFC\uAD6D";
+ // x-windows-950 characters (Traditional Chinese)
+ private static final String TEST_LINE_X_WINDOWS_950_1 =
"\u660E\u8F38\u5B50\u4EAC";
+ private static final String TEST_LINE_X_WINDOWS_950_2 =
"\u7E41\u9AD4\u4E2D\u6587";
@After
@@ -127,6 +139,38 @@ public class ReversedLinesFileReaderTest
assertEqualsAndNoLineBreaks(TEST_LINE_SHIFT_JIS1,
reversedLinesFileReader.readLine());
}
+ @Test
+ public void testWindows31jFile() throws URISyntaxException, IOException {
+ final File testFileWindows31J = new
File(this.getClass().getResource("/test-file-windows-31j.bin").toURI());
+ reversedLinesFileReader = new
ReversedLinesFileReader(testFileWindows31J, testParamBlockSize, "windows-31j");
+ assertEqualsAndNoLineBreaks(TEST_LINE_WINDOWS_31J_2,
reversedLinesFileReader.readLine());
+ assertEqualsAndNoLineBreaks(TEST_LINE_WINDOWS_31J_1,
reversedLinesFileReader.readLine());
+ }
+
+ @Test
+ public void testGBK() throws URISyntaxException, IOException {
+ final File testFileGBK = new
File(this.getClass().getResource("/test-file-gbk.bin").toURI());
+ reversedLinesFileReader = new ReversedLinesFileReader(testFileGBK,
testParamBlockSize, "GBK");
+ assertEqualsAndNoLineBreaks(TEST_LINE_GBK_2,
reversedLinesFileReader.readLine());
+ assertEqualsAndNoLineBreaks(TEST_LINE_GBK_1,
reversedLinesFileReader.readLine());
+ }
+
+ @Test
+ public void testxWindows949File() throws URISyntaxException, IOException {
+ final File testFilexWindows949 = new
File(this.getClass().getResource("/test-file-x-windows-949.bin").toURI());
+ reversedLinesFileReader = new
ReversedLinesFileReader(testFilexWindows949, testParamBlockSize,
"x-windows-949");
+ assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_949_2,
reversedLinesFileReader.readLine());
+ assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_949_1,
reversedLinesFileReader.readLine());
+ }
+
+ @Test
+ public void testxWindows950File() throws URISyntaxException, IOException {
+ final File testFilexWindows950 = new
File(this.getClass().getResource("/test-file-x-windows-950.bin").toURI());
+ reversedLinesFileReader = new
ReversedLinesFileReader(testFilexWindows950, testParamBlockSize,
"x-windows-950");
+ assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_950_2,
reversedLinesFileReader.readLine());
+ assertEqualsAndNoLineBreaks(TEST_LINE_X_WINDOWS_950_1,
reversedLinesFileReader.readLine());
+ }
+
@Test // this test is run 3x for same block size as we want to test with 10
public void testFileSizeIsExactMultipleOfBlockSize() throws
URISyntaxException, IOException {
final int blockSize = 10;
Modified:
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java
URL:
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java?rev=1686739&r1=1686738&r2=1686739&view=diff
==============================================================================
---
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java
(original)
+++
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/ReversedLinesFileReaderTestParamFile.java
Sun Jun 21 16:04:25 2015
@@ -57,6 +57,10 @@ public class ReversedLinesFileReaderTest
{"test-file-utf8-win-linebr.bin", "UTF-8", 3},
{"test-file-utf8-win-linebr.bin", "UTF-8", 4},
{"test-file-utf8.bin", "UTF-8", null},
+ {"test-file-windows-31j.bin", "windows-31j", null},
+ {"test-file-gbk.bin", "gbk", null},
+ {"test-file-x-windows-949.bin", "x-windows-949", null},
+ {"test-file-x-windows-950.bin", "x-windows-950", null},
});
}
Added: commons/proper/io/trunk/src/test/resources/test-file-gbk.bin
URL:
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/resources/test-file-gbk.bin?rev=1686739&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/resources/test-file-gbk.bin (added)
+++ commons/proper/io/trunk/src/test/resources/test-file-gbk.bin Sun Jun 21
16:04:25 2015
@@ -0,0 +1,2 @@
+Ã÷Ý×Ó¾©
+¼òÌåÖÐÎÄ
Added: commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin
URL:
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin?rev=1686739&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin (added)
+++ commons/proper/io/trunk/src/test/resources/test-file-windows-31j.bin Sun
Jun 21 16:04:25 2015
@@ -0,0 +1,2 @@
+ ¡¢£
+¾Aq
Added: commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin
URL:
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin?rev=1686739&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin
(added)
+++ commons/proper/io/trunk/src/test/resources/test-file-x-windows-949.bin Sun
Jun 21 16:04:25 2015
@@ -0,0 +1,2 @@
+Çѱ¹¾î
+´ëÇѹα¹
Added: commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin
URL:
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin?rev=1686739&view=auto
==============================================================================
--- commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin
(added)
+++ commons/proper/io/trunk/src/test/resources/test-file-x-windows-950.bin Sun
Jun 21 16:04:25 2015
@@ -0,0 +1,2 @@
+©ú¿é¤l¨Ê
+ÁcÅ餤¤å