Repository: commons-io Updated Branches: refs/heads/master bc10af423 -> 7791a851c
[IO-577] Add readers to filter out given characters: CharacterSetFilterReader and CharacterFilterReader. Project: http://git-wip-us.apache.org/repos/asf/commons-io/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-io/commit/7791a851 Tree: http://git-wip-us.apache.org/repos/asf/commons-io/tree/7791a851 Diff: http://git-wip-us.apache.org/repos/asf/commons-io/diff/7791a851 Branch: refs/heads/master Commit: 7791a851c4a303f47743b156007fb19c6ed8ed81 Parents: bc10af4 Author: Gary Gregory <garydgreg...@gmail.com> Authored: Tue May 22 10:05:59 2018 -0600 Committer: Gary Gregory <garydgreg...@gmail.com> Committed: Tue May 22 10:05:59 2018 -0600 ---------------------------------------------------------------------- src/changes/changes.xml | 3 + .../io/input/AbstractCharacterFilterReader.java | 74 +++++++++++ .../commons/io/input/CharacterFilterReader.java | 48 +++++++ .../io/input/CharacterSetFilterReader.java | 55 ++++++++ .../io/input/CharacterFilterReaderTest.java | 72 ++++++++++ .../io/input/CharacterSetFilterReaderTest.java | 131 +++++++++++++++++++ 6 files changed, 383 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 9f6fd85..f296eb0 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -65,6 +65,9 @@ The <action> type attribute can be add,update,fix,remove. <action issue="IO-572" dev="ggregory" type="update" due-to="Pranet Verma"> Refactor duplicate code in org.apache.commons.io.FileUtils. </action> + <action issue="IO-577" dev="ggregory" type="add" due-to="Gary Gregory"> + Add readers to filter out given characters: CharacterSetFilterReader and CharacterFilterReader. + </action> </release> <release version="2.6" date="2017-10-15" description="Java 7 required, Java 9 supported."> http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java b/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java new file mode 100644 index 0000000..8deeebe --- /dev/null +++ b/src/main/java/org/apache/commons/io/input/AbstractCharacterFilterReader.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input; + +import java.io.FilterReader; +import java.io.IOException; +import java.io.Reader; + +/** + * A filter reader that filters out characters where subclasses decide which characters to filter out. + */ +public abstract class AbstractCharacterFilterReader extends FilterReader { + + /** + * Constructs a new reader. + * + * @param reader + * the reader to filter + */ + protected AbstractCharacterFilterReader(final Reader reader) { + super(reader); + } + + @Override + public int read() throws IOException { + int ch; + do { + ch = in.read(); + } while (filter(ch)); + return ch; + } + + /** + * Returns true if the given character should be filtered out, false to keep the character. + * + * @param ch + * the character to test. + * @return true if the given character should be filtered out, false to keep the character. + */ + protected abstract boolean filter(int ch); + + @Override + public int read(final char[] cbuf, final int off, final int len) throws IOException { + final int read = super.read(cbuf, off, len); + if (read == -1) { + return -1; + } + int pos = off - 1; + for (int readPos = off; readPos < off + read; readPos++) { + if (filter(read)) { + continue; + } + pos++; + if (pos < readPos) { + cbuf[pos] = cbuf[readPos]; + } + } + return pos - off + 1; + } +} http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java b/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java new file mode 100644 index 0000000..092e0f5 --- /dev/null +++ b/src/main/java/org/apache/commons/io/input/CharacterFilterReader.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input; + +import java.io.Reader; + +/** + * A filter reader that filters out a given character represented as an <code>int</code> code point, handy to remove + * known junk characters from CSV files for example. This class is the most efficient way to filter out a single + * character, as opposed to using a {@link CharacterSetFilterReader}. You can also nest {@link CharacterFilterReader}s. + */ +public class CharacterFilterReader extends AbstractCharacterFilterReader { + + private final int skip; + + /** + * Constructs a new reader. + * + * @param reader + * the reader to filter. + * @param skip + * the character to filter out. + */ + public CharacterFilterReader(final Reader reader, final int skip) { + super(reader); + this.skip = skip; + } + + @Override + protected boolean filter(final int ch) { + return ch == skip; + } + +} http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java b/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java new file mode 100644 index 0000000..2810a40 --- /dev/null +++ b/src/main/java/org/apache/commons/io/input/CharacterSetFilterReader.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input; + +import java.io.Reader; +import java.util.Collections; +import java.util.Set; + +/** + * A filter reader that removes a given set of characters represented as <code>int</code> code points, handy to remove + * known junk characters from CSV files for example. + * <p> + * This class must convert each <code>int</code> read to an <code>Integer</code>. You can increase the Integer cache + * with a system property, see {@link Integer}. + * </p> + */ +public class CharacterSetFilterReader extends AbstractCharacterFilterReader { + + private static final Set<Integer> EMPTY_SET = Collections.emptySet(); + private final Set<Integer> skipSet; + + /** + * Constructs a new reader. + * + * @param reader + * the reader to filter. + * @param skip + * the set of characters to filter out. + */ + public CharacterSetFilterReader(final Reader reader, final Set<Integer> skip) { + super(reader); + this.skipSet = skip == null ? EMPTY_SET : Collections.unmodifiableSet(skip); + } + + @Override + protected boolean filter(final int ch) { + // Note WRT Integer.valueOf(): You can increase the Integer cache with a system property, see {@link Integer}. + return skipSet.contains(Integer.valueOf(ch)); + } + +} http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java b/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java new file mode 100644 index 0000000..fda062d --- /dev/null +++ b/src/test/java/org/apache/commons/io/input/CharacterFilterReaderTest.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input; + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashSet; + +import org.junit.Assert; +import org.junit.Test; + +public class CharacterFilterReaderTest { + + @Test + public void testInputSize0FilterSize1() throws IOException { + final StringReader input = new StringReader(""); + final HashSet<Integer> codePoints = new HashSet<>(); + codePoints.add(Integer.valueOf('a')); + try (CharacterFilterReader reader = new CharacterFilterReader(input, 'A')) { + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize1FilterSize1() throws IOException { + try (StringReader input = new StringReader("a"); + CharacterFilterReader reader = new CharacterFilterReader(input, 'a')) { + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize2FilterSize1FilterAll() throws IOException { + final StringReader input = new StringReader("aa"); + try (CharacterFilterReader reader = new CharacterFilterReader(input, 'a')) { + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize2FilterSize1FilterFirst() throws IOException { + final StringReader input = new StringReader("ab"); + try (CharacterFilterReader reader = new CharacterFilterReader(input, 'a')) { + Assert.assertEquals('b', reader.read()); + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize2FilterSize1FilterLast() throws IOException { + final StringReader input = new StringReader("ab"); + try (CharacterFilterReader reader = new CharacterFilterReader(input, 'b')) { + Assert.assertEquals('a', reader.read()); + Assert.assertEquals(-1, reader.read()); + } + } + +} http://git-wip-us.apache.org/repos/asf/commons-io/blob/7791a851/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java b/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java new file mode 100644 index 0000000..f1a2dc0 --- /dev/null +++ b/src/test/java/org/apache/commons/io/input/CharacterSetFilterReaderTest.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.io.input; + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashSet; + +import org.junit.Assert; +import org.junit.Test; + +public class CharacterSetFilterReaderTest { + + @Test + public void testInputSize0FilterSize0() throws IOException { + final StringReader input = new StringReader(""); + try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, new HashSet<Integer>(0))) { + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize0FilterSize1() throws IOException { + final StringReader input = new StringReader(""); + final HashSet<Integer> codePoints = new HashSet<>(); + codePoints.add(Integer.valueOf('a')); + try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) { + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize0NullFilter() throws IOException { + final StringReader input = new StringReader(""); + try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, null)) { + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize1FilterSize1() throws IOException { + try (StringReader input = new StringReader("a")) { + final HashSet<Integer> codePoints = new HashSet<>(); + codePoints.add(Integer.valueOf('a')); + final CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints); + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize2FilterSize1FilterAll() throws IOException { + final StringReader input = new StringReader("aa"); + final HashSet<Integer> codePoints = new HashSet<>(); + codePoints.add(Integer.valueOf('a')); + try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) { + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize2FilterSize1FilterFirst() throws IOException { + final StringReader input = new StringReader("ab"); + final HashSet<Integer> codePoints = new HashSet<>(); + codePoints.add(Integer.valueOf('a')); + try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) { + Assert.assertEquals('b', reader.read()); + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize2FilterSize1FilterLast() throws IOException { + final StringReader input = new StringReader("ab"); + final HashSet<Integer> codePoints = new HashSet<>(); + codePoints.add(Integer.valueOf('b')); + try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) { + Assert.assertEquals('a', reader.read()); + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize2FilterSize2FilterFirst() throws IOException { + final StringReader input = new StringReader("ab"); + final HashSet<Integer> codePoints = new HashSet<>(); + codePoints.add(Integer.valueOf('a')); + codePoints.add(Integer.valueOf('y')); + try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) { + Assert.assertEquals('b', reader.read()); + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize2FilterSize2FilterLast() throws IOException { + final StringReader input = new StringReader("ab"); + final HashSet<Integer> codePoints = new HashSet<>(); + codePoints.add(Integer.valueOf('x')); + codePoints.add(Integer.valueOf('b')); + try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) { + Assert.assertEquals('a', reader.read()); + Assert.assertEquals(-1, reader.read()); + } + } + + @Test + public void testInputSize2FilterSize2FilterNone() throws IOException { + final StringReader input = new StringReader("ab"); + final HashSet<Integer> codePoints = new HashSet<>(); + codePoints.add(Integer.valueOf('x')); + codePoints.add(Integer.valueOf('y')); + try (CharacterSetFilterReader reader = new CharacterSetFilterReader(input, codePoints)) { + Assert.assertEquals('a', reader.read()); + Assert.assertEquals('b', reader.read()); + } + } +}