This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-text.git


The following commit(s) were added to refs/heads/master by this push:
     new 09cc871  Add org.apache.commons.text.io.StringSubstitutorReader.
09cc871 is described below

commit 09cc871d5b79e12235b819907cd967f76fddc76e
Author: Gary Gregory <garydgreg...@gmail.com>
AuthorDate: Sat Jul 18 08:46:38 2020 -0400

    Add org.apache.commons.text.io.StringSubstitutorReader.
---
 src/changes/changes.xml                            |   1 +
 .../commons/text/io/StringSubstitutorReader.java   | 310 +++++++++++++++++++++
 .../org/apache/commons/text/io/package-info.java   |  31 +++
 src/site/xdoc/userguide.xml                        |   4 +
 .../text/io/StringSubstitutorFilterReaderTest.java | 159 +++++++++++
 5 files changed, 505 insertions(+)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index d8f8c32..231e544 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -71,6 +71,7 @@ The <action> type attribute can be add,update,fix,remove.
     <action                  type="add" dev="ggregory" due-to="Gary 
Gregory">Add org.apache.commons.text.TextStringBuilder.set(String).</action>
     <action                  type="add" dev="ggregory" due-to="Gary 
Gregory">Add org.apache.commons.text.TextStringBuilder.getCharsDelete(int, int, 
char[]. int).</action>
     <action                  type="add" dev="ggregory" due-to="Gary 
Gregory">Add 
org.apache.commons.text.StringSubstitutor.StringSubstitutor(StringSubstitutor).</action>
+    <action                  type="add" dev="ggregory" due-to="Gary 
Gregory">Add org.apache.commons.text.io.StringSubstitutorReader.</action>
     <action                  type="update" dev="ggregory" due-to="Gary 
Gregory">[build] Skip clirr since we use JApiCmp.</action>
     <action                  type="update" dev="ggregory" due-to="Gary 
Gregory">[test] junit-jupiter 5.5.1 -> 5.5.2.</action>
     <action                  type="update" dev="ggregory" due-to="Gary 
Gregory">[test] org.assertj:assertj-core 3.13.2 -> 3.16.1.</action>
diff --git 
a/src/main/java/org/apache/commons/text/io/StringSubstitutorReader.java 
b/src/main/java/org/apache/commons/text/io/StringSubstitutorReader.java
new file mode 100644
index 0000000..cec4f9e
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/io/StringSubstitutorReader.java
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.text.io;
+
+import java.io.FilterReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Objects;
+
+import org.apache.commons.text.StringSubstitutor;
+import org.apache.commons.text.TextStringBuilder;
+import org.apache.commons.text.matcher.StringMatcher;
+import org.apache.commons.text.matcher.StringMatcherFactory;
+
+/**
+ * A {@link Reader} that performs string substitution on a source {@code 
Reader} using a {@link StringSubstitutor}.
+ * 
+ * <p>
+ * Using this Reader avoids reading a whole file into memory as a {@code 
String} to perform string substitution, for
+ * example, when a Servlet filters a file to a client.
+ * </p>
+ * <p>
+ * This class is not thread-safe.
+ * </p>
+ *
+ * @since 1.9
+ */
+public class StringSubstitutorReader extends FilterReader {
+
+    /** The end-of-stream character marker. */
+    private static final int EOS = -1;
+
+    /** Our internal buffer. */
+    private final TextStringBuilder buffer = new TextStringBuilder();
+
+    /** End-of-Stream flag. */
+    private boolean eos;
+
+    /** Matches escaped variable starts. */
+    private final StringMatcher prefixEscapeMatcher;
+
+    /** Internal buffer for {@link #read()} method. */
+    private final char[] read1CharBuffer = {0};
+
+    /** We don't always want to drain the whole buffer. */
+    private int toDrain;
+
+    /** The underlying StringSubstitutor. */
+    private final StringSubstitutor stringSubstitutor;
+
+    /**
+     * Constructs a new instance.
+     *
+     * @param reader the underlying reader containing the template text known 
to the given {@code StringSubstitutor}.
+     * @param stringSubstitutor How to replace as we read.
+     * @throws NullPointerException if {@code reader} is {@code null}.
+     * @throws NullPointerException if {@code stringSubstitutor} is {@code 
null}.
+     */
+    public StringSubstitutorReader(final Reader reader, final 
StringSubstitutor stringSubstitutor) {
+        super(reader);
+        this.stringSubstitutor = Objects.requireNonNull(stringSubstitutor);
+        this.prefixEscapeMatcher = 
StringMatcherFactory.INSTANCE.charMatcher(stringSubstitutor.getEscapeChar())
+            .andThen(stringSubstitutor.getVariablePrefixMatcher());
+    }
+
+    /**
+     * Buffers the requested number of characters if available.
+     */
+    private int buffer(final int requestReadCount) throws IOException {
+        final int actualReadCount = buffer.readFrom(super.in, 
requestReadCount);
+        eos = actualReadCount == EOS;
+        return actualReadCount;
+    }
+
+    /**
+     * Reads a requested number of chars from the underlying reader into the 
buffer. On EOS, set the state is DRANING,
+     * drain, and return a drain count, otherwise, returns the actual read 
count.
+     */
+    private int bufferOrDrainOnEos(final int requestReadCount, final char[] 
target, final int targetIndex,
+        final int targetLength) throws IOException {
+        final int actualReadCount = buffer(requestReadCount);
+        return drainOnEos(actualReadCount, target, targetIndex, targetLength);
+    }
+
+    /**
+     * Drains characters from our buffer to the given {@code target}.
+     */
+    private int drain(final char[] target, final int targetIndex, final int 
targetLength) {
+        final int actualLen = Math.min(buffer.length(), targetLength);
+        final int drainCount = buffer.drainChars(0, actualLen, target, 
targetIndex);
+        toDrain -= drainCount;
+        if (buffer.isEmpty() || toDrain == 0) {
+            // nothing or everything drained.
+            toDrain = 0;
+        }
+        return drainCount;
+    }
+
+    /**
+     * Drains from the buffer to the target only if we are at EOS per the 
input count. If input count is EOS, drain and
+     * returns the drain count, otherwise return the input count. If draining, 
the state is set to DRAINING.
+     */
+    private int drainOnEos(final int readCountOrEos, final char[] target, 
final int targetIndex,
+        final int targetLength) {
+        if (readCountOrEos == EOS) {
+            // At EOS, drain.
+            if (buffer.isNotEmpty()) {
+                toDrain = buffer.size();
+                return drain(target, targetIndex, targetLength);
+            }
+            return EOS;
+        }
+        return readCountOrEos;
+    }
+
+    /**
+     * Tests if our buffer matches the given string matcher at the given 
position in the buffer.
+     */
+    private boolean isBufferMatchAt(final StringMatcher stringMatcher, final 
int pos) {
+        return stringMatcher.isMatch(buffer, pos) == stringMatcher.size();
+    }
+
+    /**
+     * Tests if we are draining.
+     */
+    private boolean isDraining() {
+        return toDrain > 0;
+    }
+
+    /**
+     * Reads a single character.
+     *
+     * @return a character as an {@code int} or {@code -1} for end-of-stream.
+     * @throws IOException If an I/O error occurs
+     */
+    @Override
+    public int read() throws IOException {
+        int count = 0;
+        // ask until we get a char or EOS
+        do {
+            count = read(read1CharBuffer, 0, 1);
+            if (count == EOS) {
+                return EOS;
+            }
+            // keep on buffering
+        } while (count < 1);
+        return read1CharBuffer[0];
+    }
+
+    /**
+     * Reads characters into a portion of an array.
+     *
+     * @param target Target buffer.
+     * @param targetIndexIn Index in the target at which to start storing 
characters.
+     * @param targetLengthIn Maximum number of characters to read.
+     *
+     * @return The number of characters read, or -1 on end of stream.
+     * @throws IOException If an I/O error occurs
+     */
+    @Override
+    public int read(final char[] target, final int targetIndexIn, final int 
targetLengthIn) throws IOException {
+        // The whole thing is inefficient because we must look for a balanced 
suffix to match the starting prefix
+        // Trying to substitute an incomplete expression can perform 
replacements when it should not.
+        // At a high level:
+        // - if draining, drain until empty or target length hit
+        // - copy to target until we find a variable start
+        // - buffer until a balanced suffix is read, then substitute.
+        if (eos && buffer.isEmpty()) {
+            return EOS;
+        }
+        if (targetLengthIn <= 0) {
+            // short-circuit: ask nothing, give nothing
+            return 0;
+        }
+        // drain check
+        int targetIndex = targetIndexIn;
+        int targetLength = targetLengthIn;
+        if (isDraining()) {
+            // drain as much as possible
+            final int drainCount = drain(target, targetIndex, 
Math.min(toDrain, targetLength));
+            if (drainCount == targetLength) {
+                // drained length requested, target is full, can only do more 
in the next invocation
+                return targetLength;
+            }
+            // drained less than requested, target not full.
+            targetIndex += drainCount;
+            targetLength -= drainCount;
+        }
+        // BUFFER from the underlying reader
+        final int minReadLenPrefix = prefixEscapeMatcher.size();
+        // READ enough to test for an [optionally escaped] variable start
+        int readCount = buffer(readCount(minReadLenPrefix, 0));
+        if (buffer.length() < minReadLenPrefix && targetLength < 
minReadLenPrefix) {
+            // read less than minReadLenPrefix, no variable possible
+            return drain(target, targetIndex, targetLength);
+        }
+        if (eos) {
+            // EOS
+            stringSubstitutor.replaceIn(buffer);
+            toDrain = buffer.size();
+            return drain(target, targetIndex, targetLength);
+        }
+        // PREFIX
+        // buffer and drain until we find a variable start, escaped or plain.
+        int balance = 0;
+        final StringMatcher prefixMatcher = 
stringSubstitutor.getVariablePrefixMatcher();
+        int pos = 0;
+        while (targetLength > 0) {
+            if (isBufferMatchAt(prefixMatcher, 0)) {
+                balance = 1;
+                pos = prefixMatcher.size();
+                break;
+            } else if (isBufferMatchAt(prefixEscapeMatcher, 0)) {
+                balance = 1;
+                pos = prefixEscapeMatcher.size();
+                break;
+            }
+            // drain first char
+            final int drainCount = drain(target, targetIndex, 1);
+            targetIndex += drainCount;
+            targetLength -= drainCount;
+            if (buffer.size() < minReadLenPrefix) {
+                readCount = bufferOrDrainOnEos(minReadLenPrefix, target, 
targetIndex, targetLength);
+                if (eos || isDraining()) {
+                    // if draining, readCount is a drain count
+                    if (readCount != EOS) {
+                        targetIndex += readCount;
+                        targetLength -= readCount;
+                    }
+                    final int actual = targetIndex - targetIndexIn;
+                    return actual > 0 ? actual : EOS;
+                }
+            }
+        }
+        // we found a variable start
+        if (targetLength <= 0) {
+            // no more room in target
+            return targetLengthIn;
+        }
+        // SUFFIX
+        // buffer more to find a balanced suffix
+        final StringMatcher suffixMatcher = 
stringSubstitutor.getVariableSuffixMatcher();
+        final int minReadLenSuffix = Math.max(minReadLenPrefix, 
suffixMatcher.size());
+        readCount = buffer(readCount(minReadLenSuffix, pos));
+        if (eos) {
+            // EOS
+            stringSubstitutor.replaceIn(buffer);
+            toDrain = buffer.size();
+            final int drainCount = drain(target, targetIndex, targetLength);
+            return targetIndex + drainCount - targetIndexIn;
+        }
+        // buffer and break out when we find the end or a balanced suffix
+        while (true) {
+            if (isBufferMatchAt(suffixMatcher, pos)) {
+                balance--;
+                pos++;
+                if (balance == 0) {
+                    break;
+                }
+            } else if (isBufferMatchAt(prefixMatcher, pos)) {
+                balance++;
+                pos += prefixMatcher.size();
+            } else if (isBufferMatchAt(prefixEscapeMatcher, pos)) {
+                balance++;
+                pos += prefixEscapeMatcher.size();
+            } else {
+                pos++;
+            }
+            readCount = buffer(readCount(minReadLenSuffix, pos));
+            if (readCount == EOS && pos >= buffer.size()) {
+                break;
+            }
+        }
+        // substitute
+        final int endPos = pos + 1;
+        final int leftover = Math.max(0, buffer.size() - pos);
+        stringSubstitutor.replaceIn(buffer, 0, Math.min(buffer.size(), 
endPos));
+        pos = buffer.size() - leftover;
+        final int drainLen = Math.min(targetLength, pos);
+        // only drain up to what we've substituted
+        toDrain = pos;
+        drain(target, targetIndex, drainLen);
+        return targetIndex + drainLen;
+    }
+
+    /**
+     * Returns how many chars to attempt reading to have room in the buffer 
for {@code count} chars starting at position
+     * {@code pos}.
+     */
+    private int readCount(final int count, final int pos) {
+        final int avail = buffer.size() - pos;
+        return avail >= count ? 0 : count - avail;
+    }
+
+}
diff --git a/src/main/java/org/apache/commons/text/io/package-info.java 
b/src/main/java/org/apache/commons/text/io/package-info.java
new file mode 100644
index 0000000..4234783
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/io/package-info.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <p>
+ * {@link org.apache.commons.text.io.StringSubstitutorReader} is a {@link 
java.io.Reader} that performs string
+ * substitution on a source {@code Reader} using a {@link 
org.apache.commons.text.StringSubstitutor}.
+ * </p>
+ * 
+ * <p>
+ * Using this Reader avoids reading a whole file into memory as a {@code 
String} to perform string substitution, for
+ * example, when a Servlet filters a file to a client.
+ * </p>
+ * 
+ * @since 1.9
+ */
+package org.apache.commons.text.io;
diff --git a/src/site/xdoc/userguide.xml b/src/site/xdoc/userguide.xml
index f897b56..3d06e4c 100644
--- a/src/site/xdoc/userguide.xml
+++ b/src/site/xdoc/userguide.xml
@@ -91,6 +91,10 @@ limitations under the License.
         For details see <a 
href="http://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/StringSubstitutor.html";>StringSubstitutor</a>.
       </p>
       <p>
+        Use a <a 
href="http://commons.apache.org/proper/commons-text/apidocs/org/apache/commons/text/io/StringSubstitutorReader.html";>StringSubstitutorReader</a>
+        to avoid reading a whole file into memory as a <code>String</code> to 
perform string substitution, for example, when a Servlet filters a file to a 
client.
+      </p>
+      <p>
         To build a default full-featured substitutor, use:
       </p>
       <ul>
diff --git 
a/src/test/java/org/apache/commons/text/io/StringSubstitutorFilterReaderTest.java
 
b/src/test/java/org/apache/commons/text/io/StringSubstitutorFilterReaderTest.java
new file mode 100644
index 0000000..622d5fe
--- /dev/null
+++ 
b/src/test/java/org/apache/commons/text/io/StringSubstitutorFilterReaderTest.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.text.io;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.input.NullReader;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.StringSubstitutor;
+import org.apache.commons.text.StringSubstitutorTest;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests {@link StringSubstitutorReader}.
+ */
+public class StringSubstitutorFilterReaderTest extends StringSubstitutorTest {
+
+    private StringSubstitutorReader createReader(final StringSubstitutor 
substitutor, final String template) {
+        return new StringSubstitutorReader(new StringReader(template), 
substitutor);
+    }
+
+    @Override
+    protected void doTestNoReplace(final StringSubstitutor substitutor, final 
String replaceTemplate)
+        throws IOException {
+        super.doTestNoReplace(substitutor, replaceTemplate);
+        doTestNoReplaceInSteps(replaceTemplate, substitutor);
+    }
+
+    private void doTestNoReplaceInSteps(final String replaceTemplate, final 
StringSubstitutor substitutor)
+        throws IOException {
+        doTestReplaceInSteps(substitutor, replaceTemplate, replaceTemplate, 
false);
+    }
+
+    @Override
+    protected void doTestReplace(final StringSubstitutor sub, final String 
expectedResult, final String replaceTemplate,
+        final boolean substring) throws IOException {
+        doTestReplaceInSteps(sub, expectedResult, replaceTemplate, substring);
+        super.doTestReplace(sub, expectedResult, replaceTemplate, substring);
+    }
+
+    private void doTestReplaceInSteps(final StringSubstitutor substitutor, 
final String expectedResult,
+        final String replaceTemplate, final boolean substring) throws 
IOException {
+        final StringWriter actualResultWriter = new StringWriter();
+        final AtomicInteger index = new AtomicInteger();
+        final int expectedResultLen = StringUtils.length(expectedResult);
+        try (Reader expectedResultReader = toReader(expectedResult);
+            Reader actualReader = new 
StringSubstitutorReader(toReader(replaceTemplate), substitutor)) {
+            int actualCh;
+            while ((actualCh = actualReader.read()) != -1) {
+                final int expectedCh = expectedResultReader.read();
+                final int actualCh2 = actualCh;
+                assertEquals(expectedCh, actualCh, () -> String.format("[%,d] 
'%s' != '%s', result so far: \"%s\"",
+                    index.get(), toStringChar(expectedCh), 
toStringChar(actualCh2), actualResultWriter.toString()));
+                if (actualCh != -1) {
+                    actualResultWriter.write(actualCh);
+                }
+                index.incrementAndGet();
+                assertFalse(index.get() > expectedResultLen, () -> "Index: " + 
index.get());
+            }
+        }
+        if (replaceTemplate == null) {
+            assertEquals(StringUtils.EMPTY, actualResultWriter.toString());
+        } else {
+            assertEquals(expectedResult, actualResultWriter.toString());
+        }
+    }
+
+    @Override
+    protected String replace(final StringSubstitutor substitutor, final String 
source) throws IOException {
+        if (source == null) {
+            return null;
+        }
+        try (Reader reader = createReader(substitutor, source)) {
+            return IOUtils.toString(reader);
+        }
+    }
+
+    @Test
+    public void testReadMixedBufferLengths1ToVarLenPlusNoReplace() throws 
IOException {
+        final StringSubstitutor substitutor = new StringSubstitutor(values);
+        final String template = "123456";
+        assertTrue(template.length() > substitutor.getMinExpressionLength() + 
1);
+        try (Reader reader = createReader(substitutor, template)) {
+            assertEquals('1', reader.read());
+            final char[] cbuf = new char[template.length() - 1];
+            reader.read(cbuf);
+            final String result = String.valueOf(cbuf);
+            assertEquals(template.substring(1), result);
+        }
+    }
+
+    @Test
+    public void testReadMixedBufferLengthsReplace() throws IOException {
+        final String template = "${aa}${bb}";
+        final StringSubstitutor substitutor = new StringSubstitutor(values);
+        try (Reader reader = createReader(substitutor, template)) {
+            assertEquals('1', reader.read());
+            final char[] cbuf = new char[3];
+            assertEquals(0, reader.read(cbuf, 0, 0));
+            reader.read(cbuf);
+            final String result = String.valueOf(cbuf);
+            assertEquals("122", result, () -> String.format("length %,d", 
result.length()));
+        }
+    }
+
+    @Test
+    public void testReadMixedBufferLengthsVarLenPlusToNoReplace() throws 
IOException {
+        final StringSubstitutor substitutor = new StringSubstitutor(values);
+        final String template = "123456";
+        assertTrue(template.length() > substitutor.getMinExpressionLength() + 
1);
+        try (Reader reader = createReader(substitutor, template)) {
+            final int endIndex = template.length() - 1;
+            final char[] cbuf = new char[endIndex];
+            reader.read(cbuf);
+            final String result = String.valueOf(cbuf);
+            assertEquals(template.substring(0, endIndex), result);
+            assertEquals('6', reader.read());
+        }
+    }
+
+    private Reader toReader(final String expectedResult) {
+        return expectedResult != null ? new StringReader(expectedResult) : new 
NullReader();
+    }
+
+    private String toStringChar(final int ch) {
+        switch (ch) {
+        case -1:
+            return "EOS";
+        case 0:
+            return "NUL";
+        default:
+            return String.valueOf((char) ch);
+        }
+    }
+}

Reply via email to