This is an automated email from the ASF dual-hosted git repository. kwin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/maven-doxia.git
The following commit(s) were added to refs/heads/master by this push: new ef41ffaa [DOXIA-699] Correctly generate sections for headings (#162) ef41ffaa is described below commit ef41ffaa282ba5ff19548f09979bcc3febf0feaa Author: Konrad Windszus <k...@apache.org> AuthorDate: Tue Dec 26 11:17:34 2023 +0100 [DOXIA-699] Correctly generate sections for headings (#162) Make sure to nest Sink events correctly in case of manual sections in XHTML together with headings --- .../maven/doxia/parser/Xhtml5BaseParser.java | 86 ++++++++++++++++------ .../doxia/sink/impl/EventCapturingSinkProxy.java | 65 ++++++++++++++++ .../maven/doxia/module/AbstractIdentityTest.java | 11 ++- .../maven/doxia/parser/Xhtml5BaseParserTest.java | 71 ++++++++++++++++++ 4 files changed, 208 insertions(+), 25 deletions(-) diff --git a/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java b/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java index 2e9108e8..24d6c7f8 100644 --- a/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java +++ b/doxia-core/src/main/java/org/apache/maven/doxia/parser/Xhtml5BaseParser.java @@ -22,6 +22,7 @@ import javax.swing.text.html.HTML.Attribute; import java.io.Reader; import java.util.HashSet; +import java.util.LinkedList; import java.util.Set; import java.util.Stack; import java.util.regex.Pattern; @@ -30,6 +31,7 @@ import org.apache.maven.doxia.macro.MacroExecutionException; import org.apache.maven.doxia.markup.HtmlMarkup; import org.apache.maven.doxia.sink.Sink; import org.apache.maven.doxia.sink.SinkEventAttributes; +import org.apache.maven.doxia.sink.impl.EventCapturingSinkProxy; import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet; import org.apache.maven.doxia.util.DoxiaUtils; import org.codehaus.plexus.util.xml.pull.XmlPullParser; @@ -113,10 +115,12 @@ public class Xhtml5BaseParser extends AbstractXmlParser implements HtmlMarkup { /** Used for nested lists. */ private int orderedListDepth = 0; - /** Counts section level. */ + /** Counts section nesting level of the sections manually set in the HTML document */ private int sectionLevel; - /** Counts heading level. */ + /** Counts current heading level. This is either the {@link #sectionLevel} if no artificial sections are currently open + * for headings or a number higher or lower than {@link #sectionLevel} (for all section currently opened/closed for a preceding heading). + * The heading level only changes when a new heading starts, or a section starts or ends. */ private int headingLevel; /** Verbatim flag, true whenever we are inside a <pre> tag. */ @@ -128,13 +132,17 @@ public class Xhtml5BaseParser extends AbstractXmlParser implements HtmlMarkup { /** Used to wrap the definedTerm with its definition, even when one is omitted */ boolean hasDefinitionListItem = false; + private LinkedList<String> capturedSinkEventNames; + /** {@inheritDoc} */ @Override public void parse(Reader source, Sink sink, String reference) throws ParseException { init(); try { - super.parse(source, sink, reference); + capturedSinkEventNames = new LinkedList<>(); + Sink capturingSink = EventCapturingSinkProxy.newInstance(sink, capturedSinkEventNames); + super.parse(source, capturingSink, reference); } finally { setSecondParsing(false); init(); @@ -594,12 +602,27 @@ public class Xhtml5BaseParser extends AbstractXmlParser implements HtmlMarkup { } /** - * Make sure sections are nested consecutively. + * Shortcut for {@link #emitHeadingSections(int, Sink, boolean)} with last argument being {@code true} + * @param newLevel + * @param sink + * @param attribs + * @deprecated Use {@link #emitHeadingSections(int, Sink, boolean)} instead. + */ + @Deprecated + protected void consecutiveSections(int newLevel, Sink sink, SinkEventAttributeSet attribs) { + emitHeadingSections(newLevel, sink, true); + } + + /** + * Make sure sections are nested consecutively and correctly inserted for the given heading level * * <p> - * HTML5 heading tags H1 to H5 imply sections where they are not - * present, that means we have to open close any sections that - * are missing in between. + * HTML5 heading tags H1 to H5 imply same level sections in Sink API (compare with {@link Sink#sectionTitle(int, SinkEventAttributes)}). + * However (X)HTML5 allows headings without explicit surrounding section elements and is also + * less strict with non-consecutive heading levels. + * This methods both closes open sections which have been added for previous headings and/or opens + * sections necessary for the new heading level. + * At least one section needs to be opened directly prior the heading due to Sink API restrictions. * </p> * * <p> @@ -621,27 +644,44 @@ public class Xhtml5BaseParser extends AbstractXmlParser implements HtmlMarkup { * we have to close two sections before we open the <code><h2></code>. * </p> * - * <p>The current level is set to newLevel afterwards.</p> + * <p>The current heading level is set to newLevel afterwards.</p> * * @param newLevel the new section level, all upper levels have to be closed. * @param sink the sink to receive the events. - * @param attribs a {@link org.apache.maven.doxia.sink.impl.SinkEventAttributeSet} object. */ - protected void consecutiveSections(int newLevel, Sink sink, SinkEventAttributeSet attribs) { - closeOpenSections(newLevel, sink); - openMissingSections(newLevel, sink); + protected void emitHeadingSections(int newLevel, Sink sink, boolean enforceNewSection) { + int lowerBoundSectionLevel = newLevel; + if (enforceNewSection) { + // close one more if either last event was not section start or the new level is lower than the current one + // (in this case the last event may be a section start event but for another level) + if (!isLastEventSectionStart() || newLevel < this.headingLevel) { + lowerBoundSectionLevel--; + } + } + closeOpenHeadingSections(lowerBoundSectionLevel, sink); + openMissingHeadingSections(newLevel, sink); this.headingLevel = newLevel; } + private boolean isLastEventSectionStart() { + String lastEventName = capturedSinkEventNames.pollLast(); + if (lastEventName == null) { + return false; + } + return lastEventName.startsWith("section") + && !lastEventName.endsWith("_") + && !lastEventName.startsWith("sectionTitle"); + } + /** - * Close open sections. + * Close open heading sections. * * @param newLevel the new section level, all upper levels have to be closed. * @param sink the sink to receive the events. */ - private void closeOpenSections(int newLevel, Sink sink) { - while (this.headingLevel >= newLevel && this.sectionLevel < headingLevel) { + private void closeOpenHeadingSections(int newLevel, Sink sink) { + while (this.headingLevel > newLevel) { if (headingLevel == Sink.SECTION_LEVEL_5) { sink.section5_(); } else if (headingLevel == Sink.SECTION_LEVEL_4) { @@ -656,16 +696,17 @@ public class Xhtml5BaseParser extends AbstractXmlParser implements HtmlMarkup { this.headingLevel--; } + // enforce the previous element is a section } /** - * Open missing sections. + * Open missing heading sections. * * @param newLevel the new section level, all lower levels have to be opened. * @param sink the sink to receive the events. */ - private void openMissingSections(int newLevel, Sink sink) { - while (this.headingLevel < newLevel && this.sectionLevel < newLevel) { + private void openMissingHeadingSections(int newLevel, Sink sink) { + while (this.headingLevel < newLevel) { this.headingLevel++; if (headingLevel == Sink.SECTION_LEVEL_5) { @@ -909,19 +950,20 @@ public class Xhtml5BaseParser extends AbstractXmlParser implements HtmlMarkup { } private void handleSectionStart(Sink sink, SinkEventAttributeSet attribs) { + emitHeadingSections(sectionLevel, sink, false); sink.section(++sectionLevel, attribs); + this.headingLevel = sectionLevel; } private void handleHeadingStart(Sink sink, int level, SinkEventAttributeSet attribs) { - consecutiveSections(level, sink, attribs); + emitHeadingSections(level, sink, true); sink.sectionTitle(level, attribs); } private void handleSectionEnd(Sink sink) { - closeOpenSections(sectionLevel, sink); - this.headingLevel = 0; - + emitHeadingSections(sectionLevel, sink, false); sink.section_(sectionLevel--); + this.headingLevel = sectionLevel; } private void handleTableStart(Sink sink, SinkEventAttributeSet attribs, XmlPullParser parser) { diff --git a/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/EventCapturingSinkProxy.java b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/EventCapturingSinkProxy.java new file mode 100644 index 00000000..2a99a334 --- /dev/null +++ b/doxia-core/src/main/java/org/apache/maven/doxia/sink/impl/EventCapturingSinkProxy.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.maven.doxia.sink.impl; + +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.List; + +import org.apache.maven.doxia.sink.Sink; + +/** + * A proxy for a Sink which captures all event/method names called on it. + */ +public class EventCapturingSinkProxy implements InvocationHandler { + + private final Sink sink; + private final List<String> capturedEventNames; + + /** + * + * @param sink + * @param capturedEventNames the list to receive the captured event/method names + * @return + */ + public static Sink newInstance(Sink sink, List<String> capturedEventNames) { + return (Sink) java.lang.reflect.Proxy.newProxyInstance( + sink.getClass().getClassLoader(), + new Class<?>[] {Sink.class}, + new EventCapturingSinkProxy(sink, capturedEventNames)); + } + + private EventCapturingSinkProxy(Sink sink, List<String> capturedEventNames) { + this.sink = sink; + this.capturedEventNames = capturedEventNames; + } + + @Override + public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { + Object result; + try { + capturedEventNames.add(method.getName()); + result = method.invoke(sink, args); + } catch (InvocationTargetException e) { + throw e.getTargetException(); + } + return result; + } +} diff --git a/doxia-core/src/test/java/org/apache/maven/doxia/module/AbstractIdentityTest.java b/doxia-core/src/test/java/org/apache/maven/doxia/module/AbstractIdentityTest.java index 4026107c..b81dfb4b 100644 --- a/doxia-core/src/test/java/org/apache/maven/doxia/module/AbstractIdentityTest.java +++ b/doxia-core/src/test/java/org/apache/maven/doxia/module/AbstractIdentityTest.java @@ -86,7 +86,7 @@ public abstract class AbstractIdentityTest extends AbstractModuleTest { expected = writer.toString(); // write to file for comparison - try (Writer fileWriter = getTestWriter("expected")) { + try (Writer fileWriter = getTestWriter("expectedTextSink")) { fileWriter.write(expected); } // generate the actual model @@ -94,8 +94,13 @@ public abstract class AbstractIdentityTest extends AbstractModuleTest { sink = createSink(writer); SinkTestDocument.generate(sink); sink.close(); - StringReader reader = new StringReader(writer.toString()); + String expectedViaTargetSink = writer.toString(); + // write to file for comparison + try (Writer fileWriter = getTestWriter("expectedTargetSink")) { + fileWriter.write(expectedViaTargetSink); + } + StringReader reader = new StringReader(expectedViaTargetSink); writer = new StringWriter(); sink = new TextSink(writer); Parser parser = createParser(); @@ -103,7 +108,7 @@ public abstract class AbstractIdentityTest extends AbstractModuleTest { String actual = writer.toString(); // write to file for comparison - try (Writer fileWriter = getTestWriter("actual")) { + try (Writer fileWriter = getTestWriter("actualTextSink")) { fileWriter.write(actual); } diff --git a/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java b/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java index 5598e6b8..dadc9122 100644 --- a/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java +++ b/doxia-core/src/test/java/org/apache/maven/doxia/parser/Xhtml5BaseParserTest.java @@ -129,6 +129,77 @@ public class Xhtml5BaseParserTest extends AbstractParserTest { assertFalse(it.hasNext()); } + @Test + public void testSectionsAndHeadingsOnDifferentLevels() throws ParseException { + // section on higher level than heading + String text = "<body><section><section><h1>Headline1</h1></section></section></body>"; + parser.parse(text, sink); + + Iterator<SinkEventElement> it = sink.getEventList().iterator(); + assertSinkEquals( + it, + "section1", + "section2", + "section2_", + "section1_", + "section1", + "sectionTitle1", + "text", + "sectionTitle1_", + "section2", + "section2_", + "section1_"); + } + + @Test + public void testSectionsAndHeadingsOnDifferentLevels2() throws ParseException { + // section on lower level than heading + String text = "<body><section><h3>Headline1</h3></section></body>"; + parser.parse(text, sink); + + Iterator<SinkEventElement> it = sink.getEventList().iterator(); + assertSinkEquals( + it, + "section1", + "section2", + "section3", + "sectionTitle3", + "text", + "sectionTitle3_", + "section3_", + "section2_", + "section1_"); + } + + @Test + public void testSectionsAndHeadingsOnSameLevel() throws ParseException { + // heading directly following same level section doesn't need additional sections, while headings following some + // other element (still same level) + // needs an explicit new (same level) section + String text = + "<body><section><h1>Headline1</h1><section><h2>Headline2</h2></section><h1>Headline3</h1></section></body>"; + parser.parse(text, sink); + + Iterator<SinkEventElement> it = sink.getEventList().iterator(); + assertSinkEquals( + it, + "section1", + "sectionTitle1", + "text", + "sectionTitle1_", + "section2", + "sectionTitle2", + "text", + "sectionTitle2_", + "section2_", + "section1_", + "section1", + "sectionTitle1", + "text", + "sectionTitle1_", + "section1_"); + } + @Test public void testFigureEventsList() throws Exception { String text = "<img src=\"source\" title=\"caption\" />";