[ https://issues.apache.org/jira/browse/DOXIA-690?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17682031#comment-17682031 ]
ASF GitHub Bot commented on DOXIA-690: -------------------------------------- kwin commented on code in PR #141: URL: https://github.com/apache/maven-doxia/pull/141#discussion_r1090522065 ########## doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java: ########## @@ -156,6 +178,86 @@ public void parse(Reader source, Sink sink, String reference) throws ParseExcept } } + private boolean processMetadataForHtml(StringBuilder html, StringBuilder source) { + final Map<String, List<String>> metaData; + final int endOffset; // end of metadata within source + // support two types of metadata: + if (source.toString().startsWith("---")) { + // 1. YAML front matter (https://github.com/vsch/flexmark-java/wiki/Extensions#yaml-front-matter) + Node documentRoot = FLEXMARK_METADATA_PARSER.parse(source.toString()); + YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor(); + visitor.visit(documentRoot); + metaData = visitor.getData(); + endOffset = visitor.getEndOffset(); + } else { + // 2. Multimarkdown metadata (https://fletcher.github.io/MultiMarkdown-5/metadata.html), not yet supported + // by Flexmark (https://github.com/vsch/flexmark-java/issues/550) + metaData = new LinkedHashMap<>(); + Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(source); + if (metadataMatcher.find()) { + String entry = metadataMatcher.group(0) + '\n'; + Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(entry); + while (entryMatcher.find()) { + String key = entryMatcher.group(1); + String value = normalizeMultilineValue(entryMatcher.group(2)); + metaData.put(key, Collections.singletonList(value)); + } + endOffset = metadataMatcher.end(0); + } else { + endOffset = 0; + } + } + if (endOffset > 0) { + // Trim the metadata from the source + source.delete(0, endOffset); + } + return writeHtmlMetadata(html, metaData); + } + + static String normalizeMultilineValue(String value) { + return value.trim().replaceAll("[ \\t]*[\\r\\n]+[ \\t]*", " "); + } + + private boolean writeHtmlMetadata(StringBuilder html, Map<String, List<String>> data) { + boolean containsTitle = false; + for (Entry<String, List<String>> entry : data.entrySet()) { + if (writeHtmlMetadata(html, entry.getKey(), entry.getValue())) { + containsTitle = true; + } + } + return containsTitle; + } + + private boolean writeHtmlMetadata(StringBuilder html, String key, List<String> values) { + if ("title".equalsIgnoreCase(key)) { + html.append("<title>"); + html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", ")), false)); + html.append("</title>"); + return true; + } else { + if (key.equalsIgnoreCase("author") && values.size() > 1) { + // for multiple authors emit multiple meta tags + for (String value : values) { + writeHtmlMetadata(html, key, Collections.singletonList(value)); + } + } else { + // every other multivalue should just be concatenated and emitted in a single meta tag + final String separator; + if (key.equalsIgnoreCase("keywords")) { + separator = ","; + } else { + separator = "\n"; Review Comment: Fixed in https://github.com/apache/maven-doxia/pull/141/commits/168071b3d8e0434ce73d668746e6c78f5f2051f2. > Markdown Parser: Multiline metadata incorrectly rendered > -------------------------------------------------------- > > Key: DOXIA-690 > URL: https://issues.apache.org/jira/browse/DOXIA-690 > Project: Maven Doxia > Issue Type: Bug > Components: Module - Markdown > Reporter: Konrad Windszus > Assignee: Konrad Windszus > Priority: Major > > Markdown uses the following metadata format in its sink: > [http://fletcher.github.io/MultiMarkdown-5/metadata.html]. > In case a metadata key has multiple values on multiple lines it is emitted as > following from the {{MarkdownSink}}: > {code:java} > title: Guide to creating a site > author: Brett Porter > Jason van Zyl > date: 2015-07-18 {code} > That leads to incorrect XHTML output as the second line for {{author}} is not > detected correctly and instead of being emitted as metadata is emitted as > regular paragraph in the HTML body. This is due to > https://github.com/apache/maven-doxia/blob/7509feb03af4d4fb7d48b4f9ef38ff5c1a17a149/doxia-modules/doxia-module-markdown/src/main/java/org/apache/maven/doxia/module/markdown/MarkdownParser.java#L110 > only detecting single line metadata values. > Although this might be a glitch of the underlying Markdown Parser > implementation the metadata format explicitly recommends: > {quote} > To keep multiline metadata values from being confused with additional > metadata, I recommend indenting each new line of metadata. If your metadata > value includes a colon, it must be indented to keep it from being treated as > a new key-value pair > {quote} -- This message was sent by Atlassian Jira (v8.20.10#820010)