This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit b0b0ae6e7f6aa8293f7a6cd681721c797b7dd835 Author: Martin Desruisseaux <[email protected]> AuthorDate: Fri Aug 15 15:30:30 2025 +0200 Add missing accented characters on a few names in the EPSG scripts. - Ancienne Triangulation Française - Nouvelle Triangulation Française - Nivellement Général de la Corse - Nivellement Général de la France - Nivellement Général de Nouvelle Calédonie - Nivellement Général de Polynésie Française - Nivellement Général Guyanais - Réseau Géodésique de Nouvelle Calédonie - Réseau National Belge - Posiciones Geodésicas Argentinas --- .../factory/sql/epsg/DataScriptFormatter.java | 203 ++++++++++++++++----- .../factory/sql/epsg/DataScriptUpdater.java | 20 ++ 2 files changed, 178 insertions(+), 45 deletions(-) diff --git a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java index ccedddaf5d..6fc18199c5 100644 --- a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java +++ b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java @@ -21,9 +21,13 @@ import java.util.Set; import java.util.List; import java.util.ArrayList; import java.util.Arrays; +import java.util.Objects; +import java.util.regex.Pattern; +import java.util.regex.Matcher; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; +import java.io.PrintStream; import java.nio.file.Path; import java.nio.file.Files; import java.sql.Connection; @@ -62,6 +66,20 @@ final class DataScriptFormatter extends ScriptRunner { */ private final Map<String,String> toOriginalTableNames; + /** + * Texts to replace for spelling reasons. The main changes that we are applying is the addition of accents. + * The <abbr>EPSG</abbr> geodetic dataset tends to restrict itself to the <abbr>ASCII</abbr> character set, + * but this is not a restriction mandated by <abbr>OGC</abbr> standards except in <abbr>WKT</abbr> strings + * (in the latter case, <abbr>SIS</abbr> removes the accents on-the-fly). + * + * <p>Since the content of this map is arbitrary and has no incidence on the validity of the <abbr>SQL</abbr> + * script generated by {@code DataScriptFormatter}, its content should be provided by the caller.</p> + * + * @see #addSpellingChange(String, String, String, String) + * @see #printSpellingChangeCount(PrintStream) + */ + private final List<TextChange> spellingChanges; + /** * Names of the columns to search for computing {@link TableValues#booleanColumnIndices}. */ @@ -128,7 +146,7 @@ final class DataScriptFormatter extends ScriptRunner { * @param c a dummy connection. Will be used for fetching metadata. * @throws SQLException if an error occurred while fetching metadata. */ - DataScriptFormatter(final Connection c) throws SQLException { + public DataScriptFormatter(final Connection c) throws SQLException { super(c, null, Integer.MAX_VALUE); booleanColumns = Set.of("deprecated", "show_crs", "show_operation", "reverse_op", "param_sign_reversal", "ellipsoid_shape"); doubleColumns = Set.of("parameter_value"); @@ -171,6 +189,31 @@ final class DataScriptFormatter extends ScriptRunner { toOriginalTableNames.forEach((oldTable, newTable) -> addReplacement(oldTable, '"' + newTable + '"')); valuesPerTable = new TableValues[toOriginalTableNames.size()]; otherStatements = new ArrayList<>(); + spellingChanges = new ArrayList<>(); + } + + /** + * Adds a pattern to replace by the given text. This method should be used mostly for minor spelling changes, + * such as adding the missing accents on letters of texts in French. Replacement are tried in the order that + * this method is invoked and stop at the first match. + * + * @param table name of the table where to replace a value, or {@code null} for any. + * @param before string that must exist in the <abbr>SQL</abbr> before the text, or null if none. + * @param regex regular expression to search. Will be interpreted with an implicit word boundary. + * @param replacement the replacement for the given pattern. + */ + public void addSpellingChange(final String table, final String before, final String regex, final String replacement) { + spellingChanges.add(new TextChange(table, before, regex, replacement)); + } + + /** + * Replaces an <abbr>ASCII</abbr> text by the same text with accents added on some characters. + * The <abbr>ASCII</abbr> text is inferred from the given text with accented characters. + * + * @param replacement the replacement with accented characters. + */ + public void addAccentedCharacters(final String replacement) { + addSpellingChange(null, null, Pattern.quote(CharSequences.toASCII(replacement).toString()), replacement); } /** @@ -181,7 +224,7 @@ final class DataScriptFormatter extends ScriptRunner { * @throws IOException if an I/O operation failed. * @throws SQLException should never happen. */ - final void run(final Path inputFile, final Path outputFile) throws SQLException, IOException { + public final void run(final Path inputFile, final Path outputFile) throws SQLException, IOException { if (Files.isSameFile(inputFile, outputFile)) { throw new IllegalArgumentException("Input and output files are the same."); } @@ -210,15 +253,106 @@ final class DataScriptFormatter extends ScriptRunner { } } + /** + * Description of a change to apply in the text. + * + * @see #spellingChanges + * @see #addSpellingChange(String, String, String, String) + */ + private static final class TextChange { + /** Name of the table where to replace a value, or {@code null} for any. */ + private final String table; + + /** String that must exist in the <abbr>SQL</abbr> before the text, or {@code null} if none. */ + private final String before; + + /** Text to search. Will be interpreted with an implicit word boundary. */ + private final Matcher matcher; + + /** The replacement for the matched text. */ + private final String replacement; + + /** Number of times that a match has been found. */ + private int matchCount; + + /** + * Creates a new description of a change to apply in the text. + * + * @param table name of the table where to replace a value, or {@code null} for any. + * @param before string that must exist in the <abbr>SQL</abbr> before the text, or null if none. + * @param regex regular expression to search. Will be interpreted with an implicit word boundary. + * @param replacement the replacement for the given pattern. + */ + TextChange(final String table, final String before, final String regex, final String replacement) { + this.table = table; + this.before = before; + this.matcher = Pattern.compile(regex).matcher(""); + this.replacement = Objects.requireNonNull(replacement); + } + + /** + * Returns whether the given text matches the pattern. + * This method does not verify the table in which the text occurs. + */ + final boolean matches(final String text) { + if (matcher.reset(text).lookingAt()) { + for (int c, i = matcher.start(); i > 0; i -= Character.charCount(c)) { + c = text.codePointBefore(i); + if (Character.isWhitespace(c)) continue; + if (Character.isLetter(c)) return false; + break; + } + final int length = text.length(); + for (int c, i = matcher.end(); i < length; i += Character.charCount(c)) { + c = text.codePointAt(i); + if (Character.isWhitespace(c)) continue; + if (Character.isLetter(c)) return false; + break; + } + return true; + } + return false; + } + + /** + * Replaces the matched text in the given buffer. This method shall be invoked only if + * {@link #matches(String)} returned {@code true}. + * + * @param sql the buffer where to do the replacement. + * @param lower index of the first character of the region given to {@link #matches(String)}. + * @return whether the text has been replaced. + */ + final boolean replace(final StringBuilder sql, final int lower) { + if (before != null) { + final int i = sql.indexOf(before); + if (i < 0 || i >= lower) { + return false; + } + } + if (table != null) { + final int s = CharSequences.skipLeadingWhitespaces(sql, 0, lower); + if (!CharSequences.regionMatches(sql, s, SQLBuilder.INSERT + '"' + table + '"')) { + return false; + } + } + sql.replace(lower + matcher.start(), lower + matcher.end(), replacement); + matchCount++; + return true; + } + + /** + * Formats the number of replacements done. + */ + @Override + public String toString() { + return String.format("%3d replacements by \"%s\"", matchCount, replacement); + } + } + /** * Replaces the content of a text such as {@code 'some text'}. * If the text content is a table name, the old table name is replaced by the new name. * - * <h4>Corrections</h4> - * EPSG scripts version 8.9 seems to have 2 errors where the {@code OBJECT_TABLE_NAME} column contains - * {@code "AxisName"} instead of {@code "Coordinate Axis Name"}. Furthermore, the version number noted - * in the history table is a copy-and-paste error. - * * @param sql the whole SQL statement. * @param lower index of the opening quote character ({@code '}) of the text in {@code sql}. * @param upper index after the closing quote character ({@code '}) of the text in {@code sql}. @@ -227,46 +361,14 @@ final class DataScriptFormatter extends ScriptRunner { @Workaround(library="EPSG", version="8.9") protected void editText(final StringBuilder sql, int lower, int upper) { final String text = sql.substring(++lower, --upper); - final String tableName = toOriginalTableNames.get(text); - if (tableName != null) { - sql.replace(lower, upper, tableName); + String replacement = toOriginalTableNames.get(text); + if (replacement != null) { + sql.replace(lower, upper, replacement); return; } - final String table; // Name of the table where to replace a value. - final String before; // String that must exist before the value to replace, or null if none. - final String oldValue; // The old value to replace. - final String newValue; // The new value. - switch (upper - lower) { // Optimization for reducing the number of comparisons. - default: { - StringBuilders.trimWhitespaces(sql, lower, upper); - return; - } - case 8: { - table = "Deprecation"; - before = null; - oldValue = "AxisName"; - newValue = "Coordinate Axis Name"; - break; - } - case 36: { - table = "Version History"; - before = "'8.9'"; - oldValue = "Version 8.8 full release of Dataset."; - newValue = "Version 8.9 full release of Dataset."; - break; - } - } - if (oldValue.equalsIgnoreCase(text)) { - final int s = CharSequences.skipLeadingWhitespaces(sql, 0, lower); - if (CharSequences.regionMatches(sql, s, SQLBuilder.INSERT + '"' + table + '"')) { - if (upper - lower != oldValue.length()) { - throw new AssertionError("Unexpected length"); - } - if (before != null) { - final int i = sql.indexOf(before); - if (i < 0 || i >= lower) return; - } - sql.replace(lower, upper, newValue); + for (final TextChange entry : spellingChanges) { + if (entry.matches(text) && entry.replace(sql, lower)) { + return; // Value of `upper` may be no longer valid. } } StringBuilders.trimWhitespaces(sql, lower, upper); @@ -385,4 +487,15 @@ final class DataScriptFormatter extends ScriptRunner { } } } + + /** + * Prints a summary of the number of replacements done for each case declared by {@code addSpellingChange(…)}. + * + * @param out where to print. + * + * @see #addSpellingChange(String, String, String, String) + */ + public void printSpellingChangeCount(final PrintStream out) { + spellingChanges.forEach(out::println); + } } diff --git a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java index 2d0adc2e72..82fa5b4dc9 100644 --- a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java +++ b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java @@ -67,7 +67,27 @@ public final class DataScriptUpdater { Connection c = db.source.getConnection()) { final var formatter = new DataScriptFormatter(c); + /* + * The version number noted in the history table is a copy-and-paste error. + */ + formatter.addSpellingChange("Version History", "'8.9'", + "Version 8.8 full release of Dataset.", + "Version 8.9 full release of Dataset."); + /* + * Add missing accents on some letters of texts in non-English languages. + */ + formatter.addAccentedCharacters("Ancienne Triangulation Française"); + formatter.addAccentedCharacters("Nouvelle Triangulation Française"); + formatter.addAccentedCharacters("Nivellement Général de la Corse"); + formatter.addAccentedCharacters("Nivellement Général de la France"); + formatter.addAccentedCharacters("Nivellement Général de Nouvelle Calédonie"); + formatter.addAccentedCharacters("Nivellement Général de Polynésie Française"); + formatter.addAccentedCharacters("Nivellement Général Guyanais"); + formatter.addAccentedCharacters("Réseau Géodésique de Nouvelle Calédonie"); + formatter.addAccentedCharacters("Réseau National Belge"); + formatter.addAccentedCharacters("Posiciones Geodésicas Argentinas"); formatter.run(Path.of(arguments[0]), Path.of(arguments[1])); + formatter.printSpellingChangeCount(System.out); } } }
