(sis) 02/02: Add missing accented characters on a few names in the EPSG scripts.

desruisseaux Fri, 15 Aug 2025 06:33:25 -0700

This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git


commit b0b0ae6e7f6aa8293f7a6cd681721c797b7dd835
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Fri Aug 15 15:30:30 2025 +0200

    Add missing accented characters on a few names in the EPSG scripts.
    
    - Ancienne Triangulation Française
    - Nouvelle Triangulation Française
    - Nivellement Général de la Corse
    - Nivellement Général de la France
    - Nivellement Général de Nouvelle Calédonie
    - Nivellement Général de Polynésie Française
    - Nivellement Général Guyanais
    - Réseau Géodésique de Nouvelle Calédonie
    - Réseau National Belge
    - Posiciones Geodésicas Argentinas
---
 .../factory/sql/epsg/DataScriptFormatter.java      | 203 ++++++++++++++++-----
 .../factory/sql/epsg/DataScriptUpdater.java        |  20 ++
 2 files changed, 178 insertions(+), 45 deletions(-)

diff --git 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java
 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java
index ccedddaf5d..6fc18199c5 100644
--- 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java
+++ 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptFormatter.java
@@ -21,9 +21,13 @@ import java.util.Set;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Objects;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.nio.file.Path;
 import java.nio.file.Files;
 import java.sql.Connection;
@@ -62,6 +66,20 @@ final class DataScriptFormatter extends ScriptRunner {
      */
     private final Map<String,String> toOriginalTableNames;
 
+    /**
+     * Texts to replace for spelling reasons. The main changes that we are 
applying is the addition of accents.
+     * The <abbr>EPSG</abbr> geodetic dataset tends to restrict itself to the 
<abbr>ASCII</abbr> character set,
+     * but this is not a restriction mandated by <abbr>OGC</abbr> standards 
except in <abbr>WKT</abbr> strings
+     * (in the latter case, <abbr>SIS</abbr> removes the accents on-the-fly).
+     *
+     * <p>Since the content of this map is arbitrary and has no incidence on 
the validity of the <abbr>SQL</abbr>
+     * script generated by {@code DataScriptFormatter}, its content should be 
provided by the caller.</p>
+     *
+     * @see #addSpellingChange(String, String, String, String)
+     * @see #printSpellingChangeCount(PrintStream)
+     */
+    private final List<TextChange> spellingChanges;
+
     /**
      * Names of the columns to search for computing {@link 
TableValues#booleanColumnIndices}.
      */
@@ -128,7 +146,7 @@ final class DataScriptFormatter extends ScriptRunner {
      * @param  c  a dummy connection. Will be used for fetching metadata.
      * @throws SQLException if an error occurred while fetching metadata.
      */
-    DataScriptFormatter(final Connection c) throws SQLException {
+    public DataScriptFormatter(final Connection c) throws SQLException {
         super(c, null, Integer.MAX_VALUE);
         booleanColumns  = Set.of("deprecated", "show_crs", "show_operation", 
"reverse_op", "param_sign_reversal", "ellipsoid_shape");
         doubleColumns   = Set.of("parameter_value");
@@ -171,6 +189,31 @@ final class DataScriptFormatter extends ScriptRunner {
         toOriginalTableNames.forEach((oldTable, newTable) -> 
addReplacement(oldTable, '"' + newTable + '"'));
         valuesPerTable  = new TableValues[toOriginalTableNames.size()];
         otherStatements = new ArrayList<>();
+        spellingChanges = new ArrayList<>();
+    }
+
+    /**
+     * Adds a pattern to replace by the given text. This method should be used 
mostly for minor spelling changes,
+     * such as adding the missing accents on letters of texts in French. 
Replacement are tried in the order that
+     * this method is invoked and stop at the first match.
+     *
+     * @param table        name of the table where to replace a value, or 
{@code null} for any.
+     * @param before       string that must exist in the <abbr>SQL</abbr> 
before the text, or null if none.
+     * @param regex        regular expression to search. Will be interpreted 
with an implicit word boundary.
+     * @param replacement  the replacement for the given pattern.
+     */
+    public void addSpellingChange(final String table, final String before, 
final String regex, final String replacement) {
+        spellingChanges.add(new TextChange(table, before, regex, replacement));
+    }
+
+    /**
+     * Replaces an <abbr>ASCII</abbr> text by the same text with accents added 
on some characters.
+     * The <abbr>ASCII</abbr> text is inferred from the given text with 
accented characters.
+     *
+     * @param  replacement  the replacement with accented characters.
+     */
+    public void addAccentedCharacters(final String replacement) {
+        addSpellingChange(null, null, 
Pattern.quote(CharSequences.toASCII(replacement).toString()), replacement);
     }
 
     /**
@@ -181,7 +224,7 @@ final class DataScriptFormatter extends ScriptRunner {
      * @throws IOException  if an I/O operation failed.
      * @throws SQLException should never happen.
      */
-    final void run(final Path inputFile, final Path outputFile) throws 
SQLException, IOException {
+    public final void run(final Path inputFile, final Path outputFile) throws 
SQLException, IOException {
         if (Files.isSameFile(inputFile, outputFile)) {
             throw new IllegalArgumentException("Input and output files are the 
same.");
         }
@@ -210,15 +253,106 @@ final class DataScriptFormatter extends ScriptRunner {
         }
     }
 
+    /**
+     * Description of a change to apply in the text.
+     *
+     * @see #spellingChanges
+     * @see #addSpellingChange(String, String, String, String)
+     */
+    private static final class TextChange {
+        /** Name of the table where to replace a value, or {@code null} for 
any. */
+        private final String table;
+
+        /** String that must exist in the <abbr>SQL</abbr> before the text, or 
{@code null} if none. */
+        private final String before;
+
+        /** Text to search. Will be interpreted with an implicit word 
boundary. */
+        private final Matcher matcher;
+
+        /** The replacement for the matched text. */
+        private final String replacement;
+
+        /** Number of times that a match has been found. */
+        private int matchCount;
+
+        /**
+         * Creates a new description of a change to apply in the text.
+         *
+         * @param table        name of the table where to replace a value, or 
{@code null} for any.
+         * @param before       string that must exist in the <abbr>SQL</abbr> 
before the text, or null if none.
+         * @param regex        regular expression to search. Will be 
interpreted with an implicit word boundary.
+         * @param replacement  the replacement for the given pattern.
+         */
+        TextChange(final String table, final String before, final String 
regex, final String replacement)  {
+            this.table       = table;
+            this.before      = before;
+            this.matcher     = Pattern.compile(regex).matcher("");
+            this.replacement = Objects.requireNonNull(replacement);
+        }
+
+        /**
+         * Returns whether the given text matches the pattern.
+         * This method does not verify the table in which the text occurs.
+         */
+        final boolean matches(final String text) {
+            if (matcher.reset(text).lookingAt()) {
+                for (int c, i = matcher.start(); i > 0; i -= 
Character.charCount(c)) {
+                    c = text.codePointBefore(i);
+                    if (Character.isWhitespace(c)) continue;
+                    if (Character.isLetter(c)) return false;
+                    break;
+                }
+                final int length = text.length();
+                for (int c, i = matcher.end(); i < length; i += 
Character.charCount(c)) {
+                    c = text.codePointAt(i);
+                    if (Character.isWhitespace(c)) continue;
+                    if (Character.isLetter(c)) return false;
+                    break;
+                }
+                return true;
+            }
+            return false;
+        }
+
+        /**
+         * Replaces the matched text in the given buffer. This method shall be 
invoked only if
+         * {@link #matches(String)} returned {@code true}.
+         *
+         * @param  sql    the buffer where to do the replacement.
+         * @param  lower  index of the first character of the region given to 
{@link #matches(String)}.
+         * @return whether the text has been replaced.
+         */
+        final boolean replace(final StringBuilder sql, final int lower) {
+            if (before != null) {
+                final int i = sql.indexOf(before);
+                if (i < 0 || i >= lower) {
+                    return false;
+                }
+            }
+            if (table != null) {
+                final int s = CharSequences.skipLeadingWhitespaces(sql, 0, 
lower);
+                if (!CharSequences.regionMatches(sql, s, SQLBuilder.INSERT + 
'"' + table + '"')) {
+                    return false;
+                }
+            }
+            sql.replace(lower + matcher.start(), lower + matcher.end(), 
replacement);
+            matchCount++;
+            return true;
+        }
+
+        /**
+         * Formats the number of replacements done.
+         */
+        @Override
+        public String toString() {
+            return String.format("%3d replacements by \"%s\"", matchCount, 
replacement);
+        }
+    }
+
     /**
      * Replaces the content of a text such as {@code 'some text'}.
      * If the text content is a table name, the old table name is replaced by 
the new name.
      *
-     * <h4>Corrections</h4>
-     * EPSG scripts version 8.9 seems to have 2 errors where the {@code 
OBJECT_TABLE_NAME} column contains
-     * {@code "AxisName"} instead of {@code "Coordinate Axis Name"}. 
Furthermore, the version number noted
-     * in the history table is a copy-and-paste error.
-     *
      * @param  sql    the whole SQL statement.
      * @param  lower  index of the opening quote character ({@code '}) of the 
text in {@code sql}.
      * @param  upper  index after the closing quote character ({@code '}) of 
the text in {@code sql}.
@@ -227,46 +361,14 @@ final class DataScriptFormatter extends ScriptRunner {
     @Workaround(library="EPSG", version="8.9")
     protected void editText(final StringBuilder sql, int lower, int upper) {
         final String text = sql.substring(++lower, --upper);
-        final String tableName = toOriginalTableNames.get(text);
-        if (tableName != null) {
-            sql.replace(lower, upper, tableName);
+        String replacement = toOriginalTableNames.get(text);
+        if (replacement != null) {
+            sql.replace(lower, upper, replacement);
             return;
         }
-        final String table;         // Name of the table where to replace a 
value.
-        final String before;        // String that must exist before the value 
to replace, or null if none.
-        final String oldValue;      // The old value to replace.
-        final String newValue;      // The new value.
-        switch (upper - lower) {    // Optimization for reducing the number of 
comparisons.
-            default: {
-                StringBuilders.trimWhitespaces(sql, lower, upper);
-                return;
-            }
-            case 8: {
-                table    = "Deprecation";
-                before   = null;
-                oldValue = "AxisName";
-                newValue = "Coordinate Axis Name";
-                break;
-            }
-            case 36: {
-                table    = "Version History";
-                before   = "'8.9'";
-                oldValue = "Version 8.8 full release of Dataset.";
-                newValue = "Version 8.9 full release of Dataset.";
-                break;
-            }
-        }
-        if (oldValue.equalsIgnoreCase(text)) {
-            final int s = CharSequences.skipLeadingWhitespaces(sql, 0, lower);
-            if (CharSequences.regionMatches(sql, s, SQLBuilder.INSERT + '"' + 
table + '"')) {
-                if (upper - lower != oldValue.length()) {
-                    throw new AssertionError("Unexpected length");
-                }
-                if (before != null) {
-                    final int i = sql.indexOf(before);
-                    if (i < 0 || i >= lower) return;
-                }
-                sql.replace(lower, upper, newValue);
+        for (final TextChange entry : spellingChanges) {
+            if (entry.matches(text) && entry.replace(sql, lower)) {
+                return;     // Value of `upper` may be no longer valid.
             }
         }
         StringBuilders.trimWhitespaces(sql, lower, upper);
@@ -385,4 +487,15 @@ final class DataScriptFormatter extends ScriptRunner {
             }
         }
     }
+
+    /**
+     * Prints a summary of the number of replacements done for each case 
declared by {@code addSpellingChange(…)}.
+     *
+     * @param  out  where to print.
+     *
+     * @see #addSpellingChange(String, String, String, String)
+     */
+    public void printSpellingChangeCount(final PrintStream out) {
+        spellingChanges.forEach(out::println);
+    }
 }
diff --git 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java
 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java
index 2d0adc2e72..82fa5b4dc9 100644
--- 
a/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java
+++ 
b/optional/src/org.apache.sis.referencing.epsg/test/org/apache/sis/referencing/factory/sql/epsg/DataScriptUpdater.java
@@ -67,7 +67,27 @@ public final class DataScriptUpdater {
              Connection c = db.source.getConnection())
         {
             final var formatter = new DataScriptFormatter(c);
+            /*
+             * The version number noted in the history table is a 
copy-and-paste error.
+             */
+            formatter.addSpellingChange("Version History", "'8.9'",
+                    "Version 8.8 full release of Dataset.",
+                    "Version 8.9 full release of Dataset.");
+            /*
+             * Add missing accents on some letters of texts in non-English 
languages.
+             */
+            formatter.addAccentedCharacters("Ancienne Triangulation 
Française");
+            formatter.addAccentedCharacters("Nouvelle Triangulation 
Française");
+            formatter.addAccentedCharacters("Nivellement Général de la Corse");
+            formatter.addAccentedCharacters("Nivellement Général de la 
France");
+            formatter.addAccentedCharacters("Nivellement Général de Nouvelle 
Calédonie");
+            formatter.addAccentedCharacters("Nivellement Général de Polynésie 
Française");
+            formatter.addAccentedCharacters("Nivellement Général Guyanais");
+            formatter.addAccentedCharacters("Réseau Géodésique de Nouvelle 
Calédonie");
+            formatter.addAccentedCharacters("Réseau National Belge");
+            formatter.addAccentedCharacters("Posiciones Geodésicas 
Argentinas");
             formatter.run(Path.of(arguments[0]), Path.of(arguments[1]));
+            formatter.printSpellingChangeCount(System.out);
         }
     }
 }

(sis) 02/02: Add missing accented characters on a few names in the EPSG scripts.

Reply via email to