branch: elpa/datetime commit 79fb915c78cce3586c745eea9e86147c22d3e83d Author: Paul Pogonyshev <pogonys...@gmail.com> Commit: Paul Pogonyshev <pogonys...@gmail.com>
Upgrade to fetching data from Java 9; this results in many more known locales, in particular. --- .travis.yml | 2 +- datetime.el | 37 +++++--- dev/HarvestData.java | 249 +++++++++++++++++++++++++++------------------------ locale-data.extmap | Bin 51999 -> 245800 bytes test/format.el | 4 +- timezone-data.extmap | Bin 825677 -> 825067 bytes 6 files changed, 164 insertions(+), 128 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2e1901df32..166fddbcf2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,7 @@ env: - EMACS=emacs-snapshot jdk: - - oraclejdk8 + - oraclejdk9 script: - echo "(add-to-list 'package-archives '(\"melpa\" . \"http://stable.melpa.org/packages/\") t)" >> ./local-environment.el diff --git a/datetime.el b/datetime.el index 916cc753ec..ba7a667871 100644 --- a/datetime.el +++ b/datetime.el @@ -281,10 +281,9 @@ when necessary." (setq scan (1+ scan) num-repetitions (1+ num-repetitions))) (pcase character - ((or ?G ?E ?a) + ((or ?G ?a) (push (cons (pcase character (?G 'era) - (?E 'weekday-context-name) (?a 'am-pm)) (if (>= num-repetitions 4) 'full 'abbreviated)) parts)) @@ -301,6 +300,14 @@ when necessary." (cons (if (= character ?M) 'month-context-name 'month-standalone-name) (if (>= num-repetitions 4) 'full 'abbreviated))) parts)) + ((or ?E ?c) + (push (cons (if (= character ?E) 'weekday-context-name 'weekday-standalone-name) + (if (>= num-repetitions 4) 'full 'abbreviated)) + parts)) + (?e (push (if (<= num-repetitions 2) + (cons 'weekday num-repetitions) + (cons 'weekday-context-name (if (>= num-repetitions 4) 'full 'abbreviated))) + parts)) (?w (push (cons 'week-in-year num-repetitions) parts)) (?W (push (cons 'week-in-month num-repetitions) parts)) (?D (push (cons 'day-in-year num-repetitions) parts)) @@ -356,11 +363,12 @@ when necessary." (`always-two-digits (cons base 2)) (_ (cons base details))))) (`month (cons ?M details)) - ((or `month-context-name `month-standalone-name `weekday-context-name) + ((or `month-context-name `month-standalone-name `weekday-context-name `weekday-standalone-name) (cons (pcase type - (`month-context-name ?M) - (`month-standalone-name ?L) - (`weekday-context-name ?E)) + (`month-context-name ?M) + (`month-standalone-name ?L) + (`weekday-context-name ?E) + (`weekday-standalone-name ?c)) (pcase details (`abbreviated 3) (`full 4) @@ -994,12 +1002,19 @@ Returned pattern is always of type \\\='java. This function exists not just for completeness: while in most cases the result is just corresponding date and time patterns separated by a space, for a few locales it is different." - (let ((date-time-pattern-rule (or (datetime-locale-field locale :date-time-pattern-rule) '(t . " "))) - (date-part (datetime-locale-date-pattern locale date-variant)) - (time-part (datetime-locale-time-pattern locale (or time-variant date-variant)))) + (unless date-variant + (setq date-variant :medium)) + (unless time-variant + (setq time-variant date-variant)) + (let* ((date-time-pattern-rule (or (datetime-locale-field locale :date-time-pattern-rule) '(t . " "))) + (separator (cdr date-time-pattern-rule)) + (date-part (datetime-locale-date-pattern locale date-variant)) + (time-part (datetime-locale-time-pattern locale time-variant))) + (unless (stringp separator) + (setq separator (cdr (assoc (list date-variant time-variant) separator)))) (if (car date-time-pattern-rule) - (concat date-part (cdr date-time-pattern-rule) time-part) - (concat time-part (cdr date-time-pattern-rule) date-part)))) + (concat date-part separator time-part) + (concat time-part separator date-part)))) (defconst datetime--english-eras ["BC" "AD"]) diff --git a/dev/HarvestData.java b/dev/HarvestData.java index 6de8617afb..67b4451951 100644 --- a/dev/HarvestData.java +++ b/dev/HarvestData.java @@ -1,5 +1,6 @@ -import java.text.*; import java.time.*; +import java.time.chrono.*; +import java.time.format.*; import java.time.temporal.*; import java.time.zone.*; import java.util.*; @@ -9,9 +10,11 @@ import java.util.stream.*; public class HarvestData { - private static long DAYS_IN_400_YEARS = IntStream.range (0, 400).map ((year) -> isLeapYear (year) ? 366 : 365).sum (); - private static long SECONDS_IN_400_YEARS = (DAYS_IN_400_YEARS * 24 * 60 * 60); - private static long AVERAGE_SECONDS_IN_YEAR = (SECONDS_IN_400_YEARS / 400); + private static final long DAYS_IN_400_YEARS = IntStream.range (0, 400).map ((year) -> isLeapYear (year) ? 366 : 365).sum (); + private static final long SECONDS_IN_400_YEARS = (DAYS_IN_400_YEARS * 24 * 60 * 60); + private static final long AVERAGE_SECONDS_IN_YEAR = (SECONDS_IN_400_YEARS / 400); + private static final String ENGLISH_ERAS = toLispVector (getNames (Locale.ENGLISH, ChronoField.ERA, "G", 0, 1)); + private static final String ENGLISH_AM_PM = toLispVector (getNames (Locale.ENGLISH, ChronoField.AMPM_OF_DAY, "a", 0, 1)); public static void main (String[] args) throws Exception @@ -30,38 +33,35 @@ public class HarvestData Map <Locale, Map <String, String>> data = new LinkedHashMap <> (); - List <String> english_eras = Arrays.asList (DateFormatSymbols.getInstance (Locale.ENGLISH).getEras ()); - List <String> english_am_pm = Arrays.asList (DateFormatSymbols.getInstance (Locale.ENGLISH).getAmPmStrings ()); - for (Locale locale : locales) { - Map <String, String> map = new LinkedHashMap <> (); - data.put (locale, map); - - if (DecimalFormatSymbols.getInstance (locale).getDecimalSeparator () != '.') - map.put (":decimal-separator", String.format ("?%c", DecimalFormatSymbols.getInstance (locale).getDecimalSeparator ())); - - if (!Objects.equals (Arrays.asList (DateFormatSymbols.getInstance (locale).getEras ()), english_eras)) - map.put (":eras", toLispVector (Arrays.asList (DateFormatSymbols.getInstance (locale).getEras ()))); - - map.put (":month-context-abbr", toLispVector (getNames (locale, Calendar.MONTH, Calendar.SHORT_FORMAT, Calendar.JANUARY, Calendar.DECEMBER, -1))); - map.put (":month-context-names", toLispVector (getNames (locale, Calendar.MONTH, Calendar.LONG_FORMAT, Calendar.JANUARY, Calendar.DECEMBER, -1))); - map.put (":weekday-context-abbr", toLispVector (getNames (locale, Calendar.DAY_OF_WEEK, Calendar.SHORT_FORMAT, Calendar.MONDAY, Calendar.SATURDAY, Calendar.SUNDAY))); - map.put (":weekday-context-names", toLispVector (getNames (locale, Calendar.DAY_OF_WEEK, Calendar.LONG_FORMAT, Calendar.MONDAY, Calendar.SATURDAY, Calendar.SUNDAY))); - map.put (":month-standalone-abbr", toLispVector (getNames (locale, Calendar.MONTH, Calendar.SHORT_STANDALONE, Calendar.JANUARY, Calendar.DECEMBER, -1))); - map.put (":month-standalone-names", toLispVector (getNames (locale, Calendar.MONTH, Calendar.LONG_STANDALONE, Calendar.JANUARY, Calendar.DECEMBER, -1))); - map.put (":weekday-standalone-abbr", toLispVector (getNames (locale, Calendar.DAY_OF_WEEK, Calendar.SHORT_STANDALONE, Calendar.MONDAY, Calendar.SATURDAY, Calendar.SUNDAY))); - map.put (":weekday-standalone-names", toLispVector (getNames (locale, Calendar.DAY_OF_WEEK, Calendar.LONG_STANDALONE, Calendar.MONDAY, Calendar.SATURDAY, Calendar.SUNDAY))); + // This way we discard a few locales that can otherwise lead to duplicate keys + // because of use of toLanguageTag(). E.g. `no_NO_NY' is problematic. + if (locale.getVariant ().length () > 0) + continue; - if (!Objects.equals (Arrays.asList (DateFormatSymbols.getInstance (locale).getAmPmStrings ()), english_am_pm)) - map.put (":am-pm", toLispVector (Arrays.asList (DateFormatSymbols.getInstance (locale).getAmPmStrings ()))); + Chronology chronology = Chronology.ofLocale (locale); + if (!chronology.getId ().equals ("ISO")) { + // Ignore such locales for now. + continue; + } - removeUnnecessaryStandaloneStrings (map, ":month-standalone-abbr", ":month-context-abbr"); - removeUnnecessaryStandaloneStrings (map, ":month-standalone-names", ":month-context-names"); - removeUnnecessaryStandaloneStrings (map, ":weekday-standalone-abbr", ":weekday-context-abbr"); - removeUnnecessaryStandaloneStrings (map, ":weekday-standalone-names", ":weekday-context-names"); + Map <String, String> map = new LinkedHashMap <> (); + data.put (locale, map); - Map <String, String> date_patterns = toPatternPlist ((style) -> (SimpleDateFormat) DateFormat.getDateInstance (style, locale)); - Map <String, String> time_patterns = toPatternPlist ((style) -> (SimpleDateFormat) DateFormat.getTimeInstance (style, locale)); + map.put (":decimal-separator", String.format ("?%c", DecimalStyle.of (locale).getDecimalSeparator ())); + map.put (":eras", toLispVector (getNames (locale, ChronoField.ERA, "G", 0, 1))); + map.put (":month-context-abbr", toLispVector (getNames (locale, ChronoField.MONTH_OF_YEAR, "MMM", 1, 12))); + map.put (":month-context-names", toLispVector (getNames (locale, ChronoField.MONTH_OF_YEAR, "MMMM", 1, 12))); + map.put (":weekday-context-abbr", toLispVector (getNames (locale, ChronoField.DAY_OF_WEEK, "EEE", 1, 7))); + map.put (":weekday-context-names", toLispVector (getNames (locale, ChronoField.DAY_OF_WEEK, "EEEE", 1, 7))); + map.put (":month-standalone-abbr", toLispVector (getNames (locale, ChronoField.MONTH_OF_YEAR, "LLL", 1, 12))); + map.put (":month-standalone-names", toLispVector (getNames (locale, ChronoField.MONTH_OF_YEAR, "LLLL", 1, 12))); + map.put (":weekday-standalone-abbr", toLispVector (getNames (locale, ChronoField.DAY_OF_WEEK, "ccc", 1, 7))); + map.put (":weekday-standalone-names", toLispVector (getNames (locale, ChronoField.DAY_OF_WEEK, "cccc", 1, 7))); + map.put (":am-pm", toLispVector (getNames (locale, ChronoField.AMPM_OF_DAY, "a", 0, 1))); + + Map <String, String> date_patterns = toPatternPlist ((style) -> DateTimeFormatterBuilder.getLocalizedDateTimePattern (style, null, chronology, locale)); + Map <String, String> time_patterns = toPatternPlist ((style) -> DateTimeFormatterBuilder.getLocalizedDateTimePattern (null, style, chronology, locale)); // Fallbacks: short <- medium; full <- long <- medium. for (Map <String, String> patterns : Arrays.asList (date_patterns, time_patterns)) { @@ -76,74 +76,46 @@ public class HarvestData map.put (":date-patterns", toLispPlist (date_patterns, true)); map.put (":time-patterns", toLispPlist (time_patterns, true)); - boolean date_part_first = true; - String separator = null; - - for (int date_style : new int[] { DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL }) { - for (int time_style : new int[] { DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL }) { - String date_pattern = ((SimpleDateFormat) DateFormat.getDateInstance (date_style, locale)).toPattern (); - String time_pattern = ((SimpleDateFormat) DateFormat.getTimeInstance (time_style, locale)).toPattern (); - String date_time_pattern = ((SimpleDateFormat) DateFormat.getDateTimeInstance (date_style, time_style, locale)).toPattern (); - - if (separator == null) { - boolean found = false; - - search_loop: - for (boolean date_part_first_ : new boolean[] { true, false }) { - for (String separator_ : new String[] { " ", ", " }) { - if (Objects.equals (date_time_pattern, String.format ("%s%s%s", - date_part_first_ ? date_pattern : time_pattern, - separator_, - date_part_first_ ? time_pattern : date_pattern))) { - found = true; - date_part_first = date_part_first_; - separator = separator_; - break search_loop; - } - } - } - - if (!found) { - throw new IllegalStateException (String.format ("cannot determine separator:\n locale: %s\n date-time: %s\n date: %s\n time: %s", - locale.toLanguageTag (), date_time_pattern, date_pattern, time_pattern)); - } - } + Boolean date_part_first = null; + Map <List <String>, String> separators = new LinkedHashMap <> (); + + for (FormatStyle date_style : FormatStyle.values ()) { + for (FormatStyle time_style : FormatStyle.values ()) { + String date_pattern = DateTimeFormatterBuilder.getLocalizedDateTimePattern (date_style, null, chronology, locale); + String time_pattern = DateTimeFormatterBuilder.getLocalizedDateTimePattern (null, time_style, chronology, locale); + String date_time_pattern = DateTimeFormatterBuilder.getLocalizedDateTimePattern (date_style, time_style, chronology, locale); + + if (date_part_first == null) + date_part_first = date_time_pattern.startsWith (date_pattern); + + String separator = null; - if (!Objects.equals (date_time_pattern, String.format ("%s%s%s", - date_part_first ? date_pattern : time_pattern, - separator, - date_part_first ? time_pattern : date_pattern))) { - throw new IllegalStateException (String.format ("unexpected date-time pattern:\n locale: %s\n date-time: %s\n date: %s\n time: %s", + if (date_part_first && date_time_pattern.startsWith (date_pattern) && date_time_pattern.endsWith (time_pattern)) + separator = date_time_pattern.substring (date_pattern.length (), date_time_pattern.length () - time_pattern.length ()); + else if (!date_part_first && date_time_pattern.startsWith (time_pattern) && date_time_pattern.endsWith (date_pattern)) + separator = date_time_pattern.substring (time_pattern.length (), date_time_pattern.length () - date_pattern.length ()); + else { + throw new IllegalStateException (String.format ("cannot determine separator:\n locale: %s\n date-time: %s\n date: %s\n time: %s", locale.toLanguageTag (), date_time_pattern, date_pattern, time_pattern)); } + + List <String> key = new ArrayList <> (); + for (FormatStyle style : new FormatStyle[] { date_style, time_style }) + key.add (style == FormatStyle.SHORT ? ":short" : style == FormatStyle.MEDIUM ? ":medium" : style == FormatStyle.LONG ? ":long" : ":full"); + + separators.put (key, separator); } } - if (!date_part_first || !" ".equals (separator)) - map.put (":date-time-pattern-rule", String.format ("(%s . %s)", date_part_first ? "t" : "nil", quoteString (separator))); + if (separators.values ().stream ().distinct ().collect (Collectors.counting ()) == 1) + map.put (":date-time-pattern-rule", String.format ("(%s . %s)", date_part_first ? "t" : "nil", quoteString (separators.values ().stream ().distinct ().findFirst ().get ()))); + else + map.put (":date-time-pattern-rule", String.format ("(%s . %s)", date_part_first ? "t" : "nil", toLispAlist (separators, (key) -> toLispList (key), (value) -> quoteString ((String) value)))); } - // Remove duplicates. for (Locale locale : locales) { - Locale parent = new Locale (locale.getLanguage ()); - if (Objects.equals (locale, parent)) - continue; - - if (Objects.equals (data.get (parent), data.get (locale))) { - // We used to delete such locales, but now keep them in the database. - // Otherwise, at runtime you can't for example use `ru-RU' and must use - // `ru' instead. - data.put (locale, new HashMap <> ()); - } - else { - for (Iterator <Map.Entry <String, String>> it = data.get (locale).entrySet ().iterator (); it.hasNext ();) { - Map.Entry <String, String> entry = it.next (); - if (Objects.equals (entry.getValue (), data.get (parent).get (entry.getKey ()))) - it.remove (); - } - } - - data.get (locale).put (":parent", parent.toLanguageTag ()); + if (data.containsKey (locale)) + removeUnnecessaryLocaleData (data, locale); } System.out.println ("("); @@ -152,37 +124,76 @@ public class HarvestData System.out.println (")"); } - protected static List <String> getNames (Locale locale, int field, int style, int from, int to, int extra) + protected static List <String> getNames (Locale locale, TemporalField field, String pattern, int from, int to) { - Calendar calendar = Calendar.getInstance (locale); - List <String> names = new ArrayList <> (); + // This day also was a Monday. + LocalDateTime local_date_time = LocalDateTime.of (-4, 1, 1, 6, 0, 0); + DateTimeFormatter formatter = DateTimeFormatter.ofPattern (pattern, locale); + List <String> names = new ArrayList <> (); for (int k = from; k <= to; k++) { - calendar.set (field, k); - names.add (calendar.getDisplayName (field, style, locale)); + if (local_date_time.getLong (field) != k) + throw new IllegalStateException (String.format ("%s: expected %s = %s, but was %s", local_date_time, field, k, local_date_time.getLong (field))); + + names.add (formatter.format (local_date_time)); + + if (field == ChronoField.AMPM_OF_DAY) + local_date_time = local_date_time.plusHours (12); + else if (field == ChronoField.ERA) + local_date_time = local_date_time.plusYears (1000); + else if (field == ChronoField.MONTH_OF_YEAR) + local_date_time = local_date_time.plusMonths (1); + else if (field == ChronoField.DAY_OF_WEEK) + local_date_time = local_date_time.plusDays (1); + else + throw new IllegalArgumentException (field.toString ()); + } - // Java is not very consistent here, sometimes standalone strings are just null, - // sometimes they duplicate context strings. - if (names.get (names.size () - 1) == null) - names.set (names.size () - 1, calendar.getDisplayName (field, style == Calendar.SHORT_STANDALONE ? Calendar.SHORT : Calendar.LONG, locale)); + return names; + } + + protected static void removeUnnecessaryLocaleData (Map <Locale, Map <String, String>> data, Locale locale) + { + Map <String, String> locale_data = data.get (locale); + Locale parent = new Locale (locale.getLanguage ()); + Map <String, String> parent_data; + + if (Objects.equals (locale, parent)) + parent_data = new HashMap <> (); + else { + removeUnnecessaryLocaleData (data, parent); + parent_data = data.get (parent); + + locale_data.put (":parent", parent.toLanguageTag ()); } - // Needed to put Sunday last. - if (extra >= 0) { - calendar.set (field, extra); - names.add (calendar.getDisplayName (field, style, locale)); + removeForFallback1 (locale_data, parent_data, ":decimal-separator", "?."); + removeForFallback1 (locale_data, parent_data, ":eras", ENGLISH_ERAS); + removeForFallback1 (locale_data, parent_data, ":am-pm", ENGLISH_AM_PM); + removeForFallback1 (locale_data, parent_data, ":date-time-pattern-rule", "(t . \" \")"); + + removeForFallback2 (locale_data, parent_data, ":month-standalone-abbr", ":month-context-abbr"); + removeForFallback2 (locale_data, parent_data, ":month-standalone-names", ":month-context-names"); + removeForFallback2 (locale_data, parent_data, ":weekday-standalone-abbr", ":weekday-context-abbr"); + removeForFallback2 (locale_data, parent_data, ":weekday-standalone-names", ":weekday-context-names"); - if (names.get (names.size () - 1) == null) - names.set (names.size () - 1, calendar.getDisplayName (field, style == Calendar.SHORT_STANDALONE ? Calendar.SHORT : Calendar.LONG, locale)); + for (Iterator <Map.Entry <String, String>> it = locale_data.entrySet ().iterator (); it.hasNext ();) { + Map.Entry <String, String> entry = it.next (); + if (Objects.equals (entry.getValue (), parent_data.get (entry.getKey ()))) + it.remove (); } + } - return names; + protected static void removeForFallback1 (Map <String, String> locale_data, Map <String, String> parent_data, String key, String default_value) + { + if (Objects.equals (locale_data.get (key), parent_data.getOrDefault (key, default_value))) + locale_data.remove (key); } - protected static void removeUnnecessaryStandaloneStrings (Map <String, String> properties, String standalone_key, String context_key) + protected static void removeForFallback2 (Map <String, String> locale_data, Map <String, String> parent_data, String main_key, String fallback_key) { - if (Objects.equals (properties.get (standalone_key), properties.get (context_key))) - properties.remove (standalone_key); + if (Objects.equals (locale_data.get (main_key), locale_data.get (fallback_key))) + locale_data.remove (main_key); } protected static void printTimezoneData () throws Exception @@ -306,14 +317,22 @@ public class HarvestData return String.format ("[%s]", strings.stream ().map ((string) -> quote_value_strings ? quoteString (string) : string).collect (Collectors.joining (" "))); } - protected static Map <String, String> toPatternPlist (Function <Integer, SimpleDateFormat> format) + protected static <K, V> String toLispAlist (Map <K, V> map, Function <? super K, String> key_to_string, Function <? super V, String> value_to_string) + { + return String.format ("(%s)", + map.entrySet ().stream () + .map ((entry) -> String.format ("(%s . %s)", key_to_string.apply (entry.getKey ()), value_to_string.apply (entry.getValue ()))) + .collect (Collectors.joining (" "))); + } + + protected static Map <String, String> toPatternPlist (Function <FormatStyle, String> format) { Map <String, String> patterns = new LinkedHashMap <> (); - patterns.put (":short", format.apply (DateFormat.SHORT) .toPattern ()); - patterns.put (":medium", format.apply (DateFormat.MEDIUM).toPattern ()); - patterns.put (":long", format.apply (DateFormat.LONG) .toPattern ()); - patterns.put (":full", format.apply (DateFormat.FULL) .toPattern ()); + patterns.put (":short", format.apply (FormatStyle.SHORT)); + patterns.put (":medium", format.apply (FormatStyle.MEDIUM)); + patterns.put (":long", format.apply (FormatStyle.LONG)); + patterns.put (":full", format.apply (FormatStyle.FULL)); return patterns; } diff --git a/locale-data.extmap b/locale-data.extmap index dec9c4eff9..9f6e7c4d8b 100644 Binary files a/locale-data.extmap and b/locale-data.extmap differ diff --git a/test/format.el b/test/format.el index 60df54aebe..b2c7a07623 100644 --- a/test/format.el +++ b/test/format.el @@ -39,6 +39,7 @@ (datetime--test-formatter (datetime-float-formatter 'java datetime--test-pattern :timezone datetime--test-timezone :locale datetime--test-locale))) ,@body)) +;; We assume that the Java program is already compiled externally (see `run-tests.sh'). (defun datetime--test (times) (unless (listp times) (setq times (list times))) @@ -71,7 +72,8 @@ (while times (let ((time (pop times)) (expected (pop formatted))) - (eval `(should (progn ,time (string= ,(funcall datetime--test-formatter time) ,expected))))))))) + (eval `(should (progn ',datetime--test-timezone ',datetime--test-locale ,datetime--test-pattern ,time + (string= ,(funcall datetime--test-formatter time) ,expected))))))))) (defun datetime--test-transition (time) (datetime--test (list time diff --git a/timezone-data.extmap b/timezone-data.extmap index 42f51f781b..ddd0c873be 100644 Binary files a/timezone-data.extmap and b/timezone-data.extmap differ