Hi! DWARF now maintains DW_LANG_* code assignment online and 27 language codes have been assigned already after DWARF 5 has been released, see https://dwarfstd.org/languages.html including one added yesterday (DW_LANG_C23). DWARF 6 plans to use something different, DW_AT_language_{name,version} pair where the new language versions will be just dealt with automatically rather than adding new codes, say for C23 we'll be able to use DW_LNAME_C 202311 while for C2Y for now to use DW_LNAME_C 202500 until the standard is finalized.
Now, the question is whether the toolchain should use those post DWARF 5 codes for -gdwarf-5 -gno-strict-dwarf, or if we'll just ignore those and only switch to DWARF 6 stuff when the standard is released and people use -gdwarf-6 (or when we switch over to that as default). The following patch starts using those new codes (just for C/C++ for now, Ada/Fortran not switched, Ada because I'm really not familiar with Ada and Fortran because it doesn't say 2018 in the language string). The problem with the patch is that it regresses quite a few tests, in particular gcc.dg/guality/pr78726.c g++.dg/guality/redeclaration1.C libstdc++-prettyprinters/*.cc libstdc++-xmethods/deque.cc because my gdb doesn't handle those (but git trunk gdb doesn't either), so for those the new codes are just unknown languages rather than newer revisions of C or C++. >From what I can read in gdb, it doesn't seem to care about exact standard revision, all it cares about is if the TU is C, C++, Fortran, Ada etc. So, from this POV perhaps we shouldn't switch at all and ignore all the post-DWARF 5 codes. Or shall we wait until gdb, elfutils, whatever else actually looks at DW_AT_language values is changed to handle the new codes and apply this patch after that (still one would need a new version of gdb/elfutils/etc.)? Or wait say half a year or year after that support is added in the consumers? The DWARF 6 planned scheme was designed exactly to overcome this problem, consumers that only care if something is C or C++ etc. will be able to hardcode the code once and if they care for some behavior on something more specific, they can just compare the version, DW_AT_language_version >= 201703 for C++ (or < etc.), or for Fortran DW_AT_language_version >= 2008, ... 2024-11-21 Jakub Jelinek <ja...@redhat.com> gcc/ * dwarf2out.cc (is_c): Handle also DW_LANG_C{17,23}. (is_cxx): Handle also DW_LANG_C_plus_plus_{17,20,23}. (is_fortran): Handle also DW_LANG_Fortran18. (is_ada): Handle also DW_LANG_Ada20{05,12}. (lower_bound_default): Handle also DW_LANG_{C{17,23},C_plus_plus_{17,20,23},Fortran18,Ada20{05,12}}. (add_prototyped_attribute): Handle DW_LANG_C{17,23}. (gen_compile_unit_die): Use DW_LANG_C17 if not -gstrict-dwarf for C17. Use DW_LANG_C23 if not -gstrict-dwarf for C23/C2Y. Use DW_LANG_C_plus_plus_{17,20,23} if not -gstrict-dwarf for C++{17,20,23} and the last one also for C++26. Handle DW_LANG_Fortran18. include/ * g++.dg/debug/dwarf2/lang-cpp17.C: Add -gno-strict-dwarf to dg-options and expect different DW_AT_language value. * g++.dg/debug/dwarf2/lang-cpp20.C: Likewise. * g++.dg/debug/dwarf2/lang-cpp23.C: New test. --- gcc/dwarf2out.cc.jj 2024-10-25 10:00:29.445768186 +0200 +++ gcc/dwarf2out.cc 2024-11-20 21:49:48.237062064 +0100 @@ -5540,7 +5540,8 @@ is_c (void) unsigned int lang = get_AT_unsigned (comp_unit_die (), DW_AT_language); return (lang == DW_LANG_C || lang == DW_LANG_C89 || lang == DW_LANG_C99 - || lang == DW_LANG_C11 || lang == DW_LANG_ObjC); + || lang == DW_LANG_C11 || lang == DW_LANG_C17 || lang == DW_LANG_C23 + || lang == DW_LANG_ObjC); } @@ -5553,7 +5554,9 @@ is_cxx (void) unsigned int lang = get_AT_unsigned (comp_unit_die (), DW_AT_language); return (lang == DW_LANG_C_plus_plus || lang == DW_LANG_ObjC_plus_plus - || lang == DW_LANG_C_plus_plus_11 || lang == DW_LANG_C_plus_plus_14); + || lang == DW_LANG_C_plus_plus_11 || lang == DW_LANG_C_plus_plus_14 + || lang == DW_LANG_C_plus_plus_17 || lang == DW_LANG_C_plus_plus_20 + || lang == DW_LANG_C_plus_plus_23); } /* Return TRUE if DECL was created by the C++ frontend. */ @@ -5581,7 +5584,8 @@ is_fortran (void) || lang == DW_LANG_Fortran90 || lang == DW_LANG_Fortran95 || lang == DW_LANG_Fortran03 - || lang == DW_LANG_Fortran08); + || lang == DW_LANG_Fortran08 + || lang == DW_LANG_Fortran18); } static inline bool @@ -5617,7 +5621,8 @@ is_ada (void) { unsigned int lang = get_AT_unsigned (comp_unit_die (), DW_AT_language); - return lang == DW_LANG_Ada95 || lang == DW_LANG_Ada83; + return (lang == DW_LANG_Ada95 || lang == DW_LANG_Ada83 + || lang == DW_LANG_Ada2005 || lang == DW_LANG_Ada2012); } /* Return TRUE if the language is D. */ @@ -21645,9 +21650,14 @@ lower_bound_default (void) case DW_LANG_C89: case DW_LANG_C99: case DW_LANG_C11: + case DW_LANG_C17: + case DW_LANG_C23: case DW_LANG_C_plus_plus: case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14: + case DW_LANG_C_plus_plus_17: + case DW_LANG_C_plus_plus_20: + case DW_LANG_C_plus_plus_23: case DW_LANG_ObjC: case DW_LANG_ObjC_plus_plus: return 0; @@ -21656,13 +21666,16 @@ lower_bound_default (void) case DW_LANG_Fortran95: case DW_LANG_Fortran03: case DW_LANG_Fortran08: + case DW_LANG_Fortran18: return 1; case DW_LANG_UPC: case DW_LANG_D: case DW_LANG_Python: return dwarf_version >= 4 ? 0 : -1; - case DW_LANG_Ada95: case DW_LANG_Ada83: + case DW_LANG_Ada95: + case DW_LANG_Ada2005: + case DW_LANG_Ada2012: case DW_LANG_Cobol74: case DW_LANG_Cobol85: case DW_LANG_Modula2: @@ -22025,6 +22038,8 @@ add_prototyped_attribute (dw_die_ref die case DW_LANG_C89: case DW_LANG_C99: case DW_LANG_C11: + case DW_LANG_C17: + case DW_LANG_C23: case DW_LANG_ObjC: if (prototype_p (func_type)) add_AT_flag (die, DW_AT_prototyped, 1); @@ -25372,11 +25387,17 @@ gen_compile_unit_die (const char *filena language = DW_LANG_C99; if (dwarf_version >= 5 /* || !dwarf_strict */) - if (strcmp (language_string, "GNU C11") == 0 - || strcmp (language_string, "GNU C17") == 0 - || strcmp (language_string, "GNU C23") == 0 - || strcmp (language_string, "GNU C2Y") == 0) - language = DW_LANG_C11; + { + if (strcmp (language_string, "GNU C11") == 0) + language = DW_LANG_C11; + else if (strcmp (language_string, "GNU C17") == 0) + language = DW_LANG_C17; + else if (strcmp (language_string, "GNU C23") == 0 + || strcmp (language_string, "GNU C2Y") == 0) + language = DW_LANG_C23; + if (dwarf_strict && language > DW_LANG_C11) + language = DW_LANG_C11; + } } } else if (startswith (language_string, "GNU C++")) @@ -25388,11 +25409,14 @@ gen_compile_unit_die (const char *filena language = DW_LANG_C_plus_plus_11; else if (strcmp (language_string, "GNU C++14") == 0) language = DW_LANG_C_plus_plus_14; - else if (strcmp (language_string, "GNU C++17") == 0 - || strcmp (language_string, "GNU C++20") == 0 - || strcmp (language_string, "GNU C++23") == 0 + else if (strcmp (language_string, "GNU C++17") == 0) + language = DW_LANG_C_plus_plus_17; + else if (strcmp (language_string, "GNU C++20") == 0) + language = DW_LANG_C_plus_plus_20; + else if (strcmp (language_string, "GNU C++23") == 0 || strcmp (language_string, "GNU C++26") == 0) - /* For now. */ + language = DW_LANG_C_plus_plus_23; + if (dwarf_strict && language > DW_LANG_C_plus_plus_14) language = DW_LANG_C_plus_plus_14; } } @@ -25445,6 +25469,7 @@ gen_compile_unit_die (const char *filena case DW_LANG_Fortran95: case DW_LANG_Fortran03: case DW_LANG_Fortran08: + case DW_LANG_Fortran18: /* Fortran has case insensitive identifiers and the front-end lowercases everything. */ add_AT_unsigned (die, DW_AT_identifier_case, DW_ID_down_case); --- gcc/testsuite/g++.dg/debug/dwarf2/lang-cpp17.C.jj 2021-01-18 14:52:42.946040137 +0100 +++ gcc/testsuite/g++.dg/debug/dwarf2/lang-cpp17.C 2024-11-21 09:08:09.868183593 +0100 @@ -1,8 +1,7 @@ // { dg-do compile } // { dg-options "-O -std=c++17 -gdwarf-5 -dA -gno-strict-dwarf" } // { dg-skip-if "AIX DWARF5" { powerpc-ibm-aix* } } -// For -gdwarf-6 hopefully DW_LANG_C_plus_plus_17 -// DW_LANG_C_plus_plus_14 = 0x0021 -// { dg-final { scan-assembler "0x21\[^\n\r]* DW_AT_language" } } */ +// DW_LANG_C_plus_plus_177 = 0x002a +// { dg-final { scan-assembler "0x2a\[^\n\r]* DW_AT_language" } } */ int version; --- gcc/testsuite/g++.dg/debug/dwarf2/lang-cpp20.C.jj 2021-01-18 14:52:42.946040137 +0100 +++ gcc/testsuite/g++.dg/debug/dwarf2/lang-cpp20.C 2024-11-21 09:08:31.391877872 +0100 @@ -1,8 +1,7 @@ // { dg-do compile } // { dg-options "-O -std=c++20 -gdwarf-5 -dA -gno-strict-dwarf" } // { dg-skip-if "AIX DWARF5" { powerpc-ibm-aix* } } -// For -gdwarf-6 hopefully DW_LANG_C_plus_plus_20 -// DW_LANG_C_plus_plus_14 = 0x0021 -// { dg-final { scan-assembler "0x21\[^\n\r]* DW_AT_language" } } */ +// DW_LANG_C_plus_plus_20 = 0x002b +// { dg-final { scan-assembler "0x2b\[^\n\r]* DW_AT_language" } } */ int version; --- gcc/testsuite/g++.dg/debug/dwarf2/lang-cpp23.C.jj 2024-11-21 09:08:41.801729997 +0100 +++ gcc/testsuite/g++.dg/debug/dwarf2/lang-cpp23.C 2024-11-21 09:08:57.226510904 +0100 @@ -0,0 +1,7 @@ +// { dg-do compile } +// { dg-options "-O -std=c++23 -gdwarf-5 -dA -gno-strict-dwarf" } +// { dg-skip-if "AIX DWARF5" { powerpc-ibm-aix* } } +// DW_LANG_C_plus_plus_23 = 0x003a +// { dg-final { scan-assembler "0x3a\[^\n\r]* DW_AT_language" } } */ + +int version; Jakub