On Mon, Oct 17, 2022 at 3:01 PM Florian Weimer via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > "zR" and "zPLR" are the most common augmentations. Use a simple > SIMD-with-in-a-register technique to check for both augmentations, > and that the following variable-length integers have length 1, to > get more quickly at the encoding field. > > libgcc/ > > * unwind-dw2-fde.c (get_cie_encoding_slow): Rename from > get_cie_encoding. Mark as noinline. > (get_cie_encoding): Add fast path for "zR" and "zPLR" > augmentations. Call get_cie_encoding_slow as a fall-back. > > --- > libgcc/unwind-dw2-fde.c | 61 > +++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 59 insertions(+), 2 deletions(-) > > diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c > index 3c0cc654ec0..4e3a54c5a1a 100644 > --- a/libgcc/unwind-dw2-fde.c > +++ b/libgcc/unwind-dw2-fde.c > @@ -333,8 +333,10 @@ base_from_object (unsigned char encoding, const struct > object *ob) > /* Return the FDE pointer encoding from the CIE. */ > /* ??? This is a subset of extract_cie_info from unwind-dw2.c. */ > > -static int > -get_cie_encoding (const struct dwarf_cie *cie) > +/* Disable inlining because the function is only used as a slow path in > + get_cie_encoding below. */ > +static int __attribute__ ((noinline)) > +get_cie_encoding_slow (const struct dwarf_cie *cie) > { > const unsigned char *aug, *p; > _Unwind_Ptr dummy; > @@ -389,6 +391,61 @@ get_cie_encoding (const struct dwarf_cie *cie) > } > } > > +static inline int > +get_cie_encoding (const struct dwarf_cie *cie) > +{ > + /* Fast path for some augmentations and single-byte variable-length > + integers. Do this only for targets that align struct dwarf_cie to 8 > + bytes, which ensures that at least 8 bytes are available starting at > + cie->version. */ > +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ \ > + || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ > + if (__alignof (*cie) == 8 && sizeof (unsigned long long) == 8) > + { > + unsigned long long value = *(const unsigned long long *) &cie->version;
TBAA? Maybe use unsigned long long value; memcpy (&value, &cie->version, 8); instead? > + > +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ > +#define C(x) __builtin_bswap64 (x) > +#else > +#define C(x) x > +#endif > + > + /* Fast path for "zR". Check for version 1, the "zR" string and that > + the sleb128/uleb128 values are single bytes. In the comments > + below, '1', 'c', 'd', 'r', 'l' are version, code alignment, data > + alignment, return address column, augmentation length. Note that > + with CIE version 1, the return address column is byte-encoded. */ > + unsigned long long expected = > + /* 1 z R 0 c d r l. */ > + C (0x017a520000000000ULL); > + unsigned long long mask = > + /* 1 z R 0 c d r l. */ > + C (0xffffffff80800080ULL); > + > + if ((value & mask) == expected) > + return cie->augmentation[7]; > + > + /* Fast path for "zPLR". */ > + expected = > + /* 1 z P L R 0 c d. */ > + C (0x017a504c52000000ULL); > + mask = > + /* 1 z P L R 0 c d. */ > + C (0xffffffffffff8080ULL); > +#undef C > + > + /* Validate the augmentation length, and return the enconding after > + it. No check for the return address column because it is > + byte-encoded with CIE version 1. */ > + if (__builtin_expect ((value & mask) == expected > + && (cie->augmentation[8] & 0x80) == 0, 1)) > + return cie->augmentation[9]; > + } > +#endif > + > + return get_cie_encoding_slow (cie); > +} > + > static inline int > get_fde_encoding (const struct dwarf_fde *f) > { > > base-commit: de84a1e4b107b803ec3b064c3771a6ed8c0e201e >