Peter Hutterer <[email protected]> writes: > On Mon, Mar 27, 2017 at 03:11:03PM -0700, Eric Anholt wrote: >> The clever pointer tricks were actually not working, and we were doing >> the byte-by-byte moves in general. By just doing the memcpy and >> obvious byte swap code, we end up generating actual byte swap >> instructions, thanks to optimizing compilers. >> >> text data bss dec hex filename >> before: 2241932 51584 132016 2425532 2502bc hw/xfree86/Xorg >> after: 2216276 51584 132016 2399876 249e84 hw/xfree86/Xorg >> --- >> doc/Xserver-spec.xml | 2 +- >> glx/glxbyteorder.h | 21 ------------ >> include/misc.h | 97 >> ++++++++++++++++++---------------------------------- >> os/io.c | 4 +-- >> 4 files changed, 37 insertions(+), 87 deletions(-) >> >> diff --git a/doc/Xserver-spec.xml b/doc/Xserver-spec.xml >> index 7867544e48a9..3dde65178b7f 100644 >> --- a/doc/Xserver-spec.xml >> +++ b/doc/Xserver-spec.xml >> @@ -600,7 +600,7 @@ are: REQUEST, REQUEST_SIZE_MATCH, REQUEST_AT_LEAST_SIZE, >> REQUEST_FIXED_SIZE, LEGAL_NEW_RESOURCE, and >> VALIDATE_DRAWABLE_AND_GC. Useful byte swapping macros can be found >> in Xserver/include/dix.h: WriteReplyToClient and WriteSwappedDataToClient; >> and >> -in Xserver/include/misc.h: lswapl, lswaps, LengthRestB, LengthRestS, >> +in Xserver/include/misc.h: bswap_64, bswap_32, bswap_16, LengthRestB, >> LengthRestS, >> LengthRestL, SwapRestS, SwapRestL, swapl, swaps, cpswapl, and >> cpswaps.</para> >> </section> >> </section> >> diff --git a/glx/glxbyteorder.h b/glx/glxbyteorder.h >> index 5e94e8626e66..8f0cd8a4b0d7 100644 >> --- a/glx/glxbyteorder.h >> +++ b/glx/glxbyteorder.h >> @@ -37,25 +37,4 @@ >> >> #include "misc.h" >> >> -static inline uint16_t >> -bswap_16(uint16_t val) >> -{ >> - swap_uint16(&val); >> - return val; >> -} >> - >> -static inline uint32_t >> -bswap_32(uint32_t val) >> -{ >> - swap_uint32(&val); >> - return val; >> -} >> - >> -static inline uint64_t >> -bswap_64(uint64_t val) >> -{ >> - swap_uint64(&val); >> - return val; >> -} >> - >> #endif /* !defined(__GLXBYTEORDER_H__) */ >> diff --git a/include/misc.h b/include/misc.h >> index 01747fd38339..a75eb617c642 100644 >> --- a/include/misc.h >> +++ b/include/misc.h >> @@ -128,21 +128,6 @@ typedef struct _xReq *xReqPtr; >> #define USE_BACKGROUND_PIXEL 3 >> #define USE_BORDER_PIXEL 3 >> >> -/* byte swap a 32-bit literal */ >> -static inline uint32_t >> -lswapl(uint32_t x) >> -{ >> - return ((x & 0xff) << 24) | >> - ((x & 0xff00) << 8) | ((x & 0xff0000) >> 8) | ((x >> 24) & 0xff); >> -} >> - >> -/* byte swap a 16-bit literal */ >> -static inline uint16_t >> -lswaps(uint16_t x) >> -{ >> - return (uint16_t)((x & 0xff) << 8) | ((x >> 8) & 0xff); >> -} >> - >> #undef min >> #undef max >> >> @@ -311,88 +296,74 @@ __builtin_constant_p(int x) >> } >> #endif >> >> -/* byte swap a 64-bit value */ >> -static inline void >> -swap_uint64(uint64_t *x) >> +static inline uint64_t >> +bswap_64(uint64_t x) >> { >> - char n; >> - >> - n = ((char *) x)[0]; >> - ((char *) x)[0] = ((char *) x)[7]; >> - ((char *) x)[7] = n; >> - >> - n = ((char *) x)[1]; >> - ((char *) x)[1] = ((char *) x)[6]; >> - ((char *) x)[6] = n; >> - >> - n = ((char *) x)[2]; >> - ((char *) x)[2] = ((char *) x)[5]; >> - ((char *) x)[5] = n; >> - >> - n = ((char *) x)[3]; >> - ((char *) x)[3] = ((char *) x)[4]; >> - ((char *) x)[4] = n; >> + return (((x & 0xFF00000000000000ull) >> 56) | >> + ((x & 0x00FF000000000000ull) >> 40) | >> + ((x & 0x0000FF0000000000ull) >> 24) | >> + ((x & 0x000000FF00000000ull) >> 8) | >> + ((x & 0x00000000FF000000ull) << 8) | >> + ((x & 0x0000000000FF0000ull) << 24) | >> + ((x & 0x000000000000FF00ull) << 40) | >> + ((x & 0x00000000000000FFull) << 56)); >> } >> >> #define swapll(x) do { \ >> + uint64_t temp; \ >> if (sizeof(*(x)) != 8) \ >> wrong_size(); \ >> - swap_uint64((uint64_t *)(x)); \ >> + memcpy(&temp, x, 8); \ >> + temp = bswap_64(temp); \ >> + memcpy(x, &temp, 8); \ > > is the memcpy really needed here? am I missing something? We're passing by > value into bswap_64() now, so we get the copy for free anyway. Same in the > swapl below. > > fwiww, test/misc.c would be trivially amended to test all of these macros > for correctness :)
Unaligned accesses trap on some platforms, and I don't think we're guaranteed that the caller has the pointer aligned (at least, the previous code seemed pretty clearly to be trying to work around that, as well). I do like the unit test plan, though.
signature.asc
Description: PGP signature
_______________________________________________ [email protected]: X.Org development Archives: http://lists.x.org/archives/xorg-devel Info: https://lists.x.org/mailman/listinfo/xorg-devel
