https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114252

--- Comment #11 from Richard Biener <rguenth at gcc dot gnu.org> ---
diff --git a/gcc/gimple-ssa-store-merging.cc b/gcc/gimple-ssa-store-merging.cc
index 42b68abf61b..c9d4662656f 100644
--- a/gcc/gimple-ssa-store-merging.cc
+++ b/gcc/gimple-ssa-store-merging.cc
@@ -170,6 +170,7 @@
 #include "optabs-tree.h"
 #include "dbgcnt.h"
 #include "selftest.h"
+#include "regs.h"

 /* The maximum size (in bits) of the stores this pass should generate.  */
 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
@@ -1484,7 +1485,8 @@ maybe_optimize_vector_constructor (gimple *cur_stmt)
       break;
     case 32:
       if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
-         && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
+         && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
+         && have_regs_of_mode[SImode])
        {
          load_type = uint32_type_node;
          fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
@@ -1545,7 +1547,8 @@ pass_optimize_bswap::execute (function *fun)
   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;

   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
-              && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
+              && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
+              && have_regs_of_mode[SImode]);
   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
               && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
                   || (bswap32_p && word_mode == SImode)));


doesn't work.  AVR has regs of SImode.  There doesn't seem to be a way to
query the (maximum?) number of hardregs used for a mode.  Using

  bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
               && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
               && have_regs_of_mode[SImode]
               && hard_regno_nregs (0, SImode) == 1);

"works" but is surely wrong (whatever hardreg zero corresponds to).
Looking only at word_mode, requiring SImode size >= word_mode size like with

  bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
               && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
               && known_ge (GET_MODE_SIZE (word_mode), GET_MODE_SIZE
(SImode)));

"works" but would affect many more targets.  Maybe && word_mode != QImode
is better.

Note that this will cut off _all_ bswap detection.  Thus my question on
profitability of detecting cases like those in libgcc2.c which then produces

__bswapsi2:
        push r12
        push r13
        push r14
        push r15
        push r16
        push r17
/* prologue: function */
/* frame size = 0 */
/* stack size = 6 */
.L__stack_usage = 6
        mov r16,r22
        mov r17,r23
        mov r18,r24
        mov r19,r25
        mov r22,r19
        clr r23
        clr r24
        clr r25
        mov r15,r16
        clr r14
        clr r13
        clr r12
        or r22,r12
        or r23,r13
        or r24,r14
        or r25,r15
        mov r12,r17
        mov r13,r18
        mov r14,r19
        clr r15
        clr r12
        clr r14
        clr r15
        or r22,r12
        or r23,r13
        or r24,r14
        or r25,r15
        mov r19,r18
        mov r18,r17
        mov r17,r16
        clr r16
        clr r16
        clr r17
        clr r19
        or r22,r16
        or r23,r17
        or r24,r18
        or r25,r19
/* epilogue start */
        pop r17
        pop r16
        pop r15
        pop r14
        pop r13
        pop r12
        ret

then.

bswap detection does not try to do any sophisticated evaluation of costs.

Reply via email to