https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114252
--- Comment #11 from Richard Biener <rguenth at gcc dot gnu.org> ---
diff --git a/gcc/gimple-ssa-store-merging.cc b/gcc/gimple-ssa-store-merging.cc
index 42b68abf61b..c9d4662656f 100644
--- a/gcc/gimple-ssa-store-merging.cc
+++ b/gcc/gimple-ssa-store-merging.cc
@@ -170,6 +170,7 @@
#include "optabs-tree.h"
#include "dbgcnt.h"
#include "selftest.h"
+#include "regs.h"
/* The maximum size (in bits) of the stores this pass should generate. */
#define MAX_STORE_BITSIZE (BITS_PER_WORD)
@@ -1484,7 +1485,8 @@ maybe_optimize_vector_constructor (gimple *cur_stmt)
break;
case 32:
if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
- && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
+ && have_regs_of_mode[SImode])
{
load_type = uint32_type_node;
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
@@ -1545,7 +1547,8 @@ pass_optimize_bswap::execute (function *fun)
tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
- && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
+ && have_regs_of_mode[SImode]);
bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
&& (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
|| (bswap32_p && word_mode == SImode)));
doesn't work. AVR has regs of SImode. There doesn't seem to be a way to
query the (maximum?) number of hardregs used for a mode. Using
bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
&& optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
&& have_regs_of_mode[SImode]
&& hard_regno_nregs (0, SImode) == 1);
"works" but is surely wrong (whatever hardreg zero corresponds to).
Looking only at word_mode, requiring SImode size >= word_mode size like with
bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
&& optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
&& known_ge (GET_MODE_SIZE (word_mode), GET_MODE_SIZE
(SImode)));
"works" but would affect many more targets. Maybe && word_mode != QImode
is better.
Note that this will cut off _all_ bswap detection. Thus my question on
profitability of detecting cases like those in libgcc2.c which then produces
__bswapsi2:
push r12
push r13
push r14
push r15
push r16
push r17
/* prologue: function */
/* frame size = 0 */
/* stack size = 6 */
.L__stack_usage = 6
mov r16,r22
mov r17,r23
mov r18,r24
mov r19,r25
mov r22,r19
clr r23
clr r24
clr r25
mov r15,r16
clr r14
clr r13
clr r12
or r22,r12
or r23,r13
or r24,r14
or r25,r15
mov r12,r17
mov r13,r18
mov r14,r19
clr r15
clr r12
clr r14
clr r15
or r22,r12
or r23,r13
or r24,r14
or r25,r15
mov r19,r18
mov r18,r17
mov r17,r16
clr r16
clr r16
clr r17
clr r19
or r22,r16
or r23,r17
or r24,r18
or r25,r19
/* epilogue start */
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
ret
then.
bswap detection does not try to do any sophisticated evaluation of costs.