On Fri, Jun 02, 2006 at 09:24:17AM +0200, Rask Ingemann Lambertsen wrote:
> The rest of the ARM backend presently assumes that the pattern has the form
>
> (set (operand:QI 0) (operand:QI 1))
>
> but now we've changed it to
>
> (parallel [(set (operand:QI 0) (operand:QI 1))
> (clobber (operand:QI 2))
> ])
>
> so that's why you get "unrecognizable insn" errors now. Any place which
> intended to generate an *arm_movqi_insn has to add a clobber also. For a
> start, this means the "movqi" pattern.
I've now implemented it. This brings a small improvement to the code
generated for bytewritetest:
bytewritetest:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
ldrb r3, [r0, #5] @ zero_extendqisi2
ldrb ip, [r0, #4] @ zero_extendqisi2
ldr r2, [r0, #0]
add r1, r3, ip
str r2, [r0, #8]
str r1, [r0], #5 <--
eor r3, r3, ip
swpb r2, r3, [r0]
@ lr needed for prologue
bx lr
Exactly the same number of instructions as without -mswp-byte-writes because
of postincrement. Basicly, it pays off to get the insn expanded correctly to
begin with, rather than leaving it to reload to fix it up later. This should
work fine with volatile variables because there is no need to read back from
memory. The peephole optimizations are gone for the same reason. I do wonder
if the ability to reuse the input register as a scratch register has been
preserved, though.
Compiling unwind-dw2-fde.c, I noticed that the code produced for
__register_frame_info_table_bases() differs more than expected:
__register_frame_info_table_bases:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
1 stmfd sp!, {r4, lr}
2 mov lr, #0
3 str lr, [r1, #16]
4 ldrb ip, [r1, #16] @ zero_extendqisi2
5 orr ip, ip, #2
6 strb ip, [r1, #16]
7 ldr r4, .L28
8 ldrh ip, [r1, #16]
9 ldr lr, [r4, #0]
10 orr ip, ip, #2032
11 str r0, [r1, #12]
12 orr ip, ip, #8
13 mvn r0, #0
14 strh ip, [r1, #16] @ movhi
15 str lr, [r1, #20]
16 str r0, [r1, #0]
17 str r1, [r4, #0]
18 stmib r1, {r2, r3} @ phole stm
19 ldmfd sp!, {r4, pc}
vs.
__register_frame_info_table_bases:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
2 mov ip, #0
3 str ip, [r1, #16]
1 str lr, [sp, #-4]!
4 ldrb lr, [r1, #16] @ zero_extendqisi2
11 str r0, [r1, #12]
5 orr lr, lr, #2
13 mvn r0, #0
6a add ip, r1, #16
16+18? stmia r1, {r0, r2, r3} @ phole stm
6b swpb r3, lr, [ip]
7 ldr r0, .L28
8 ldrh r3, [r1, #16]
9 ldr r2, [r0, #0]
10 orr r3, r3, #2032
12 orr r3, r3, #8
14 strh r3, [r1, #16] @ movhi
15 str r2, [r1, #20]
17 str r1, [r0, #0]
19 ldr pc, [sp], #4
But the swp version seems to be equivalent, doesn't it?
I'm not sure that the reload_outqi expander will correctly handle
cases where reload spills a register to memory. If the memory address
doesn't have the right form, it becomes more complicated.
Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h (revision 114119)
+++ gcc/config/arm/arm.h (working copy)
@@ -1094,6 +1094,8 @@
? vfp_secondary_reload_class (MODE, X) \
: TARGET_ARM \
? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
+ || ((MODE) == QImode && TARGET_ARM && TARGET_SWP_BYTE_WRITES \
+ && true_regnum (X) == -1) \
? GENERAL_REGS : NO_REGS) \
: THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X))
Index: gcc/config/arm/arm.opt
===================================================================
--- gcc/config/arm/arm.opt (revision 114119)
+++ gcc/config/arm/arm.opt (working copy)
@@ -153,3 +153,7 @@
mwords-little-endian
Target Report RejectNegative Mask(LITTLE_WORDS)
Assume big endian bytes, little endian words
+
+mswp-byte-writes
+Target Report Mask(SWP_BYTE_WRITES)
+Use the swp instruction for byte writes. The default is to use str
Index: gcc/config/arm/predicates.md
===================================================================
--- gcc/config/arm/predicates.md (revision 114119)
+++ gcc/config/arm/predicates.md (working copy)
@@ -125,6 +125,14 @@
|| (GET_CODE (op) == REG
&& REGNO (op) >= FIRST_PSEUDO_REGISTER)))")))
+;; Match register operands or memory operands of the form (mem (reg ...)),
+;; as permitted by the "Q" memory constraint.
+(define_predicate "reg_or_Qmem_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "mem")
+ (match_code "reg" "0")))
+)
+
;; True for valid operands for the rhs of an floating point insns.
;; Allows regs or certain consts on FPA, just regs for everything else.
(define_predicate "arm_float_rhs_operand"
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md (revision 114119)
+++ gcc/config/arm/arm.md (working copy)
@@ -5151,6 +5151,16 @@
emit_insn (gen_movsi (operands[0], operands[1]));
DONE;
}
+ if (TARGET_ARM && TARGET_SWP_BYTE_WRITES)
+ {
+ /* Ensure that operands[0] is (mem (reg ...)) if a memory operand. */
+ if (MEM_P (operands[0]) && !REG_P (XEXP (operands[0], 0)))
+ operands[0]
+ = replace_equiv_address (operands[0],
+ copy_to_reg (XEXP (operands[0], 0)));
+ emit_insn (gen__arm_movqi_insn_swp (operands[0], operands[1]));
+ DONE;
+ }
"
)
@@ -5158,7 +5168,7 @@
(define_insn "*arm_movqi_insn"
[(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
(match_operand:QI 1 "general_operand" "rI,K,m,r"))]
- "TARGET_ARM
+ "TARGET_ARM && !TARGET_SWP_BYTE_WRITES
&& ( register_operand (operands[0], QImode)
|| register_operand (operands[1], QImode))"
"@
@@ -5170,6 +5180,31 @@
(set_attr "predicable" "yes")]
)
+;; This is primarily a hack for the Nintendo DS external RAM.
+(define_insn "_arm_movqi_insn_swp"
+ [(set (match_operand:QI 0 "reg_or_Qmem_operand" "=r,r,r,Q")
+ (match_operand:QI 1 "general_operand" "rI,K,m,r"))
+ (clobber (match_scratch:QI 2 "=X,X,X,r"))]
+ "TARGET_ARM && TARGET_SWP_BYTE_WRITES
+ && ( register_operand (operands[0], QImode)
+ || register_operand (operands[1], QImode))"
+ "@
+ mov%?\\t%0, %1
+ mvn%?\\t%0, #%B1
+ ldr%?b\\t%0, %1
+ swp%?b\\t%2, %1, [%|%m0]"
+ [(set_attr "type" "*,*,load1,store1")
+ (set_attr "predicable" "yes")]
+)
+
+;; The earlyclobber is required by default_secondary_reload() in targhooks.c.
+(define_expand "reload_outqi"
+ [(set (match_operand:QI 0 "memory_operand" "=Q")
+ (match_operand:QI 1 "register_operand" "r"))
+ (clobber (match_operand:QI 2 "register_operand" "=&r"))]
+ "TARGET_ARM && TARGET_SWP_BYTE_WRITES"
+)
+
(define_insn "*thumb_movqi_insn"
[(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l")
(match_operand:QI 1 "general_operand" "l, m,l,*h,*r,I"))]
--
Rask Ingemann Lambertsen