On Wednesday, November 24, 2010 8:29:35 pm Peter Maydell wrote: > This wiki page came up during the toolchain call: > https://wiki.linaro.org/Internal/People/KenWerner/AtomicMemoryOperations/ > > It gives the code generated for __sync_val_compare_and_swap > as including a push {r4} / pop {r4} pair because it uses too many > temporaries to fit them all in callee-saves registers. I think you > can tweak it a bit to get rid of that: > > # int __sync_val_compare_and_swap (int *mem, int old, int new); > # if the current value of *mem is old, then write new into *mem > # r0: mem, r1 old, r2 new > mov r3, r0 # move r0 into r3 > dmb sy # full memory barrier > .LSYT7: > ldrex r0, [r3] # load (exclusive) from memory pointed to > by r3 into r0 > cmp r0, r1 # compare contents of r0 (mem) with r1 > (old) -> updates the condition flag > bne .LSYB7 # branch to LSYB7 if mem != old > # This strex trashes the r0 we just loaded, but since we didn't > take # the branch we know that r0 == r1 > strex r0, r2, [r3] # store r2 (new) into memory pointed to > by r3 (mem) > # r0 contains 0 if the store was > successful, otherwise 1 > teq r0, #0 # compares contents of r0 with zero -> > updates the condition flag > bne .LSYT7 # branch to LSYT7 if r0 != 0 (if the > store wasn't successful) > # Move the value that was in memory into the right register to > return it mov r0, r1 > dmb sy # full memory barrier > .LSYB7: > bx lr # return > > I think you can do a similar trick with __sync_fetch_and_add > (although you have to use a subtract to regenerate r0 from > r1 and r2). > > On the other hand I just looked at the gcc code that does this > and it's not simply dumping canned sequences out to the > assembler, so maybe it's not worth the effort just to drop a > stack push/pop.
Hi, Attached is a small GCC patch that attempts to optimize the __sync_* builtins as described above. Since "or" and "(n)and" are non-reversible the corresponding builtins still need the push/pop instructions. Any suggestions or comments are welcome. Regards Ken
=== modified file 'gcc/config/arm/arm.c' --- gcc/config/arm/arm.c 2010-11-11 11:50:33 +0000 +++ gcc/config/arm/arm.c 2010-11-29 12:59:50 +0000 @@ -23084,10 +23084,46 @@ break; } - arm_output_strex (emit, mode, "", t2, t1, memory); - operands[0] = t2; - arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); - arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX); + if (t2) + { + arm_output_strex (emit, mode, "", t2, t1, memory); + operands[0] = t2; + arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); + arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", + LOCAL_LABEL_PREFIX); + } + else + { + /* Use old_value for the return value because for some operations + the old_value can easily be restored. This saves one register. */ + arm_output_strex (emit, mode, "", old_value, t1, memory); + operands[0] = old_value; + arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); + arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", + LOCAL_LABEL_PREFIX); + + switch (sync_op) + { + case SYNC_OP_ADD: + arm_output_op3 (emit, "sub", old_value, t1, new_value); + break; + + case SYNC_OP_SUB: + arm_output_op3 (emit, "add", old_value, t1, new_value); + break; + + case SYNC_OP_XOR: + arm_output_op3 (emit, "eor", old_value, t1, new_value); + break; + + case SYNC_OP_NONE: + arm_output_op2 (emit, "mov", old_value, required_value); + break; + + default: + gcc_unreachable (); + } + } arm_process_output_memory_barrier (emit, NULL); arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); === modified file 'gcc/config/arm/sync.md' --- gcc/config/arm/sync.md 2010-09-13 15:39:11 +0000 +++ gcc/config/arm/sync.md 2010-11-29 13:54:17 +0000 @@ -103,6 +103,18 @@ (plus "add") (minus "sub")]) +(define_code_attr sync_clobber [(ior "=&r") + (and "=&r") + (xor "X") + (plus "X") + (minus "X")]) + +(define_code_attr sync_t2_reqd [(ior "4") + (and "4") + (xor "*") + (plus "*") + (minus "*")]) + (define_expand "sync_<sync_optab>si" [(match_operand:SI 0 "memory_operand") (match_operand:SI 1 "s_register_operand") @@ -286,7 +298,6 @@ VUNSPEC_SYNC_COMPARE_AND_SWAP)) (set (match_dup 1) (unspec_volatile:SI [(match_dup 2)] VUNSPEC_SYNC_COMPARE_AND_SWAP)) - (clobber:SI (match_scratch:SI 4 "=&r")) (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] VUNSPEC_SYNC_COMPARE_AND_SWAP)) ] @@ -299,7 +310,6 @@ (set_attr "sync_required_value" "2") (set_attr "sync_new_value" "3") (set_attr "sync_t1" "0") - (set_attr "sync_t2" "4") (set_attr "conds" "clob") (set_attr "predicable" "no")]) @@ -313,7 +323,6 @@ VUNSPEC_SYNC_COMPARE_AND_SWAP))) (set (match_dup 1) (unspec_volatile:NARROW [(match_dup 2)] VUNSPEC_SYNC_COMPARE_AND_SWAP)) - (clobber:SI (match_scratch:SI 4 "=&r")) (set (reg:CC CC_REGNUM) (unspec_volatile:CC [(match_dup 1)] VUNSPEC_SYNC_COMPARE_AND_SWAP)) ] @@ -326,7 +335,6 @@ (set_attr "sync_required_value" "2") (set_attr "sync_new_value" "3") (set_attr "sync_t1" "0") - (set_attr "sync_t2" "4") (set_attr "conds" "clob") (set_attr "predicable" "no")]) @@ -487,7 +495,7 @@ VUNSPEC_SYNC_OLD_OP)) (clobber (reg:CC CC_REGNUM)) (clobber (match_scratch:SI 3 "=&r")) - (clobber (match_scratch:SI 4 "=&r"))] + (clobber (match_scratch:SI 4 "<sync_clobber>"))] "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER" { return arm_output_sync_insn (insn, operands); @@ -496,7 +504,7 @@ (set_attr "sync_memory" "1") (set_attr "sync_new_value" "2") (set_attr "sync_t1" "3") - (set_attr "sync_t2" "4") + (set_attr "sync_t2" "<sync_t2_reqd>") (set_attr "sync_op" "<sync_optab>") (set_attr "conds" "clob") (set_attr "predicable" "no")]) @@ -540,7 +548,7 @@ VUNSPEC_SYNC_OLD_OP)) (clobber (reg:CC CC_REGNUM)) (clobber (match_scratch:SI 3 "=&r")) - (clobber (match_scratch:SI 4 "=&r"))] + (clobber (match_scratch:SI 4 "<sync_clobber>"))] "TARGET_HAVE_LDREXBHD && TARGET_HAVE_MEMORY_BARRIER" { return arm_output_sync_insn (insn, operands); @@ -549,7 +557,7 @@ (set_attr "sync_memory" "1") (set_attr "sync_new_value" "2") (set_attr "sync_t1" "3") - (set_attr "sync_t2" "4") + (set_attr "sync_t2" "<sync_t2_reqd>") (set_attr "sync_op" "<sync_optab>") (set_attr "conds" "clob") (set_attr "predicable" "no")])
_______________________________________________ linaro-toolchain mailing list linaro-toolchain@lists.linaro.org http://lists.linaro.org/mailman/listinfo/linaro-toolchain