On 1/8/20 3:18 PM, Stam Markianos-Wright wrote: > > > On 12/10/19 5:03 PM, Kyrill Tkachov wrote: >> Hi Stam, >> >> On 11/15/19 5:26 PM, Stam Markianos-Wright wrote: >>> Pinging with more correct maintainers this time :) >>> >>> Also would need to backport to gcc7,8,9, but need to get this approved >>> first! >>> >> >> Sorry for the delay. > > Same here now! Sorry totally forget about this in the lead up to Xmas! > > Done the changes marked below and also removed the unnecessary extra #defines > from the test.
Ping :) Cheers, Stam > >> >> >>> Thank you, >>> Stam >>> >>> >>> -------- Forwarded Message -------- >>> Subject: Re: [PATCH][GCC][ARM] Arm generates out of range conditional >>> branches in Thumb2 (PR91816) >>> Date: Mon, 21 Oct 2019 10:37:09 +0100 >>> From: Stam Markianos-Wright <stam.markianos-wri...@arm.com> >>> To: Ramana Radhakrishnan <ramana....@googlemail.com> >>> CC: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>, nd <n...@arm.com>, >>> James Greenhalgh <james.greenha...@arm.com>, Richard Earnshaw >>> <richard.earns...@arm.com> >>> >>> >>> >>> On 10/13/19 4:23 PM, Ramana Radhakrishnan wrote: >>> >> >>> >> Patch bootstrapped and regression tested on arm-none-linux-gnueabihf, >>> >> however, on my native Aarch32 setup the test times out when run as part >>> >> of a big "make check-gcc" regression, but not when run individually. >>> >> >>> >> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com> >>> >> >>> >> * config/arm/arm.md: Update b<cond> for Thumb2 range checks. >>> >> * config/arm/arm.c: New function arm_gen_far_branch. >>> >> * config/arm/arm-protos.h: New function arm_gen_far_branch >>> >> prototype. >>> >> >>> >> gcc/testsuite/ChangeLog: >>> >> >>> >> 2019-10-11 Stamatis Markianos-Wright <stam.markianos-wri...@arm.com> >>> >> >>> >> * testsuite/gcc.target/arm/pr91816.c: New test. >>> > >>> >> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h >>> >> index f995974f9bb..1dce333d1c3 100644 >>> >> --- a/gcc/config/arm/arm-protos.h >>> >> +++ b/gcc/config/arm/arm-protos.h >>> >> @@ -570,4 +570,7 @@ void arm_parse_option_features (sbitmap, const >>> cpu_arch_option *, >>> >> >>> >> void arm_initialize_isa (sbitmap, const enum isa_feature *); >>> >> >>> >> +const char * arm_gen_far_branch (rtx *, int,const char * , const char >>> >> *); >>> >> + >>> >> + >>> > >>> > Lets get the nits out of the way. >>> > >>> > Unnecessary extra new line, need a space between int and const above. >>> > >>> > >>> >>> .Fixed! >>> >>> >> #endif /* ! GCC_ARM_PROTOS_H */ >>> >> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c >>> >> index 39e1a1ef9a2..1a693d2ddca 100644 >>> >> --- a/gcc/config/arm/arm.c >>> >> +++ b/gcc/config/arm/arm.c >>> >> @@ -32139,6 +32139,31 @@ arm_run_selftests (void) >>> >> } >>> >> } /* Namespace selftest. */ >>> >> >>> >> + >>> >> +/* Generate code to enable conditional branches in functions over 1 >>> >> MiB. */ >>> >> +const char * >>> >> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest, >>> >> + const char * branch_format) >>> > >>> > Not sure if this is some munging from the attachment but check >>> > vertical alignment of parameters. >>> > >>> >>> .Fixed! >>> >>> >> +{ >>> >> + rtx_code_label * tmp_label = gen_label_rtx (); >>> >> + char label_buf[256]; >>> >> + char buffer[128]; >>> >> + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \ >>> >> + CODE_LABEL_NUMBER (tmp_label)); >>> >> + const char *label_ptr = arm_strip_name_encoding (label_buf); >>> >> + rtx dest_label = operands[pos_label]; >>> >> + operands[pos_label] = tmp_label; >>> >> + >>> >> + snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr); >>> >> + output_asm_insn (buffer, operands); >>> >> + >>> >> + snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, >>> label_ptr); >>> >> + operands[pos_label] = dest_label; >>> >> + output_asm_insn (buffer, operands); >>> >> + return ""; >>> >> +} >>> >> + >>> >> + >>> > >>> > Unnecessary extra newline. >>> > >>> >>> .Fixed! >>> >>> >> #undef TARGET_RUN_TARGET_SELFTESTS >>> >> #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests >>> >> #endif /* CHECKING_P */ >>> >> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md >>> >> index f861c72ccfc..634fd0a59da 100644 >>> >> --- a/gcc/config/arm/arm.md >>> >> +++ b/gcc/config/arm/arm.md >>> >> @@ -6686,9 +6686,16 @@ >>> >> ;; And for backward branches we have >>> >> ;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + >>> >>4). >>> >> ;; >>> >> +;; In 16-bit Thumb these ranges are: >>> >> ;; For a 'b' pos_range = 2046, neg_range = -2048 giving >>> (-2040->2048). >>> >> ;; For a 'b<cond>' pos_range = 254, neg_range = -256 giving (-250 >>> >>->256). >>> >> >>> >> +;; In 32-bit Thumb these ranges are: >>> >> +;; For a 'b' +/- 16MB is not checked for. >>> >> +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576 giving >>> >> +;; (-1048568 -> 1048576). >>> >> + >>> >> + >>> > >>> > Unnecessary extra newline. >>> > >>> >>> .Fixed! >>> >>> >> (define_expand "cbranchsi4" >>> >> [(set (pc) (if_then_else >>> >> (match_operator 0 "expandable_comparison_operator" >>> >> @@ -6947,22 +6954,42 @@ >>> >> (pc)))] >>> >> "TARGET_32BIT" >>> >> "* >>> >> - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >>> >> - { >>> >> - arm_ccfsm_state += 2; >>> >> - return \"\"; >>> >> - } >>> >> - return \"b%d1\\t%l0\"; >>> >> + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >>> >> + { >>> >> + arm_ccfsm_state += 2; >>> >> + return \"\"; >>> >> + } >>> >> + switch (get_attr_length (insn)) >>> >> + { >>> >> + // Thumb2 16-bit b{cond} >>> >> + case 2: >>> >> + >>> >> + // Thumb2 32-bit b{cond} >>> >> + case 4: return \"b%d1\\t%l0\";break; >>> >> + >>> >> + // Thumb2 b{cond} out of range. Use unconditional branch. >>> >> + case 8: return arm_gen_far_branch \ >>> >> + (operands, 0, \"Lbcond\", \"b%D1\t\"); >>> >> + break; >>> >> + >>> >> + // A32 b{cond} >>> >> + default: return \"b%d1\\t%l0\"; >>> >> + } >>> > >>> > Please fix indentation here. >>> > >>> >>> .Fixed together with below changes. >>> >>> >> " >>> >> [(set_attr "conds" "use") >>> >> (set_attr "type" "branch") >>> >> (set (attr "length") >>> >> - (if_then_else >>> >> - (and (match_test "TARGET_THUMB2") >>> >> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>> >> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >>> >> - (const_int 2) >>> >> - (const_int 4)))] >>> >> + (if_then_else (match_test "TARGET_THUMB2") >>> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>> >> + (le (minus (match_dup 0) (pc)) (const_int 256))) >>> >> + (const_int 2) >>> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >>> >> + (const_int -1048568)) >>> >> + (le (minus (match_dup 0) (pc)) (const_int 1048576))) >>> >> + (const_int 4) >>> >> + (const_int 8))) >>> >> + (const_int 10))) >>> >> + ] >>> > >>> > This conditional is unreadable and is getting quite complex. >>> > >>> > Please fix the indentation and add some comments to indicate when >>> > this is 2, 4, 8, 10 above the pattern and ask for the comment to >>> > be in sync with this. >>> > >>> > How did we end up with length 10 ? That indicates 2 4 byte instructions >>> > and a 2 byte instruction ? You are handling lengths 2, 4, 8 above in >>> > the switch - is length 10 going to be a single A32 b<cond> instruction ? >>> > >>> > What am I missing ? >>> > >>> > >>> >>> Ah sorry, I had not realised that the "length" related to the number of >>> bytes in the instruction, so I just used it as a variable to then check >>> in the switch(). >>> And yes, you are correct in assuming that length 10 would have been the >>> A32 b<cond> version. >>> So the mapping I had in mind was: >>> 2-> Thumb2 b<cond> - narrow 16bit version >>> 4-> Thumb2 b<cond> - wide 32bit version >>> 8-> Thumb2 b - "far branch". >>> 10-> A32 b<cond> >>> >>> The new version that maintains the "length=number of bytes" would be: >>> >>> 2-> Thumb2 b<cond> - narrow 16bit version >>> 4-> Thumb2 b<cond> - wide 32bit version OR A32 b<cond> >>> 6-> Thumb2 "far branch" made up from one b<cond> to a very close Lbcond >>> label (so 16 bits) and one b for 32 bits. (so 2+4 == 6) >>> >>> I've gone ahead and done this in the new proposed patch. Let me know if >>> it's ok! (also I changed the first check to !TARGET_THUMB2 - this makes >>> it slightly more readable). I'm still not sure about this, so any >>> suggestions are welcome! >>> >>> > >>> >> ) >>> >> >>> >> (define_insn "*arm_cond_branch_reversed" >>> >> @@ -6978,17 +7005,36 @@ >>> >> arm_ccfsm_state += 2; >>> >> return \"\"; >>> >> } >>> >> - return \"b%D1\\t%l0\"; >>> >> + switch (get_attr_length (insn)) >>> >> + { >>> >> + // Thumb2 16-bit b{cond} >>> >> + case 2: >>> >> + >>> >> + // Thumb2 32-bit b{cond} >>> >> + case 4: return \"b%D1\\t%l0\";break; >>> >> + >>> >> + // Thumb2 b{cond} out of range. Use unconditional branch. >>> >> + case 8: return arm_gen_far_branch \ >>> >> + (operands, 0, \"Lbcond\", \"b%d1\t\"); >>> >> + break; >>> >> + // A32 b{cond} >>> >> + default: return \"b%D1\\t%l0\"; >>> >> + } >>> >> " >>> >> [(set_attr "conds" "use") >>> >> (set_attr "type" "branch") >>> >> (set (attr "length") >>> >> - (if_then_else >>> >> - (and (match_test "TARGET_THUMB2") >>> >> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>> >> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >>> >> - (const_int 2) >>> >> - (const_int 4)))] >>> >> + (if_then_else (match_test "TARGET_THUMB2") >>> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >>> >> + (le (minus (match_dup 0) (pc)) (const_int 256))) >>> >> + (const_int 2) >>> >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) >>> >> + (const_int -1048568)) >>> >> + (le (minus (match_dup 0) (pc)) (const_int 1048576))) >>> >> + (const_int 4) >>> >> + (const_int 8))) >>> >> + (const_int 10))) >>> >> + ] >>> > >>> > Same comments as above apply here too. >>> > >>> >>> Same as above. >>> >>> Thank you for the feedback and apologies for being a clueless :) >>> >>> And, of course, let me know of any problems or queries! >>> >>> Cheers, >>> Stam >>> >>> > Ramana >>> > >>> >> ) >>> >> >>> >> >>> >> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c >>> b/gcc/testsuite/gcc.target/arm/pr91816.c >>> >> new file mode 100644 >>> >> index 00000000000..176bf61780b >>> >> --- /dev/null >>> >> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c >>> >> @@ -0,0 +1,102 @@ >>> >> +/* { dg-do compile } */ >>> >> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" } */ >>> >> +int printf(const char *, ...); >>> >> + >>> >> +__attribute__((noinline,noclone)) void f1(int a) >>> >> +{ >>> >> + if (a) { >>> >> +#define HW0 printf("Hello World!\n"); >>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> >> + HW0 >>> >> + } >>> >> +} >>> >> + >>> >> +__attribute__((noinline,noclone)) void f2(int a) >>> >> +{ >>> >> + if (a) { >>> >> +#define HW0 printf("Hello World!\n"); >>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> >> + HW3 >>> >> + } >>> >> +} >>> >> + >>> >> + >>> >> +__attribute__((noinline,noclone)) void f3(int a) >>> >> +{ >>> >> + if (a) { >>> >> +#define HW0 printf("Hello World!\n"); >>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> >> + HW5 >>> >> + } >>> >> +} >>> >> + >>> >> +__attribute__((noinline,noclone)) void f4(int a) >>> >> +{ >>> >> + if (a==1) { >>> >> +#define HW0 printf("Hello World!\n"); >>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> >> + HW0 >>> >> + } >>> >> +} >>> >> + >>> >> +__attribute__((noinline,noclone)) void f5(int a) >>> >> +{ >>> >> + if (a==1) { >>> >> +#define HW0 printf("Hello World!\n"); >>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> >> + HW3 >>> >> + } >>> >> +} >>> >> + >>> >> + >>> >> +__attribute__((noinline,noclone)) void f6(int a) >>> >> +{ >>> >> + if (a==1) { >>> >> +#define HW0 printf("Hello World!\n"); >>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >>> >> + HW5 >>> >> + } >>> >> +} >>> >> + >>> >> + >>> >> +int main(void) >>> >> +{ >>> >> + f1(0); >>> >> + f2(0); >>> >> + f3(0); >>> >> + f4(0); >>> >> + f5(0); >>> >> + f6(0); >>> >> + return 0; >>> >> +} >>> >> + >>> >> + >>> >> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */ >>> >> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */ >>> >> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */ >>> >> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */ >>> >> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */ >>> > >>> >> >> 1.patch >> >> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h >> index f995974f9bb..59ec219da3d 100644 >> --- a/gcc/config/arm/arm-protos.h >> +++ b/gcc/config/arm/arm-protos.h >> @@ -570,4 +570,6 @@ void arm_parse_option_features (sbitmap, const >> cpu_arch_option *, >> >> void arm_initialize_isa (sbitmap, const enum isa_feature *); >> >> +const char * arm_gen_far_branch (rtx *, int, const char *, const char *); >> + >> #endif /* ! GCC_ARM_PROTOS_H */ >> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c >> index 39e1a1ef9a2..7a69ddb6b7b 100644 >> --- a/gcc/config/arm/arm.c >> +++ b/gcc/config/arm/arm.c >> @@ -32139,6 +32139,30 @@ arm_run_selftests (void) >> } >> } /* Namespace selftest. */ >> >> + >> +/* Generate code to enable conditional branches in functions over 1 MiB. */ >> >> >> Please document the function parameters in this comment as other functions >> in >> this file (try to) do. > > Done :) >> >> >> +const char * >> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest, >> + const char * branch_format) >> +{ >> + rtx_code_label * tmp_label = gen_label_rtx (); >> + char label_buf[256]; >> + char buffer[128]; >> + ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \ >> + CODE_LABEL_NUMBER (tmp_label)); >> + const char *label_ptr = arm_strip_name_encoding (label_buf); >> + rtx dest_label = operands[pos_label]; >> + operands[pos_label] = tmp_label; >> + >> + snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr); >> + output_asm_insn (buffer, operands); >> + >> + snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, >> label_ptr); >> + operands[pos_label] = dest_label; >> + output_asm_insn (buffer, operands); >> + return ""; >> +} >> + >> #undef TARGET_RUN_TARGET_SELFTESTS >> #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests >> #endif /* CHECKING_P */ >> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md >> index f861c72ccfc..7e5e1489214 100644 >> --- a/gcc/config/arm/arm.md >> +++ b/gcc/config/arm/arm.md >> @@ -6686,9 +6686,15 @@ >> ;; And for backward branches we have >> ;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4). >> ;; >> +;; In 16-bit Thumb these ranges are: >> ;; For a 'b' pos_range = 2046, neg_range = -2048 giving >> (-2040->2048). >> ;; For a 'b<cond>' pos_range = 254, neg_range = -256 giving (-250 ->256). >> >> +;; In 32-bit Thumb these ranges are: >> +;; For a 'b' ± 16MB is not checked for. >> +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576 giving >> +;; (-1048568 -> 1048576). >> + >> (define_expand "cbranchsi4" >> [(set (pc) (if_then_else >> (match_operator 0 "expandable_comparison_operator" >> @@ -6946,23 +6952,56 @@ >> (label_ref (match_operand 0 "" "")) >> (pc)))] >> "TARGET_32BIT" >> - "* >> - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >> - { >> - arm_ccfsm_state += 2; >> - return \"\"; >> - } >> - return \"b%d1\\t%l0\"; >> - " >> + { >> + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >> + { >> + arm_ccfsm_state += 2; >> + return ""; >> + } >> + switch (get_attr_length (insn)) >> + { >> + /* Thumb2 16-bit b{cond}. */ >> + case 2: >> + >> + /* Thumb2 32-bit b{cond} or A32 b{cond}. */ >> + case 4: return "b%d1\t%l0"; >> + break; >> + >> + /* Thumb2 b{cond} out of range. Use 16-bit b{cond} and >> + unconditional branch b. */ >> + default: return arm_gen_far_branch \ >> + (operands, 0, "Lbcond", "b%D1\t"); >> + } >> >> >> The indentation here is wrong. Please look at how other switch statements >> are >> written in the backend for guidance: 2 space indentation, new line after the >> cases etc. > > Done >> >> + } >> [(set_attr "conds" "use") >> (set_attr "type" "branch") >> (set (attr "length") >> - (if_then_else >> - (and (match_test "TARGET_THUMB2") >> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >> - (const_int 2) >> - (const_int 4)))] >> + (if_then_else (match_test "!TARGET_THUMB2") >> + >> + ;;Target is not Thumb2, therefore is A32. Generate b{cond}. >> + (const_int 4) >> + >> + ;; Check if target is within 16-bit Thumb2 b{cond} range. >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >> + (le (minus (match_dup 0) (pc)) (const_int 256))) >> + >> + ;; Target is Thumb2, within narrow range. >> + ;; Generate b{cond}. >> + (const_int 2) >> + >> + ;; Check if target is within 32-bit Thumb2 b{cond} range. >> + (if_then_else (and (ge (minus (match_dup 0) >> + (pc))(const_int -1048568)) >> + (le (minus (match_dup 0) >> + (pc)) (const_int 1048576))) >> + >> + ;; Target is Thumb2, within wide range. >> + ;; Generate b{cond} >> + (const_int 4) >> + ;; Target is Thumb2, out of range. >> + ;; Generate narrow b{cond} and unconditional branch b. >> + (const_int 6))))) >> + ] >> >> >> Likewise on the indentation. > > Done, sorry about that! >> >> ) >> >> (define_insn "*arm_cond_branch_reversed" >> @@ -6972,23 +7011,56 @@ >> (pc) >> (label_ref (match_operand 0 "" ""))))] >> "TARGET_32BIT" >> - "* >> - if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >> - { >> - arm_ccfsm_state += 2; >> - return \"\"; >> - } >> - return \"b%D1\\t%l0\"; >> - " >> + { >> + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) >> + { >> + arm_ccfsm_state += 2; >> + return ""; >> + } >> + switch (get_attr_length (insn)) >> + { >> + /* Thumb2 16-bit b{cond}. */ >> + case 2: >> + >> + /* Thumb2 32-bit b{cond} or A32 b{cond}. */ >> + case 4: return "b%D1\t%l0"; >> + break; >> + >> + /* Thumb2 b{cond} out of range. Use 16-bit b{cond} and >> + unconditional branch b. */ >> + default: return arm_gen_far_branch \ >> + (operands, 0, "Lbcond", "b%d1\t"); >> + } >> >> >> >> >> + } >> [(set_attr "conds" "use") >> (set_attr "type" "branch") >> (set (attr "length") >> - (if_then_else >> - (and (match_test "TARGET_THUMB2") >> - (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >> - (le (minus (match_dup 0) (pc)) (const_int 256)))) >> - (const_int 2) >> - (const_int 4)))] >> + (if_then_else (match_test "!TARGET_THUMB2") >> + >> + ;;Target is not Thumb2, therefore is A32. Generate b{cond}. >> + (const_int 4) >> + >> + ;; Check if target is within 16-bit Thumb2 b{cond} range. >> + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) >> + (le (minus (match_dup 0) (pc)) (const_int 256))) >> + >> + ;; Target is Thumb2, within narrow range. >> + ;; Generate b{cond}. >> + (const_int 2) >> + >> + ;; Check if target is within 32-bit Thumb2 b{cond} range. >> + (if_then_else (and (ge (minus (match_dup 0) >> + (pc))(const_int -1048568)) >> + (le (minus (match_dup 0) >> + (pc)) (const_int 1048576))) >> + >> + ;; Target is Thumb2, within wide range. >> + ;; Generate b{cond}. >> + (const_int 4) >> + ;; Target is Thumb2, out of range. >> + ;; Generate narrow b{cond} and unconditional branch b. >> + (const_int 6))))) >> + ] >> ) >> >> >> Otherwise this looks reasonable to me. Ramana, did you have any further >> comments on the patch? >> Thanks, >> Kyrill >> >> >> >> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c >> b/gcc/testsuite/gcc.target/arm/pr91816.c >> new file mode 100644 >> index 00000000000..176bf61780b >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c >> @@ -0,0 +1,102 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" } */ >> +int printf(const char *, ...); >> + >> +__attribute__((noinline,noclone)) void f1(int a) >> +{ >> + if (a) { >> +#define HW0 printf("Hello World!\n"); >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> + HW0 >> + } >> +} >> + >> +__attribute__((noinline,noclone)) void f2(int a) >> +{ >> + if (a) { >> +#define HW0 printf("Hello World!\n"); >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> + HW3 >> + } >> +} >> + >> + >> +__attribute__((noinline,noclone)) void f3(int a) >> +{ >> + if (a) { >> +#define HW0 printf("Hello World!\n"); >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> + HW5 >> + } >> +} >> + >> +__attribute__((noinline,noclone)) void f4(int a) >> +{ >> + if (a==1) { >> +#define HW0 printf("Hello World!\n"); >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> + HW0 >> + } >> +} >> + >> +__attribute__((noinline,noclone)) void f5(int a) >> +{ >> + if (a==1) { >> +#define HW0 printf("Hello World!\n"); >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> + HW3 >> + } >> +} >> + >> + >> +__attribute__((noinline,noclone)) void f6(int a) >> +{ >> + if (a==1) { >> +#define HW0 printf("Hello World!\n"); >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 >> + HW5 >> + } >> +} >> + >> + >> +int main(void) >> +{ >> + f1(0); >> + f2(0); >> + f3(0); >> + f4(0); >> + f5(0); >> + f6(0); >> + return 0; >> +} >> + >> + >> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */ >> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */ >> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */ >> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */ >> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */ >> >>