On 1/8/20 3:18 PM, Stam Markianos-Wright wrote:
> 
> 
> On 12/10/19 5:03 PM, Kyrill Tkachov wrote:
>> Hi Stam,
>>
>> On 11/15/19 5:26 PM, Stam Markianos-Wright wrote:
>>> Pinging with more correct maintainers this time :)
>>>
>>> Also would need to backport to gcc7,8,9, but need to get this approved
>>> first!
>>>
>>
>> Sorry for the delay.
> 
> Same here now! Sorry totally forget about this in the lead up to Xmas!
> 
> Done the changes marked below and also removed the unnecessary extra #defines 
> from the test.

Ping :)

Cheers,
Stam

> 
>>
>>
>>> Thank you,
>>> Stam
>>>
>>>
>>> -------- Forwarded Message --------
>>> Subject: Re: [PATCH][GCC][ARM] Arm generates out of range conditional
>>> branches in Thumb2 (PR91816)
>>> Date: Mon, 21 Oct 2019 10:37:09 +0100
>>> From: Stam Markianos-Wright <stam.markianos-wri...@arm.com>
>>> To: Ramana Radhakrishnan <ramana....@googlemail.com>
>>> CC: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>, nd <n...@arm.com>,
>>> James Greenhalgh <james.greenha...@arm.com>, Richard Earnshaw
>>> <richard.earns...@arm.com>
>>>
>>>
>>>
>>> On 10/13/19 4:23 PM, Ramana Radhakrishnan wrote:
>>> >>
>>> >> Patch bootstrapped and regression tested on arm-none-linux-gnueabihf,
>>> >> however, on my native Aarch32 setup the test times out when run as part
>>> >> of a big "make check-gcc" regression, but not when run individually.
>>> >>
>>> >> 2019-10-11  Stamatis Markianos-Wright <stam.markianos-wri...@arm.com>
>>> >>
>>> >>       * config/arm/arm.md: Update b<cond> for Thumb2 range checks.
>>> >>       * config/arm/arm.c: New function arm_gen_far_branch.
>>> >>       * config/arm/arm-protos.h: New function arm_gen_far_branch
>>> >>       prototype.
>>> >>
>>> >> gcc/testsuite/ChangeLog:
>>> >>
>>> >> 2019-10-11  Stamatis Markianos-Wright <stam.markianos-wri...@arm.com>
>>> >>
>>> >>       * testsuite/gcc.target/arm/pr91816.c: New test.
>>> >
>>> >> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
>>> >> index f995974f9bb..1dce333d1c3 100644
>>> >> --- a/gcc/config/arm/arm-protos.h
>>> >> +++ b/gcc/config/arm/arm-protos.h
>>> >> @@ -570,4 +570,7 @@ void arm_parse_option_features (sbitmap, const 
>>> cpu_arch_option *,
>>> >>
>>> >>   void arm_initialize_isa (sbitmap, const enum isa_feature *);
>>> >>
>>> >> +const char * arm_gen_far_branch (rtx *, int,const char * , const char 
>>> >> *);
>>> >> +
>>> >> +
>>> >
>>> > Lets get the nits out of the way.
>>> >
>>> > Unnecessary extra new line, need a space between int and const above.
>>> >
>>> >
>>>
>>> .Fixed!
>>>
>>> >>   #endif /* ! GCC_ARM_PROTOS_H */
>>> >> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>>> >> index 39e1a1ef9a2..1a693d2ddca 100644
>>> >> --- a/gcc/config/arm/arm.c
>>> >> +++ b/gcc/config/arm/arm.c
>>> >> @@ -32139,6 +32139,31 @@ arm_run_selftests (void)
>>> >>   }
>>> >>   } /* Namespace selftest.  */
>>> >>
>>> >> +
>>> >> +/* Generate code to enable conditional branches in functions over 1 
>>> >> MiB.  */
>>> >> +const char *
>>> >> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
>>> >> +                    const char * branch_format)
>>> >
>>> > Not sure if this is some munging from the attachment but check
>>> > vertical alignment of parameters.
>>> >
>>>
>>> .Fixed!
>>>
>>> >> +{
>>> >> +  rtx_code_label * tmp_label = gen_label_rtx ();
>>> >> +  char label_buf[256];
>>> >> +  char buffer[128];
>>> >> +  ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
>>> >> +                    CODE_LABEL_NUMBER (tmp_label));
>>> >> +  const char *label_ptr = arm_strip_name_encoding (label_buf);
>>> >> +  rtx dest_label = operands[pos_label];
>>> >> +  operands[pos_label] = tmp_label;
>>> >> +
>>> >> +  snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
>>> >> +  output_asm_insn (buffer, operands);
>>> >> +
>>> >> +  snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, 
>>> label_ptr);
>>> >> +  operands[pos_label] = dest_label;
>>> >> +  output_asm_insn (buffer, operands);
>>> >> +  return "";
>>> >> +}
>>> >> +
>>> >> +
>>> >
>>> > Unnecessary extra newline.
>>> >
>>>
>>> .Fixed!
>>>
>>> >>   #undef TARGET_RUN_TARGET_SELFTESTS
>>> >>   #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
>>> >>   #endif /* CHECKING_P */
>>> >> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
>>> >> index f861c72ccfc..634fd0a59da 100644
>>> >> --- a/gcc/config/arm/arm.md
>>> >> +++ b/gcc/config/arm/arm.md
>>> >> @@ -6686,9 +6686,16 @@
>>> >>   ;; And for backward branches we have
>>> >>   ;;   (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 
>>> >>4).
>>> >>   ;;
>>> >> +;; In 16-bit Thumb these ranges are:
>>> >>   ;; For a 'b'       pos_range = 2046, neg_range = -2048 giving 
>>> (-2040->2048).
>>> >>   ;; For a 'b<cond>' pos_range = 254, neg_range = -256  giving (-250 
>>> >>->256).
>>> >>
>>> >> +;; In 32-bit Thumb these ranges are:
>>> >> +;; For a 'b'       +/- 16MB is not checked for.
>>> >> +;; For a 'b<cond>' pos_range = 1048574, neg_range = -1048576  giving
>>> >> +;; (-1048568 -> 1048576).
>>> >> +
>>> >> +
>>> >
>>> > Unnecessary extra newline.
>>> >
>>>
>>> .Fixed!
>>>
>>> >>   (define_expand "cbranchsi4"
>>> >>     [(set (pc) (if_then_else
>>> >>             (match_operator 0 "expandable_comparison_operator"
>>> >> @@ -6947,22 +6954,42 @@
>>> >>                     (pc)))]
>>> >>     "TARGET_32BIT"
>>> >>     "*
>>> >> -  if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>> >> -    {
>>> >> -      arm_ccfsm_state += 2;
>>> >> -      return \"\";
>>> >> -    }
>>> >> -  return \"b%d1\\t%l0\";
>>> >> +     if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>>> >> +      {
>>> >> +    arm_ccfsm_state += 2;
>>> >> +    return \"\";
>>> >> +      }
>>> >> +     switch (get_attr_length (insn))
>>> >> +      {
>>> >> +    // Thumb2 16-bit b{cond}
>>> >> +    case 2:
>>> >> +
>>> >> +    // Thumb2 32-bit b{cond}
>>> >> +    case 4: return \"b%d1\\t%l0\";break;
>>> >> +
>>> >> +    // Thumb2 b{cond} out of range.  Use unconditional branch.
>>> >> +    case 8: return arm_gen_far_branch \
>>> >> +            (operands, 0, \"Lbcond\", \"b%D1\t\");
>>> >> +    break;
>>> >> +
>>> >> +    // A32 b{cond}
>>> >> +    default: return \"b%d1\\t%l0\";
>>> >> +      }
>>> >
>>> > Please fix indentation here.
>>> >
>>>
>>> .Fixed together with below changes.
>>>
>>> >>     "
>>> >>     [(set_attr "conds" "use")
>>> >>      (set_attr "type" "branch")
>>> >>      (set (attr "length")
>>> >> -    (if_then_else
>>> >> -       (and (match_test "TARGET_THUMB2")
>>> >> -            (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> >> -                 (le (minus (match_dup 0) (pc)) (const_int 256))))
>>> >> -       (const_int 2)
>>> >> -       (const_int 4)))]
>>> >> +    (if_then_else (match_test "TARGET_THUMB2")
>>> >> +    (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> >> +    (le (minus (match_dup 0) (pc)) (const_int 256)))
>>> >> +    (const_int 2)
>>> >> +    (if_then_else (and (ge (minus (match_dup 0) (pc))
>>> >> + (const_int -1048568))
>>> >> +                    (le (minus (match_dup 0) (pc)) (const_int 1048576)))
>>> >> +    (const_int 4)
>>> >> +    (const_int 8)))
>>> >> +    (const_int 10)))
>>> >> +   ]
>>> >
>>> > This conditional is unreadable and is getting quite complex.
>>> >
>>> > Please fix the indentation and add some comments to indicate when
>>> > this is 2, 4, 8, 10 above the pattern and ask for the comment to
>>> > be in sync with this.
>>> >
>>> > How did we end up with length 10 ? That indicates 2 4 byte instructions
>>> > and a 2 byte instruction ? You are handling lengths 2, 4, 8 above in
>>> > the switch - is length 10 going to be a single A32 b<cond> instruction ?
>>> >
>>> > What am I missing ?
>>> >
>>> >
>>>
>>> Ah sorry, I had not realised that the "length" related to the number of
>>> bytes in the instruction, so I just used it as a variable to then check
>>> in the switch().
>>> And yes, you are correct in assuming that length 10 would have been the
>>> A32 b<cond> version.
>>> So the mapping I had in mind was:
>>> 2->  Thumb2 b<cond> - narrow 16bit version
>>> 4->  Thumb2 b<cond> - wide 32bit version
>>> 8->  Thumb2 b       - "far branch".
>>> 10-> A32 b<cond>
>>>
>>> The new version that maintains the "length=number of bytes" would be:
>>>
>>> 2->  Thumb2 b<cond> - narrow 16bit version
>>> 4->  Thumb2 b<cond> - wide 32bit version OR A32 b<cond>
>>> 6->  Thumb2 "far branch" made up from one b<cond> to a very close Lbcond
>>> label (so 16 bits) and one b for 32 bits. (so 2+4 == 6)
>>>
>>> I've gone ahead and done this in the new proposed patch. Let me know if
>>> it's ok! (also I changed the first check to !TARGET_THUMB2 - this makes
>>> it slightly more readable). I'm still not sure about this, so any
>>> suggestions are welcome!
>>>
>>> >
>>> >>   )
>>> >>
>>> >>   (define_insn "*arm_cond_branch_reversed"
>>> >> @@ -6978,17 +7005,36 @@
>>> >>         arm_ccfsm_state += 2;
>>> >>         return \"\";
>>> >>       }
>>> >> -  return \"b%D1\\t%l0\";
>>> >> +     switch (get_attr_length (insn))
>>> >> +      {
>>> >> +    // Thumb2 16-bit b{cond}
>>> >> +    case 2:
>>> >> +
>>> >> +    // Thumb2 32-bit b{cond}
>>> >> +    case 4: return \"b%D1\\t%l0\";break;
>>> >> +
>>> >> +    // Thumb2 b{cond} out of range.  Use unconditional branch.
>>> >> +    case 8: return arm_gen_far_branch \
>>> >> +            (operands, 0, \"Lbcond\", \"b%d1\t\");
>>> >> +            break;
>>> >> +    // A32 b{cond}
>>> >> +    default: return \"b%D1\\t%l0\";
>>> >> +       }
>>> >>     "
>>> >>     [(set_attr "conds" "use")
>>> >>      (set_attr "type" "branch")
>>> >>      (set (attr "length")
>>> >> -    (if_then_else
>>> >> -       (and (match_test "TARGET_THUMB2")
>>> >> -            (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> >> -                 (le (minus (match_dup 0) (pc)) (const_int 256))))
>>> >> -       (const_int 2)
>>> >> -       (const_int 4)))]
>>> >> +    (if_then_else (match_test "TARGET_THUMB2")
>>> >> +    (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>>> >> +            (le (minus (match_dup 0) (pc)) (const_int 256)))
>>> >> +    (const_int 2)
>>> >> +    (if_then_else (and (ge (minus (match_dup 0) (pc))
>>> >> + (const_int -1048568))
>>> >> +            (le (minus (match_dup 0) (pc)) (const_int 1048576)))
>>> >> +    (const_int 4)
>>> >> +    (const_int 8)))
>>> >> +    (const_int 10)))
>>> >> +   ]
>>> >
>>> > Same comments as above apply here too.
>>> >
>>>
>>> Same as above.
>>>
>>> Thank you for the feedback and apologies for being a clueless :)
>>>
>>> And, of course, let me know of any problems or queries!
>>>
>>> Cheers,
>>> Stam
>>>
>>> > Ramana
>>> >
>>> >>   )
>>> >>
>>> >>
>>> >> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c 
>>> b/gcc/testsuite/gcc.target/arm/pr91816.c
>>> >> new file mode 100644
>>> >> index 00000000000..176bf61780b
>>> >> --- /dev/null
>>> >> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c
>>> >> @@ -0,0 +1,102 @@
>>> >> +/* { dg-do compile } */
>>> >> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" }  */
>>> >> +int printf(const char *, ...);
>>> >> +
>>> >> +__attribute__((noinline,noclone)) void f1(int a)
>>> >> +{
>>> >> +    if (a) {
>>> >> +#define HW0 printf("Hello World!\n");
>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> >> +            HW0
>>> >> +    }
>>> >> +}
>>> >> +
>>> >> +__attribute__((noinline,noclone)) void f2(int a)
>>> >> +{
>>> >> +    if (a) {
>>> >> +#define HW0 printf("Hello World!\n");
>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> >> +            HW3
>>> >> +    }
>>> >> +}
>>> >> +
>>> >> +
>>> >> +__attribute__((noinline,noclone)) void f3(int a)
>>> >> +{
>>> >> +    if (a) {
>>> >> +#define HW0 printf("Hello World!\n");
>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> >> +            HW5
>>> >> +    }
>>> >> +}
>>> >> +
>>> >> +__attribute__((noinline,noclone)) void f4(int a)
>>> >> +{
>>> >> +    if (a==1) {
>>> >> +#define HW0 printf("Hello World!\n");
>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> >> +            HW0
>>> >> +    }
>>> >> +}
>>> >> +
>>> >> +__attribute__((noinline,noclone)) void f5(int a)
>>> >> +{
>>> >> +    if (a==1) {
>>> >> +#define HW0 printf("Hello World!\n");
>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> >> +            HW3
>>> >> +    }
>>> >> +}
>>> >> +
>>> >> +
>>> >> +__attribute__((noinline,noclone)) void f6(int a)
>>> >> +{
>>> >> +    if (a==1) {
>>> >> +#define HW0 printf("Hello World!\n");
>>> >> +#define HW1 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>>> >> +#define HW2 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>>> >> +#define HW3 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>>> >> +#define HW4 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>>> >> +#define HW5 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>>> >> +            HW5
>>> >> +    }
>>> >> +}
>>> >> +
>>> >> +
>>> >> +int main(void)
>>> >> +{
>>> >> +    f1(0);
>>> >> +    f2(0);
>>> >> +    f3(0);
>>> >> +    f4(0);
>>> >> +    f5(0);
>>> >> +    f6(0);
>>> >> +    return 0;
>>> >> +}
>>> >> +
>>> >> +
>>> >> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */
>>> >> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */
>>> >> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */
>>> >> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */
>>> >> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */
>>> >
>>>
>>
>> 1.patch
>>
>> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
>> index f995974f9bb..59ec219da3d 100644
>> --- a/gcc/config/arm/arm-protos.h
>> +++ b/gcc/config/arm/arm-protos.h
>> @@ -570,4 +570,6 @@ void arm_parse_option_features (sbitmap, const 
>> cpu_arch_option *,
>>
>>   void arm_initialize_isa (sbitmap, const enum isa_feature *);
>>
>> +const char * arm_gen_far_branch (rtx *, int, const char *, const char *);
>> +
>>   #endif /* ! GCC_ARM_PROTOS_H */
>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>> index 39e1a1ef9a2..7a69ddb6b7b 100644
>> --- a/gcc/config/arm/arm.c
>> +++ b/gcc/config/arm/arm.c
>> @@ -32139,6 +32139,30 @@ arm_run_selftests (void)
>>   }
>>   } /* Namespace selftest.  */
>>
>> +
>> +/* Generate code to enable conditional branches in functions over 1 MiB.  */
>>
>>
>> Please document the function parameters in this comment as other functions 
>> in 
>> this file (try to) do.
> 
> Done :)
>>
>>
>> +const char *
>> +arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
>> +            const char * branch_format)
>> +{
>> +  rtx_code_label * tmp_label = gen_label_rtx ();
>> +  char label_buf[256];
>> +  char buffer[128];
>> +  ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
>> +            CODE_LABEL_NUMBER (tmp_label));
>> +  const char *label_ptr = arm_strip_name_encoding (label_buf);
>> +  rtx dest_label = operands[pos_label];
>> +  operands[pos_label] = tmp_label;
>> +
>> +  snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
>> +  output_asm_insn (buffer, operands);
>> +
>> +  snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, 
>> label_ptr);
>> +  operands[pos_label] = dest_label;
>> +  output_asm_insn (buffer, operands);
>> +  return "";
>> +}
>> +
>>   #undef TARGET_RUN_TARGET_SELFTESTS
>>   #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
>>   #endif /* CHECKING_P */
>> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
>> index f861c72ccfc..7e5e1489214 100644
>> --- a/gcc/config/arm/arm.md
>> +++ b/gcc/config/arm/arm.md
>> @@ -6686,9 +6686,15 @@
>>   ;; And for backward branches we have
>>   ;;   (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4).
>>   ;;
>> +;; In 16-bit Thumb these ranges are:
>>   ;; For a 'b'       pos_range = 2046, neg_range = -2048 giving 
>> (-2040->2048).
>>   ;; For a 'b<cond>' pos_range = 254,  neg_range = -256  giving (-250 ->256).
>>
>> +;; In 32-bit Thumb these ranges are:
>> +;; For a 'b'       ± 16MB is not checked for.
>> +;; For a 'b<cond>' pos_range = 1048574,  neg_range = -1048576  giving
>> +;; (-1048568 -> 1048576).
>> +
>>   (define_expand "cbranchsi4"
>>     [(set (pc) (if_then_else
>>             (match_operator 0 "expandable_comparison_operator"
>> @@ -6946,23 +6952,56 @@
>>                 (label_ref (match_operand 0 "" ""))
>>                 (pc)))]
>>     "TARGET_32BIT"
>> -  "*
>> -  if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>> -    {
>> -      arm_ccfsm_state += 2;
>> -      return \"\";
>> -    }
>> -  return \"b%d1\\t%l0\";
>> -  "
>> +  {
>> +    if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>> +    {
>> +        arm_ccfsm_state += 2;
>> +        return "";
>> +    }
>> +    switch (get_attr_length (insn))
>> +    {
>> +        /* Thumb2 16-bit b{cond}.  */
>> +        case 2:
>> +
>> +        /* Thumb2 32-bit b{cond} or A32 b{cond}.  */
>> +        case 4: return "b%d1\t%l0";
>> +            break;
>> +
>> +        /* Thumb2 b{cond} out of range.  Use 16-bit b{cond} and
>> +           unconditional branch b.  */
>> +        default: return arm_gen_far_branch \
>> +                (operands, 0, "Lbcond", "b%D1\t");
>> +    }
>>
>>
>> The indentation here is wrong. Please look at how other switch statements 
>> are 
>> written in the backend for guidance: 2 space indentation, new line after the 
>> cases etc.
> 
> Done
>>
>>   +  }
>>     [(set_attr "conds" "use")
>>      (set_attr "type" "branch")
>>      (set (attr "length")
>> -    (if_then_else
>> -       (and (match_test "TARGET_THUMB2")
>> -        (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>> -             (le (minus (match_dup 0) (pc)) (const_int 256))))
>> -       (const_int 2)
>> -       (const_int 4)))]
>> +    (if_then_else (match_test "!TARGET_THUMB2")
>> +
>> +    ;;Target is not Thumb2, therefore is A32.  Generate b{cond}.
>> +    (const_int 4)
>> +
>> +    ;; Check if target is within 16-bit Thumb2 b{cond} range.
>> +    (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>> +               (le (minus (match_dup 0) (pc)) (const_int 256)))
>> +
>> +        ;; Target is Thumb2, within narrow range.
>> +        ;; Generate b{cond}.
>> +            (const_int 2)
>> +
>> +        ;; Check if target is within 32-bit Thumb2 b{cond} range.
>> +            (if_then_else (and (ge (minus (match_dup 0)
>> +                     (pc))(const_int -1048568))
>> +                       (le (minus (match_dup 0)
>> +                     (pc)) (const_int 1048576)))
>> +
>> +        ;; Target is Thumb2, within wide range.
>> +        ;; Generate b{cond}
>> +                        (const_int 4)
>> +        ;; Target is Thumb2, out of range.
>> +        ;; Generate narrow b{cond} and unconditional branch b.
>> +                        (const_int 6)))))
>> +  ]
>>
>>
>> Likewise on the indentation.
> 
> Done, sorry about that!
>>
>>   )
>>
>>   (define_insn "*arm_cond_branch_reversed"
>> @@ -6972,23 +7011,56 @@
>>                 (pc)
>>                 (label_ref (match_operand 0 "" ""))))]
>>     "TARGET_32BIT"
>> -  "*
>> -  if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>> -    {
>> -      arm_ccfsm_state += 2;
>> -      return \"\";
>> -    }
>> -  return \"b%D1\\t%l0\";
>> -  "
>> +  {
>> +    if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2)
>> +    {
>> +        arm_ccfsm_state += 2;
>> +        return "";
>> +    }
>> +    switch (get_attr_length (insn))
>> +    {
>> +        /* Thumb2 16-bit b{cond}.  */
>> +        case 2:
>> +
>> +        /* Thumb2 32-bit b{cond} or A32 b{cond}.  */
>> +        case 4: return "b%D1\t%l0";
>> +            break;
>> +
>> +        /* Thumb2 b{cond} out of range.  Use 16-bit b{cond} and
>> +           unconditional branch b.  */
>> +        default: return arm_gen_far_branch \
>> +                (operands, 0, "Lbcond", "b%d1\t");
>> +    }
>>
>>
>>
>>
>>   +  }
>>     [(set_attr "conds" "use")
>>      (set_attr "type" "branch")
>>      (set (attr "length")
>> -    (if_then_else
>> -       (and (match_test "TARGET_THUMB2")
>> -        (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>> -             (le (minus (match_dup 0) (pc)) (const_int 256))))
>> -       (const_int 2)
>> -       (const_int 4)))]
>> +    (if_then_else (match_test "!TARGET_THUMB2")
>> +
>> +    ;;Target is not Thumb2, therefore is A32.  Generate b{cond}.
>> +    (const_int 4)
>> +
>> +    ;; Check if target is within 16-bit Thumb2 b{cond} range.
>> +    (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
>> +               (le (minus (match_dup 0) (pc)) (const_int 256)))
>> +
>> +        ;; Target is Thumb2, within narrow range.
>> +        ;; Generate b{cond}.
>> +            (const_int 2)
>> +
>> +        ;; Check if target is within 32-bit Thumb2 b{cond} range.
>> +            (if_then_else (and (ge (minus (match_dup 0)
>> +                     (pc))(const_int -1048568))
>> +                       (le (minus (match_dup 0)
>> +                     (pc)) (const_int 1048576)))
>> +
>> +        ;; Target is Thumb2, within wide range.
>> +        ;; Generate b{cond}.
>> +                        (const_int 4)
>> +        ;; Target is Thumb2, out of range.
>> +        ;; Generate narrow b{cond} and unconditional branch b.
>> +                        (const_int 6)))))
>> +  ]
>>   )
>>
>>
>> Otherwise this looks reasonable to me. Ramana, did you have any further 
>> comments on the patch?
>> Thanks,
>> Kyrill
>>
>>
>>
>> diff --git a/gcc/testsuite/gcc.target/arm/pr91816.c 
>> b/gcc/testsuite/gcc.target/arm/pr91816.c
>> new file mode 100644
>> index 00000000000..176bf61780b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/arm/pr91816.c
>> @@ -0,0 +1,102 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-march=armv7-a -mthumb -mfpu=vfpv3-d16" }  */
>> +int printf(const char *, ...);
>> +
>> +__attribute__((noinline,noclone)) void f1(int a)
>> +{
>> +    if (a) {
>> +#define HW0    printf("Hello World!\n");
>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>> +        HW0
>> +    }
>> +}
>> +
>> +__attribute__((noinline,noclone)) void f2(int a)
>> +{
>> +    if (a) {
>> +#define HW0    printf("Hello World!\n");
>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>> +        HW3
>> +    }
>> +}
>> +
>> +
>> +__attribute__((noinline,noclone)) void f3(int a)
>> +{
>> +    if (a) {
>> +#define HW0    printf("Hello World!\n");
>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>> +        HW5
>> +    }
>> +}
>> +
>> +__attribute__((noinline,noclone)) void f4(int a)
>> +{
>> +    if (a==1) {
>> +#define HW0    printf("Hello World!\n");
>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>> +        HW0
>> +    }
>> +}
>> +
>> +__attribute__((noinline,noclone)) void f5(int a)
>> +{
>> +    if (a==1) {
>> +#define HW0    printf("Hello World!\n");
>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>> +        HW3
>> +    }
>> +}
>> +
>> +
>> +__attribute__((noinline,noclone)) void f6(int a)
>> +{
>> +    if (a==1) {
>> +#define HW0    printf("Hello World!\n");
>> +#define HW1    HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0 HW0
>> +#define HW2    HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1 HW1
>> +#define HW3    HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2 HW2
>> +#define HW4    HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3 HW3
>> +#define HW5    HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4 HW4
>> +        HW5
>> +    }
>> +}
>> +
>> +
>> +int main(void)
>> +{
>> +    f1(0);
>> +    f2(0);
>> +    f3(0);
>> +    f4(0);
>> +    f5(0);
>> +    f6(0);
>> +    return 0;
>> +}
>> +
>> +
>> +/* { dg-final { scan-assembler-times "beq\\t.L\[0-9\]" 2 } } */
>> +/* { dg-final { scan-assembler-times "beq\\t.Lbcond\[0-9\]" 1 } } */
>> +/* { dg-final { scan-assembler-times "bne\\t.L\[0-9\]" 2 } } */
>> +/* { dg-final { scan-assembler-times "bne\\t.Lbcond\[0-9\]" 1 } } */
>> +/* { dg-final { scan-assembler-times "b\\t.L\[0-9\]" 2 } } */
>>
>>

Reply via email to