Hi Hao Chen,
On 9/15/21 2:35 AM, HAO CHEN GUI wrote:
> Bill,
>
> Yes, I built the gcc with p10 binutils. Then power10_ok tests can pass.
> Thanks again for your kindly explanation.
>
> I finally realized that the line wrap settings on my thunderbird didn't
> take any effect. I have to set a very large line size, just for a workaround.
>
> ChangeLog
>
> 2021-09-15 Haochen Gui <[email protected]>
>
> gcc/
> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
> Modify the expansion for sign extension. All extentions are done
> within VSX resgisters.
Two typos here: extentions => extensions, resgisters => registers.
The patch itself looks good to me. I recommend the maintainers approve with
the ChangeLog fixed.
Thanks!
Bill
> * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
>
> gcc/testsuite/
> * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>
> patch.diff
>
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index b4e13af4dc6..587e9fa2a2a 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree
> exp, rtx target, bool bl
>
> if (sign_extend)
> {
> - rtx discratch = gen_reg_rtx (DImode);
> + rtx discratch = gen_reg_rtx (V2DImode);
> rtx tiscratch = gen_reg_rtx (TImode);
>
> /* Emit the lxvr*x insn. */
> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
> tree exp, rtx target, bool bl
> return 0;
> emit_insn (pat);
>
> - /* Emit a sign extension from QI,HI,WI to double (DI). */
> - rtx scratch = gen_lowpart (smode, tiscratch);
> + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
> + rtx temp1, temp2;
> if (icode == CODE_FOR_vsx_lxvrbx)
> - emit_insn (gen_extendqidi2 (discratch, scratch));
> + {
> + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
> + }
> else if (icode == CODE_FOR_vsx_lxvrhx)
> - emit_insn (gen_extendhidi2 (discratch, scratch));
> + {
> + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
> + }
> else if (icode == CODE_FOR_vsx_lxvrwx)
> - emit_insn (gen_extendsidi2 (discratch, scratch));
> - /* Assign discratch directly if scratch is already DI. */
> - if (icode == CODE_FOR_vsx_lxvrdx)
> - discratch = scratch;
> + {
> + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
> + }
> + else if (icode == CODE_FOR_vsx_lxvrdx)
> + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
> + else
> + gcc_unreachable ();
>
> - /* Emit the sign extension from DI (double) to TI (quad). */
> - emit_insn (gen_extendditi2 (target, discratch));
> + /* Emit the sign extension from V2DI (double) to TI (quad). */
> + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
> + emit_insn (gen_extendditi2_vector (target, temp2));
>
> return target;
> }
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index bcb92be2f5c..987f21bbc22 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
> "vextsh2<wd> %0,%1"
> [(set_attr "type" "vecexts")])
>
> -(define_insn "*vsx_sign_extend_si_v2di"
> +(define_insn "vsx_sign_extend_si_v2di"
> [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
> (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
> UNSPEC_VSX_SIGN_EXTEND))]
> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> new file mode 100644
> index 00000000000..78e72ac5425
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include <altivec.h>
> +
> +vector signed __int128
> +foo1 (signed long a, signed char *b)
> +{
> + return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo2 (signed long a, signed short *b)
> +{
> + return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo3 (signed long a, signed int *b)
> +{
> + return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo4 (signed long a, signed long *b)
> +{
> + return vec_xl_sext (a, b);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
>
>
> On 10/9/2021 下午 8:18, Bill Schmidt wrote:
>>
>>
>> On 9/10/21 12:45 AM, HAO CHEN GUI wrote:
>>> Bill,
>>>
>>> Thanks so much for your advice.
>>>
>>> I refined the patch and passed the bootstrap and regression test.
>>> Just one thing, the test case becomes unsupported on P9 if I set "{
>>> dg-require-effective-target power10_ok }". I just want the test case to
>>> be compiled and check its assembly. Do we need set "power10_ok"?
>>
>> Yes. power10_ok tests whether your toolchain supports *assembly* of p10
>> instructions. It isn't unsupported on P9 per se; it's unsupported if the
>> toolchain you're using on P9 doesn't have binutils support for P10.
>>
>> You can look at gcc/testsuite/lib/target-supports.exp to see how this stuff
>> works. In this case we have:
>>
>> proc check_effective_target_power10_ok { } {
>> if { ([istarget powerpc64*-*-linux*]) } {
>> return [check_no_compiler_messages power10_ok object {
>> int main (void) {
>> long e;
>> asm ("pli %0,%1" : "=r" (e) : "n" (0x12345));
>> return e;
>> }
>> } "-mcpu=power10"]
>> } else {
>> return 0
>> }
>> }
>>
>>> I tried to disable line wrap in my email editor. Please let me know
>>> if you still see line wrap. Thanks.
>>
>> Thanks, but unfortunately it's still broken as I still see:
>>
>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
>> tree exp, rtx target, bool bl
>>
>> This should all be on one line. The headers show:
>>
>> Content-Type: text/plain; charset=utf-8; format=flowed
>>
>> Not sure what mailer you're using, but often you can avoid problems by
>> marking the patch portion as untouchable in some way. In Thunderbird, you
>> can set it to Preformat, as an example. Then you can get line wrap on the
>> non-patch portions if you prefer that.
>>
>>> ChangeLog
>>>
>>> 2021-09-10 Haochen Gui<[email protected]>
>>>
>>> gcc/
>>> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>>> Modify the expansion for sign extension. All extentions are done
>>> within VSX resgisters.
>>> * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
>>>
>>> gcc/testsuite/
>>> * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>>>
>>> patch.diff
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-call.c
>>> b/gcc/config/rs6000/rs6000-call.c
>>> index b4e13af4dc6..587e9fa2a2a 100644
>>> --- a/gcc/config/rs6000/rs6000-call.c
>>> +++ b/gcc/config/rs6000/rs6000-call.c
>>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
>>> tree exp, rtx target, bool bl
>>>
>>> if (sign_extend)
>>> {
>>> - rtx discratch = gen_reg_rtx (DImode);
>>> + rtx discratch = gen_reg_rtx (V2DImode);
>>> rtx tiscratch = gen_reg_rtx (TImode);
>>>
>>> /* Emit the lxvr*x insn. */
>>> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
>>> icode, tree exp, rtx target, bool bl
>>> return 0;
>>> emit_insn (pat);
>>>
>>> - /* Emit a sign extension from QI,HI,WI to double (DI). */
>>> - rtx scratch = gen_lowpart (smode, tiscratch);
>>> + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
>>> + rtx temp1, temp2;
>>> if (icode == CODE_FOR_vsx_lxvrbx)
>>> - emit_insn (gen_extendqidi2 (discratch, scratch));
>>> + {
>>> + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
>>> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
>>> + }
>>> else if (icode == CODE_FOR_vsx_lxvrhx)
>>> - emit_insn (gen_extendhidi2 (discratch, scratch));
>>> + {
>>> + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
>>> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
>>> + }
>>> else if (icode == CODE_FOR_vsx_lxvrwx)
>>> - emit_insn (gen_extendsidi2 (discratch, scratch));
>>> - /* Assign discratch directly if scratch is already DI. */
>>> - if (icode == CODE_FOR_vsx_lxvrdx)
>>> - discratch = scratch;
>>> + {
>>> + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
>>> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
>>> + }
>>> + else if (icode == CODE_FOR_vsx_lxvrdx)
>>> + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
>>> + else
>>> + gcc_unreachable ();
>>>
>>> - /* Emit the sign extension from DI (double) to TI (quad). */
>>> - emit_insn (gen_extendditi2 (target, discratch));
>>> + /* Emit the sign extension from V2DI (double) to TI (quad). */
>>> + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
>>> + emit_insn (gen_extendditi2_vector (target, temp2));
>>>
>>> return target;
>>> }
>>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>>> index bcb92be2f5c..987f21bbc22 100644
>>> --- a/gcc/config/rs6000/vsx.md
>>> +++ b/gcc/config/rs6000/vsx.md
>>> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
>>> "vextsh2<wd> %0,%1"
>>> [(set_attr "type" "vecexts")])
>>>
>>> -(define_insn "*vsx_sign_extend_si_v2di"
>>> +(define_insn "vsx_sign_extend_si_v2di"
>>> [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
>>> (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
>>> UNSPEC_VSX_SIGN_EXTEND))]
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>> new file mode 100644
>>> index 00000000000..fcecc542d07
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>> @@ -0,0 +1,34 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target int128 } */
>>
>> As discussed, add the power10_ok directive.
>>
>> Otherwise looks fine to me, will let Segher have a look. :)
>>
>> Thanks!
>> Bill
>>
>>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
>>> +
>>> +#include <altivec.h>
>>> +
>>> +vector signed __int128
>>> +foo1 (signed long a, signed char *b)
>>> +{
>>> + return vec_xl_sext (a, b);
>>> +}
>>> +
>>> +vector signed __int128
>>> +foo2 (signed long a, signed short *b)
>>> +{
>>> + return vec_xl_sext (a, b);
>>> +}
>>> +
>>> +vector signed __int128
>>> +foo3 (signed long a, signed int *b)
>>> +{
>>> + return vec_xl_sext (a, b);
>>> +}
>>> +
>>> +vector signed __int128
>>> +foo4 (signed long a, signed long *b)
>>> +{
>>> + return vec_xl_sext (a, b);
>>> +}
>>> +
>>> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
>>> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
>>> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
>>> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
>>>
>>>
>>> On 10/9/2021 上午 4:49, Bill Schmidt wrote:
>>>> Hi Haochen,
>>>>
>>>> This patch was sent with "format=flowed", so it doesn't apply. That
>>>> makes it harder to review. Can you please make sure you disable line
>>>> wrap from your patch submissions, at least in the patch part?
>>>>
>>>> On 9/6/21 1:18 AM, HAO CHEN GUI wrote:
>>>>> Hi,
>>>>>
>>>>> The patch optimized the code generation for vec_xl_sext builtin.
>>>>> Now
>>>>> all the sign extensions are done on VSX registers directly.
>>>>>
>>>>> Bootstrapped and tested on powerpc64le-linux with no
>>>>> regressions. Is
>>>>> this okay for trunk? Any recommendations? Thanks a lot.
>>>>>
>>>>> ChangeLog
>>>>>
>>>>> 2021-09-06 Haochen Gui<[email protected]>
>>>>>
>>>>> gcc/
>>>>> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>>>>> Modify the expansion for sign extension.
>>>>> * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
>>>>>
>>>>> gcc/testsuite/
>>>>> * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>>>>>
>>>>>
>>>>> patch.diff
>>>>>
>>>>> diff --git a/gcc/config/rs6000/rs6000-call.c
>>>>> b/gcc/config/rs6000/rs6000-call.c
>>>>> index b4e13af4dc6..54fdaf47be3 100644
>>>>> --- a/gcc/config/rs6000/rs6000-call.c
>>>>> +++ b/gcc/config/rs6000/rs6000-call.c
>>>>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
>>>>> tree exp, rtx target, bool bl
>>>> Above is the first wrapped line that causes the patch to not apply.
>>>>> if (sign_extend)
>>>>> {
>>>>> - rtx discratch = gen_reg_rtx (DImode);
>>>>> + rtx discratch = gen_reg_rtx (V2DImode);
>>>>> rtx tiscratch = gen_reg_rtx (TImode);
>>>>>
>>>>> /* Emit the lxvr*x insn. */
>>>>> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
>>>>> icode, tree exp, rtx target, bool bl
>>>>> return 0;
>>>>> emit_insn (pat);
>>>>>
>>>>> - /* Emit a sign extension from QI,HI,WI to double (DI). */
>>>>> - rtx scratch = gen_lowpart (smode, tiscratch);
>>>>> + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
>>>>> + rtx temp1, temp2;
>>>>> if (icode == CODE_FOR_vsx_lxvrbx)
>>>>> - emit_insn (gen_extendqidi2 (discratch, scratch));
>>>>> + {
>>>>> + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
>>>>> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
>>>>> + }
>>>>> else if (icode == CODE_FOR_vsx_lxvrhx)
>>>>> - emit_insn (gen_extendhidi2 (discratch, scratch));
>>>>> + {
>>>>> + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
>>>>> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
>>>>> + }
>>>>> else if (icode == CODE_FOR_vsx_lxvrwx)
>>>>> - emit_insn (gen_extendsidi2 (discratch, scratch));
>>>>> - /* Assign discratch directly if scratch is already DI. */
>>>>> - if (icode == CODE_FOR_vsx_lxvrdx)
>>>>> - discratch = scratch;
>>>>> + {
>>>>> + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
>>>>> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
>>>>> + }
>>>>> + else if (icode == CODE_FOR_vsx_lxvrdx)
>>>>> + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
>>>>> + else
>>>>> + return 0;
>>>> Please call gcc_unreachable () here, instead of returning 0. If we
>>>> call this function with an unexpected insn_code, we want to ICE right
>>>> away to make it easier to debug the problem.
>>>>
>>>>> - /* Emit the sign extension from DI (double) to TI (quad). */
>>>>> - emit_insn (gen_extendditi2 (target, discratch));
>>>>> + /* Emit the sign extension from V2DI (double) to TI (quad). */
>>>>> + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
>>>>> + emit_insn (gen_extendditi2_vector (target, temp2));
>>>>>
>>>>> return target;
>>>>> }
>>>>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>>>>> index bcb92be2f5c..987f21bbc22 100644
>>>>> --- a/gcc/config/rs6000/vsx.md
>>>>> +++ b/gcc/config/rs6000/vsx.md
>>>>> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
>>>>> "vextsh2<wd> %0,%1"
>>>>> [(set_attr "type" "vecexts")])
>>>>>
>>>>> -(define_insn "*vsx_sign_extend_si_v2di"
>>>>> +(define_insn "vsx_sign_extend_si_v2di"
>>>>> [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
>>>>> (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
>>>>> UNSPEC_VSX_SIGN_EXTEND))]
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>>>> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>>>> new file mode 100644
>>>>> index 00000000000..e5b84e6167a
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>>>> @@ -0,0 +1,32 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
>>>> You need a { dg-require-effective-target int128 } directive. Also {
>>>> dg-require-effective-target power10_ok }.
>>>>> +
>>>>> +#include <altivec.h>
>>>>> +
>>>>> +vector signed __int128
>>>>> +foo1 (signed long a, signed char *b)
>>>>> +{
>>>>> + return vec_xl_sext (a, b);
>>>>> +}
>>>>> +
>>>>> +vector signed __int128
>>>>> +foo2 (signed long a, signed short *b)
>>>>> +{
>>>>> + return vec_xl_sext (a, b);
>>>>> +}
>>>>> +
>>>>> +vector signed __int128
>>>>> +foo3 (signed long a, signed int *b)
>>>>> +{
>>>>> + return vec_xl_sext (a, b);
>>>>> +}
>>>>> +
>>>>> +vector signed __int128
>>>>> +foo4 (signed long a, signed long *b)
>>>>> +{
>>>>> + return vec_xl_sext (a, b);
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
>>>>> +/* { dg-final { scan-assembler-times
>>>>> {\mvextsh2d\M|\mvextsw2d\M|\mvextsb2d\M} 3 } } */
>>>> I'd rather see the exact number of times each instruction is expected,
>>>> since this would match if all of foo1, foo2, and foo3 emitted
>>>> vextsb2d, for example.
>>>>
>>>> I don't see any additional problems...over to Segher.
>>>>
>>>> Thanks for the patch!
>>>> Bill
>>>>
>>>>> +/* { dg-final { scan-assembler-not "mtvsrdd" } } */
>>>>>