The following patch adds support for the popc and mul.wide instructions to the nvptx backend. I've a follow-up patch for supporting mul.hi instructions, but those changes require some minor tweaks to GCC's middle-end, so I'll submit those pieces separately.
Tested by "make" and "make -k check" on --build=nvptx-none hosted on x86_64-pc-linux-gnu with no new regressions. 2020-07-01 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog: * config/nvptx/nvptx.md (popcount<mode>2): New instructions. (mulhishi3, mulsidi3, umulhisi3, umulsidi3): New instructions. gcc/testsuite/ChangeLog: * gcc.target/nvptx/popc-1.c: New test. * gcc.target/nvptx/popc-2.c: New test. * gcc.target/nvptx/popc-3.c: New test. * gcc.target/nvptx/mul-wide.c: New test. * gcc.target/nvptx/umul-wide.c: New test. Ok for mainline? Thanks in advance, Roger -- Roger Sayle NextMove Software Cambridge, UK
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 089cdf0..5ceeac7 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -493,6 +493,50 @@ DONE; }) +(define_insn "popcount<mode>2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (popcount:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))] + "" + "%.\\tpopc.b%T1\\t%0, %1;") + +;; Multiplication variants + +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "nvptx_register_operand" "R")) + (sign_extend:SI + (match_operand:HI 2 "nvptx_register_operand" "R"))))] + "" + "%.\\tmul.wide.s16\\t%0, %1, %2;") + +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "nvptx_register_operand" "=R") + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nvptx_register_operand" "R")) + (sign_extend:DI + (match_operand:SI 2 "nvptx_register_operand" "R"))))] + "" + "%.\\tmul.wide.s32\\t%0, %1, %2;") + +(define_insn "umulhisi3" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "nvptx_register_operand" "R")) + (zero_extend:SI + (match_operand:HI 2 "nvptx_register_operand" "R"))))] + "" + "%.\\tmul.wide.u16\\t%0, %1, %2;") + +(define_insn "umulsidi3" + [(set (match_operand:DI 0 "nvptx_register_operand" "=R") + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nvptx_register_operand" "R")) + (zero_extend:DI + (match_operand:SI 2 "nvptx_register_operand" "R"))))] + "" + "%.\\tmul.wide.u32\\t%0, %1, %2;") + ;; Shifts (define_insn "ashl<mode>3"
/* { dg-do compile } */ /* { dg-options "-O2" } */ unsigned int foo(unsigned int x) { return __builtin_popcount(x); } /* { dg-final { scan-assembler-times "popc.b32" 1 } } */
/* { dg-do compile } */ /* { dg-options "-O2" } */ unsigned long foo(unsigned long x) { return __builtin_popcountl(x); } /* { dg-final { scan-assembler-times "popc.b64" 1 } } */ /* { dg-final { scan-assembler-times "cvt.s64.s32" 1 } } */
/* { dg-do compile } */ /* { dg-options "-O2" } */ unsigned int foo(unsigned long x) { return __builtin_popcountl(x); } /* { dg-final { scan-assembler-times "popc.b64" 1 } } */ /* { dg-final { scan-assembler-times "cvt.s64.s32" 0 } } */
/* { dg-do compile } */ /* { dg-options "-O2" } */ int mulhisi3(short x, short y) { return (int)x * (int)y; } long mulsidi3(int x, int y) { return (long)x * (long)y; } /* { dg-final { scan-assembler-times "mul.wide.s16" 1 } } */ /* { dg-final { scan-assembler-times "mul.wide.s32" 1 } } */
/* { dg-do compile } */ /* { dg-options "-O2" } */ unsigned int umulhisi3(unsigned short x, unsigned short y) { return (unsigned int)x * (unsigned int)y; } unsigned long umulsidi3(unsigned int x, unsigned int y) { return (unsigned long)x * (unsigned long)y; } /* { dg-final { scan-assembler-times "mul.wide.u16" 1 } } */ /* { dg-final { scan-assembler-times "mul.wide.u32" 1 } } */