[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic created this revision. Herald added subscribers: dexonsmith, dang. rsanthir.quic requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits. With this implementation "-Wstack-usage" acts as an alias to "-Wframe-larger-than" Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D102782 Files: clang/include/clang/Driver/Options.td clang/lib/Basic/Warnings.cpp clang/test/Frontend/backend-stack-usage-diagnostic.c Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,17 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wno-stack-usage= -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Basic/Warnings.cpp === --- clang/lib/Basic/Warnings.cpp +++ clang/lib/Basic/Warnings.cpp @@ -94,6 +94,11 @@ if (Opt == "format=0") Opt = "no-format"; + // -Wstack-usage is aliased to -Wframe-larger-than, this handles + // the negative case, as table gen does not. + if (Opt == "no-stack-usage=") +Opt = "no-frame-larger-than="; + // Check to see if this warning starts with "no-", if so, this is a // negative form of the option. bool isPositive = true; Index: clang/include/clang/Driver/Options.td === --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2500,6 +2500,8 @@ def Wlarger_than_EQ : Joined<["-"], "Wlarger-than=">, Group; def Wlarger_than_ : Joined<["-"], "Wlarger-than-">, Alias; def Wframe_larger_than_EQ : Joined<["-"], "Wframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wstack_stack_usage_EQ : Joined<["-"], "Wstack-usage=">, Flags<[NoXarchOption]>, + Alias; def : Flag<["-"], "fterminated-vtables">, Alias; defm threadsafe_statics : BoolFOption<"threadsafe-statics", Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,17 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wno-stack-usage= -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Basic/Warnings.cpp === --- clang/lib/Basic/Warnings.cpp +++ clang/lib/Basic/Warnings.cpp @@ -94,6 +94,11 @@ if (Opt == "format=0") Opt = "no-format"; + // -Wstack-usage is aliased to -Wframe-larger-than, this handles + // the negative case, as table gen does not. + if (Opt == "no-stack-usage=") +Opt = "no-frame-larger-than="; + // Check to see if this warning starts with "no-", if so, this is a // negative form of the option. bool isPositive = true; Index: clang/include/clang/Driver/Options.td === --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2500,6 +2500,8 @@ def Wlarger_than_EQ : Joined<["-"], "Wlarger-than=">, Group; def Wlarger_than_ : Joined<["-"], "Wlarger-than-">, Alias; def Wframe_larger_than_EQ : Joined<["-"], "Wframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wstack_stack_usage_EQ : Joined<["-"], "Wstack-usage=">, Flags<[NoXarchOption]>, + Alias; def : Flag<["-"]
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic updated this revision to Diff 346582. rsanthir.quic added a comment. Localized changes to Options.td, needed to explicitly add negative flag for -Wframe-larger-than Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 Files: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Clang.cpp clang/test/Frontend/backend-stack-usage-diagnostic.c Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,17 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wno-stack-usage= -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4762,12 +4762,14 @@ D.Diag(diag::err_aix_default_altivec_abi); } - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { -StringRef v = A->getValue(); -CmdArgs.push_back("-mllvm"); -CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); -A->claim(); - } + if (Args.hasFlag(options::OPT_Wframe_larger_than_EQ, + options::OPT_Wno_frame_larger_than_EQ, false)) +if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { + StringRef v = A->getValue(); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); + A->claim(); +} if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables, true)) Index: clang/include/clang/Driver/Options.td === --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2500,6 +2500,11 @@ def Wlarger_than_EQ : Joined<["-"], "Wlarger-than=">, Group; def Wlarger_than_ : Joined<["-"], "Wlarger-than-">, Alias; def Wframe_larger_than_EQ : Joined<["-"], "Wframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wno_frame_larger_than_EQ : Joined<["-"], "Wnoframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wstack_stack_usage_EQ : Joined<["-"], "Wstack-usage=">, Flags<[NoXarchOption]>, + Alias; +def Wno_stack_stack_usage_EQ : Joined<["-"], "Wno-stack-usage=">, Flags<[NoXarchOption]>, + Alias; def : Flag<["-"], "fterminated-vtables">, Alias; defm threadsafe_statics : BoolFOption<"threadsafe-statics", Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,17 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wno-stack-usage= -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4762,12 +4762,14 @@ D.Diag(diag::err_aix_default_altivec_abi); } - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { -StringRef v = A->getValue(); -CmdArgs.push_back("-mllvm"); -CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); -A->claim(); - } + if (Args.hasFlag(options::OPT_Wframe_larger_than_EQ, + opti
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic added a comment. Thanks for reviewing @bruno doesn't look like the failure is related to my change: https://buildkite.com/llvm-project/premerge-checks/builds/39905#5f70c261-ae54-451b-b771-7012bcee7387 "No space left on device" Unless I am looking at the wrong thing. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D101965: Added Support for Warning Flag -Wstack-usage=
rsanthir.quic abandoned this revision. rsanthir.quic added a comment. Moving forward with simpler approach here: https://reviews.llvm.org/D102782 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D101965/new/ https://reviews.llvm.org/D101965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D101964: Added support for -Wstack-usage flag and Framesize reporting fix
rsanthir.quic abandoned this revision. rsanthir.quic added a comment. Moving forward with simpler approach here: https://reviews.llvm.org/D102782 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D101964/new/ https://reviews.llvm.org/D101964 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic updated this revision to Diff 348531. rsanthir.quic added a comment. rebase Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 Files: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Clang.cpp clang/test/Frontend/backend-stack-usage-diagnostic.c Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,17 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wno-stack-usage= -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4762,12 +4762,14 @@ D.Diag(diag::err_aix_default_altivec_abi); } - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { -StringRef v = A->getValue(); -CmdArgs.push_back("-mllvm"); -CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); -A->claim(); - } + if (Args.hasFlag(options::OPT_Wframe_larger_than_EQ, + options::OPT_Wno_frame_larger_than_EQ, false)) +if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { + StringRef v = A->getValue(); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); + A->claim(); +} if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables, true)) Index: clang/include/clang/Driver/Options.td === --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2500,6 +2500,11 @@ def Wlarger_than_EQ : Joined<["-"], "Wlarger-than=">, Group; def Wlarger_than_ : Joined<["-"], "Wlarger-than-">, Alias; def Wframe_larger_than_EQ : Joined<["-"], "Wframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wno_frame_larger_than_EQ : Joined<["-"], "Wnoframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wstack_stack_usage_EQ : Joined<["-"], "Wstack-usage=">, Flags<[NoXarchOption]>, + Alias; +def Wno_stack_stack_usage_EQ : Joined<["-"], "Wno-stack-usage=">, Flags<[NoXarchOption]>, + Alias; def : Flag<["-"], "fterminated-vtables">, Alias; defm threadsafe_statics : BoolFOption<"threadsafe-statics", Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,17 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wno-stack-usage= -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4762,12 +4762,14 @@ D.Diag(diag::err_aix_default_altivec_abi); } - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { -StringRef v = A->getValue(); -CmdArgs.push_back("-mllvm"); -CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); -A->claim(); - } + if (Args.hasFlag(options::OPT_Wframe_larger_than_EQ, + options::OPT_Wno_frame_larger_than_EQ, false)) +if (Arg *A = Args.getLastArg(options::OPT
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic updated this revision to Diff 348555. rsanthir.quic added a comment. Updated test Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 Files: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Clang.cpp clang/test/Frontend/backend-stack-usage-diagnostic.c Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,21 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=0 -Wno-stack-usage= -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4762,12 +4762,14 @@ D.Diag(diag::err_aix_default_altivec_abi); } - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { -StringRef v = A->getValue(); -CmdArgs.push_back("-mllvm"); -CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); -A->claim(); - } + if (Args.hasFlag(options::OPT_Wframe_larger_than_EQ, + options::OPT_Wno_frame_larger_than_EQ, false)) +if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { + StringRef v = A->getValue(); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); + A->claim(); +} if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables, true)) Index: clang/include/clang/Driver/Options.td === --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2500,6 +2500,11 @@ def Wlarger_than_EQ : Joined<["-"], "Wlarger-than=">, Group; def Wlarger_than_ : Joined<["-"], "Wlarger-than-">, Alias; def Wframe_larger_than_EQ : Joined<["-"], "Wframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wno_frame_larger_than_EQ : Joined<["-"], "Wnoframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wstack_stack_usage_EQ : Joined<["-"], "Wstack-usage=">, Flags<[NoXarchOption]>, + Alias; +def Wno_stack_stack_usage_EQ : Joined<["-"], "Wno-stack-usage=">, Flags<[NoXarchOption]>, + Alias; def : Flag<["-"], "fterminated-vtables">, Alias; defm threadsafe_statics : BoolFOption<"threadsafe-statics", Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,21 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=0 -Wno-stack-usage= -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4762,12 +4762,14 @@ D.Diag(diag::err_aix_default_altivec_abi); } - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { -StringRef v = A->getValue(); -CmdArgs.push_back("-mllvm"); -CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); -A->claim(); - } + if (Args.hasFlag(options::OPT_Wframe_larger_than_EQ, + options::OPT_Wno_frame_larger_than_EQ, false)
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic updated this revision to Diff 348563. rsanthir.quic added a comment. Updated test and aligned negative flag with gcc Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 Files: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Clang.cpp clang/test/Frontend/backend-stack-usage-diagnostic.c Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,24 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wno-stack-usage -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=0 -Wno-stack-usage -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4762,12 +4762,14 @@ D.Diag(diag::err_aix_default_altivec_abi); } - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { -StringRef v = A->getValue(); -CmdArgs.push_back("-mllvm"); -CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); -A->claim(); - } + if (Args.hasFlag(options::OPT_Wframe_larger_than_EQ, + options::OPT_Wno_frame_larger_than, false)) +if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { + StringRef v = A->getValue(); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); + A->claim(); +} if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables, true)) Index: clang/include/clang/Driver/Options.td === --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2500,6 +2500,11 @@ def Wlarger_than_EQ : Joined<["-"], "Wlarger-than=">, Group; def Wlarger_than_ : Joined<["-"], "Wlarger-than-">, Alias; def Wframe_larger_than_EQ : Joined<["-"], "Wframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wno_frame_larger_than : Joined<["-"], "Wno-frame-larger-than">, Group, Flags<[NoXarchOption]>; +def Wstack_stack_usage_EQ : Joined<["-"], "Wstack-usage=">, Flags<[NoXarchOption]>, + Alias; +def Wno_stack_stack_usage : Joined<["-"], "Wno-stack-usage">, Flags<[NoXarchOption]>, + Alias; def : Flag<["-"], "fterminated-vtables">, Alias; defm threadsafe_statics : BoolFOption<"threadsafe-statics", Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,24 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wno-stack-usage -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=0 -Wno-stack-usage -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4762,12 +4762,14 @@ D.Diag(diag::err_aix_default_altivec_abi); } - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic updated this revision to Diff 349274. rsanthir.quic added a comment. Updated Release Notes and rebased Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 Files: clang/docs/ReleaseNotes.rst clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Clang.cpp clang/test/Frontend/backend-stack-usage-diagnostic.c Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,24 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wno-stack-usage -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=0 -Wno-stack-usage -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + Index: clang/lib/Driver/ToolChains/Clang.cpp === --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4826,12 +4826,14 @@ D.Diag(diag::err_aix_default_altivec_abi); } - if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { -StringRef v = A->getValue(); -CmdArgs.push_back("-mllvm"); -CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); -A->claim(); - } + if (Args.hasFlag(options::OPT_Wframe_larger_than_EQ, + options::OPT_Wno_frame_larger_than, false)) +if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { + StringRef v = A->getValue(); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString("-warn-stack-size=" + v)); + A->claim(); +} if (!Args.hasFlag(options::OPT_fjump_tables, options::OPT_fno_jump_tables, true)) Index: clang/include/clang/Driver/Options.td === --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2577,6 +2577,11 @@ def Wlarger_than_EQ : Joined<["-"], "Wlarger-than=">, Group; def Wlarger_than_ : Joined<["-"], "Wlarger-than-">, Alias; def Wframe_larger_than_EQ : Joined<["-"], "Wframe-larger-than=">, Group, Flags<[NoXarchOption]>; +def Wno_frame_larger_than : Joined<["-"], "Wno-frame-larger-than">, Group, Flags<[NoXarchOption]>; +def Wstack_stack_usage_EQ : Joined<["-"], "Wstack-usage=">, Flags<[NoXarchOption]>, + Alias; +def Wno_stack_stack_usage : Joined<["-"], "Wno-stack-usage">, Flags<[NoXarchOption]>, + Alias; def : Flag<["-"], "fterminated-vtables">, Alias; defm threadsafe_statics : BoolFOption<"threadsafe-statics", Index: clang/docs/ReleaseNotes.rst === --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -76,6 +76,9 @@ file contains frame size information for each function defined in the source file. +- ``-Wstack-usage=`` warn if stack usage of user functions might + exceed . + Deprecated Compiler Flags - Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,24 @@ +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wno-stack-usage -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=0 -Wno-stack-usage -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN + +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} by
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic added a comment. ping Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D95655: Adding Neon Sm3 & Sm4 Intrinsics
rsanthir.quic created this revision. rsanthir.quic added reviewers: t.p.northover, pbarrio. Herald added subscribers: hiraditya, kristof.beyls. rsanthir.quic requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits. This adds SM3 and SM4 Intrinsics support for AArch64, specifically: vsm3ss1q_u32 vsm3tt1aq_u32 vsm3tt1bq_u32 vsm3tt2aq_u32 vsm3tt2bq_u32 vsm3partw1q_u32 vsm3partw2q_u32 vsm4eq_u32 vsm4ekeyq_u32 Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D95655 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-neon-sm4-sm3-invalid.c clang/test/CodeGen/aarch64-neon-sm4-sm3.c llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll Index: llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sm4 -o - | FileCheck %s + +define <4 x i32> @test_vsm3partw1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3partw1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3partw1 v0.4s, v1.4s, v2.4s +; CHECK-NEXT:ret +entry: + %vsm3partw1.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3partw1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vsm3partw1.i +} + +define <4 x i32> @test_vsm3partw2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3partw2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3partw2 v0.4s, v1.4s, v2.4s +; CHECK-NEXT:ret +entry: + %vsm3partw2.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3partw2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vsm3partw2.i +} + +define <4 x i32> @test_vsm3ss1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { +; CHECK-LABEL: test_vsm3ss1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3ss1 v0.4s, v0.4s, v1.4s, v2.4s +; CHECK-NEXT:ret +entry: + %vsm3ss1.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3ss1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vsm3ss1.i +} + +define <4 x i32> @test_vsm3tt1a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt1a: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt1a v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt1a.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt1a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt1a.i +} + +define <4 x i32> @test_vsm3tt1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt1b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt1b v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt1b.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt1b.i +} + +define <4 x i32> @test_vsm3tt2a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt2a: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt2a v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt2a.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt2a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt2a.i +} + +define <4 x i32> @test_vsm3tt2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt2b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt2b v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt2b.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt2b.i +} + +define <4 x i32> @test_vsm4e(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsm4e: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm4e v1.4s, v0.4s +; CHECK-NEXT:mov v0.16b, v1.16b +; CHECK-NEXT:ret +entry: + %vsm4e.i = tail call <4 x i32> @llvm.aarch64.crypto.sm4e(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %vsm4e.i +} + +define <4 x i32> @test_vsm4ekey(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsm4ekey: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm4ekey v0.4s, v0.4s, v1.4s +; CHECK-NEXT:ret +entry: + %vsm4ekey.i = tail call <4 x i32> @llvm.aarch64.crypto.sm4ekey(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %vsm4ekey.i +} + +declare <4 x i32> @llvm.aarch64.crypto.sm3partw1(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm3partw2(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm3ss1(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt1a(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt2b(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt2a(<
[PATCH] D95655: Adding Neon Sm3 & Sm4 Intrinsics
rsanthir.quic added a comment. This is the first in a series of patches that will address the following: https://bugs.llvm.org/show_bug.cgi?id=47828 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D95655/new/ https://reviews.llvm.org/D95655 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D95655: [AArch64] Adding Neon Sm3 & Sm4 Intrinsics
rsanthir.quic marked 2 inline comments as done. rsanthir.quic added a comment. Thank you for taking a look at this @labrinea ! Comment at: llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll:77 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm4e v1.4s, v0.4s +; CHECK-NEXT:mov v0.16b, v1.16b labrinea wrote: > Shouldn't the registers be the other way around: sm4e v0.4s, v1.4s ? I > believe the reason this happens is because of how CryptoRRTied is defined in > `llvm/lib/Target/AArch64/AArch64InstrFormats.td`: > > > ``` > class CryptoRRTiedop0, bits<2>op1, string asm, string asmops> > : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm, asmops, > "$Vm = $Vd", []> { > ``` > > Vd be should be the first source register (as well as destination register) > and Vn should be the second source register. I see what you mean, this has the added effect of correcting SHA512SU0 as well Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D95655/new/ https://reviews.llvm.org/D95655 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D95655: [AArch64] Adding Neon Sm3 & Sm4 Intrinsics
rsanthir.quic updated this revision to Diff 320199. rsanthir.quic marked an inline comment as done. rsanthir.quic added a comment. Corrected register ordering for sm4e and removed redundant argument in sm3ss1 test CHANGES SINCE LAST ACTION https://reviews.llvm.org/D95655/new/ https://reviews.llvm.org/D95655 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-neon-sm4-sm3-invalid.c clang/test/CodeGen/aarch64-neon-sm4-sm3.c llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll Index: llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sm4 -o - | FileCheck %s + +define <4 x i32> @test_vsm3partw1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3partw1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3partw1 v0.4s, v1.4s, v2.4s +; CHECK-NEXT:ret +entry: + %vsm3partw1.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3partw1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vsm3partw1.i +} + +define <4 x i32> @test_vsm3partw2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3partw2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3partw2 v0.4s, v1.4s, v2.4s +; CHECK-NEXT:ret +entry: + %vsm3partw2.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3partw2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vsm3partw2.i +} + +define <4 x i32> @test_vsm3ss1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3ss1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3ss1 v0.4s, v0.4s, v1.4s, v2.4s +; CHECK-NEXT:ret +entry: + %vsm3ss1.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3ss1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vsm3ss1.i +} + +define <4 x i32> @test_vsm3tt1a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt1a: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt1a v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt1a.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt1a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt1a.i +} + +define <4 x i32> @test_vsm3tt1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt1b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt1b v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt1b.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt1b.i +} + +define <4 x i32> @test_vsm3tt2a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt2a: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt2a v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt2a.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt2a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt2a.i +} + +define <4 x i32> @test_vsm3tt2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt2b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt2b v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt2b.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt2b.i +} + +define <4 x i32> @test_vsm4e(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsm4e: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm4e v0.4s, v1.4s +; CHECK-NEXT:ret +entry: + %vsm4e.i = tail call <4 x i32> @llvm.aarch64.crypto.sm4e(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %vsm4e.i +} + +define <4 x i32> @test_vsm4ekey(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsm4ekey: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm4ekey v0.4s, v0.4s, v1.4s +; CHECK-NEXT:ret +entry: + %vsm4ekey.i = tail call <4 x i32> @llvm.aarch64.crypto.sm4ekey(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %vsm4ekey.i +} + +declare <4 x i32> @llvm.aarch64.crypto.sm3partw1(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm3partw2(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm3ss1(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt1a(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt2b(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt2a(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt1b(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm4e(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm4ekey(<4
[PATCH] D95655: [AArch64] Adding Neon Sm3 & Sm4 Intrinsics
rsanthir.quic added a comment. @t.p.northover Could you take a look before I ask for this to be merged? CHANGES SINCE LAST ACTION https://reviews.llvm.org/D95655/new/ https://reviews.llvm.org/D95655 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D95655: [AArch64] Adding Neon Sm3 & Sm4 Intrinsics
rsanthir.quic updated this revision to Diff 322470. rsanthir.quic added a comment. Updated test name and checks performed CHANGES SINCE LAST ACTION https://reviews.llvm.org/D95655/new/ https://reviews.llvm.org/D95655 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-neon-range-checks.c clang/test/CodeGen/aarch64-neon-sm4-sm3.c llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll Index: llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-sm4-sm3.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sm4 -o - | FileCheck %s + +define <4 x i32> @test_vsm3partw1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3partw1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3partw1 v0.4s, v1.4s, v2.4s +; CHECK-NEXT:ret +entry: + %vsm3partw1.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3partw1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vsm3partw1.i +} + +define <4 x i32> @test_vsm3partw2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3partw2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3partw2 v0.4s, v1.4s, v2.4s +; CHECK-NEXT:ret +entry: + %vsm3partw2.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3partw2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vsm3partw2.i +} + +define <4 x i32> @test_vsm3ss1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3ss1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3ss1 v0.4s, v0.4s, v1.4s, v2.4s +; CHECK-NEXT:ret +entry: + %vsm3ss1.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3ss1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %vsm3ss1.i +} + +define <4 x i32> @test_vsm3tt1a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt1a: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt1a v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt1a.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt1a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt1a.i +} + +define <4 x i32> @test_vsm3tt1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt1b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt1b v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt1b.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt1b.i +} + +define <4 x i32> @test_vsm3tt2a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt2a: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt2a v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt2a.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt2a(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt2a.i +} + +define <4 x i32> @test_vsm3tt2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: test_vsm3tt2b: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm3tt2b v0.4s, v1.4s, v2.s[2] +; CHECK-NEXT:ret +entry: + %vsm3tt2b.i = tail call <4 x i32> @llvm.aarch64.crypto.sm3tt2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i64 2) + ret <4 x i32> %vsm3tt2b.i +} + +define <4 x i32> @test_vsm4e(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsm4e: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm4e v0.4s, v1.4s +; CHECK-NEXT:ret +entry: + %vsm4e.i = tail call <4 x i32> @llvm.aarch64.crypto.sm4e(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %vsm4e.i +} + +define <4 x i32> @test_vsm4ekey(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vsm4ekey: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sm4ekey v0.4s, v0.4s, v1.4s +; CHECK-NEXT:ret +entry: + %vsm4ekey.i = tail call <4 x i32> @llvm.aarch64.crypto.sm4ekey(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %vsm4ekey.i +} + +declare <4 x i32> @llvm.aarch64.crypto.sm3partw1(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm3partw2(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm3ss1(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt1a(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt2b(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt2a(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm3tt1b(<4 x i32>, <4 x i32>, <4 x i32>, i64 immarg) +declare <4 x i32> @llvm.aarch64.crypto.sm4e(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.crypto.sm4ekey(<4 x i32>, <4 x i32>) Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td ==
[PATCH] D96381: [AArch64] Adding SHA3 Intrinsics support
rsanthir.quic created this revision. rsanthir.quic added reviewers: apazos, t.p.northover, labrinea, pbarrio. Herald added subscribers: danielkiss, hiraditya, kristof.beyls. rsanthir.quic requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits. This patch adds the following SHA3 Intrinsics: vsha512hq_u64, vsha512h2q_u64, vsha512su0q_u64, vsha512su1q_u64 veor3q_u8 veor3q_u16 veor3q_u32 veor3q_u64 veor3q_s8 veor3q_s16 veor3q_s32 veor3q_s64 vrax1q_u64 vxarq_u64 vbcaxq_u8 vbcaxq_u16 vbcaxq_u32 vbcaxq_u64 vbcaxq_s8 vbcaxq_s16 vbcaxq_s32 vbcaxq_s64 Note need to include +sha3 and +crypto when building from the front-end Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D96381 Files: clang/include/clang/Basic/arm_neon.td clang/include/clang/Basic/arm_neon_incl.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-neon-range-checks.c clang/test/CodeGen/aarch64-neon-sha3.c clang/utils/TableGen/NeonEmitter.cpp llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/test/CodeGen/AArch64/neon-sha3.ll Index: llvm/test/CodeGen/AArch64/neon-sha3.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-sha3.ll @@ -0,0 +1,105 @@ +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sha3 -o - | FileCheck %s + +define <2 x i64> @test_vsha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512h q0, q1, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512h.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h.i +} + +define <2 x i64> @test_vsha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512h2 q0, q1, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512h2.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h2.i +} + +define <2 x i64> @test_vsha512su0(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vsha512su0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512su0 v0.2d, v1.2d +; CHECK-NEXT:ret +entry: + %vsha512su0.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vsha512su0.i +} + +define <2 x i64> @test_vsha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512su1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512su1 v0.2d, v1.2d, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512su1.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512su1.i +} + +define <2 x i64> @test_vrax1(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vrax1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:rax1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT:ret +entry: + %vrax1.i = tail call <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vrax1.i +} + +define <2 x i64> @test_vxar(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vxar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:xar v0.2d, v0.2d, v1.2d, #1 +; CHECK-NEXT:ret +entry: + %vxar.i = tail call <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64> %a, <2 x i64> %b, i64 1) + ret <2 x i64> %vxar.i +} + +define <16 x i8> @test_bcax_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_bcax_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vbcax_8.i +} + +define <16 x i8> @test_eor3_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_eor3_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %veor3_8.i +} + +declare <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64>, <2 x i64>, i64 immarg) +declare <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.bcax
[PATCH] D96381: [AArch64] Adding SHA3 Intrinsics support
rsanthir.quic added a comment. This is the second of three patches to address the following: https://bugs.llvm.org/show_bug.cgi?id=47828 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96381/new/ https://reviews.llvm.org/D96381 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D95655: [AArch64] Adding Neon Sm3 & Sm4 Intrinsics
rsanthir.quic added a comment. @labrinea could you commit this when you have some time please? CHANGES SINCE LAST ACTION https://reviews.llvm.org/D95655/new/ https://reviews.llvm.org/D95655 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D96381: [AArch64] Adding SHA3 Intrinsics support
rsanthir.quic updated this revision to Diff 322713. rsanthir.quic added a comment. alphabetized check in NeonEmitter CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96381/new/ https://reviews.llvm.org/D96381 Files: clang/include/clang/Basic/arm_neon.td clang/include/clang/Basic/arm_neon_incl.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-neon-range-checks.c clang/test/CodeGen/aarch64-neon-sha3.c clang/utils/TableGen/NeonEmitter.cpp llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/test/CodeGen/AArch64/neon-sha3.ll Index: llvm/test/CodeGen/AArch64/neon-sha3.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-sha3.ll @@ -0,0 +1,105 @@ +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sha3 -o - | FileCheck %s + +define <2 x i64> @test_vsha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512h q0, q1, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512h.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h.i +} + +define <2 x i64> @test_vsha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512h2 q0, q1, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512h2.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h2.i +} + +define <2 x i64> @test_vsha512su0(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vsha512su0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512su0 v0.2d, v1.2d +; CHECK-NEXT:ret +entry: + %vsha512su0.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vsha512su0.i +} + +define <2 x i64> @test_vsha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512su1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512su1 v0.2d, v1.2d, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512su1.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512su1.i +} + +define <2 x i64> @test_vrax1(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vrax1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:rax1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT:ret +entry: + %vrax1.i = tail call <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vrax1.i +} + +define <2 x i64> @test_vxar(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vxar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:xar v0.2d, v0.2d, v1.2d, #1 +; CHECK-NEXT:ret +entry: + %vxar.i = tail call <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64> %a, <2 x i64> %b, i64 1) + ret <2 x i64> %vxar.i +} + +define <16 x i8> @test_bcax_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_bcax_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vbcax_8.i +} + +define <16 x i8> @test_eor3_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_eor3_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %veor3_8.i +} + +declare <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64>, <2 x i64>, i64 immarg) +declare <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.eor3u.v
[PATCH] D96825: [AArch64] Adding Polynomial vadd Intrinsics support
rsanthir.quic created this revision. rsanthir.quic added reviewers: t.p.northover, kevin.qin, labrinea, pbarrio, apazos. Herald added subscribers: danielkiss, kristof.beyls. rsanthir.quic requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits. This patch adds the following intrinsics: vadd_p8 vadd_p16 vadd_p64 vaddq_p8 vaddq_p16 vaddq_p64 vaddq_p128 Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -ffp-contract=fast -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5662,7 +5662,10 @@ NEONMAP0(splatq_laneq_v), NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), @@ -6290,6 +6293,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); + case NEON::BI__builtin_neon_vadd_v: + case NEON::BI__builtin_neon_vaddq_v: { +llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8); +Ops[0] = Builder.CreateBitCast(Ops[0], VTy
[PATCH] D96825: [AArch64] Adding Polynomial vadd Intrinsics support
rsanthir.quic added a comment. This is the final of three patches to address the following: https://bugs.llvm.org/show_bug.cgi?id=47828 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D96825: [AArch64] Adding Polynomial vadd Intrinsics support
rsanthir.quic updated this revision to Diff 324153. rsanthir.quic added a comment. rebased due to merge issues CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -ffp-contract=fast -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5368,7 +5368,10 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), @@ -6299,6 +6302,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); + case NEON::BI__builtin_neon_vadd_v: + case NEON::BI__builtin_neon_vaddq_v: { +llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8); +Ops[0] = Builder.CreateBitCast(Ops[0], VTy); +Ops[1] = Builder.CreateBitCast(Ops[1], VTy); +Ops[0] = Builder.CreateXor(Ops[0], Ops[1]); +return Builder.CreateBitCast(Ops[0], Ty); + } case NEON::BI__builtin_neon_vaddhn_v: { llvm::FixedVectorType *SrcTy = llvm::Fi
[PATCH] D96825: [AArch64] Adding Polynomial vadd Intrinsics support
rsanthir.quic marked an inline comment as done. rsanthir.quic added inline comments. Comment at: clang/test/CodeGen/aarch64-poly-add.c:4 +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -ffp-contract=fast -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s DavidSpickett wrote: > Do we need this option? (-ffp-contract) You are correct this is not necessary, I'll remove it CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
rsanthir.quic updated this revision to Diff 324336. rsanthir.quic marked an inline comment as done. rsanthir.quic retitled this revision from "[AArch64] Adding Polynomial vadd Intrinsics support" to "[AArch64] Adding Neon Polynomial vadd Intrinsics". rsanthir.quic added a comment. removed unnecessary flag in test and changed commit message. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5368,7 +5368,10 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), @@ -6299,6 +6302,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); + case NEON::BI__builtin_neon_vadd_v: + case NEON::BI__builtin_neon_vaddq_v: { +llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8); +Ops[0] = Builder.CreateBitCast(Ops[0], VTy); +Ops[1] = Builder.CreateBitCas
[PATCH] D96381: [AArch64] Adding SHA3 Intrinsics support
rsanthir.quic updated this revision to Diff 324347. rsanthir.quic added a comment. Removed extra whitespace CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96381/new/ https://reviews.llvm.org/D96381 Files: clang/include/clang/Basic/arm_neon.td clang/include/clang/Basic/arm_neon_incl.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-neon-range-checks.c clang/test/CodeGen/aarch64-neon-sha3.c clang/utils/TableGen/NeonEmitter.cpp llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/test/CodeGen/AArch64/neon-sha3.ll Index: llvm/test/CodeGen/AArch64/neon-sha3.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-sha3.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sha3 -o - | FileCheck %s + +define <2 x i64> @test_vsha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512h q0, q1, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512h.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h.i +} + +define <2 x i64> @test_vsha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512h2 q0, q1, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512h2.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h2.i +} + +define <2 x i64> @test_vsha512su0(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vsha512su0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512su0 v0.2d, v1.2d +; CHECK-NEXT:ret +entry: + %vsha512su0.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vsha512su0.i +} + +define <2 x i64> @test_vsha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512su1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512su1 v0.2d, v1.2d, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512su1.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512su1.i +} + +define <2 x i64> @test_vrax1(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vrax1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:rax1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT:ret +entry: + %vrax1.i = tail call <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vrax1.i +} + +define <2 x i64> @test_vxar(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vxar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:xar v0.2d, v0.2d, v1.2d, #1 +; CHECK-NEXT:ret +entry: + %vxar.i = tail call <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64> %a, <2 x i64> %b, i64 1) + ret <2 x i64> %vxar.i +} + +define <16 x i8> @test_bcax_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_bcax_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vbcax_8.i +} + +define <16 x i8> @test_eor3_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_eor3_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %veor3_8.i +} + +declare <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64>, <2 x i64>, i64 immarg) +declare <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8>,
[PATCH] D96381: [AArch64] Adding SHA3 Intrinsics support
rsanthir.quic updated this revision to Diff 324711. rsanthir.quic marked 3 inline comments as done. rsanthir.quic added a comment. Minor corrections and removed unused code, also added complete testing CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96381/new/ https://reviews.llvm.org/D96381 Files: clang/include/clang/Basic/arm_neon.td clang/include/clang/Basic/arm_neon_incl.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-neon-range-checks.c clang/test/CodeGen/aarch64-neon-sha3.c clang/utils/TableGen/NeonEmitter.cpp llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64InstrFormats.td llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/test/CodeGen/AArch64/neon-sha3.ll Index: llvm/test/CodeGen/AArch64/neon-sha3.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-sha3.ll @@ -0,0 +1,246 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sha3 -o - | FileCheck %s + +define <2 x i64> @test_vsha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512h q0, q1, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512h.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h.i +} + +define <2 x i64> @test_vsha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512h2 q0, q1, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512h2.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h2.i +} + +define <2 x i64> @test_vsha512su0(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vsha512su0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512su0 v0.2d, v1.2d +; CHECK-NEXT:ret +entry: + %vsha512su0.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vsha512su0.i +} + +define <2 x i64> @test_vsha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512su1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:sha512su1 v0.2d, v1.2d, v2.2d +; CHECK-NEXT:ret +entry: + %vsha512su1.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512su1.i +} + +define <2 x i64> @test_vrax1(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vrax1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:rax1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT:ret +entry: + %vrax1.i = tail call <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vrax1.i +} + +define <2 x i64> @test_vxar(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vxar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:xar v0.2d, v0.2d, v1.2d, #1 +; CHECK-NEXT:ret +entry: + %vxar.i = tail call <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64> %a, <2 x i64> %b, i64 1) + ret <2 x i64> %vxar.i +} + +define <16 x i8> @test_bcax_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_bcax_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vbcax_8.i +} + +define <16 x i8> @test_eor3_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_eor3_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %veor3_8.i +} + +define <16 x i8> @test_bcax_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_bcax_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vbcax_8.i +} + +define <16 x i8> @test_eor3_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_eor3_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %veor3_8.i +} + +define <8 x i16> @test_bcax_16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: test_bcax_16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT:bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT:ret +entry: + %vbcax_16.i = tail call <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %vbcax_16.i +} +
[PATCH] D96381: [AArch64] Adding SHA3 Intrinsics support
rsanthir.quic added inline comments. Comment at: llvm/lib/Target/AArch64/AArch64InstrInfo.td:928 + +class SHA512H_pattern + : Pat<(v2i64 (OpNode (v2i64 V128:$Vd), (v2i64 V128:$Vn), (v2i64 V128:$Vm))), DavidSpickett wrote: > This is unused. a good catch thank you! CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96381/new/ https://reviews.llvm.org/D96381 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
rsanthir.quic updated this revision to Diff 325027. rsanthir.quic added a comment. Windows builds were failing due to missing builtins in Intrinsics map CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5368,7 +5368,10 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), @@ -5662,7 +5665,10 @@ NEONMAP0(splatq_laneq_v), NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), @@ -6290,6 +6296,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); + case NEON::BI__builtin_neon_vadd_v: + case NEON::BI__bui
[PATCH] D96825: [AArch64] Adding Neon Polynomial vadd Intrinsics
rsanthir.quic updated this revision to Diff 325034. rsanthir.quic added a comment. rebased on main CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96825/new/ https://reviews.llvm.org/D96825 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/aarch64-poly-add.c Index: clang/test/CodeGen/aarch64-poly-add.c === --- /dev/null +++ clang/test/CodeGen/aarch64-poly-add.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p128( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT:ret i128 [[TMP3]] +// +poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ + return vaddq_p128(a, b); +} Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5371,7 +5371,10 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), @@ -5665,7 +5668,10 @@ NEONMAP0(splatq_laneq_v), NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), + NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), + NEONMAP0(vaddq_p128), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), @@ -6302,6 +6308,14 @@ if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); + case NEON::BI__builtin_neon_vadd_v: + case NEON::BI__builtin_neon_vaddq_v: { +llvm::Type *VTy = llvm::Fixe
[PATCH] D96381: [AArch64] Adding SHA3 Intrinsics support
rsanthir.quic added a comment. Thank you for reviewing this @DavidSpickett ! If you get a chance could you commit this for me? I do not have commit access yet. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D96381/new/ https://reviews.llvm.org/D96381 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic added a comment. @bruno all tests are passing, could you take another look when you have a chance? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic added inline comments. Comment at: clang/docs/ReleaseNotes.rst:79-80 +- ``-Wstack-usage=`` warn if stack usage of user functions might + exceed . + Quuxplusone wrote: > Does this mean: > - Warn if the size of any single function's stack frame (including > temporaries and local variables, but not parameters or return addresses) > exceeds . > - Warn if the size of any single function's stack frame (including parameters > and return addresses) exceeds . > - Warn if the total stack usage of the longest visible call chain in this > translation unit might exceed . > ? > Looking at how the value for this flag is obtained, which is an alias of "-Wframe-larger-than", I see that it is all objects placed on the stack by each function plus any space reserved for arguments of callsites in the function. There is no precise definition of what is included as it depends on the target as well as what is stored on the stack or not. I guess in simplest terms, for each function this will report how much stack space is used by that function. Comment at: clang/test/Frontend/backend-stack-usage-diagnostic.c:19-23 +// WARN: warning: stack frame size of {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} Quuxplusone wrote: > This function has no "stack frame" in the usual sense of the word, because > it's just > ``` > movl%edi, %eax > imull %edi, %eax > ret > ``` > So am I correct to infer that the `-Wstack-usage=` option includes the size > of the return address in its computations? So the stack usage of this > function would be computed as "8 bytes" because the `callq` instruction > pushes 8 bytes on the stack? > This seems eminently reasonable to me, but non-obvious, and should get some > user-facing documentation. When a function stores the return address on the stack, that will be included in that functions stack usage. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic added a comment. ping @Quuxplusone this is being used as a diagnostic flag that aliases "-Wframe-larger-than", I think if we want to add more user-facing documentation to clarify things we should revisit what the "-Wframe-larger-than=" tracks as well. Currently there isn't too much info on that flag either. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic added a comment. ping Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic added a comment. @MaskRay Yes this would unblock applications. Regarding your concern, the information from this implementation as well as GCC's should be used conservatively as both are approximate. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D102782: Add support for Warning Flag "-Wstack-usage="
rsanthir.quic added a comment. This was discussed in llvm-dev mailing list, and originally we had a change that was closer to what GCC was reporting however there was no consensus on what was needed. The purpose of this change is to bring parity in terms of available options with GCC. @lebedev.ri could you chime in on what your specific use for this flag is? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D102782/new/ https://reviews.llvm.org/D102782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D100499: [AArch64] Neon Polynomial vadd Intrinsic Fix
rsanthir.quic abandoned this revision. rsanthir.quic added a comment. Merging with https://reviews.llvm.org/D100772 CHANGES SINCE LAST ACTION https://reviews.llvm.org/D100499/new/ https://reviews.llvm.org/D100499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D100772: [ARM] Neon Polynomial vadd Intrinsic fix
rsanthir.quic marked an inline comment as done. rsanthir.quic added inline comments. Comment at: clang/include/clang/Basic/arm_neon.td:712 + +// Crypto +// TODO: poly128_t not implemented on aarch32 DavidSpickett wrote: > This isn't a crypto intrinsic, it just happens to be right after a block like: > ``` > let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)" in { > <...> > def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">; > } > ``` > > You could instead say: > ``` > // Non poly128_t vaddp for Arm and AArch64 > ``` > (keep the todo) I agree that it isn't strictly crypto, I think it is used primarily for cryptographic applications. I'll apply your suggestion Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D100772/new/ https://reviews.llvm.org/D100772 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D100772: [ARM] Neon Polynomial vadd Intrinsic fix
rsanthir.quic updated this revision to Diff 340567. rsanthir.quic marked an inline comment as done. rsanthir.quic added a comment. minor fixes and merged https://reviews.llvm.org/D100499 CHANGES SINCE LAST ACTION https://reviews.llvm.org/D100772/new/ https://reviews.llvm.org/D100772 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/arm-poly-add.c Index: clang/test/CodeGen/arm-poly-add.c === --- /dev/null +++ clang/test/CodeGen/arm-poly-add.c @@ -0,0 +1,86 @@ +// REQUIRES: arm-registered-target +// RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi \ +// RUN: -target-feature +neon \ +// RUN: -mfloat-abi hard \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// TODO: poly128_t not implemented on aarch32 +// CHCK-LABEL: @test_vaddq_p128( +// CHCK-NEXT: entry: +// CHCK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHCK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHCK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHCK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHCK-NEXT:ret i128 [[TMP3]] +// +//poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ +// return vaddq_p128(a, b); Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5460,7 +5460,6 @@ NEONMAP1(vabsq_v, arm_neon_vabs, 0), NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), - NEONMAP0(vaddq_p128), NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), Index: clang/include/clang/Basic/arm_neon.td === --- clang/include/clang/Basic/arm_neon.td +++ clang/include/clang/Basic/arm_neon.td @@ -708,6 +708,11 @@ def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "1.I", "Qh", OP_SCALAR_HALF_GET_LNQ>; def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", ".1.I", "Qh", OP_SCALAR_HALF_SET_LNQ>; + +// Non poly128_t vaddp for Arm and AArch64 +// TODO: poly128_t not implemented on arm32 +def VADDP : WInst<"vadd", "...", "PcPsPlQPcQPsQPl">; + // AArch64 Intrinsics @@ -1160,7 +1165,9 @@ def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">;
[PATCH] D100772: [ARM] Neon Polynomial vadd Intrinsic fix
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG0395f9e70b8f: [ARM] Neon Polynomial vadd Intrinsic fix (authored by rsanthir.quic). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D100772/new/ https://reviews.llvm.org/D100772 Files: clang/include/clang/Basic/arm_neon.td clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/arm-poly-add.c Index: clang/test/CodeGen/arm-poly-add.c === --- /dev/null +++ clang/test/CodeGen/arm-poly-add.c @@ -0,0 +1,86 @@ +// REQUIRES: arm-registered-target +// RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi \ +// RUN: -target-feature +neon \ +// RUN: -mfloat-abi hard \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// TODO: poly128_t not implemented on aarch32 +// CHCK-LABEL: @test_vaddq_p128( +// CHCK-NEXT: entry: +// CHCK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHCK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHCK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHCK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHCK-NEXT:ret i128 [[TMP3]] +// +//poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ +// return vaddq_p128(a, b); Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5460,7 +5460,6 @@ NEONMAP1(vabsq_v, arm_neon_vabs, 0), NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), - NEONMAP0(vaddq_p128), NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), Index: clang/include/clang/Basic/arm_neon.td === --- clang/include/clang/Basic/arm_neon.td +++ clang/include/clang/Basic/arm_neon.td @@ -708,6 +708,11 @@ def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "1.I", "Qh", OP_SCALAR_HALF_GET_LNQ>; def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", ".1.I", "Qh", OP_SCALAR_HALF_SET_LNQ>; + +// Non poly128_t vaddp for Arm and AArch64 +// TODO: poly128_t not implemented on arm32 +def VADDP : WInst<"vadd", "...", "PcPsPlQPcQPsQPl">; + // AArch6
[PATCH] D101964: Added support for -Wstack-usage flag and Framesize reporting fix
rsanthir.quic created this revision. Herald added subscribers: dexonsmith, dang, hiraditya. rsanthir.quic requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits. This patch adds support for the -Wstack-usage flag. It also changes how -Wframe-larger-than reports it's frame size. It now excludes the space allocated to hold parameters for called functions. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D101964 Files: clang/include/clang/Basic/DiagnosticFrontendKinds.td clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Driver/Options.td clang/lib/CodeGen/CodeGenAction.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/test/Frontend/backend-diagnostic.c clang/test/Frontend/backend-stack-usage-diagnostic.c clang/test/Misc/backend-stack-frame-diagnostics-fallback.cpp llvm/include/llvm/CodeGen/MachineFrameInfo.h llvm/include/llvm/IR/DiagnosticInfo.h llvm/lib/CodeGen/PrologEpilogInserter.cpp Index: llvm/lib/CodeGen/PrologEpilogInserter.cpp === --- llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -139,9 +139,14 @@ char &llvm::PrologEpilogCodeInserterID = PEI::ID; static cl::opt -WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), - cl::desc("Warn for stack size bigger than the given" - " number")); +WarnFrameSize("warn-frame-size", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for frame size bigger than the given" + " number")); + +static cl::opt +WarnStackUsage("warn-stack-usage", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for stack size bigger than the given" +" number")); INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, false) @@ -275,11 +280,19 @@ if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(MF, *RS); - // Warn on stack size when we exceeds the given limit. + // Warn on stack usage size when we exceed the given limit MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); - if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { -DiagnosticInfoStackSize DiagStackSize(F, StackSize); + if (WarnStackUsage.getNumOccurrences() > 0 && WarnStackUsage <= StackSize) { +DiagnosticInfoStackSize DiagStackUsage( +F, StackSize, MFI.hasVarSizedObjects(), true /*Stack Usage*/); +F.getContext().diagnose(DiagStackUsage); + } + + uint64_t FrameSize = MFI.getFrameSize(); + // Warn on Frame size when we exceed the given limit. + if (WarnFrameSize.getNumOccurrences() > 0 && WarnFrameSize < FrameSize) { +DiagnosticInfoStackSize DiagStackSize(F, FrameSize); F.getContext().diagnose(DiagStackSize); } ORE->emit([&]() { @@ -1048,6 +1061,10 @@ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign, Skew); } + int64_t OffsetBeforeMaxCallFrameSize = Offset; + int64_t FrameSize = OffsetBeforeMaxCallFrameSize - LocalAreaOffset; + MFI.setFrameSize(FrameSize); + if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the @@ -1073,6 +1090,11 @@ int64_t OffsetBeforeAlignment = Offset; Offset = alignTo(Offset, StackAlign, Skew); +OffsetBeforeMaxCallFrameSize = +alignTo(OffsetBeforeMaxCallFrameSize, StackAlign, Skew); +int64_t FrameSize = OffsetBeforeMaxCallFrameSize - LocalAreaOffset; +MFI.setFrameSize(FrameSize); + // If we have increased the offset to fulfill the alignment constrants, // then the scavenging spill slots may become harder to reach from the // stack pointer, float them so they stay close. Index: llvm/include/llvm/IR/DiagnosticInfo.h === --- llvm/include/llvm/IR/DiagnosticInfo.h +++ llvm/include/llvm/IR/DiagnosticInfo.h @@ -216,15 +216,24 @@ class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit { void anchor() override; + bool HasVarSizedObjects; + bool PrintStackUsageInfo; + public: DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize, + bool HasVarSizedObjects = false, + bool PrintStackUsageInfo = false, DiagnosticSeverity Severity = DS_Warning, uint64_t StackLimit = 0) : DiagnosticInfoResourceLimit(Fn, "stack size", StackSize, Severity, -DK_StackSize, StackLimit) {} +DK_StackSize, StackLimit), +HasVarSizedObjects(HasVarSizedObjects), +
[PATCH] D101965: Added Support for Warning Flag -Wstack-usage=
rsanthir.quic created this revision. Herald added subscribers: dexonsmith, dang, hiraditya. rsanthir.quic requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits. The -Wstack-usage flag has been added, which provides a warning message when size of stack exceeds user provided value. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D101965 Files: clang/include/clang/Basic/DiagnosticFrontendKinds.td clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Driver/Options.td clang/lib/CodeGen/CodeGenAction.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/test/Frontend/backend-stack-usage-diagnostic.c llvm/include/llvm/IR/DiagnosticInfo.h llvm/lib/CodeGen/PrologEpilogInserter.cpp Index: llvm/lib/CodeGen/PrologEpilogInserter.cpp === --- llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -143,6 +143,11 @@ cl::desc("Warn for stack size bigger than the given" " number")); +static cl::opt +WarnStackUsage("warn-stack-usage", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for stack size bigger than the given" +" number")); + INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) @@ -275,9 +280,16 @@ if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(MF, *RS); - // Warn on stack size when we exceeds the given limit. + // Warn on stack usage size when we exceed the given limit MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); + if (WarnStackUsage.getNumOccurrences() > 0 && WarnStackUsage <= StackSize) { +DiagnosticInfoStackSize DiagStackUsage( +F, StackSize, MFI.hasVarSizedObjects(), true /*Stack Usage*/); +F.getContext().diagnose(DiagStackUsage); + } + + // Warn on stack size when we exceeds the given limit. if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { DiagnosticInfoStackSize DiagStackSize(F, StackSize); F.getContext().diagnose(DiagStackSize); Index: llvm/include/llvm/IR/DiagnosticInfo.h === --- llvm/include/llvm/IR/DiagnosticInfo.h +++ llvm/include/llvm/IR/DiagnosticInfo.h @@ -216,15 +216,24 @@ class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit { void anchor() override; + bool HasVarSizedObjects; + bool PrintStackUsageInfo; + public: DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize, + bool HasVarSizedObjects = false, + bool PrintStackUsageInfo = false, DiagnosticSeverity Severity = DS_Warning, uint64_t StackLimit = 0) : DiagnosticInfoResourceLimit(Fn, "stack size", StackSize, Severity, -DK_StackSize, StackLimit) {} +DK_StackSize, StackLimit), +HasVarSizedObjects(HasVarSizedObjects), +PrintStackUsageInfo(PrintStackUsageInfo) {} uint64_t getStackSize() const { return getResourceSize(); } uint64_t getStackLimit() const { return getResourceLimit(); } + bool hasVarSizedObjects() const { return HasVarSizedObjects; } + bool printStackUsageInfo() const { return PrintStackUsageInfo; } static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_StackSize; Index: clang/test/Frontend/backend-stack-usage-diagnostic.c === --- /dev/null +++ clang/test/Frontend/backend-stack-usage-diagnostic.c @@ -0,0 +1,24 @@ + +// RUN: %clang -Wstack-usage=0 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wstack-usage=0 -Wno-stack-usage= -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=0 -w -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty +// RUN: %clang -Wstack-usage=3 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=WARN +// RUN: %clang -Wstack-usage=100 -o /dev/null -c %s 2> %t.err +// RUN: FileCheck < %t.err %s --check-prefix=IGNORE --allow-empty + +// WARN: warning: stack usage is {{[0-9]+}} bytes in function 'test_square' +// IGNORE-NOT: stack usage is {{[0-9]+}} bytes in function 'test_square' +int test_square(int num) { + return num * num; +} + +// WARN: warning: stack usage might be {{[0-9]+}} bytes in function 'test_unbounded' +// IGNORE-NOT: stack usage might be {{[0-9]+}} bytes in function 'test_unbounded' +int test_unbounded(int len) { + char a[len]; + return 1; +} Index: clang/lib/
[PATCH] D101964: Added support for -Wstack-usage flag and Framesize reporting fix
rsanthir.quic added a comment. Another approach is explored here: https://reviews.llvm.org/D101965 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D101964/new/ https://reviews.llvm.org/D101964 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D101965: Added Support for Warning Flag -Wstack-usage=
rsanthir.quic added a comment. Another approach is explored here: https://reviews.llvm.org/D101964 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D101965/new/ https://reviews.llvm.org/D101965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D100499: [AArch64] Neon Polynomial vadd Intrinsic Fix
rsanthir.quic created this revision. rsanthir.quic added reviewers: t.p.northover, DavidSpickett, labrinea, apazos. Herald added subscribers: danielkiss, kristof.beyls. rsanthir.quic requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits. The Neon vadd intrinsics were added to both the ARMSIMD and AArch64SIMD Intrinsic maps. The intrinsics should only be supported for AArch64, this patch corrects this by removing the mapping from ARMSIMDIntrinsicMap that was added in https://reviews.llvm.org/D96825 Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D100499 Files: clang/lib/CodeGen/CGBuiltin.cpp Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5458,10 +5458,7 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), - NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), - NEONMAP0(vaddq_p128), - NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5458,10 +5458,7 @@ NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), - NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), - NEONMAP0(vaddq_p128), - NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D99079: [ARM][AArch64] Require appropriate features for crypto algorithms
rsanthir.quic added inline comments. Comment at: clang/lib/Basic/Targets/AArch64.h:36 + bool HasSHA3; + bool HasSM4; bool HasUnaligned; Would it make sense to further differentiate SM3 and SM4? I see that we differentiate between the two in arm_neon.td ("ARM_FEATURE_SM3" & "ARM_FEATURE_SM4") but we don't include this differentiation as flags (only HasSM4, +sm4 etc) Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D99079/new/ https://reviews.llvm.org/D99079 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D100499: [AArch64] Neon Polynomial vadd Intrinsic Fix
rsanthir.quic added a comment. As you mentioned, I thought it was only supported due to `CheckFPAdvSIMDEnabled64`. If the header is also guarding for AArch64 does that not support the idea that it is AArch64 specific? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D100499/new/ https://reviews.llvm.org/D100499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D100499: [AArch64] Neon Polynomial vadd Intrinsic Fix
rsanthir.quic added a comment. Thanks for looking into this @DavidSpickett ! What you found makes sense. I'll update this patch to remove only the poly128 vadd from the mapping. I'll also add another patch that will correctly enable the remaining vadd intrinsics for ARM. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D100499/new/ https://reviews.llvm.org/D100499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D100499: [AArch64] Neon Polynomial vadd Intrinsic Fix
rsanthir.quic updated this revision to Diff 338539. rsanthir.quic added a comment. only the pol128 intrinsic is incompatible with ARM, the rest should be supported. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D100499/new/ https://reviews.llvm.org/D100499 Files: clang/lib/CodeGen/CGBuiltin.cpp Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5460,8 +5460,7 @@ NEONMAP1(vabsq_v, arm_neon_vabs, 0), NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), - NEONMAP0(vaddq_p128), - NEONMAP0(vaddq_v), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), Index: clang/lib/CodeGen/CGBuiltin.cpp === --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5460,8 +5460,7 @@ NEONMAP1(vabsq_v, arm_neon_vabs, 0), NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), - NEONMAP0(vaddq_p128), - NEONMAP0(vaddq_v), + NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D100772: [ARM] Neon Polynomial vadd Intrinsic fix
rsanthir.quic created this revision. rsanthir.quic added reviewers: t.p.northover, DavidSpickett, labrinea, apazos. Herald added subscribers: danielkiss, kristof.beyls. rsanthir.quic requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits. The Neon vadd intrinsics were added to the ARMSIMD intrinsic map, however due to being defined under an AArch64 guard in arm_neon.td, were not previously useable on ARM. This change rectifies that. It is important to note that poly128 is not valid on ARM, thus it was extracted out of the original arm_neon.td definition and separated for the sake of AArch64. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D100772 Files: clang/include/clang/Basic/arm_neon.td clang/test/CodeGen/arm-poly-add.c Index: clang/test/CodeGen/arm-poly-add.c === --- /dev/null +++ clang/test/CodeGen/arm-poly-add.c @@ -0,0 +1,86 @@ +// REQUIRES: arm-registered-target +// RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi \ +// RUN: -target-feature +neon \ +// RUN: -target-feature +bf16 -mfloat-abi hard \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT:ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT:ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT:ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// TODO: poly128_t not implemented on aarch32 +// CHCK-LABEL: @test_vaddq_p128( +// CHCK-NEXT: entry: +// CHCK-NEXT:[[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHCK-NEXT:[[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHCK-NEXT:[[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHCK-NEXT:[[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHCK-NEXT:ret i128 [[TMP3]] +// +//poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ +// return vaddq_p128(a, b); Index: clang/include/clang/Basic/arm_neon.td === --- clang/include/clang/Basic/arm_neon.td +++ clang/include/clang/Basic/arm_neon.td @@ -708,6 +708,11 @@ def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "1.I", "Qh", OP_SCALAR_HALF_GET_LNQ>; def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", ".1.I", "Qh", OP_SCALAR_HALF_SET_LNQ>; + +// Crypto +// TODO: poly128_t not implemented on aarch32 +def VADDP_Q : WInst<"vadd", "...", "PcPsPlQPcQPsQPl">; + // AArch64 Intrinsics @@ -1160,7 +1165,7 @@ def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">; } -def VADDP :
[PATCH] D100499: [AArch64] Neon Polynomial vadd Intrinsic Fix
rsanthir.quic added a comment. Here's the fix for enabling these on ARM: https://reviews.llvm.org/D100772 CHANGES SINCE LAST ACTION https://reviews.llvm.org/D100499/new/ https://reviews.llvm.org/D100499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits