r314964 - Enabling new pass manager in LTO (and thinLTO) link step.
Author: sfertile Date: Wed Oct 4 18:50:48 2017 New Revision: 314964 URL: http://llvm.org/viewvc/llvm-project?rev=314964&view=rev Log: Enabling new pass manager in LTO (and thinLTO) link step. Passes 'new-pass-manager' option to the linker plugin when the new pass manager is enabled. Patch by Graham Yiu. Differential Revision: https://reviews.llvm.org/D38517 Added: cfe/trunk/test/Driver/gold-lto-new-pass-man.c Modified: cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp Modified: cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp?rev=314964&r1=314963&r2=314964&view=diff == --- cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp Wed Oct 4 18:50:48 2017 @@ -454,6 +454,14 @@ void tools::AddGoldPlugin(const ToolChai CmdArgs.push_back( Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName)); } + + // Need this flag to turn on new pass manager via Gold plugin. + if (Args.hasFlag(options::OPT_fexperimental_new_pass_manager, + options::OPT_fno_experimental_new_pass_manager, + /* Default */ false)) { +CmdArgs.push_back("-plugin-opt=new-pass-manager"); + } + } void tools::addArchSpecificRPath(const ToolChain &TC, const ArgList &Args, Added: cfe/trunk/test/Driver/gold-lto-new-pass-man.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/gold-lto-new-pass-man.c?rev=314964&view=auto == --- cfe/trunk/test/Driver/gold-lto-new-pass-man.c (added) +++ cfe/trunk/test/Driver/gold-lto-new-pass-man.c Wed Oct 4 18:50:48 2017 @@ -0,0 +1,7 @@ +// RUN: touch %t.o +// +// RUN: %clang -target ppc64le-unknown-linux -### %t.o -flto 2>&1 \ +// RUN: -Wl,-plugin-opt=foo -O3 \ +// RUN: -fexperimental-new-pass-manager \ +// RUN: | FileCheck %s +// CHECK: "-plugin-opt=new-pass-manager" ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r344276 - Revert "clang-cl: Add /showFilenames option (PR31957)"
Author: sfertile Date: Thu Oct 11 11:40:35 2018 New Revision: 344276 URL: http://llvm.org/viewvc/llvm-project?rev=344276&view=rev Log: Revert "clang-cl: Add /showFilenames option (PR31957)" This reverts https://reviews.llvm.org/rL344234 which is causing failures on several bots due to invalid llvm.linker.options. Removed: cfe/trunk/test/Driver/cl-showfilenames.c Modified: cfe/trunk/include/clang/Driver/CLCompatOptions.td cfe/trunk/include/clang/Driver/Job.h cfe/trunk/lib/Driver/Job.cpp cfe/trunk/lib/Driver/ToolChains/Clang.cpp Modified: cfe/trunk/include/clang/Driver/CLCompatOptions.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/CLCompatOptions.td?rev=344276&r1=344275&r2=344276&view=diff == --- cfe/trunk/include/clang/Driver/CLCompatOptions.td (original) +++ cfe/trunk/include/clang/Driver/CLCompatOptions.td Thu Oct 11 11:40:35 2018 @@ -158,10 +158,6 @@ def _SLASH_Qvec_ : CLFlag<"Qvec-">, def _SLASH_showIncludes : CLFlag<"showIncludes">, HelpText<"Print info about included files to stderr">, Alias; -def _SLASH_showFilenames : CLFlag<"showFilenames">, - HelpText<"Print the name of each compiled file">; -def _SLASH_showFilenames_ : CLFlag<"showFilenames-">, - HelpText<"Don't print the name of each compiled file (default)">; def _SLASH_source_charset : CLCompileJoined<"source-charset:">, HelpText<"Source encoding, supports only UTF-8">, Alias; def _SLASH_execution_charset : CLCompileJoined<"execution-charset:">, Modified: cfe/trunk/include/clang/Driver/Job.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Job.h?rev=344276&r1=344275&r2=344276&view=diff == --- cfe/trunk/include/clang/Driver/Job.h (original) +++ cfe/trunk/include/clang/Driver/Job.h Thu Oct 11 11:40:35 2018 @@ -59,9 +59,6 @@ class Command { /// The list of program arguments which are inputs. llvm::opt::ArgStringList InputFilenames; - /// Whether to print the input filenames when executing. - bool PrintInputFilenames = false; - /// Response file name, if this command is set to use one, or nullptr /// otherwise const char *ResponseFile = nullptr; @@ -131,9 +128,6 @@ public: /// Print a command argument, and optionally quote it. static void printArg(llvm::raw_ostream &OS, StringRef Arg, bool Quote); - - /// Set whether to print the input filenames when executing. - void setPrintInputFilenames(bool P) { PrintInputFilenames = P; } }; /// Like Command, but with a fallback which is executed in case Modified: cfe/trunk/lib/Driver/Job.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Job.cpp?rev=344276&r1=344275&r2=344276&view=diff == --- cfe/trunk/lib/Driver/Job.cpp (original) +++ cfe/trunk/lib/Driver/Job.cpp Thu Oct 11 11:40:35 2018 @@ -315,12 +315,6 @@ void Command::setEnvironment(llvm::Array int Command::Execute(ArrayRef> Redirects, std::string *ErrMsg, bool *ExecutionFailed) const { - if (PrintInputFilenames) { -for (const char *Arg : InputFilenames) - llvm::outs() << llvm::sys::path::filename(Arg) << "\n"; -llvm::outs().flush(); - } - SmallVector Argv; Optional> Env; Modified: cfe/trunk/lib/Driver/ToolChains/Clang.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Clang.cpp?rev=344276&r1=344275&r2=344276&view=diff == --- cfe/trunk/lib/Driver/ToolChains/Clang.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp Thu Oct 11 11:40:35 2018 @@ -5067,13 +5067,6 @@ void Clang::ConstructJob(Compilation &C, C.addCommand(llvm::make_unique(JA, *this, Exec, CmdArgs, Inputs)); } - // Make the compile command echo its inputs for /showFilenames. - if (Output.getType() == types::TY_Object && - Args.hasFlag(options::OPT__SLASH_showFilenames, - options::OPT__SLASH_showFilenames_, false)) { -C.getJobs().getJobs().back()->setPrintInputFilenames(true); - } - if (Arg *A = Args.getLastArg(options::OPT_pg)) if (!shouldUseFramePointer(Args, Triple)) D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer" Removed: cfe/trunk/test/Driver/cl-showfilenames.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cl-showfilenames.c?rev=344275&view=auto == --- cfe/trunk/test/Driver/cl-showfilenames.c (original) +++ cfe/trunk/test/Driver/cl-showfilenames.c (removed) @@ -1,19 +0,0 @@ -// RUN: %clang_cl /c /o %t.obj /showFilenames -- %s 2>&1 | FileCheck -check-prefix=show %s -// RUN: %clang_cl /c /o %t.obj /showFilenames -- %s %S/Inputs/wildcard*.c 2>&1 | FileCheck -check-prefix=multiple %s - -// RUN: %cla
[clang] [AIX][TOC] -mtocdata/-mno-tocdata fix non deterministic iteration order (PR #86840)
https://github.com/mandlebug commented: It's unfortunate we have to ditch the StringSet because of the iteration order since the semantics of the option are the same regardless of the order the names are printed in. Now that we are constructing the list in sorted order we should change [these](https://github.com/llvm/llvm-project/blob/36e74cfdbde208e384c72bcb52ea638303fb7d67/clang/lib/Frontend/CompilerInstance.cpp#L1052) to assertions that they are already sorted. https://github.com/llvm/llvm-project/pull/86840 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AIX][TOC] -mtocdata/-mno-tocdata fix non deterministic iteration order (PR #86840)
https://github.com/mandlebug approved this pull request. LGTM. Zaara pointed out offline that we can't rely on the input being sorted despite this change because you can invoke the front end directly with an argument list in unsorted order. https://github.com/llvm/llvm-project/pull/86840 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [LTO] Fix fat-lto output for -c -emit-llvm. (PR #79404)
https://github.com/mandlebug created https://github.com/llvm/llvm-project/pull/79404 Fix and add a test case for combining '-ffat-lto-objects -c -emit-llvm' options and fix a spelling mistake in same test. >From 4ea905576a552a43fcf9a97287e618b67eb8e681 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Wed, 24 Jan 2024 17:40:28 -0500 Subject: [PATCH] [LTO] Fix fat-lto output for -c -emit-llvm. Fix and add a test case for combining '-ffat-lto-objects -c -emit-llvm' options and fix a spelling mistake in same test. --- clang/lib/Driver/Driver.cpp | 6 +++--- clang/test/Driver/fat-lto-objects.c | 12 +--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 190a73bfd40b68..10b97476873f5c 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4764,9 +4764,9 @@ Action *Driver::ConstructPhaseAction( case phases::Backend: { if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) { types::ID Output; - if (Args.hasArg(options::OPT_ffat_lto_objects)) -Output = Args.hasArg(options::OPT_emit_llvm) ? types::TY_LTO_IR - : types::TY_PP_Asm; + if (Args.hasArg(options::OPT_ffat_lto_objects) && + !Args.hasArg(options::OPT_emit_llvm)) +Output = types::TY_PP_Asm; else if (Args.hasArg(options::OPT_S)) Output = types::TY_LTO_IR; else diff --git a/clang/test/Driver/fat-lto-objects.c b/clang/test/Driver/fat-lto-objects.c index 97002db6edc51e..d9a5ba88ea6d6f 100644 --- a/clang/test/Driver/fat-lto-objects.c +++ b/clang/test/Driver/fat-lto-objects.c @@ -23,11 +23,17 @@ // CHECK-CC-S-EL-LTO-SAME: -emit-llvm // CHECK-CC-S-EL-LTO-SAME: -ffat-lto-objects -/// When fat LTO is enabled wihtout -S we expect native object output and -ffat-lto-object to be passed to cc1. +/// When fat LTO is enabled without -S we expect native object output and -ffat-lto-object to be passed to cc1. // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -c 2>&1 | FileCheck %s -check-prefix=CHECK-CC-C-LTO // CHECK-CC-C-LTO: -cc1 -// CHECK-CC-C-LTO: -emit-obj -// CHECK-CC-C-LTO: -ffat-lto-objects +// CHECK-CC-C-LTO-SAME: -emit-obj +// CHECK-CC-C-LTO-SAME: -ffat-lto-objects + +/// When fat LTO is enabled with -c and -emit-llvm we expect bitcode output and -ffat-lto-object to be passed to cc1. +// RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -c -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-CC-C-EL-LTO +// CHECK-CC-C-EL-LTO: -cc1 +// CHECK-CC-C-EL-LTO-SAME: -emit-llvm-bc +// CHECK-CC-C-EL-LTO-SAME: -ffat-lto-objects /// Make sure we don't have a warning for -ffat-lto-objects being unused // RUN: %clang --target=x86_64-unknown-linux-gnu -ffat-lto-objects -fdriver-only -Werror -v %s -c 2>&1 | FileCheck %s -check-prefix=CHECK-CC-NOLTO ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [LTO] Fix fat-lto output for -c -emit-llvm. (PR #79404)
https://github.com/mandlebug closed https://github.com/llvm/llvm-project/pull/79404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [LTO] Fix fat-lto output for -c -emit-llvm. (PR #79404)
mandlebug wrote: Thanks for the review. https://github.com/llvm/llvm-project/pull/79404 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [FatLTO] output of -ffat-lto-objects -S should be assembly. (PR #79041)
https://github.com/mandlebug created https://github.com/llvm/llvm-project/pull/79041 Fat lto with -c compiles to an object file with the IR embedded in a section of the object, the combination of fat-lto with -S should then produce an assembly file equivalent of that. The IR output can still be genreated by using both -S and -emit-llvm. >From d81d2e0f03ca50ceb1b7f7050291d00c0c8c3925 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Fri, 19 Jan 2024 15:39:57 -0500 Subject: [PATCH] [FatLTO] output of -ffat-lto-objects -S should be assembly. Fat lto with -c compiles to an object file with the IR embedded in a section of the object, the combination of fat-lto with -S should then produce an assembly file equivalent of that. The IR output can still be genreated by using both -S and -emit-llvm. --- clang/lib/Driver/Driver.cpp | 7 --- clang/test/Driver/fat-lto-objects.c | 19 --- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index da27ca2d28e91a4..7109faa1072de5f 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4764,10 +4764,11 @@ Action *Driver::ConstructPhaseAction( case phases::Backend: { if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) { types::ID Output; - if (Args.hasArg(options::OPT_S)) + if (Args.hasArg(options::OPT_ffat_lto_objects)) +Output = Args.hasArg(options::OPT_emit_llvm) ? types::TY_LTO_IR + : types::TY_PP_Asm; + else if (Args.hasArg(options::OPT_S)) Output = types::TY_LTO_IR; - else if (Args.hasArg(options::OPT_ffat_lto_objects)) -Output = types::TY_PP_Asm; else Output = types::TY_LTO_BC; return C.MakeAction(Input, Output); diff --git a/clang/test/Driver/fat-lto-objects.c b/clang/test/Driver/fat-lto-objects.c index e02359db3f0ae0d..203175d61b73d72 100644 --- a/clang/test/Driver/fat-lto-objects.c +++ b/clang/test/Driver/fat-lto-objects.c @@ -12,14 +12,27 @@ // CHECK-CC-S-NOT: -emit-llvm // CHECK-CC-S-NOT: -ffat-lto-objects -/// When LTO is enabled, we expect LLVM IR output and -ffat-lto-objects to be passed to cc1. +/// When fat LTO is enabled with -S, we expect asm output and -ffat-lto-objects to be passed to cc1. // RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -S 2>&1 | FileCheck %s -check-prefix=CHECK-CC-S-LTO -// RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -S -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-CC-S-LTO // CHECK-CC-S-LTO: -cc1 // CHECK-CC-S-LTO-SAME: -funified-lto -// CHECK-CC-S-LTO-SAME: -emit-llvm +// CHECK-CC-S-NOT: -emit-llvm // CHECK-CC-S-LTO-SAME: -ffat-lto-objects +/// When fat LTO is enabled with -S and -emit-llvm, we expect IR output and -ffat-lto-objects to be passed to cc1. +// RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -S -emit-llvm 2>&1 | FileCheck %s -check-prefix=CHECK-CC-S-EL-LTO +// CHECK-CC-S-EL-LTO: -cc1 +// CHECK-CC-S-EL-LTO-SAME: -funified-lto +// CHECK-CC-S-EL-LTO-SAME: -emit-llvm +// CHECK-CC-S-EL-LTO-SAME: -ffat-lto-objects + +/// When fat LTO is enabled wihtout -S we expect native object output and -ffat-lto-object to be passed to cc1. +// RUN: %clang --target=x86_64-unknown-linux-gnu -flto -ffat-lto-objects -### %s -c 2>&1 | FileCheck %s -check-prefix=CHECK-CC-C-LTO +// CHECK-CC-C-LTO: -cc1 +// CHECK-CC-C-LTO: -funified-lto +// CHECK-CC-C-LTO: -emit-obj +// CHECK-CC-C-LTO: -ffat-lto-objects + /// Make sure we don't have a warning for -ffat-lto-objects being unused // RUN: %clang --target=x86_64-unknown-linux-gnu -ffat-lto-objects -fdriver-only -Werror -v %s -c 2>&1 | FileCheck %s -check-prefix=CHECK-CC-NOLTO // CHECK-CC-NOLTO: -cc1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 9b10e2b - [PowerPC][AIX] Warn when using pragma align(packed) on AIX.
Author: Sean Fertile Date: 2021-09-29T11:53:46-04:00 New Revision: 9b10e2b1cf01b37f441d83ebc41d2c2f9f81831e URL: https://github.com/llvm/llvm-project/commit/9b10e2b1cf01b37f441d83ebc41d2c2f9f81831e DIFF: https://github.com/llvm/llvm-project/commit/9b10e2b1cf01b37f441d83ebc41d2c2f9f81831e.diff LOG: [PowerPC][AIX] Warn when using pragma align(packed) on AIX. With xlc and xlC pragma align(packed) will pack bitfields the same way as pragma align(bit_packed). xlclang, xlclang++ and clang will pack bitfields the same way as pragma pack(1). Issue a warning when source code using pragma align(packed) is used to alert the user it may not be compatable with xlc/xlC. Differential Revision: https://reviews.llvm.org/D107506 Added: clang/test/Sema/aix-pragma-align-packed-warn.c Modified: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/Sema/SemaDecl.cpp Removed: diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 00b6acf8bbe68..a3944f7a6054b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -916,6 +916,9 @@ def warn_pragma_options_align_reset_failed : Warning< InGroup; def err_pragma_options_align_mac68k_target_unsupported : Error< "mac68k alignment pragma is not supported on this target">; +def warn_pragma_align_not_xl_compatible : Warning< + "#pragma align(packed) may not be compatible with objects generated with AIX XL C/C++">, + InGroup; def warn_pragma_pack_invalid_alignment : Warning< "expected #pragma pack parameter to be '1', '2', '4', '8', or '16'">, InGroup; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index db7c9f9418042..48f298f2ce823 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -16613,6 +16613,23 @@ void Sema::ActOnTagFinishDefinition(Scope *S, Decl *TagD, // Notify the consumer that we've defined a tag. if (!Tag->isInvalidDecl()) Consumer.HandleTagDeclDefinition(Tag); + + // Clangs implementation of #pragma align(packed) diff ers in bitfield layout + // from XLs and instead matches the XL #pragma pack(1) behavior. + if (Context.getTargetInfo().getTriple().isOSAIX() && + AlignPackStack.hasValue()) { +AlignPackInfo APInfo = AlignPackStack.CurrentValue; +// Only diagnose #pragma align(packed). +if (!APInfo.IsAlignAttr() || APInfo.getAlignMode() != AlignPackInfo::Packed) + return; +const RecordDecl *RD = dyn_cast(Tag); +if (!RD) + return; +// Only warn if there is at least 1 bitfield member. +if (llvm::any_of(RD->fields(), + [](const FieldDecl *FD) { return FD->isBitField(); })) + Diag(BraceRange.getBegin(), diag::warn_pragma_align_not_xl_compatible); + } } void Sema::ActOnObjCContainerFinishDefinition() { diff --git a/clang/test/Sema/aix-pragma-align-packed-warn.c b/clang/test/Sema/aix-pragma-align-packed-warn.c new file mode 100644 index 0..acbca792e37a3 --- /dev/null +++ b/clang/test/Sema/aix-pragma-align-packed-warn.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fxl-pragma-pack -verify -fsyntax-only %s +// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fxl-pragma-pack -verify -fsyntax-only %s + +#pragma align(packed) +struct A { // expected-warning {{#pragma align(packed) may not be compatible with objects generated with AIX XL C/C++}} + short s1; + int : 0; + short s2; +}; + +struct B { // expected-warning {{#pragma align(packed) may not be compatible with objects generated with AIX XL C/C++}} + short a : 8; + short b : 8; + int c; +}; + +struct C { + int x, y, z; +}; + +struct D { + double d; + struct A a; +}; +#pragma align(reset) + +struct E { + int a : 28; + int : 0; + int b : 16; +}; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] b8f612e - [PowerPC][AIX] Packed zero-width bitfields do not affect alignment.
Author: Sean Fertile Date: 2021-08-04T11:03:25-04:00 New Revision: b8f612e780e50cfb62bc0196b6367e4587949f88 URL: https://github.com/llvm/llvm-project/commit/b8f612e780e50cfb62bc0196b6367e4587949f88 DIFF: https://github.com/llvm/llvm-project/commit/b8f612e780e50cfb62bc0196b6367e4587949f88.diff LOG: [PowerPC][AIX] Packed zero-width bitfields do not affect alignment. Zero-width bitfields on AIX pad out to the natral alignment boundary but do not change the containing records alignment. Differential Revision: https://reviews.llvm.org/D106900 Added: Modified: clang/lib/AST/RecordLayoutBuilder.cpp clang/test/Layout/aix-packed-bitfields.c Removed: diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 972690becf9ec..83045253aa512 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1775,11 +1775,18 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { !D->getIdentifier()) FieldAlign = UnpackedFieldAlign = 1; - // On AIX, zero-width bitfields pad out to the alignment boundary, but then - // do not affect overall record alignment if there is a pragma pack or - // pragma align(packed). - if (isAIXLayout(Context) && !MaxFieldAlignment.isZero() && !FieldSize) -FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits); + // On AIX, zero-width bitfields pad out to the natural alignment boundary, + // but do not increase the alignment greater than the MaxFieldAlignment, or 1 + // if packed. + if (isAIXLayout(Context) && !FieldSize) { +if (FieldPacked) + FieldAlign = 1; +if (!MaxFieldAlignment.isZero()) { + UnpackedFieldAlign = + std::min(UnpackedFieldAlign, MaxFieldAlignmentInBits); + FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits); +} + } // Diagnose diff erences in layout due to padding or packing. if (!UseExternalLayout) diff --git a/clang/test/Layout/aix-packed-bitfields.c b/clang/test/Layout/aix-packed-bitfields.c index 9bc907af0f596..88f6b3fced80a 100644 --- a/clang/test/Layout/aix-packed-bitfields.c +++ b/clang/test/Layout/aix-packed-bitfields.c @@ -1,14 +1,18 @@ // RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fdump-record-layouts \ -// RUN: -fsyntax-only -fxl-pragma-pack -x c %s | FileCheck %s +// RUN: -fsyntax-only -fxl-pragma-pack -x c %s | \ +// RUN: FileCheck --check-prefixes=CHECK,32BIT %s // RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fdump-record-layouts \ -// RUN: -fsyntax-only -fxl-pragma-pack -x c++ %s | FileCheck %s -// +// RUN: -fsyntax-only -fxl-pragma-pack -x c++ %s | \ +// RUN: FileCheck --check-prefixes=CHECK,32BIT %s + // RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fdump-record-layouts \ -// RUN: -fsyntax-only -fxl-pragma-pack -x c %s | FileCheck %s -// +// RUN: -fsyntax-only -fxl-pragma-pack -x c %s | \ +// RUN: FileCheck --check-prefixes=CHECK,64BIT %s + // RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fdump-record-layouts \ -// RUN: -fsyntax-only -fxl-pragma-pack -x c++ %s | FileCheck %s +// RUN: -fsyntax-only -fxl-pragma-pack -x c++ %s | \ +// RUN: FileCheck --check-prefixes=CHECK,64BIT %s struct A { int a1 : 30; @@ -75,3 +79,35 @@ int d = sizeof(struct Pack2); // CHECK-NEXT: 3:6-35 | int a2 // CHECK-NEXT: 7:4-7 | int a3 // CHECK-NEXT: sizeof=8, {{(dsize=8, )?}}align=2, preferredalign=2 +// +struct __attribute__((packed)) PackedAttr { + char f1; + int : 0; + short : 3; + char f4 : 2; +}; + +int e = sizeof(struct PackedAttr); +// CHECK: *** Dumping AST Record Layout +// CHECK-NEXT: 0 | struct PackedAttr +// CHECK-NEXT: 0 | char f1 +// CHECK-NEXT:4:- | int +// CHECK-NEXT: 4:0-2 | short +// CHECK-NEXT: 4:3-4 | char f4 +// CHECK-NEXT: sizeof=5, {{(dsize=5, )?}}align=1, preferredalign=1 + +#pragma pack(2) +struct __attribute__((packed)) PackedAttrAndPragma { + char f1; + long long : 0; +}; +#pragma pack(pop) + +int f = sizeof(struct PackedAttrAndPragma); +// CHECK: *** Dumping AST Record Layout +// CHECK-NEXT: 0 | struct PackedAttrAndPragma +// CHECK-NEXT: 0 | char f1 +// 32BIT-NEXT:4:- | long long +// 32BIT-NEXT: sizeof=4, {{(dsize=4, )?}}align=1, preferredalign=1 +// 64BIT-NEXT:8:- | long long +// 64BIT-NEXT: sizeof=8, {{(dsize=8, )?}}align=1, preferredalign=1 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 5181be3 - [PowerPC][AIX] Limit attribute aligned to 4096.
Author: Sean Fertile Date: 2021-08-05T09:51:16-04:00 New Revision: 5181be344adbf7ba7dffc73526893d4e7750d34c URL: https://github.com/llvm/llvm-project/commit/5181be344adbf7ba7dffc73526893d4e7750d34c DIFF: https://github.com/llvm/llvm-project/commit/5181be344adbf7ba7dffc73526893d4e7750d34c.diff LOG: [PowerPC][AIX] Limit attribute aligned to 4096. Limit the maximum alignment for attribute aligned to 4096 to match the limit of the .align pseudo op in the system assembler. Differential Revision: https://reviews.llvm.org/D107497 Added: clang/test/Sema/aix-attr-aligned-limit.c Modified: clang/lib/Sema/SemaDeclAttr.cpp Removed: diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 5cc5e5fb24413..e5d03d55a5d6f 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -4054,6 +4054,9 @@ void Sema::AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E, unsigned MaximumAlignment = Sema::MaximumAlignment; if (Context.getTargetInfo().getTriple().isOSBinFormatCOFF()) MaximumAlignment = std::min(MaximumAlignment, 8192u); + else if (Context.getTargetInfo().getTriple().isOSAIX()) +MaximumAlignment = std::min(MaximumAlignment, 4096u); + if (AlignVal > MaximumAlignment) { Diag(AttrLoc, diag::err_attribute_aligned_too_great) << MaximumAlignment << E->getSourceRange(); diff --git a/clang/test/Sema/aix-attr-aligned-limit.c b/clang/test/Sema/aix-attr-aligned-limit.c new file mode 100644 index 0..3c9a0facf9e54 --- /dev/null +++ b/clang/test/Sema/aix-attr-aligned-limit.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -triple powerpc-unknown-aix -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple powerpc64-unknown-aix -fsyntax-only -verify %s +// +int a __attribute__((aligned(8192))); // expected-error {{requested alignment must be 4096 bytes or smaller}} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] f888e44 - [PowerPC][AIX] attribute aligned cannot decrease align of a vector var.
Author: Sean Fertile Date: 2021-08-05T11:15:12-04:00 New Revision: f888e442bcc547301b58e77667eb261c0391b897 URL: https://github.com/llvm/llvm-project/commit/f888e442bcc547301b58e77667eb261c0391b897 DIFF: https://github.com/llvm/llvm-project/commit/f888e442bcc547301b58e77667eb261c0391b897.diff LOG: [PowerPC][AIX] attribute aligned cannot decrease align of a vector var. On AIX an aligned attribute cannot decrease the alignment of a variable when placed on a variable declaration of vector type. Differential Revision: https://reviews.llvm.org/D107522 Added: clang/test/CodeGen/aix-vector-attr-aligned.c clang/test/Sema/aix-attr-aligned-vector-warn.c Modified: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/Sema/SemaDeclAttr.cpp Removed: diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 71e7ffdbe8a08..247f9d715b846 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2986,6 +2986,8 @@ def err_alignas_mismatch : Error< "redeclaration has diff erent alignment requirement (%1 vs %0)">; def err_alignas_underaligned : Error< "requested alignment is less than minimum alignment of %1 for type %0">; +def warn_aligned_attr_underaligned : Warning, + InGroup; def err_attribute_sizeless_type : Error< "%0 attribute cannot be applied to sizeless type %1">; def err_attribute_argument_n_type : Error< diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index e5d03d55a5d6f..5098c3900aeb2 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -4063,12 +4063,12 @@ void Sema::AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E, return; } - if (Context.getTargetInfo().isTLSSupported()) { + const auto *VD = dyn_cast(D); + if (VD && Context.getTargetInfo().isTLSSupported()) { unsigned MaxTLSAlign = Context.toCharUnitsFromBits(Context.getTargetInfo().getMaxTLSAlign()) .getQuantity(); -const auto *VD = dyn_cast(D); -if (MaxTLSAlign && AlignVal > MaxTLSAlign && VD && +if (MaxTLSAlign && AlignVal > MaxTLSAlign && VD->getTLSKind() != VarDecl::TLS_None) { Diag(VD->getLocation(), diag::err_tls_var_aligned_over_maximum) << (unsigned)AlignVal << VD << MaxTLSAlign; @@ -4076,6 +4076,17 @@ void Sema::AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E, } } + // On AIX, an aligned attribute can not decrease the alignment when applied + // to a variable declaration with vector type. + if (VD && Context.getTargetInfo().getTriple().isOSAIX()) { +const Type *Ty = VD->getType().getTypePtr(); +if (Ty->isVectorType() && AlignVal < 16) { + Diag(VD->getLocation(), diag::warn_aligned_attr_underaligned) + << VD->getType() << 16; + return; +} + } + AlignedAttr *AA = ::new (Context) AlignedAttr(Context, CI, true, ICE.get()); AA->setPackExpansion(IsPackExpansion); D->addAttr(AA); diff --git a/clang/test/CodeGen/aix-vector-attr-aligned.c b/clang/test/CodeGen/aix-vector-attr-aligned.c new file mode 100644 index 0..462fb7a3b019c --- /dev/null +++ b/clang/test/CodeGen/aix-vector-attr-aligned.c @@ -0,0 +1,33 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc-unknown-aix -target-feature +altivec -target-cpu pwr7 -emit-llvm -o - %s | \ +// RUN: FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-unknown-aix -target-feature +altivec -target-cpu pwr7 -emit-llvm -o - %s | \ +// RUN: FileCheck %s + +typedef vector int __attribute__((aligned(8))) UnderAlignedVI; + +vector int g32 __attribute__((aligned(32))); +vector int g8 __attribute__((aligned(8))); +UnderAlignedVI TypedefedGlobal; + +int escape(vector int*); + +int local32(void) { + vector int l32 __attribute__((aligned(32))); + return escape(&l32); +} + +int local8(void) { + vector int l8 __attribute__((aligned(8))); + return escape(&l8); +} + +// CHECK: @g32 = global <4 x i32> zeroinitializer, align 32 +// CHECK: @g8 = global <4 x i32> zeroinitializer, align 16 +// CHECK: @TypedefedGlobal = global <4 x i32> zeroinitializer, align 8 + +// CHECK-LABEL: @local32 +// CHECK: %l32 = alloca <4 x i32>, align 32 +// +// CHECK-LABEL: @local8 +// CHECK: %l8 = alloca <4 x i32>, align 16 diff --git a/clang/test/Sema/aix-attr-aligned-vector-warn.c b/clang/test/Sema/aix-attr-aligned-vector-warn.c new file mode 100644 index 0..af2e1a89268a2 --- /dev/null +++ b/clang/test/Sema/aix-attr-aligned-vector-warn.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple powerpc64-unknown-aix -target-feature +altivec -target-cpu pwr7 -verify -fsyntax-only %s +// RUN: %clang_cc1 -triple powerpc-unknown-aix -target-feature +altivec -target-cpu pwr7 -verify -fsyntax-only %s + +int escap
[clang] dddd524 - Revert "[PowerPC][AIX] Limit attribute aligned to 4096."
Author: Sean Fertile Date: 2021-08-06T13:54:50-04:00 New Revision: 524bb8844b225d42c98d381d745d4fd549ea URL: https://github.com/llvm/llvm-project/commit/524bb8844b225d42c98d381d745d4fd549ea DIFF: https://github.com/llvm/llvm-project/commit/524bb8844b225d42c98d381d745d4fd549ea.diff LOG: Revert "[PowerPC][AIX] Limit attribute aligned to 4096." This reverts commit 5181be344adbf7ba7dffc73526893d4e7750d34c. Break libcxx type_traits header which uses aligned storage with alignments greater than 4096. Reverting untill we can fix the header. Added: Modified: clang/lib/Sema/SemaDeclAttr.cpp Removed: clang/test/Sema/aix-attr-aligned-limit.c diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 5098c3900aeb2..3b3e4a414c78c 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -4054,9 +4054,6 @@ void Sema::AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E, unsigned MaximumAlignment = Sema::MaximumAlignment; if (Context.getTargetInfo().getTriple().isOSBinFormatCOFF()) MaximumAlignment = std::min(MaximumAlignment, 8192u); - else if (Context.getTargetInfo().getTriple().isOSAIX()) -MaximumAlignment = std::min(MaximumAlignment, 4096u); - if (AlignVal > MaximumAlignment) { Diag(AttrLoc, diag::err_attribute_aligned_too_great) << MaximumAlignment << E->getSourceRange(); diff --git a/clang/test/Sema/aix-attr-aligned-limit.c b/clang/test/Sema/aix-attr-aligned-limit.c deleted file mode 100644 index 3c9a0facf9e54..0 --- a/clang/test/Sema/aix-attr-aligned-limit.c +++ /dev/null @@ -1,4 +0,0 @@ -// RUN: %clang_cc1 -triple powerpc-unknown-aix -fsyntax-only -verify %s -// RUN: %clang_cc1 -triple powerpc64-unknown-aix -fsyntax-only -verify %s -// -int a __attribute__((aligned(8192))); // expected-error {{requested alignment must be 4096 bytes or smaller}} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 3f40dbb - [PowerPC][AIX] Enable passing vectors in variadic functions.
Author: Sean Fertile Date: 2021-03-01T13:08:28-05:00 New Revision: 3f40dbbbc71d28f6ad0bd616fe009bde861362ed URL: https://github.com/llvm/llvm-project/commit/3f40dbbbc71d28f6ad0bd616fe009bde861362ed DIFF: https://github.com/llvm/llvm-project/commit/3f40dbbbc71d28f6ad0bd616fe009bde861362ed.diff LOG: [PowerPC][AIX] Enable passing vectors in variadic functions. Differential Revision: https://reviews.llvm.org/D97474 Added: clang/test/CodeGen/aix-altivec-vaargs.c Modified: clang/lib/CodeGen/TargetInfo.cpp Removed: diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index a11768d3807b..8c3857ff268b 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -4563,10 +4563,6 @@ Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, if (Ty->isAnyComplexType()) llvm::report_fatal_error("complex type is not supported on AIX yet"); - if (Ty->isVectorType()) -llvm::report_fatal_error( -"vector types are not yet supported for variadic functions on AIX"); - auto TypeInfo = getContext().getTypeInfoInChars(Ty); TypeInfo.Align = getParamTypeAlignment(Ty); diff --git a/clang/test/CodeGen/aix-altivec-vaargs.c b/clang/test/CodeGen/aix-altivec-vaargs.c new file mode 100644 index ..e144b9fcb464 --- /dev/null +++ b/clang/test/CodeGen/aix-altivec-vaargs.c @@ -0,0 +1,52 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc-unknown-aix -emit-llvm -target-feature +altivec -target-cpu pwr7 -o - %s | FileCheck %s --check-prefixes=CHECK,AIX32 +// RUN: %clang_cc1 -triple powerpc64-unknown-aix -emit-llvm -target-feature +altivec -target-cpu pwr7 -o - %s | FileCheck %s --check-prefixes=CHECK,AIX64 + +vector double vector_varargs(int count, ...) { + __builtin_va_list arg_list; + __builtin_va_start(arg_list, count); + + vector double ret; + + for (int i = 0; i != count; ++i) { +ret = __builtin_va_arg(arg_list, vector double); + } + + __builtin_va_end(arg_list); + return ret; +} + +// CHECK: %arg_list = alloca i8* +// CHECK: %arg_list1 = bitcast i8** %arg_list to i8* +// CHECK: call void @llvm.va_start(i8* %arg_list1) + +// AIX32: for.body: +// AIX32-NEXT:%argp.cur = load i8*, i8** %arg_list, align 4 +// AIX32-NEXT:%2 = ptrtoint i8* %argp.cur to i32 +// AIX32-NEXT:%3 = add i32 %2, 15 +// AIX32-NEXT:%4 = and i32 %3, -16 +// AIX32-NEXT:%argp.cur.aligned = inttoptr i32 %4 to i8* +// AIX32-NEXT:%argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 16 +// AIX32-NEXT:store i8* %argp.next, i8** %arg_list, align 4 +// AIX32-NEXT:%5 = bitcast i8* %argp.cur.aligned to <2 x double>* +// AIX32-NEXT:%6 = load <2 x double>, <2 x double>* %5, align 16 +// AIX32-NEXT:store <2 x double> %6, <2 x double>* %ret, align 16 +// AIX32-NEXT:br label %for.inc + +// AIX64: for.body: +// AIX64-NEXT:%argp.cur = load i8*, i8** %arg_list, align 8 +// AIX64-NEXT:%2 = ptrtoint i8* %argp.cur to i64 +// AIX64-NEXT:%3 = add i64 %2, 15 +// AIX64-NEXT:%4 = and i64 %3, -16 +// AIX64-NEXT:%argp.cur.aligned = inttoptr i64 %4 to i8* +// AIX64-NEXT:%argp.next = getelementptr inbounds i8, i8* %argp.cur.aligned, i64 16 +// AIX64-NEXT:store i8* %argp.next, i8** %arg_list, align 8 +// AIX64-NEXT:%5 = bitcast i8* %argp.cur.aligned to <2 x double>* +// AIX64-NEXT:%6 = load <2 x double>, <2 x double>* %5, align 16 +// AIX64-NEXT:store <2 x double> %6, <2 x double>* %ret, align 16 +// AIX64-NEXT:br label %for.inc + + +// CHECK: for.end: +// CHECK:%arg_list2 = bitcast i8** %arg_list to i8* +// CHECK:call void @llvm.va_end(i8* %arg_list2) ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r291179 - Add vec_insert4b and vec_extract4b functions to altivec.h
Author: sfertile Date: Thu Jan 5 15:43:30 2017 New Revision: 291179 URL: http://llvm.org/viewvc/llvm-project?rev=291179&view=rev Log: Add vec_insert4b and vec_extract4b functions to altivec.h Add builtins for the functions and custom codegen mapping the builtins to their corresponding intrinsics and handling the endian related swapping. https://reviews.llvm.org/D26546 Added: cfe/trunk/test/CodeGen/builtins-ppc-error.c cfe/trunk/test/CodeGen/builtins-ppc-extractword-error.c cfe/trunk/test/CodeGen/builtins-ppc-insertword-error.c Modified: cfe/trunk/include/clang/Basic/BuiltinsPPC.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/altivec.h cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c Modified: cfe/trunk/include/clang/Basic/BuiltinsPPC.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsPPC.def?rev=291179&r1=291178&r2=291179&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsPPC.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsPPC.def Thu Jan 5 15:43:30 2017 @@ -417,6 +417,9 @@ BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "") BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "") +BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "") +BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=291179&r1=291178&r2=291179&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jan 5 15:43:30 2017 @@ -35,6 +35,11 @@ using namespace clang; using namespace CodeGen; using namespace llvm; +static +int64_t clamp(int64_t Value, int64_t Low, int64_t High) { + return std::min(High, std::max(Low, Value)); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -8191,6 +8196,85 @@ Value *CodeGenFunction::EmitPPCBuiltinEx llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } + + case PPC::BI__builtin_vsx_insertword: { +llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); + +// Third argument is a compile time constant int. It must be clamped to +// to the range [0, 12]. +ConstantInt *ArgCI = dyn_cast(Ops[2]); +assert(ArgCI && + "Third arg to xxinsertw intrinsic must be constant integer"); +const int64_t MaxIndex = 12; +int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex); + +// The builtin semantics don't exactly match the xxinsertw instructions +// semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the +// word from the first argument, and inserts it in the second argument. The +// instruction extracts the word from its second input register and inserts +// it into its first input register, so swap the first and second arguments. +std::swap(Ops[0], Ops[1]); + +// Need to cast the second argument from a vector of unsigned int to a +// vector of long long. +Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + +if (getTarget().isLittleEndian()) { + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), + ConstantInt::get(Int32Ty, 0) + }; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + // Reverse the double words in the vector we will extract from. + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); + + // Reverse the index. + Index = MaxIndex - Index; +} + +// Intrinsic expects the first arg to be a vector of int. +Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); +Ops[2] = ConstantInt::getSigned(Int32Ty, Index); +return Builder.CreateCall(F, Ops); + } + + case PPC::BI__builtin_vsx_extractuword: { +llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); + +// Intrinsic expects the first argument to be a vector of doublewords. +Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + +// The second argument is a compile time constant int that needs to +// be clamped to the range [0, 12]. +ConstantInt *ArgCI = dyn_cast(Ops[1]); +assert(ArgCI && + "Second Arg to xxextractuw intrinsic must be a constant integer!"); +const int64_t MaxIndex = 12; +in
r291188 - Remove the ppc insertword/extractword expected fail tests.
Author: sfertile Date: Thu Jan 5 16:54:34 2017 New Revision: 291188 URL: http://llvm.org/viewvc/llvm-project?rev=291188&view=rev Log: Remove the ppc insertword/extractword expected fail tests. Removed: cfe/trunk/test/CodeGen/builtins-ppc-extractword-error.c cfe/trunk/test/CodeGen/builtins-ppc-insertword-error.c Removed: cfe/trunk/test/CodeGen/builtins-ppc-extractword-error.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-ppc-extractword-error.c?rev=291187&view=auto == --- cfe/trunk/test/CodeGen/builtins-ppc-extractword-error.c (original) +++ cfe/trunk/test/CodeGen/builtins-ppc-extractword-error.c (removed) @@ -1,15 +0,0 @@ -// REQUIRES: powerpc-registered-target -// XFAIL: powerpc - -// RUN: %clang -faltivec -target powerpc64le-unknown-unknown -mcpu=power8 \ -// RUN: -Wall -Wextra -c %s -// RUN: %clang -faltivec -target powerpc64-unknown-unknown -mcpu=power8 \ -// RUN: -Wall -Wextra -c %s - -// Expect the compile to fail with "cannot compile this builtin function yet" -extern vector signed int vsi; -extern vector unsigned char vuc; - -vector unsigned long long testExtractWord(void) { - return __builtin_vsx_extractuword(vuc, 12); -} Removed: cfe/trunk/test/CodeGen/builtins-ppc-insertword-error.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-ppc-insertword-error.c?rev=291187&view=auto == --- cfe/trunk/test/CodeGen/builtins-ppc-insertword-error.c (original) +++ cfe/trunk/test/CodeGen/builtins-ppc-insertword-error.c (removed) @@ -1,16 +0,0 @@ -// REQUIRES: powerpc-registered-target -// XFAIL: powerpc - -// RUN: %clang -faltivec -target powerpc64le-unknown-unknown -mcpu=power8 \ -// RUN: -Wall -Werror -c %s - -// RUN: %clang -faltivec -target powerpc64-unknown-unknown -mcpu=power8 \ -// RUN: -Wall -Werror -c %s - -// expect to fail with diagnostic: "cannot compile this builtin function yet" -extern vector signed int vsi; -extern vector unsigned char vuc; - -vector unsigned char testInsertWord(void) { - return __builtin_vsx_insertword(vsi, vuc, 0); -} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: r291179 - Add vec_insert4b and vec_extract4b functions to altivec.h
Sorry about that I've removed the 2 tests causing the issue for now. Sean - Original message -From: Evgenii Stepanov To: Sean Fertile/Toronto/IBM@IBMCACc: cfe-commits Subject: Re: r291179 - Add vec_insert4b and vec_extract4b functions to altivec.hDate: Thu, Jan 5, 2017 5:16 PM Tests on linux/x86_64 are failing with:fatal error: error in backend: Cannot select: intrinsic %llvm.ppc.vsx.xxinsertwOn Thu, Jan 5, 2017 at 1:43 PM, Sean Fertile via cfe-commits wrote:> Author: sfertile> Date: Thu Jan 5 15:43:30 2017> New Revision: 291179>> URL: http://llvm.org/viewvc/llvm-project?rev=291179&view=rev> Log:> Add vec_insert4b and vec_extract4b functions to altivec.h>> Add builtins for the functions and custom codegen mapping the builtins to their> corresponding intrinsics and handling the endian related swapping.>> https://reviews.llvm.org/D26546>> Added:> cfe/trunk/test/CodeGen/builtins-ppc-error.c> cfe/trunk/test/CodeGen/builtins-ppc-extractword-error.c> cfe/trunk/test/CodeGen/builtins-ppc-insertword-error.c> Modified:> cfe/trunk/include/clang/Basic/BuiltinsPPC.def> cfe/trunk/lib/CodeGen/CGBuiltin.cpp> cfe/trunk/lib/Headers/altivec.h> cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c>> Modified: cfe/trunk/include/clang/Basic/BuiltinsPPC.def> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsPPC.def?rev=291179&r1=291178&r2=291179&view=diff> ==> --- cfe/trunk/include/clang/Basic/BuiltinsPPC.def (original)> +++ cfe/trunk/include/clang/Basic/BuiltinsPPC.def Thu Jan 5 15:43:30 2017> @@ -417,6 +417,9 @@ BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us> BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "")> BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "")>> +BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "")> +BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "")> +> // HTM builtins> BUILTIN(__builtin_tbegin, "UiUIi", "")> BUILTIN(__builtin_tend, "UiUIi", "")>> Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=291179&r1=291178&r2=291179&view=diff> ==> --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)> +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jan 5 15:43:30 2017> @@ -35,6 +35,11 @@ using namespace clang;> using namespace CodeGen;> using namespace llvm;>> +static> +int64_t clamp(int64_t Value, int64_t Low, int64_t High) {> + return std::min(High, std::max(Low, Value));> +}> +> /// getBuiltinLibFunction - Given a builtin id for a function like> /// "__builtin_fabsf", return a Function* for "fabsf".> llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,> @@ -8191,6 +8196,85 @@ Value *CodeGenFunction::EmitPPCBuiltinEx> llvm_unreachable("Unknown FMA operation");> return nullptr; // Suppress no-return warning> }> +> + case PPC::BI__builtin_vsx_insertword: {> + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);> +> + // Third argument is a compile time constant int. It must be clamped to> + // to the range [0, 12].> + ConstantInt *ArgCI = dyn_cast(Ops[2]);> + assert(ArgCI &&> + "Third arg to xxinsertw intrinsic must be constant integer");> + const int64_t MaxIndex = 12;> + int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);> +> + // The builtin semantics don't exactly match the xxinsertw instructions> + // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the> + // word from the first argument, and inserts it in the second argument. The> + // instruction extracts the word from its second input register and inserts> + // it into its first input register, so swap the first and second arguments.> + std::swap(Ops[0], Ops[1]);> +> + // Need to cast the second argument from a vector of unsigned int to a> + // vector of long long.> + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));> +> + if (getTarget().isLittleEndian()) {> + // Create a shuffle mask of (1, 0)> + Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),> + ConstantInt::get(Int32Ty, 0)> + };> + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);> +> + // Reverse the double words in the vector we will extract from.> +
[PATCH] D26546: [PPC] Add vec_insert4b/vec_extract4b to altivec.h
sfertile added inline comments. Comment at: lib/Headers/altivec.h:11908 +#define vec_extract4b(__a, __b) \ + vec_reve((vector unsigned long long) \ +__builtin_vsx_xxextractuw((__a), (12 - (__b & 0xF nemanjai wrote: > I find it difficult to follow and understand this logic when it's in the > header. > What I'd prefer to see here is that the macro simply expands into > `__builtin_vsx_xxextractuw` and then handle all this logic in the code that > emits an intrinsic call. > Namely if the target is little endian, we adjust the parameter, emit the > intrinsic call and finally emit a shufflevector. I think this is a good idea, looking at the code its not obvious what is going on. Comment at: lib/Headers/altivec.h:12014 +#define vec_insert4b(__a, __b, __c) \ + ((vector unsigned char)__builtin_vsx_xxinsertw((__a), (__b), (__c) & 0xF)) +#endif kbarton wrote: > nemanjai wrote: > > As far as I can tell by looking at this patch and the corresponding back > > end patch, the `__a` argument will have a word inserted into it and it will > > be returned. > > > > Is that the semantics that the ABI specifies (I can't seem to make sense of > > the description). > > > > ``` > > vector unsigned int a = { 0x, 0xBB, 0xCC, 0xDD }; > > vector unsigned char b = (vector unsigned char) 0xFF; > > vector unsigned char c = vec_insert4b(a, b, 4); > > // Do we expect vector c to be: > > // { 0xAA, 0xAA, 0xAA, 0xAA, 0xFF, 0xFF, 0xFF, 0xFF, 0xCC, 0xCC, 0xCC, > > 0xCC, 0xDD, 0xDD, 0xDD, 0xDD } > > ``` > I think the current version of the ABI document has an error in it. The > description of the vec_insert4b is identical to the vec_extract4b, so I > expect it was copy/pasted in error. I think we need to open up an (internal) > bug against the ABI and wait for clarification to complete this. You have it correct Nemanja, word 1 will be extracted from b, and it will get inserted into a. The word will be inserted at the byte position starting at the 3rd argument. (so in this case byte offsets 4 to 7) I talked to Bill Schmidt earlier today and he already has a bug open. It hasn't been updated yet, but it should roughly correspond to the description for the xxinsertw instruction: The contents of word element 1 of VSR[XB] are placed into byte elements UIM:UIM+3 of VSR[XT]. The contents of the remaining byte elements of VSR[XT] are not modified. Repository: rL LLVM https://reviews.llvm.org/D26546 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26546: [PPC] Add vec_insert4b/vec_extract4b to altivec.h
sfertile updated this revision to Diff 78760. sfertile added a comment. Moved the endian related massaging from altivec.h into Clang codegen and clamped the input index into the valid range [0, 12]. Repository: rL LLVM https://reviews.llvm.org/D26546 Files: include/clang/Basic/BuiltinsPPC.def lib/CodeGen/CGBuiltin.cpp lib/Headers/altivec.h test/CodeGen/builtins-ppc-p9vector.c Index: test/CodeGen/builtins-ppc-p9vector.c === --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -1180,3 +1180,27 @@ // CHECK-LE-NEXT: ret <4 x float> return vec_extract_fp32_from_shortl(vusa); } +vector unsigned char test116(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 7) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK-LE: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> +// CHECK-LE-NEXT: [[T2:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> [[T1]], i32 5) +// CHECK-LE-NEXT: bitcast <4 x i32> T2 to <16 x i8> + return vec_insert4b(vuia, vuca, 7); +} +vector unsigned char test117(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 5) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK-LE: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> +// CHECK-LE-NEXT: [[T2:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> [[T1]], i32 7) +// CHECK-LE-NEXT: bitcast <4 x i32> T2 to <16 x i8> + return vec_insert4b(vsia, vuca, 5); +} +vector unsigned long long test118(void) { +// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 11) +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK-LE: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 1) +// CHECK-LE-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> +// CHECK-LE-NEXT: ret <2 x i64> + return vec_extract4b(vuca, 11); +} Index: lib/Headers/altivec.h === --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -12456,6 +12456,9 @@ #ifdef __POWER9_VECTOR__ +#define vec_insert4b __builtin_vsx_insertword +#define vec_extract4b __builtin_vsx_extractuword + /* vec_extract_exp */ static __inline__ vector unsigned int __ATTRS_o_ai Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -35,6 +35,11 @@ using namespace CodeGen; using namespace llvm; +static +int64_t clamp(int64_t value, int64_t low, int64_t high) { + return std::min(high, std::max(low, value)); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -8168,6 +8173,67 @@ llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } + case PPC::BI__builtin_vsx_insertword: { +llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); + +// Third argument is a compile time constant int. It must be clamped to +// to the range [0, 12]. +ConstantInt *ArgCI = dyn_cast(Ops[2]); +assert(ArgCI); +int64_t index = clamp(ArgCI->getSExtValue(), 0, 12); + +// Need to cast the second argument from a vector of cahr to a vector +// of long long. +Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + +if(getTarget().isLittleEndian()) { + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2]; + ShuffleElts[0] = ConstantInt::get(Int32Ty, 1); + ShuffleElts[1] = ConstantInt::get(Int32Ty, 0); + Constant* ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + // Reverse the double words in the second argument. + Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], ShuffleMask); + + // reverse the index + index = 12 - index; +} + +Ops[2] = ConstantInt::getSigned(Int32Ty, index); +return Builder.CreateCall(F, Ops); + } + case PPC::BI__builtin_vsx_extractuword: { +llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); + +Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + +// The second argument is a compile time constant int that needs to +// be clamped to the range [0, 12]. +ConstantInt *ArgCI = dyn_cast(Ops[1]); +assert(ArgCI); +int64_t index = clamp(ArgCI->getSExtValue(), 0, 12); + +if(getTarget().isLittleEndian()) { + // Reverse the index + index = 12 - index; + Ops[1] = ConstantInt::getSigned(Int32Ty, index); + + // Emit the call, then reverse the double words of the results vector. + Value *Call = Builder.CreateCall(F, Ops)
[PATCH] D26546: [PPC] Add vec_insert4b/vec_extract4b to altivec.h
sfertile updated this revision to Diff 78911. sfertile added a comment. Fixed spelling error in comment Repository: rL LLVM https://reviews.llvm.org/D26546 Files: include/clang/Basic/BuiltinsPPC.def lib/CodeGen/CGBuiltin.cpp lib/Headers/altivec.h test/CodeGen/builtins-ppc-p9vector.c Index: test/CodeGen/builtins-ppc-p9vector.c === --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -1180,3 +1180,27 @@ // CHECK-LE-NEXT: ret <4 x float> return vec_extract_fp32_from_shortl(vusa); } +vector unsigned char test116(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 7) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK-LE: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> +// CHECK-LE-NEXT: [[T2:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> [[T1]], i32 5) +// CHECK-LE-NEXT: bitcast <4 x i32> T2 to <16 x i8> + return vec_insert4b(vuia, vuca, 7); +} +vector unsigned char test117(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> {{.+}}, i32 5) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK-LE: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> +// CHECK-LE-NEXT: [[T2:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <2 x i64> [[T1]], i32 7) +// CHECK-LE-NEXT: bitcast <4 x i32> T2 to <16 x i8> + return vec_insert4b(vsia, vuca, 5); +} +vector unsigned long long test118(void) { +// CHECK-BE: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 11) +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK-LE: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> {{.+}}, i32 1) +// CHECK-LE-NEXT: shufflevector <2 x i64> [[T1]], <2 x i64> [[T1]], <2 x i32> +// CHECK-LE-NEXT: ret <2 x i64> + return vec_extract4b(vuca, 11); +} Index: lib/Headers/altivec.h === --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -12456,6 +12456,9 @@ #ifdef __POWER9_VECTOR__ +#define vec_insert4b __builtin_vsx_insertword +#define vec_extract4b __builtin_vsx_extractuword + /* vec_extract_exp */ static __inline__ vector unsigned int __ATTRS_o_ai Index: lib/CodeGen/CGBuiltin.cpp === --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -35,6 +35,11 @@ using namespace CodeGen; using namespace llvm; +static +int64_t clamp(int64_t value, int64_t low, int64_t high) { + return std::min(high, std::max(low, value)); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -8168,6 +8173,67 @@ llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } + case PPC::BI__builtin_vsx_insertword: { +llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); + +// Third argument is a compile time constant int. It must be clamped to +// to the range [0, 12]. +ConstantInt *ArgCI = dyn_cast(Ops[2]); +assert(ArgCI); +int64_t index = clamp(ArgCI->getSExtValue(), 0, 12); + +// Need to cast the second argument from a vector of char to a vector +// of long long. +Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + +if(getTarget().isLittleEndian()) { + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2]; + ShuffleElts[0] = ConstantInt::get(Int32Ty, 1); + ShuffleElts[1] = ConstantInt::get(Int32Ty, 0); + Constant* ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + // Reverse the double words in the second argument. + Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], ShuffleMask); + + // reverse the index + index = 12 - index; +} + +Ops[2] = ConstantInt::getSigned(Int32Ty, index); +return Builder.CreateCall(F, Ops); + } + case PPC::BI__builtin_vsx_extractuword: { +llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); + +Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + +// The second argument is a compile time constant int that needs to +// be clamped to the range [0, 12]. +ConstantInt *ArgCI = dyn_cast(Ops[1]); +assert(ArgCI); +int64_t index = clamp(ArgCI->getSExtValue(), 0, 12); + +if(getTarget().isLittleEndian()) { + // Reverse the index + index = 12 - index; + Ops[1] = ConstantInt::getSigned(Int32Ty, index); + + // Emit the call, then reverse the double words of the results vector. + Value *Call = Builder.CreateCall(F, Ops); + + // Create a shuffle mask of (1, 0) + Constant *ShuffleElts[2]; + ShuffleElt
[PATCH] D26546: [PPC] Add vec_insert4b/vec_extract4b to altivec.h
sfertile marked an inline comment as done. sfertile added inline comments. Comment at: lib/CodeGen/CGBuiltin.cpp:8185 + +// Need to cast the second argument from a vector of cahr to a vector +// of long long. syzaara wrote: > tiny comment, char is misspelled as cahr Fixed Repository: rL LLVM https://reviews.llvm.org/D26546 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26073: [PPC] Add vec_absd functions to altivec.h
sfertile created this revision. sfertile added reviewers: kbarton, amehsan, lei, jtony, syzaara, nemanjai, echristo. sfertile added a subscriber: cfe-commits. Herald added a subscriber: mehdi_amini. Adds three overloads of vec_absd to altivec .h, as well as matching PPC specific builtins: vector unsigned char vec_absd (vector unsigned char, vector unsigned char); vector unsigned int vec_absd (vector unsigned int, vector unsigned int); vector unsigned short vec_absd (vector unsigned short, vector unsigned short); https://reviews.llvm.org/D26073 Files: include/clang/Basic/BuiltinsPPC.def lib/Headers/altivec.h test/CodeGen/builtins-ppc-p9vector.c Index: test/CodeGen/builtins-ppc-p9vector.c === --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -726,3 +726,18 @@ // CHECK-NEXT: ret <4 x float> return vec_insert_exp (vuia,vuib); } +vector unsigned char test59(void) { +// CHECK-BE: call <16 x i8> @llvm.ppc.altivec.vabsdub(<16 x i8> {{.+}}, <16 x i8> {{.+}}) +// CHECK: call <16 x i8> @llvm.ppc.altivec.vabsdub(<16 x i8> {{.+}}, <16 x i8> {{.+}}) + return vec_absd(vuca, vucb); +} +vector unsigned short test60(void) { +// CHECK-BE: call <8 x i16> @llvm.ppc.altivec.vabsduh(<8 x i16> {{.+}}, <8 x i16> {{.+}}) +// CHECK: call <8 x i16> @llvm.ppc.altivec.vabsduh(<8 x i16> {{.+}}, <8 x i16> {{.+}}) + return vec_absd(vusa, vusb); +} +vector unsigned int test61(void) { +// CHECK-BE: call <4 x i32> @llvm.ppc.altivec.vabsduw(<4 x i32> {{.+}}, <4 x i32> {{.+}}) +// CHECK: call <4 x i32> @llvm.ppc.altivec.vabsduw(<4 x i32> {{.+}}, <4 x i32> {{.+}}) + return vec_absd(vuia, vuib); +} Index: lib/Headers/altivec.h === --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -163,6 +163,26 @@ __a, __builtin_altivec_vsubsws((vector signed int)(0), __a)); } +/* vec_absd */ +#if defined(__POWER9_VECTOR__) + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_absd(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vabsdub(__a, __b); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_absd(vector unsigned short __a, vector unsigned short __b) { + return __builtin_altivec_vabsduh(__a, __b); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_absd(vector unsigned int __a, vector unsigned int __b) { + return __builtin_altivec_vabsduw(__a, __b); +} + +#endif /* End __POWER9_VECTOR__ */ + /* vec_add */ static __inline__ vector signed char __ATTRS_o_ai Index: include/clang/Basic/BuiltinsPPC.def === --- include/clang/Basic/BuiltinsPPC.def +++ include/clang/Basic/BuiltinsPPC.def @@ -278,6 +278,11 @@ BUILTIN(__builtin_altivec_vpopcntw, "V4UiV4Ui", "") BUILTIN(__builtin_altivec_vpopcntd, "V2ULLiV2ULLi", "") +// Absolute difference built-ins +BUILTIN(__builtin_altivec_vabsdub, "V16UcV16UcV16Uc", "") +BUILTIN(__builtin_altivec_vabsduh, "V8UsV8UsV8Us", "") +BUILTIN(__builtin_altivec_vabsduw, "V4UiV4UiV4Ui", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") Index: test/CodeGen/builtins-ppc-p9vector.c === --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -726,3 +726,18 @@ // CHECK-NEXT: ret <4 x float> return vec_insert_exp (vuia,vuib); } +vector unsigned char test59(void) { +// CHECK-BE: call <16 x i8> @llvm.ppc.altivec.vabsdub(<16 x i8> {{.+}}, <16 x i8> {{.+}}) +// CHECK: call <16 x i8> @llvm.ppc.altivec.vabsdub(<16 x i8> {{.+}}, <16 x i8> {{.+}}) + return vec_absd(vuca, vucb); +} +vector unsigned short test60(void) { +// CHECK-BE: call <8 x i16> @llvm.ppc.altivec.vabsduh(<8 x i16> {{.+}}, <8 x i16> {{.+}}) +// CHECK: call <8 x i16> @llvm.ppc.altivec.vabsduh(<8 x i16> {{.+}}, <8 x i16> {{.+}}) + return vec_absd(vusa, vusb); +} +vector unsigned int test61(void) { +// CHECK-BE: call <4 x i32> @llvm.ppc.altivec.vabsduw(<4 x i32> {{.+}}, <4 x i32> {{.+}}) +// CHECK: call <4 x i32> @llvm.ppc.altivec.vabsduw(<4 x i32> {{.+}}, <4 x i32> {{.+}}) + return vec_absd(vuia, vuib); +} Index: lib/Headers/altivec.h === --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -163,6 +163,26 @@ __a, __builtin_altivec_vsubsws((vector signed int)(0), __a)); } +/* vec_absd */ +#if defined(__POWER9_VECTOR__) + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_absd(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vabsdub(__a, __b); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_absd(vector unsigned short __a, vector unsigned short __b) { + return __builtin_altivec_vabsduh(__a, __b); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_absd(vector unsigned int __a, vector unsig
[PATCH] D26271: [PPC} add extract significand/ extract exponent/test data class for vector float and vector double -- clang portion
sfertile created this revision. sfertile added reviewers: kbarton, nemanjai, amehsan, syzaara, jtony, lei. sfertile added subscribers: cfe-commits, echristo. sfertile set the repository for this revision to rL LLVM. Add support in altivec.h for the following functions, as well as matching builtins: vector unsigned long long vec_extract_exp (vector double); vector unsigned int vec_extract_exp (vector float); vector unsigned long long vec_extract_sig (vector double) vector unsigned int vec_extract_sig (vector float) Add builtins mapping to the vector float/vector double 'test data class' instructions, as well as a function like macro that will expand to the equivalent of either: vector bool int vec_test_data_class (vector float, const int); vector bool long long vec_test_data_class (vector double, const int); as well as defines for all the masks used as the second argument to vec_test_data_class. Repository: rL LLVM https://reviews.llvm.org/D26271 Files: include/clang/Basic/BuiltinsPPC.def lib/Headers/altivec.h test/CodeGen/builtins-ppc-p9vector.c Index: test/CodeGen/builtins-ppc-p9vector.c === --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -827,4 +827,46 @@ // CHECK-NEXT: ret <16 x i8> return vec_srv (vuca, vucb); } +vector unsigned int test74(void) { +// CHECK-BE: @llvm.ppc.vsx.xvxexpsp(<4 x float> {{.+}}) +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ppc.vsx.xvxexpsp(<4 x float> {{.+}}) +// CHECK-NEXT: ret <4 x i32> + return vec_extract_exp(vfa); +} +vector unsigned long long test75(void) { +// CHECK-BE: @llvm.ppc.vsx.xvxexpdp(<2 x double> {{.+}}) +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ppc.vsx.xvxexpdp(<2 x double> {{.+}}) +// CHECK-NEXT: ret <2 x i64> + return vec_extract_exp(vda); +} +vector unsigned int test76(void) { +// CHECK-BE: @llvm.ppc.vsx.xvxsigsp(<4 x float> {{.+}}) +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ppc.vsx.xvxsigsp(<4 x float> {{.+}}) +// CHECK-NEXT: ret <4 x i32> + return vec_extract_sig(vfa); +} +vector unsigned long long test77(void) { +// CHECK-BE: @llvm.ppc.vsx.xvxsigdp(<2 x double> {{.+}}) +// CHECK-BE-NEXT: ret <2 x i64> +// CHECK: @llvm.ppc.vsx.xvxsigdp(<2 x double> {{.+}}) +// CHECK-NEXT: ret <2 x i64> + return vec_extract_sig(vda); +} +vector bool int test78(void) { +// CHECK-BE: @llvm.ppc.vsx.xvtstdcsp(<4 x float> {{.+}}, i32 127) +// CHECK-BE-NEXT: ret <4 x i32> +// CHECK: @llvm.ppc.vsx.xvtstdcsp(<4 x float> {{.+}}, i32 127) +// CHECK-NEXT: ret <4 x i32> + return vec_test_data_class(vfa, __VEC_CLASS_FP_NOT_NORMAL); +} +vector bool long long test79(void) { +// CHECK-BE: @llvm.ppc.vsx.xvtstdcdp(<2 x double> {{.+}}, i32 127) +// CHECK-BE_NEXT: ret <2 x i64 +// CHECK: @llvm.ppc.vsx.xvtstdcdp(<2 x double> {{.+}}, i32 127) +// CHECK-NEXT: ret <2 x i64> + return vec_test_data_class(vda, __VEC_CLASS_FP_NOT_NORMAL); +} Index: lib/Headers/altivec.h === --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -34,6 +34,25 @@ #define __CR6_LT 2 #define __CR6_LT_REV 3 +/* Constants for vec_test_data_class */ +#define __VEC_CLASS_FP_SUBNORMAL_N (1 << 0) +#define __VEC_CLASS_FP_SUBNORMAL_P (1 << 1) +#define __VEC_CLASS_FP_SUBNORMAL (__VEC_CLASS_FP_SUBNORMAL_P | \ + __VEC_CLASS_FP_SUBNORMAL_N) +#define __VEC_CLASS_FP_ZERO_N (1<<2) +#define __VEC_CLASS_FP_ZERO_P (1<<3) +#define __VEC_CLASS_FP_ZERO (__VEC_CLASS_FP_ZERO_P | \ + __VEC_CLASS_FP_ZERO_N) +#define __VEC_CLASS_FP_INFINITY_N (1<<4) +#define __VEC_CLASS_FP_INFINITY_P (1<<5) +#define __VEC_CLASS_FP_INFINITY (__VEC_CLASS_FP_INFINITY_P | \ + __VEC_CLASS_FP_INFINITY_N) +#define __VEC_CLASS_FP_NAN (1<<6) +#define __VEC_CLASS_FP_NOT_NORMAL (__VEC_CLASS_FP_NAN| \ + __VEC_CLASS_FP_SUBNORMAL | \ + __VEC_CLASS_FP_ZERO | \ + __VEC_CLASS_FP_INFINITY) + #define __ATTRS_o_ai __attribute__((__overloadable__, __always_inline__)) static __inline__ vector signed char __ATTRS_o_ai vec_perm( @@ -11873,6 +11892,34 @@ return __a[__b]; } +#ifdef __POWER9_VECTOR__ + +/* vec_extract_exp */ + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_extract_exp(vector float __a) { + return __builtin_vsx_xvxexpsp(__a); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_extract_exp(vector double __a) { + return __builtin_vsx_xvxexpdp(__a); +} + +/* vec_extract_sig */ + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_extract_sig(vector float __a) { + return __builtin_vsx_xvxsigsp(__a); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_extract_sig (vector double __a) { + return __builtin_vsx_xvxsigdp(__a); +} + +#endif /* __POWER9_VECTOR__ */ + /* vec_insert */ sta
[PATCH] D26271: [PPC} add extract significand/ extract exponent/test data class for vector float and vector double -- clang portion
sfertile added inline comments. Comment at: lib/Headers/altivec.h:15629 +#define vec_test_data_class(__a, __b) \ +_Generic((__a),\ + vector float: \ nemanjai wrote: > I don't know what happens if the type of the first argument is neither of the > two types, but I think we should verify that something sane happens. Namely, > some sort of overload resolution error message. We do get a decent error diagnostic: test_data_class_bad.c:9:26: error: controlling expression type '__vector unsigned char' (vector of 16 'unsigned char' values) not compatible with any generic association type vector bool int res_f = vec_test_data_class(vfa, 4); ^~~ /home/sfertile/LLVM/publish/assert/bin/../lib/clang/4.0.0/include/altivec.h:15629:18: note: expanded from macro 'vec_test_data_class' _Generic((__a),\ ^ 1 error generated. Repository: rL LLVM https://reviews.llvm.org/D26271 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26534: [PPC] add altivec.h functions for converting a vector of half precision to a vector of single precision
sfertile created this revision. sfertile added reviewers: nemanjai, amehsan, kbarton, syzaara, jtony, lei. sfertile added subscribers: cfe-commits, echristo. sfertile set the repository for this revision to rL LLVM. Change adds 2 functions to altivec.h vector float vec_extract_fp32_from_shorth (vector unsigned short); Purpose: Extracts four single-precision floating-point numbers from the high elements of a vector of eight 16-bit elements, interpreting each element as a 16-bit floating-point number in IEEE format. Result value: The first four elements are interpreted as 16-bit floating-point numbers in IEEE format, and extended to single-precision format, returning a vector with four single-precision IEEE numbers. vector float vec_extract_fp32_from_shortl (vector unsigned short); Purpose Extracts four single-precision floating-point numbers from the low elements of a vector of eight 16-bit elements, interpreting each element as a 16-bit floating-point number in IEEE format. Result value: The last four elements are interpreted as 16-bit floating-point numbers in IEEE format, and extended to single-precision format, returning a vector with four single-precision IEEE numbers. Adds a builtin that gets mapped to the xscvhpsp instruction. (VSX Scalar Convert Half-Precision to Double-Precision) Repository: rL LLVM https://reviews.llvm.org/D26534 Files: include/clang/Basic/BuiltinsPPC.def lib/Headers/altivec.h test/CodeGen/builtins-ppc-p9vector.c Index: test/CodeGen/builtins-ppc-p9vector.c === --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -827,4 +827,21 @@ // CHECK-NEXT: ret <16 x i8> return vec_srv (vuca, vucb); } - +vector float test74(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEXT: ret <4 x float> +// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-LE-NEXT: ret <4 x float> + return vec_extract_fp32_from_shorth(vusa); +} +vector float test75(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEXT: ret <4 x float> +// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-LE-NEXT: ret <4 x float> + return vec_extract_fp32_from_shortl(vusa); +} Index: lib/Headers/altivec.h === --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -11902,6 +11902,30 @@ return __a[__b]; } +#if defined(__POWER9_VECTOR__) +static __inline__ vector float __ATTRS_o_ai +vec_extract_fp32_from_shorth(vector unsigned short __a) { + vector unsigned short __b = +#ifdef __LITTLE_ENDIAN__ +__builtin_shufflevector(__a, __a, 0, -1, 1, -1, 2, -1, 3, -1); +#else +__builtin_shufflevector(__a, __a, -1, 0, -1, 1, -1, 2, -1, 3); +#endif + return __builtin_vsx_xvcvhpsp(__b); +} + +static __inline__ vector float __ATTRS_o_ai +vec_extract_fp32_from_shortl(vector unsigned short __a) { + vector unsigned short __b = +#ifdef __LITTLE_ENDIAN__ +__builtin_shufflevector(__a, __a, 4, -1, 5, -1, 6, -1, 7, -1); +#else +__builtin_shufflevector(__a, __a, -1, 4, -1, 5, -1, 6, -1, 7); +#endif + return __builtin_vsx_xvcvhpsp(__b); +} +#endif /* __POWER9_VECTOR__ */ + /* vec_insert */ static __inline__ vector signed char __ATTRS_o_ai Index: include/clang/Basic/BuiltinsPPC.def === --- include/clang/Basic/BuiltinsPPC.def +++ include/clang/Basic/BuiltinsPPC.def @@ -380,6 +380,8 @@ BUILTIN(__builtin_vsx_xviexpdp, "V2dV2ULLiV2ULLi", "") BUILTIN(__builtin_vsx_xviexpsp, "V4fV4UiV4Ui", "") +BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") Index: test/CodeGen/builtins-ppc-p9vector.c === --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -827,4 +827,21 @@ // CHECK-NEXT: ret <16 x i8> return vec_srv (vuca, vucb); } - +vector float test74(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEXT: ret <4 x float> +// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-LE-NEXT: ret <4 x float> + return vec_extract_fp32_from_shorth(vusa); +} +vector float test75(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEX
[PATCH] D26546: [PPC] Add vec_insert4b/vec_extract4b to altivec.h
sfertile created this revision. sfertile added reviewers: amehsan, kbarton, nemanjai, jtony, syzaara, lei. sfertile added subscribers: cfe-commits, echristo. sfertile set the repository for this revision to rL LLVM. Add macros that implement the vec_extract4b and vec_insert4b functionality. vector unsigned long long vec_extract4b (vector unsigned char, const int) Purpose: Extracts a word from a vector at a byte position. Result value: The first doubleword element of the result contains the zero-extended extracted word from ARG1. The second doubleword is set to 0. ARG2 specifies the least-significant byte number (0 - 12) of the word to be extracted vector unsigned char vec_insert4b (vector signed int, vector unsigned char, const int) vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char, const int) Purpose: Inserts a word into a vector at a byte position. Result Value: The contents of word element 1 of the first argument are extracted and placed into argument 2 at the byte elements starting at the position indicated by the third argument. Repository: rL LLVM https://reviews.llvm.org/D26546 Files: include/clang/Basic/BuiltinsPPC.def lib/Headers/altivec.h test/CodeGen/builtins-ppc-p9vector.c Index: test/CodeGen/builtins-ppc-p9vector.c === --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -827,4 +827,25 @@ // CHECK-NEXT: ret <16 x i8> return vec_srv (vuca, vucb); } - +vector unsigned char test74(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <4 x i32> {{.+}}, i32 5) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <4 x i32> {{.+}}, i32 7) +// CHECK-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> + return vec_insert4b(vuia, vuca, 5); +} +vector unsigned char test75(void) { +// CHECK-BE: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <4 x i32> {{.+}}, i32 5) +// CHECK-BE-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> +// CHECK: [[T1:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> {{.+}}, <4 x i32> {{.+}}, i32 7) +// CHECK-NEXT: bitcast <4 x i32> [[T1]] to <16 x i8> + return vec_insert4b(vsia, vuca, 5); +} +vector unsigned long long test76(void) { +// CHECK-BE: [[T1:%.+]] = call <2 x i64> @llvm.ppc.vsx.xxextractuw(<4 x i32> {{.+}}, i32 11) +// CHECK-BE-NEXT: ret <2 x i64> [[T1]] +// CHECK: call <2 x i64> @llvm.ppc.vsx.xxextractuw(<4 x i32> {{.+}}, i32 1) +// CHECK: [[T1:%.+]] = shufflevector <2 x i64> {{.+}}, <2 x i64> {{.+}}, <2 x i32> +// CHECK-NEXT: ret <2 x i64> [[T1]] + return vec_extract4b(vuca, 11); +} Index: lib/Headers/altivec.h === --- lib/Headers/altivec.h +++ lib/Headers/altivec.h @@ -11902,6 +11902,16 @@ return __a[__b]; } +#if defined(__POWER9_VECTOR__) +#ifdef __LITTLE_ENDIAN__ +#define vec_extract4b(__a, __b) \ + vec_reve((vector unsigned long long) \ +__builtin_vsx_xxextractuw((__a), (12 - (__b & 0xF +#else +#define vec_extract4b(__a, __b) __builtin_vsx_xxextractuw((__a), (__b)) +#endif +#endif /* if defined(__POWER9_VECTOR__) */ + /* vec_insert */ static __inline__ vector signed char __ATTRS_o_ai @@ -11993,6 +12003,18 @@ return __b; } +#if defined(__POWER9_VECTOR__) +#ifdef __LITTLE_ENDIAN__ +#define vec_insert4b(__a, __b, __c) \ + ((vector unsigned char)__builtin_vsx_xxinsertw((__a), \ + vec_reve((vector unsigned long long)(__b)), \ + 12 - ((__c) & 0xF))) +#else +#define vec_insert4b(__a, __b, __c) \ + ((vector unsigned char)__builtin_vsx_xxinsertw((__a), (__b), (__c) & 0xF)) +#endif +#endif /* if defined(__POWER9_VECTOR__) */ + /* vec_lvlx */ static __inline__ vector signed char __ATTRS_o_ai Index: include/clang/Basic/BuiltinsPPC.def === --- include/clang/Basic/BuiltinsPPC.def +++ include/clang/Basic/BuiltinsPPC.def @@ -380,6 +380,9 @@ BUILTIN(__builtin_vsx_xviexpdp, "V2dV2ULLiV2ULLi", "") BUILTIN(__builtin_vsx_xviexpsp, "V4fV4UiV4Ui", "") +BUILTIN(__builtin_vsx_xxinsertw, "V16UcV4UiV16UcIi", "") +BUILTIN(__builtin_vsx_xxextractuw, "V2ULLiV16UcIi", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") Index: test/CodeGen/builtins-ppc-p9vector.c === --- test/CodeGen/builtins-ppc-p9vector.c +++ test/CodeGen/builtins-ppc-p9vector.c @@ -827,4 +827,25 @@ // CHECK-NEXT: ret <16 x i8> return vec_srv (vuca, vucb); } - +vector unsigned char test74(void) { +// CHECK-BE: [[T1:%.+]]
[PATCH] D26479: [PowerPC] Implement remaining permute builtins in altivec.h - Clang portion
sfertile added a comment. Comment at: include/clang/Basic/BuiltinsPPC.def:385-388 +BUILTIN(__builtin_altivec_vrlwmi, "V4UiV4UiV4UiV4Ui", "") +BUILTIN(__builtin_altivec_vrldmi, "V2ULLiV2ULLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") +BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") A minor quibble: we have the __builtin_altivec_* functions added after the vsx builtins, rather than with the rest of the altivec builtins. Repository: rL LLVM https://reviews.llvm.org/D26479 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r286830 - [PPC] add extract sig/exp test data class for vec float and vec double.
Author: sfertile Date: Mon Nov 14 08:43:27 2016 New Revision: 286830 URL: http://llvm.org/viewvc/llvm-project?rev=286830&view=rev Log: [PPC] add extract sig/exp test data class for vec float and vec double. Add vector extract exponent/significand functions to altivec.h, as well as functions (and related constants) to test the data class of vector float and vector double. Differential Revision: https://reviews.llvm.org/D26271 Modified: cfe/trunk/include/clang/Basic/BuiltinsPPC.def cfe/trunk/lib/Headers/altivec.h cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c Modified: cfe/trunk/include/clang/Basic/BuiltinsPPC.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsPPC.def?rev=286830&r1=286829&r2=286830&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsPPC.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsPPC.def Mon Nov 14 08:43:27 2016 @@ -383,8 +383,13 @@ BUILTIN(__builtin_vsx_xvcpsgnsp, "V4fV4f BUILTIN(__builtin_vsx_xvabssp, "V4fV4f", "") BUILTIN(__builtin_vsx_xvabsdp, "V2dV2d", "") +// vector Insert/Extract exponent/significand builtins BUILTIN(__builtin_vsx_xviexpdp, "V2dV2ULLiV2ULLi", "") BUILTIN(__builtin_vsx_xviexpsp, "V4fV4UiV4Ui", "") +BUILTIN(__builtin_vsx_xvxexpdp, "V2ULLiV2d", "") +BUILTIN(__builtin_vsx_xvxexpsp, "V4UiV4f", "") +BUILTIN(__builtin_vsx_xvxsigdp, "V2ULLiV2d", "") +BUILTIN(__builtin_vsx_xvxsigsp, "V4UiV4f", "") // Conversion builtins BUILTIN(__builtin_vsx_xvcvdpsxws, "V4SiV2d", "") @@ -398,6 +403,10 @@ BUILTIN(__builtin_vsx_xvcvdpsp, "V4fV2d" BUILTIN(__builtin_vsx_xvcvsphp, "V4fV4f", "") +// Vector Test Data Class builtins +BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "") +BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") Modified: cfe/trunk/lib/Headers/altivec.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=286830&r1=286829&r2=286830&view=diff == --- cfe/trunk/lib/Headers/altivec.h (original) +++ cfe/trunk/lib/Headers/altivec.h Mon Nov 14 08:43:27 2016 @@ -34,6 +34,25 @@ #define __CR6_LT 2 #define __CR6_LT_REV 3 +/* Constants for vec_test_data_class */ +#define __VEC_CLASS_FP_SUBNORMAL_N (1 << 0) +#define __VEC_CLASS_FP_SUBNORMAL_P (1 << 1) +#define __VEC_CLASS_FP_SUBNORMAL (__VEC_CLASS_FP_SUBNORMAL_P | \ + __VEC_CLASS_FP_SUBNORMAL_N) +#define __VEC_CLASS_FP_ZERO_N (1<<2) +#define __VEC_CLASS_FP_ZERO_P (1<<3) +#define __VEC_CLASS_FP_ZERO (__VEC_CLASS_FP_ZERO_P | \ + __VEC_CLASS_FP_ZERO_N) +#define __VEC_CLASS_FP_INFINITY_N (1<<4) +#define __VEC_CLASS_FP_INFINITY_P (1<<5) +#define __VEC_CLASS_FP_INFINITY (__VEC_CLASS_FP_INFINITY_P | \ + __VEC_CLASS_FP_INFINITY_N) +#define __VEC_CLASS_FP_NAN (1<<6) +#define __VEC_CLASS_FP_NOT_NORMAL (__VEC_CLASS_FP_NAN| \ + __VEC_CLASS_FP_SUBNORMAL | \ + __VEC_CLASS_FP_ZERO | \ + __VEC_CLASS_FP_INFINITY) + #define __ATTRS_o_ai __attribute__((__overloadable__, __always_inline__)) static __inline__ vector signed char __ATTRS_o_ai vec_perm( @@ -12277,6 +12296,34 @@ static __inline__ float __ATTRS_o_ai vec return __a[__b]; } +#ifdef __POWER9_VECTOR__ + +/* vec_extract_exp */ + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_extract_exp(vector float __a) { + return __builtin_vsx_xvxexpsp(__a); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_extract_exp(vector double __a) { + return __builtin_vsx_xvxexpdp(__a); +} + +/* vec_extract_sig */ + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_extract_sig(vector float __a) { + return __builtin_vsx_xvxsigsp(__a); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_extract_sig (vector double __a) { + return __builtin_vsx_xvxsigdp(__a); +} + +#endif /* __POWER9_VECTOR__ */ + /* vec_insert */ static __inline__ vector signed char __ATTRS_o_ai @@ -15999,7 +16046,6 @@ vec_revb(vector unsigned __int128 __a) { } #endif /* END __POWER8_VECTOR__ && __powerpc64__ */ - /* vec_xl */ static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset, @@ -16143,6 +16189,18 @@ static inline __ATTRS_o_ai void vec_xst( *(vector unsigned __int128 *)(__ptr + __offset) = __vec; } #endif + +#ifdef __POWER9_VECTOR__ +#define vec_test_data_class(__a, __b) \ +_Generic((__a),\ + vector float: \ + (vector bool int)__builtin_vsx_xvtstdcsp((__a), (__b)), \ + vector double:
[PATCH] D26271: [PPC] add extract significand/ extract exponent/test data class for vector float and vector double -- clang portion
sfertile closed this revision. sfertile added a comment. committed https://reviews.llvm.org/rL286830 Repository: rL LLVM https://reviews.llvm.org/D26271 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r286863 - [PPC] altivec.h functions for converting half precision to single precision.
Author: sfertile Date: Mon Nov 14 12:47:15 2016 New Revision: 286863 URL: http://llvm.org/viewvc/llvm-project?rev=286863&view=rev Log: [PPC] altivec.h functions for converting half precision to single precision. Adds 2 vector functions for converting from a vector of unsigned short to a vector of float. One converts the low 4 halfwords and one converts the high 4 halfwords. Differential Revision: https://reviews.llvm.org/D26534 Modified: cfe/trunk/include/clang/Basic/BuiltinsPPC.def cfe/trunk/lib/Headers/altivec.h cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c Modified: cfe/trunk/include/clang/Basic/BuiltinsPPC.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsPPC.def?rev=286863&r1=286862&r2=286863&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsPPC.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsPPC.def Mon Nov 14 12:47:15 2016 @@ -402,6 +402,7 @@ BUILTIN(__builtin_vsx_xvcvuxdsp, "V4fV2U BUILTIN(__builtin_vsx_xvcvdpsp, "V4fV2d", "") BUILTIN(__builtin_vsx_xvcvsphp, "V4fV4f", "") +BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "") // Vector Test Data Class builtins BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "") Modified: cfe/trunk/lib/Headers/altivec.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=286863&r1=286862&r2=286863&view=diff == --- cfe/trunk/lib/Headers/altivec.h (original) +++ cfe/trunk/lib/Headers/altivec.h Mon Nov 14 12:47:15 2016 @@ -12322,6 +12322,27 @@ vec_extract_sig (vector double __a) { return __builtin_vsx_xvxsigdp(__a); } +static __inline__ vector float __ATTRS_o_ai +vec_extract_fp32_from_shorth(vector unsigned short __a) { + vector unsigned short __b = +#ifdef __LITTLE_ENDIAN__ +__builtin_shufflevector(__a, __a, 0, -1, 1, -1, 2, -1, 3, -1); +#else +__builtin_shufflevector(__a, __a, -1, 0, -1, 1, -1, 2, -1, 3); +#endif + return __builtin_vsx_xvcvhpsp(__b); +} + +static __inline__ vector float __ATTRS_o_ai +vec_extract_fp32_from_shortl(vector unsigned short __a) { + vector unsigned short __b = +#ifdef __LITTLE_ENDIAN__ +__builtin_shufflevector(__a, __a, 4, -1, 5, -1, 6, -1, 7, -1); +#else +__builtin_shufflevector(__a, __a, -1, 4, -1, 5, -1, 6, -1, 7); +#endif + return __builtin_vsx_xvcvhpsp(__b); +} #endif /* __POWER9_VECTOR__ */ /* vec_insert */ Modified: cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c?rev=286863&r1=286862&r2=286863&view=diff == --- cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c (original) +++ cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c Mon Nov 14 12:47:15 2016 @@ -967,3 +967,21 @@ vector bool long long test87(void) { // CHECK-NEXT: ret <2 x i64> return vec_test_data_class(vda, __VEC_CLASS_FP_NOT_NORMAL); } +vector float test88(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEXT: ret <4 x float> +// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-LE-NEXT: ret <4 x float> + return vec_extract_fp32_from_shorth(vusa); +} +vector float test89(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEXT: ret <4 x float> +// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-LE-NEXT: ret <4 x float> + return vec_extract_fp32_from_shortl(vusa); +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26534: [PPC] add altivec.h functions for converting a vector of half precision to a vector of single precision
This revision was automatically updated to reflect the committed changes. Closed by commit rL286863: [PPC] altivec.h functions for converting half precision to single precision. (authored by sfertile). Changed prior to commit: https://reviews.llvm.org/D26534?vs=77581&id=77842#toc Repository: rL LLVM https://reviews.llvm.org/D26534 Files: cfe/trunk/include/clang/Basic/BuiltinsPPC.def cfe/trunk/lib/Headers/altivec.h cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c Index: cfe/trunk/include/clang/Basic/BuiltinsPPC.def === --- cfe/trunk/include/clang/Basic/BuiltinsPPC.def +++ cfe/trunk/include/clang/Basic/BuiltinsPPC.def @@ -402,6 +402,7 @@ BUILTIN(__builtin_vsx_xvcvdpsp, "V4fV2d", "") BUILTIN(__builtin_vsx_xvcvsphp, "V4fV4f", "") +BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "") // Vector Test Data Class builtins BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "") Index: cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c === --- cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c +++ cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c @@ -967,3 +967,21 @@ // CHECK-NEXT: ret <2 x i64> return vec_test_data_class(vda, __VEC_CLASS_FP_NOT_NORMAL); } +vector float test88(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEXT: ret <4 x float> +// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-LE-NEXT: ret <4 x float> + return vec_extract_fp32_from_shorth(vusa); +} +vector float test89(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEXT: ret <4 x float> +// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-LE-NEXT: ret <4 x float> + return vec_extract_fp32_from_shortl(vusa); +} Index: cfe/trunk/lib/Headers/altivec.h === --- cfe/trunk/lib/Headers/altivec.h +++ cfe/trunk/lib/Headers/altivec.h @@ -12322,6 +12322,27 @@ return __builtin_vsx_xvxsigdp(__a); } +static __inline__ vector float __ATTRS_o_ai +vec_extract_fp32_from_shorth(vector unsigned short __a) { + vector unsigned short __b = +#ifdef __LITTLE_ENDIAN__ +__builtin_shufflevector(__a, __a, 0, -1, 1, -1, 2, -1, 3, -1); +#else +__builtin_shufflevector(__a, __a, -1, 0, -1, 1, -1, 2, -1, 3); +#endif + return __builtin_vsx_xvcvhpsp(__b); +} + +static __inline__ vector float __ATTRS_o_ai +vec_extract_fp32_from_shortl(vector unsigned short __a) { + vector unsigned short __b = +#ifdef __LITTLE_ENDIAN__ +__builtin_shufflevector(__a, __a, 4, -1, 5, -1, 6, -1, 7, -1); +#else +__builtin_shufflevector(__a, __a, -1, 4, -1, 5, -1, 6, -1, 7); +#endif + return __builtin_vsx_xvcvhpsp(__b); +} #endif /* __POWER9_VECTOR__ */ /* vec_insert */ Index: cfe/trunk/include/clang/Basic/BuiltinsPPC.def === --- cfe/trunk/include/clang/Basic/BuiltinsPPC.def +++ cfe/trunk/include/clang/Basic/BuiltinsPPC.def @@ -402,6 +402,7 @@ BUILTIN(__builtin_vsx_xvcvdpsp, "V4fV2d", "") BUILTIN(__builtin_vsx_xvcvsphp, "V4fV4f", "") +BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "") // Vector Test Data Class builtins BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "") Index: cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c === --- cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c +++ cfe/trunk/test/CodeGen/builtins-ppc-p9vector.c @@ -967,3 +967,21 @@ // CHECK-NEXT: ret <2 x i64> return vec_test_data_class(vda, __VEC_CLASS_FP_NOT_NORMAL); } +vector float test88(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEXT: ret <4 x float> +// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-LE-NEXT: ret <4 x float> + return vec_extract_fp32_from_shorth(vusa); +} +vector float test89(void) { +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-BE-NEXT: ret <4 x float> +// CHECK-LE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-LE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) +// CHECK-LE-NEXT: ret <4 x float> + return vec_extract_fp32_from_shortl(vusa); +} Index: cfe/trunk/lib/Headers/altivec.h === --- cfe/trunk/lib/Headers/altivec.h +++ cfe/trunk/lib/Headers/altivec.h @@ -12322,6 +12322,27 @@
[clang] [llvm] [PowerPC][AIX] 64-bit large code-model support for toc-data (PR #90619)
@@ -6141,24 +6141,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) { assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following."); -// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode -// or 64-bit medium (ELF-only) or large (ELF and AIX) code model code non -// toc-data symbols. +// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode, +// 64-bit medium (ELF-only), or large (ELF and AIX) code model code that +// does not contain TOC data symbols. // We generate two instructions as described below. The first source -// operand is a symbol reference. If it must be toc-referenced according to -// Subtarget, we generate: +// operand is a symbol reference. If it must be referenced via the TOC +// according to Subtarget, we generate: // [32-bit AIX] // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) // [64-bit ELF/AIX] // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) // Otherwise we generate: // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym) - -// For large code model toc-data symbols we generate: +// For large code model with TOC data symbols we generate: mandlebug wrote: ```suggestion // And finally for AIX with toc-data we generate: ``` https://github.com/llvm/llvm-project/pull/90619 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PowerPC][AIX] 64-bit large code-model support for toc-data (PR #90619)
@@ -1292,8 +1291,9 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { unsigned Op = MI->getOpcode(); -// Change the opcode to load address for tocdata -TmpInst.setOpcode(Op == PPC::ADDItocL8 ? PPC::ADDI8 : PPC::LA); +// Change the opcode to load address for toc data. +unsigned NewOp64 = IsAIX ? PPC::LA8 : PPC::ADDI8; mandlebug wrote: Minor nit: Roll selecting the new opcode into a single statement rather then selecting what the opcode would be for 64-bit, then deciding too use LA or the 64-bit opcode in 2 different statements? https://github.com/llvm/llvm-project/pull/90619 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PowerPC][AIX] 64-bit large code-model support for toc-data (PR #90619)
@@ -6141,24 +6141,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) { assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" " ELF/AIX or 32-bit AIX in the following."); -// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode -// or 64-bit medium (ELF-only) or large (ELF and AIX) code model code non -// toc-data symbols. +// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode, +// 64-bit medium (ELF-only), or large (ELF and AIX) code model code that +// does not contain TOC data symbols. // We generate two instructions as described below. The first source -// operand is a symbol reference. If it must be toc-referenced according to -// Subtarget, we generate: +// operand is a symbol reference. If it must be referenced via the TOC +// according to Subtarget, we generate: // [32-bit AIX] // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) // [64-bit ELF/AIX] // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) // Otherwise we generate: mandlebug wrote: ```suggestion // Otherwise for medium code model ELF we generate: ``` https://github.com/llvm/llvm-project/pull/90619 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Reduce number of run steps in ppc rop-protect test. (PR #139607)
https://github.com/mandlebug created https://github.com/llvm/llvm-project/pull/139607 Test was running both -mcpu=pwrN and -mcpu=powerN compile steps for power N = 8, 9 and 10. Reduce to one run step for each form using power8 for one instance and power 10 for the other. >From 5c551ba6ab687dbedf38d57bba2fbf6c93a82d3b Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Mon, 12 May 2025 14:56:18 -0400 Subject: [PATCH] Reduce number of run steps in ppc rop-protect test. Test was running both -mcpu=pwrN and -mcpu=powerN compile steps for power N = 8, 9 and 10. Reduce to one run step for each form using power8 for one instance and power 10 for the other. --- clang/test/Driver/ppc-mrop-protection-support-check.c | 10 -- 1 file changed, 10 deletions(-) diff --git a/clang/test/Driver/ppc-mrop-protection-support-check.c b/clang/test/Driver/ppc-mrop-protection-support-check.c index 50eaef3ed770b..541dc2bf99c3f 100644 --- a/clang/test/Driver/ppc-mrop-protection-support-check.c +++ b/clang/test/Driver/ppc-mrop-protection-support-check.c @@ -1,20 +1,10 @@ // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=pwr10 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=power10 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=pwr9 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=power9 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=power8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=pwr7 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=NOROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=power7 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=NOROP #ifdef __ROP_PROTECT__ static_assert(false, "ROP Protect enabled"); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [PPC] Disable rop-protect for 32-bit OS targets. (PR #139619)
https://github.com/mandlebug created https://github.com/llvm/llvm-project/pull/139619 The instructions are not supported on either 32-bit ELF (due to no redzone) or 32-bit AIX due to the instructions always using the full 64-bit width of the register inputs. >From 6f089b31986290495a452d71fe24ebb73fcd0f42 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Mon, 12 May 2025 11:27:39 -0400 Subject: [PATCH] [PPC] Disable rop-protect for 32-bit OS targets. The instructions are not supported on either 32-bit ELF (due to no redzone) or 32-bit AIX due to the instructions always using the full 64-bit width of the register inputs. --- clang/lib/Basic/Targets/PPC.cpp | 16 +++- .../Driver/ppc-mrop-protection-support-check.c | 6 ++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 425ad68bb9098..e6ef0ecc526ba 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -679,11 +679,17 @@ bool PPCTargetInfo::initFeatureMap( } } - if (!(ArchDefs & ArchDefinePwr8) && - llvm::is_contained(FeaturesVec, "+rop-protect")) { -// We can turn on ROP Protect on Power 8 and above. -Diags.Report(diag::err_opt_not_valid_with_opt) << "-mrop-protect" << CPU; -return false; + if (llvm::is_contained(FeaturesVec, "+rop-protect")) { +if (PointerWidth == 32) { + Diags.Report(diag::err_opt_not_valid_on_target) << "-mrop-protect"; + return false; +} + +if (!(ArchDefs & ArchDefinePwr8)) { + // We can turn on ROP Protect on Power 8 and above. + Diags.Report(diag::err_opt_not_valid_with_opt) << "-mrop-protect" << CPU; + return false; +} } if (!(ArchDefs & ArchDefinePwr8) && diff --git a/clang/test/Driver/ppc-mrop-protection-support-check.c b/clang/test/Driver/ppc-mrop-protection-support-check.c index 50eaef3ed770b..9081c583de8bf 100644 --- a/clang/test/Driver/ppc-mrop-protection-support-check.c +++ b/clang/test/Driver/ppc-mrop-protection-support-check.c @@ -16,6 +16,11 @@ // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=power7 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=NOROP +// RUN: not %clang -target powerpc-unknown-linux -fsyntax-only \ +// RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=32BIT +// RUN: not %clang -target powerpc-unknown-aix -fsyntax-only \ +// RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=32BIT + #ifdef __ROP_PROTECT__ static_assert(false, "ROP Protect enabled"); #endif @@ -24,3 +29,4 @@ static_assert(false, "ROP Protect enabled"); // HASROP-NOT: option '-mrop-protect' cannot be specified with // NOROP: option '-mrop-protect' cannot be specified with +// 32BIT: option '-mrop-protect' cannot be specified on this target ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [PowerPC] Add BCDCOPYSIGN and BCDSETSIGN Instruction Support (PR #144874)
@@ -515,6 +515,10 @@ TARGET_BUILTIN(__builtin_altivec_vctzh, "V8UsV8Us", "", "power9-vector") TARGET_BUILTIN(__builtin_altivec_vctzw, "V4UiV4Ui", "", "power9-vector") TARGET_BUILTIN(__builtin_altivec_vctzd, "V2ULLiV2ULLi", "", "power9-vector") +//P9 BCD builtins +TARGET_BUILTIN(__builtin_ppc_bcdcopysign, "V16UcV16UcV16Uc", "", "power9-vector") mandlebug wrote: Should the feature be "isa-v30-instructions" instead of "power9-vector"? https://github.com/llvm/llvm-project/pull/144874 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Reduce number of run steps in ppc rop-protect test. (PR #139607)
@@ -1,20 +1,10 @@ // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=pwr10 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=power10 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=pwr9 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=power9 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP mandlebug wrote: Yeah. We have one test for P8 which is the first CPU version supported, and one test for P10 which is a later CPU that also supports it. Adding a P9 doesn't really add any coverage IMO. https://github.com/llvm/llvm-project/pull/139607 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Reduce number of run steps in ppc rop-protect test. (PR #139607)
@@ -1,20 +1,10 @@ // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=pwr10 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=power10 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=pwr9 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=power9 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP -// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP mandlebug wrote: The P10 run step uses the `pwr` format. I think one run step for each mcpu format is adequate. Realistically we should have other coverage that ensures that the 2 formats sets up the same environment and this test could use just one format either way, but since we have a run step for a target greater then 8 anyway I thought to test one of each. https://github.com/llvm/llvm-project/pull/139607 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [PPC] ROP protection won't be supported with the ELFV1 ABI. (PR #139947)
https://github.com/mandlebug created https://github.com/llvm/llvm-project/pull/139947 Disables -mrop-protect option for the elfv1 ABI. Tests both a target where this ABI is the default and a target where we explicitly have to set the ABI to elfv1. >From 21bbe51434af58e9a37e74a457a871ffb2e64d03 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Wed, 14 May 2025 14:52:48 -0400 Subject: [PATCH] [PPC] ROP protection won't be supported with the ELFV1 ABI. --- clang/lib/Basic/Targets/PPC.cpp | 6 ++ .../test/Driver/ppc-mrop-protection-support-check.c | 13 + 2 files changed, 19 insertions(+) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index e6ef0ecc526ba..0cb43a24c8aff 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -685,6 +685,12 @@ bool PPCTargetInfo::initFeatureMap( return false; } +if (ABI == "elfv1") { + Diags.Report(diag::err_unsupported_abi_for_opt) + << "-mrop-protect" << "elfv2"; + return false; +} + if (!(ArchDefs & ArchDefinePwr8)) { // We can turn on ROP Protect on Power 8 and above. Diags.Report(diag::err_opt_not_valid_with_opt) << "-mrop-protect" << CPU; diff --git a/clang/test/Driver/ppc-mrop-protection-support-check.c b/clang/test/Driver/ppc-mrop-protection-support-check.c index f500e9e3e510c..34db142ef6796 100644 --- a/clang/test/Driver/ppc-mrop-protection-support-check.c +++ b/clang/test/Driver/ppc-mrop-protection-support-check.c @@ -6,11 +6,23 @@ // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=pwr7 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=NOROP +// RUN: not %clang -target powerpc64-unknown-linux-gnu -fsyntax-only \ +// RUN: -mcpu=power8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=ELFV1 +// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ +// RUN: -mcpu=power8 -mrop-protect -mabi=elfv1 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ELFV1 + // RUN: not %clang -target powerpc-unknown-linux -fsyntax-only \ // RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=32BIT // RUN: not %clang -target powerpc-unknown-aix -fsyntax-only \ // RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=32BIT +#ifdef __ROP_PROTECT__ +#if defined(__CALL_ELF) && __CALL_ELF == 1 +#error "ROP protection not supported with 64-bit elfv1 abi" +#endif +#endif + #ifdef __ROP_PROTECT__ static_assert(false, "ROP Protect enabled"); #endif @@ -20,3 +32,4 @@ static_assert(false, "ROP Protect enabled"); // NOROP: option '-mrop-protect' cannot be specified with // 32BIT: option '-mrop-protect' cannot be specified on this target +// ELFV1: '-mrop-protect' can only be used with the 'elfv2' ABI ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [PPC] Disable rop-protect for 32-bit OS targets. (PR #139619)
https://github.com/mandlebug closed https://github.com/llvm/llvm-project/pull/139619 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Reduce number of run steps in ppc rop-protect test. (PR #139607)
https://github.com/mandlebug closed https://github.com/llvm/llvm-project/pull/139607 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [PPC] ROP protection won't be supported with the ELFV1 ABI. (PR #139947)
https://github.com/mandlebug updated https://github.com/llvm/llvm-project/pull/139947 >From 21bbe51434af58e9a37e74a457a871ffb2e64d03 Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Wed, 14 May 2025 14:52:48 -0400 Subject: [PATCH 1/2] [PPC] ROP protection won't be supported with the ELFV1 ABI. --- clang/lib/Basic/Targets/PPC.cpp | 6 ++ .../test/Driver/ppc-mrop-protection-support-check.c | 13 + 2 files changed, 19 insertions(+) diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index e6ef0ecc526ba..0cb43a24c8aff 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -685,6 +685,12 @@ bool PPCTargetInfo::initFeatureMap( return false; } +if (ABI == "elfv1") { + Diags.Report(diag::err_unsupported_abi_for_opt) + << "-mrop-protect" << "elfv2"; + return false; +} + if (!(ArchDefs & ArchDefinePwr8)) { // We can turn on ROP Protect on Power 8 and above. Diags.Report(diag::err_opt_not_valid_with_opt) << "-mrop-protect" << CPU; diff --git a/clang/test/Driver/ppc-mrop-protection-support-check.c b/clang/test/Driver/ppc-mrop-protection-support-check.c index f500e9e3e510c..34db142ef6796 100644 --- a/clang/test/Driver/ppc-mrop-protection-support-check.c +++ b/clang/test/Driver/ppc-mrop-protection-support-check.c @@ -6,11 +6,23 @@ // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=pwr7 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=NOROP +// RUN: not %clang -target powerpc64-unknown-linux-gnu -fsyntax-only \ +// RUN: -mcpu=power8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=ELFV1 +// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ +// RUN: -mcpu=power8 -mrop-protect -mabi=elfv1 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=ELFV1 + // RUN: not %clang -target powerpc-unknown-linux -fsyntax-only \ // RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=32BIT // RUN: not %clang -target powerpc-unknown-aix -fsyntax-only \ // RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=32BIT +#ifdef __ROP_PROTECT__ +#if defined(__CALL_ELF) && __CALL_ELF == 1 +#error "ROP protection not supported with 64-bit elfv1 abi" +#endif +#endif + #ifdef __ROP_PROTECT__ static_assert(false, "ROP Protect enabled"); #endif @@ -20,3 +32,4 @@ static_assert(false, "ROP Protect enabled"); // NOROP: option '-mrop-protect' cannot be specified with // 32BIT: option '-mrop-protect' cannot be specified on this target +// ELFV1: '-mrop-protect' can only be used with the 'elfv2' ABI >From 0a20e810b3339b161c45fdc7bb0ad0e39de65c3f Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Wed, 6 Aug 2025 15:54:43 -0400 Subject: [PATCH 2/2] Explicitly specifiy which ABI to target when testing rop-protect. The preprocessor init check was failing becuase of using rop-protect with the default ABI which is ELFV1 for powerpc64. Also removed 2 of the run lines as they only differ in the cpu level. --- clang/test/Preprocessor/init-ppc64.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/test/Preprocessor/init-ppc64.c b/clang/test/Preprocessor/init-ppc64.c index 7dffd4627481b..505a532fab0d1 100644 --- a/clang/test/Preprocessor/init-ppc64.c +++ b/clang/test/Preprocessor/init-ppc64.c @@ -697,9 +697,7 @@ // RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-feature +mma -target-cpu power10 -fno-signed-char < /dev/null | FileCheck -check-prefix PPC-MMA %s // PPC-MMA:#define __MMA__ 1 // -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-feature +rop-protect -target-cpu power10 -fno-signed-char < /dev/null | FileCheck -check-prefix PPC-ROP %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-feature +rop-protect -target-cpu power9 -fno-signed-char < /dev/null | FileCheck -check-prefix PPC-ROP %s -// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-feature +rop-protect -target-cpu power8 -fno-signed-char < /dev/null | FileCheck -check-prefix PPC-ROP %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-feature +rop-protect -target-abi elfv2 -target-cpu power8 -fno-signed-char < /dev/null | FileCheck -check-prefix PPC-ROP %s // PPC-ROP:#define __ROP_PROTECT__ 1 // // RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-feature +float128 -target-cpu power9 -fno-signed-char < /dev/null | FileCheck -check-prefix PPC-FLOAT128 %s ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits