https://github.com/pow2clk updated https://github.com/llvm/llvm-project/pull/106588
>From 12253818bd47aa8c324f6222586965f356b11c90 Mon Sep 17 00:00:00 2001 From: Greg Roth <grr...@microsoft.com> Date: Wed, 24 Jul 2024 16:49:19 -0600 Subject: [PATCH 1/2] [HLSL] set alwaysinline on HLSL functions HLSL inlines all its functions by default. This uses the alwaysinline attribute to force that in the corresponding pass for user functions by default and overrides the default noinline of some implicit functions. This makes an instance of explicit inlining for buffer subscripts unnecessary. Adds tests for function and constructor inlining and augments some existing tests to verify correct inlining of implicitly created functions as well. incidentally restore RUN line that I believe was mistakenly removed as part of #88918 fixes #89282 --- clang/lib/CodeGen/CGHLSLRuntime.cpp | 17 ++- clang/lib/CodeGen/CodeGenFunction.cpp | 4 +- clang/lib/Sema/HLSLExternalSemaSource.cpp | 2 - .../GlobalConstructorFunction.hlsl | 31 +++-- .../CodeGenHLSL/GlobalConstructorLib.hlsl | 23 +++- clang/test/CodeGenHLSL/GlobalDestructors.hlsl | 51 +++++--- .../builtins/RWBuffer-constructor.hlsl | 1 + .../builtins/RWBuffer-subscript.hlsl | 5 +- .../test/CodeGenHLSL/inline-constructors.hlsl | 74 ++++++++++++ clang/test/CodeGenHLSL/inline-functions.hlsl | 114 ++++++++++++++++++ 10 files changed, 279 insertions(+), 43 deletions(-) create mode 100644 clang/test/CodeGenHLSL/inline-constructors.hlsl create mode 100644 clang/test/CodeGenHLSL/inline-functions.hlsl diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 4bd7b6ba58de0d..24d126ced0d9f7 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -414,9 +414,20 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD, llvm::Function *Fn) { - if (FD->isInExportDeclContext()) { - const StringRef ExportAttrKindStr = "hlsl.export"; - Fn->addFnAttr(ExportAttrKindStr); + if (FD) { // "explicit" functions with declarations + if (FD->isInExportDeclContext()) { + const StringRef ExportAttrKindStr = "hlsl.export"; + Fn->addFnAttr(ExportAttrKindStr); + } + // Respect noinline if the explicit functions use it + // otherwise default to alwaysinline + if (!Fn->hasFnAttribute(Attribute::NoInline)) + Fn->addFnAttr(llvm::Attribute::AlwaysInline); + } else { // "implicit" autogenerated functions with no declaration + // Implicit functions might get marked as noinline by default + // but we override that for HLSL + Fn->removeFnAttr(Attribute::NoInline); + Fn->addFnAttr(Attribute::AlwaysInline); } } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index a5747283e98058..aceeed0e66d130 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1239,9 +1239,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (getLangOpts().OpenMP && CurCodeDecl) CGM.getOpenMPRuntime().emitFunctionProlog(*this, CurCodeDecl); - if (FD && getLangOpts().HLSL) { + if (getLangOpts().HLSL) { // Handle emitting HLSL entry functions. - if (FD->hasAttr<HLSLShaderAttr>()) { + if (FD && FD->hasAttr<HLSLShaderAttr>()) { CGM.getHLSLRuntime().emitEntryFunction(FD, Fn); } CGM.getHLSLRuntime().setHLSLFunctionAttributes(FD, Fn); diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 9aacbe4ad9548e..0a534d94192560 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -290,8 +290,6 @@ struct BuiltinTypeDeclBuilder { SourceLocation())); MethodDecl->setLexicalDeclContext(Record); MethodDecl->setAccess(AccessSpecifier::AS_public); - MethodDecl->addAttr(AlwaysInlineAttr::CreateImplicit( - AST, SourceRange(), AlwaysInlineAttr::CXX11_clang_always_inline)); Record->addDecl(MethodDecl); return *this; diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl index f954c9d2f029f2..b39311ad67cd62 100644 --- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl +++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE int i; @@ -7,7 +8,7 @@ __attribute__((constructor)) void call_me_first(void) { } __attribute__((constructor)) void then_call_me(void) { - i = 12; + i = 13; } __attribute__((destructor)) void call_me_last(void) { @@ -21,11 +22,21 @@ void main(unsigned GI : SV_GroupIndex) {} // CHECK-NOT:@llvm.global_ctors // CHECK-NOT:@llvm.global_dtors -//CHECK: define void @main() -//CHECK-NEXT: entry: -//CHECK-NEXT: call void @"?call_me_first@@YAXXZ"() -//CHECK-NEXT: call void @"?then_call_me@@YAXXZ"() -//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() -//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) -//CHECK-NEXT: call void @"?call_me_last@@YAXXZ"( -//CHECK-NEXT: ret void +// CHECK: define void @main() +// CHECK-NEXT: entry: +// Verify function constructors are emitted +// NOINLINE-NEXT: call void @"?call_me_first@@YAXXZ"() +// NOINLINE-NEXT: call void @"?then_call_me@@YAXXZ"() +// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() +// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0) +// NOINLINE-NEXT: call void @"?call_me_last@@YAXXZ"( +// NOINLINE-NEXT: ret void + +// Verify constructor calls are inlined when AlwaysInline is run +// INLINE-NEXT: alloca +// INLINE-NEXT: store i32 12 +// INLINE-NEXT: store i32 13 +// INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() +// INLINE-NEXT: store i32 % +// INLINE-NEXT: store i32 0 +// INLINE: ret void diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl index 2c5c4e19c3296d..78f6475462bc47 100644 --- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl +++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE // Make sure global variable for ctors exist for lib profile. // CHECK:@llvm.global_ctors @@ -11,7 +12,11 @@ void FirstEntry() {} // CHECK: define void @FirstEntry() // CHECK-NEXT: entry: -// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() +// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() +// NOINLINE-NEXT: call void @"?FirstEntry@@YAXXZ"() +// Verify inlining leaves only calls to "llvm." intrinsics +// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}} +// CHECK: ret void [shader("compute")] [numthreads(1,1,1)] @@ -19,5 +24,15 @@ void SecondEntry() {} // CHECK: define void @SecondEntry() // CHECK-NEXT: entry: -// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() -// CHECK-NEXT: call void @"?SecondEntry@@YAXXZ"() +// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() +// NOINLINE-NEXT: call void @"?SecondEntry@@YAXXZ"() +// Verify inlining leaves only calls to "llvm." intrinsics +// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}} +// CHECK: ret void + + +// Verify the constructor is alwaysinline +// NOINLINE: ; Function Attrs: {{.*}}alwaysinline +// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[IntAttr:\#[0-9]+]] + +// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl index 24c3c039fc6192..ea28354222f885 100644 --- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl +++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl @@ -1,10 +1,18 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,CHECK -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,CHECK +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK -// Make sure global variable for dtors exist for lib profile. +// Tests that constructors and destructors are appropriately generated for globals +// and that their calls are inlined when AlwaysInline is run +// but global variables are retained for the library profiles + +// Make sure global variable for ctors/dtors exist for lib profile. +// LIB:@llvm.global_ctors // LIB:@llvm.global_dtors -// Make sure global variable for dtors removed for compute profile. -// CS-NOT:llvm.global_dtors +// Make sure global variable for ctors/dtors removed for compute profile. +// CS-NOT:@llvm.global_ctors +// CS-NOT:@llvm.global_dtors struct Tail { Tail() { @@ -46,22 +54,25 @@ void main(unsigned GI : SV_GroupIndex) { Wag(); } -// Make sure global variable for ctors/dtors removed. -// CHECK-NOT:@llvm.global_ctors -// CHECK-NOT:@llvm.global_dtors -//CHECK: define void @main() -//CHECK-NEXT: entry: -//CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl() -//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() -//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) -//CHECK-NEXT: call void @_GLOBAL__D_a() -//CHECK-NEXT: ret void +// CHECK: define void @main() +// CHECK-NEXT: entry: +// Verify destructor is emitted +// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl() +// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() +// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0) +// NOINLINE-NEXT: call void @_GLOBAL__D_a() +// NOINLINE-NEXT: ret void +// Verify inlining leaves only calls to "llvm." intrinsics +// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}} +// INLINE: ret void // This is really just a sanity check I needed for myself to verify that // function scope static variables also get destroyed properly. -//CHECK: define internal void @_GLOBAL__D_a() -//CHECK-NEXT: entry: -//CHECK-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A") -//CHECK-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A") -//CHECK-NEXT: ret void +// NOINLINE: define internal void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]] +// NOINLINE-NEXT: entry: +// NOINLINE-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A") +// NOINLINE-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A") +// NOINLINE-NEXT: ret void + +// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl index baddfcf2cf1d52..174f4c3eaaad26 100644 --- a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl +++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl @@ -1,3 +1,4 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s // RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV RWBuffer<float> Buf; diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl index da8a1e538ec5e7..2a350c1619bd6e 100644 --- a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl +++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl @@ -11,6 +11,7 @@ void main(unsigned GI : SV_GroupIndex) { // Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy // and confusing to follow so the match here is pretty weak. -// CHECK: define internal void @"?main@@YAXI@Z" -// CHECK-NOT: call +// CHECK: define void @main() +// Verify inlining leaves only calls to "llvm." intrinsics +// CHECK-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}} // CHECK: ret void diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl new file mode 100644 index 00000000000000..40c7a42bdc262e --- /dev/null +++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE + +// Tests that implicit contstructor calls for user classes will always be inlined. + +struct Weed { + Weed() {Count += 1;} + [[maybe_unused]] void pull() {Count--;} + static int weedCount() { return Count; } +private: + static int Count; + +} YardWeeds; + +int Weed::Count = 1; // It begins. . . + +struct Kitty { + unsigned burrsInFur; + + Kitty() { + burrsInFur = 0; + } + + void wanderInYard(int hours) { + burrsInFur = hours*Weed::weedCount()/8; + } + + void lick() { + if(burrsInFur) { + burrsInFur--; + Weed w; + } + } + +} Nion; + +void NionsDay(int hours) { + static Kitty Nion; + Nion.wanderInYard(hours); + while(Nion.burrsInFur) Nion.lick(); +} + +// CHECK: define void @main() +// CHECK-NEXT: entry: +// Verify constructor is emitted +// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl() +// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() +// NOINLINE-NEXT: call void @"?main@@YAXI@Z"(i32 %0) +// Verify inlining leaves only calls to "llvm." intrinsics +// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}} +// CHECK: ret void +[shader("compute")] +[numthreads(1,1,1)] +void main(unsigned GI : SV_GroupIndex) { + NionsDay(10); +} + + +// CHECK: define void @rainyMain() +// CHECK-NEXT: entry: +// Verify constructor is emitted +// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl() +// NOINLINE-NEXT: call void @"?rainyMain@@YAXXZ"() +// Verify inlining leaves only calls to "llvm." intrinsics +// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}} +// CHECK: ret void +[shader("compute")] +[numthreads(1,1,1)] +void rainyMain() { + NionsDay(1); +} + diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl new file mode 100644 index 00000000000000..9c00e096a9eef2 --- /dev/null +++ b/clang/test/CodeGenHLSL/inline-functions.hlsl @@ -0,0 +1,114 @@ +// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE +// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE +// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE +// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE + +// Tests that user functions will always be inlined. +// This includes exported functions and mangled entry point implementation functions. +// The unmangled entry functions must not be alwaysinlined. + +#define MAX 100 + +float nums[MAX]; + +// Verify that all functions have the alwaysinline attribute +// CHECK: Function Attrs: alwaysinline +// CHECK: define void @"?swap@@YAXY0GE@III@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]] +// CHECK: ret void +// Swap the values of Buf at indices ix1 and ix2 +void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) { + float tmp = Buf[ix1]; + Buf[ix1] = Buf[ix2]; + Buf[ix2] = tmp; +} + +// CHECK: Function Attrs: alwaysinline +// CHECK: define void @"?BubbleSort@@YAXY0GE@II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]] +// CHECK: ret void +// Inefficiently sort Buf in place +void BubbleSort(unsigned Buf[MAX], unsigned size) { + bool swapped = true; + while (swapped) { + swapped = false; + for (unsigned i = 1; i < size; i++) { + if (Buf[i] < Buf[i-1]) { + swap(Buf, i, i-1); + swapped = true; + } + } + } +} + +// Note ExtAttr is the inlined export set of attribs +// CHECK: Function Attrs: alwaysinline +// CHECK: define noundef i32 @"?RemoveDupes@@YAIY0GE@II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr:\#[0-9]+]] +// CHECK: ret i32 +// Sort Buf and remove any duplicate values +// returns the number of values left +export +unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) { + BubbleSort(Buf, size); + unsigned insertPt = 0; + for (unsigned i = 1; i < size; i++) { + if (Buf[i] == Buf[i-1]) + insertPt++; + else + Buf[insertPt] = Buf[i]; + } + return insertPt; +} + + +RWBuffer<unsigned> Indices; + +// The mangled version of main only remains without inlining +// because it has internal linkage from the start +// Note main functions get the norecurse attrib, which IntAttr reflects +// NOINLINE: Function Attrs: alwaysinline +// NOINLINE: define internal void @"?main@@YAXI@Z"(i32 noundef %GI) [[IntAttr]] +// NOINLINE: ret void + +// The unmangled version is not inlined, EntryAttr reflects that +// CHECK: Function Attrs: convergent norecurse +// CHECK: define void @main() [[EntryAttr:\#[0-9]+]] +// Make sure function calls are inlined when AlwaysInline is run +// This only leaves calls to llvm. intrinsics +// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}} +// CHECK: ret void + +[numthreads(1,1,1)] +[shader("compute")] +void main(unsigned int GI : SV_GroupIndex) { + unsigned tmpIndices[MAX]; + if (GI > MAX) return; + for (unsigned i = 1; i < GI; i++) + tmpIndices[i] = Indices[i]; + RemoveDupes(tmpIndices, GI); + for (unsigned i = 1; i < GI; i++) + tmpIndices[i] = Indices[i]; +} + +// The mangled version of main only remains without inlining +// because it has internal linkage from the start +// Note main functions get the norecurse attrib, which IntAttr reflects +// NOINLINE: Function Attrs: alwaysinline +// NOINLINE: define internal void @"?main10@@YAXXZ"() [[IntAttr]] +// NOINLINE: ret void + +// The unmangled version is not inlined, EntryAttr reflects that +// CHECK: Function Attrs: convergent norecurse +// CHECK: define void @main10() [[EntryAttr]] +// Make sure function calls are inlined when AlwaysInline is run +// This only leaves calls to llvm. intrinsics +// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}} +// CHECK: ret void + +[numthreads(1,1,1)] +[shader("compute")] +void main10() { + main(10); +} + +// CHECK: attributes [[IntAttr]] = {{.*}} alwaysinline +// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline +// CHECK-NOT: attributes [[EntryAttr]] = {{.*}} alwaysinline >From b7e367a51f61c511bca45eed28e2ea0701c3e4f3 Mon Sep 17 00:00:00 2001 From: Greg Roth <grr...@microsoft.com> Date: Thu, 29 Aug 2024 12:59:03 -0600 Subject: [PATCH 2/2] update RWBuffer-AST test for later alwaysinline marking Previously, the alwaysinline attribute was set for the RWBuffer subscript operator as soon as it was created. Since they are all in a common place now, that setting was redundant, but it does mean that it won't show up in the AST --- clang/test/AST/HLSL/RWBuffer-AST.hlsl | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/test/AST/HLSL/RWBuffer-AST.hlsl b/clang/test/AST/HLSL/RWBuffer-AST.hlsl index 1f6ef60e121ea5..dd0208d50a05b1 100644 --- a/clang/test/AST/HLSL/RWBuffer-AST.hlsl +++ b/clang/test/AST/HLSL/RWBuffer-AST.hlsl @@ -42,7 +42,6 @@ RWBuffer<float> Buffer; // CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}} // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'const RWBuffer<element_type>' lvalue implicit this // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int' -// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline // CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> operator[] 'element_type &(unsigned int)' // CHECK-NEXT: ParmVarDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> Idx 'unsigned int' @@ -52,7 +51,6 @@ RWBuffer<float> Buffer; // CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}} // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'RWBuffer<element_type>' lvalue implicit this // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int' -// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline // CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class RWBuffer definition _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits