kzhuravl-AMD updated this revision to Diff 49652.
kzhuravl-AMD added a comment.
Review Feedback - Updated diff
http://reviews.llvm.org/D17764
Files:
include/clang/Basic/Attr.td
include/clang/Basic/AttrDocs.td
include/clang/Driver/Options.td
include/clang/Frontend/CodeGenOptions.def
lib/CodeGen/CGCall.cpp
lib/CodeGen/TargetInfo.cpp
lib/Driver/Tools.cpp
lib/Frontend/CompilerInvocation.cpp
lib/Sema/SemaDeclAttr.cpp
test/CodeGenOpenCL/amdgpu-tools-attrs-opts-precedence.cl
test/CodeGenOpenCL/amdgpu-tools-attrs.cl
test/CodeGenOpenCL/amdgpu-tools-opts.cl
test/SemaOpenCL/amdgpu-tools-attrs.cl
Index: test/SemaOpenCL/amdgpu-tools-attrs.cl
===================================================================
--- /dev/null
+++ test/SemaOpenCL/amdgpu-tools-attrs.cl
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple amdgcn--amdhsa -verify -fsyntax-only %s
+
+// Check attributes with non kernel function
+__attribute__((amdgpu_tools_insert_nops)) void not_kernel0() {} // expected-error {{'amdgpu_tools_insert_nops' attribute only applies to kernel functions}}
+__attribute__((amdgpu_tools_num_reserved_vgpr(8))) void not_kernel1() {} // expected-error {{'amdgpu_tools_num_reserved_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_tools_num_reserved_sgpr(4))) void not_kernel2() {} // expected-error {{'amdgpu_tools_num_reserved_sgpr' attribute only applies to kernel functions}}
+typedef __attribute__((amdgpu_tools_insert_nops)) struct foo0_s { // expected-error {{'amdgpu_tools_insert_nops' attribute only applies to kernel functions}}
+ int x;
+ int y;
+} foo0_t;
+typedef __attribute__((amdgpu_tools_num_reserved_vgpr(8))) struct foo1_s { // expected-error {{'amdgpu_tools_num_reserved_vgpr' attribute only applies to kernel functions}}
+ int x;
+ int y;
+} foo1_t;
+typedef __attribute__((amdgpu_tools_num_reserved_sgpr(4))) struct foo2_s { // expected-error {{'amdgpu_tools_num_reserved_sgpr' attribute only applies to kernel functions}}
+ int x;
+ int y;
+} foo2_t;
+
+// Check non integer attribute values.
+__attribute__((amdgpu_tools_num_reserved_vgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_tools_num_reserved_vgpr' attribute requires an integer constant}}
+__attribute__((amdgpu_tools_num_reserved_sgpr("DEF"))) kernel void foo4() {} // expected-error {{'amdgpu_tools_num_reserved_sgpr' attribute requires an integer constant}}
+
+// Check large attribute values.
+__attribute__((amdgpu_tools_num_reserved_vgpr(4294967296))) kernel void foo5() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_tools_num_reserved_sgpr(4294967296))) kernel void foo6() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_tools_num_reserved_vgpr(4294967296), amdgpu_tools_num_reserved_sgpr(4294967296))) kernel void foo7() {} // expected-error 2 {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+
+// Check valid attributes.
+__attribute__((amdgpu_tools_insert_nops)) kernel void foo8() {}
+__attribute__((amdgpu_tools_num_reserved_vgpr(8))) kernel void foo9() {}
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8))) kernel void foo10() {}
+__attribute__((amdgpu_tools_num_reserved_sgpr(4))) kernel void foo11() {}
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_sgpr(4))) kernel void foo12() {}
+__attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) kernel void foo13() {}
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) kernel void foo14() {}
+// Make sure 0 VGPRs is accepted.
+__attribute__((amdgpu_tools_num_reserved_vgpr(0))) kernel void foo15() {}
+// Make sure 0 SGPRs is accepted.
+__attribute__((amdgpu_tools_num_reserved_sgpr(0))) kernel void foo16() {}
+// Make sure 0 VGPRs and 0 SGPRs is accepted.
+__attribute__((amdgpu_tools_num_reserved_vgpr(0), amdgpu_tools_num_reserved_sgpr(0))) kernel void foo17() {}
+// Make sure kernel keyword can go before __attribute__ keyword.
+kernel __attribute__((amdgpu_tools_insert_nops)) void foo18() {}
+kernel __attribute__((amdgpu_tools_num_reserved_vgpr(8))) void foo19() {}
+kernel __attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8))) void foo20() {}
+kernel __attribute__((amdgpu_tools_num_reserved_sgpr(4))) void foo21() {}
+kernel __attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_sgpr(4))) void foo22() {}
+kernel __attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) void foo23() {}
+kernel __attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) void foo24() {}
Index: test/CodeGenOpenCL/amdgpu-tools-opts.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-tools-opts.cl
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR8 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR8 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR8_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR8_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=SGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_SGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=0 --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR0_SGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=0 --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR0_SGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_VGPR8 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP_VGPR8 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_VGPR8_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP_VGPR8_SGPR4 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=0 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_VGPR0 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=0 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_SGPR0 %s
+
+kernel void foo() { // expected-no-diagnostics
+// NOP: define void @foo() [[ATTR_NOP:#[0-9]+]]
+// VGPR8: define void @foo() [[ATTR_VGPR8:#[0-9]+]]
+// NOP_VGPR8: define void @foo() [[ATTR_NOP_VGPR8:#[0-9]+]]
+// SGPR4: define void @foo() [[ATTR_SGPR4:#[0-9]+]]
+// NOP_SGPR4: define void @foo() [[ATTR_NOP_SGPR4:#[0-9]+]]
+// VGPR8_SGPR4: define void @foo() [[ATTR_VGPR8_SGPR4:#[0-9]+]]
+// NOP_VGPR8_SGPR4: define void @foo() [[ATTR_NOP_VGPR8_SGPR4:#[0-9]+]]
+}
+
+kernel void another_foo() { // expected-no-diagnostics
+// NOP: define void @another_foo() [[ATTR_SECOND_NOP:#[0-9]+]]
+}
+
+// NOP-DAG: attributes [[ATTR_NOP]] = { nounwind "amdgpu_tools_insert_nops"
+// NOP-DAG: attributes [[ATTR_SECOND_NOP]] = { nounwind "amdgpu_tools_insert_nops"
+// VGPR8-DAG: attributes [[ATTR_VGPR8]] = { nounwind "amdgpu_tools_num_reserved_vgpr"="8"
+// NOP_VGPR8-DAG: attributes [[ATTR_NOP_VGPR8]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_vgpr"="8"
+// SGPR4-DAG: attributes [[ATTR_SGPR4]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="4"
+// NOP_SGPR4-DAG: attributes [[ATTR_NOP_SGPR4]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4"
+// VGPR8_SGPR4-DAG: attributes [[ATTR_VGPR8_SGPR4]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// NOP_VGPR8_SGPR4-DAG: attributes [[ATTR_NOP_VGPR8_SGPR4]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// VGPR0-NOT: "amdgpu_tools_num_reserved_vgpr"="0"
+// NOP_VGPR0-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_vgpr"="0"
+// SGPR0-NOT: "amdgpu_tools_num_reserved_sgpr"="0"
+// NOP_SGPR0-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="0"
+// VGPR0_SGPR0-NOT: "amdgpu_tools_num_reserved_vgpr"="0" "amdgpu_tools_num_reserved_sgpr"="0"
+// NOP_VGPR0_SGPR0-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="0" "amdgpu_tools_num_reserved_vgpr"="0"
+// X86_NOP-NOT: "amdgpu_tools_insert_nops"
+// X86_VGPR8-NOT: "amdgpu_tools_num_reserved_vgpr"
+// X86_NOP_VGPR8-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_vgpr"="8"
+// X86_SGPR4-NOT: "amdgpu_tools_num_reserved_sgpr"
+// X86_NOP_SGPR4-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4"
+// X86_VGPR8_SGPR4-NOT: "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// X86_NOP_VGPR8_SGPR4-NOT: "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// X86_VGPR0-NOT: "amdgpu_tools_num_reserved_vgpr"
+// X86_SGPR0-NOT: "amdgpu_tools_num_reserved_sgpr"
Index: test/CodeGenOpenCL/amdgpu-tools-attrs.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-tools-attrs.cl
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
+
+__attribute__((amdgpu_tools_insert_nops)) // expected-no-diagnostics
+kernel void test_tools_insert_nops() {
+// CHECK: define void @test_tools_insert_nops() [[ATTR_NOP:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(8))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_vgpr8() {
+// CHECK: define void @test_tools_num_reserved_vgpr8() [[ATTR_VGPR8:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8))) // expected-no-diagnostics
+kernel void test_tools_insert_nops_num_reserved_vgpr8() {
+// CHECK: define void @test_tools_insert_nops_num_reserved_vgpr8() [[ATTR_NOP_VGPR8:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_sgpr4() {
+// CHECK: define void @test_tools_num_reserved_sgpr4() [[ATTR_SGPR4:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_tools_insert_nops_num_reserved_sgpr4() {
+// CHECK: define void @test_tools_insert_nops_num_reserved_sgpr4() [[ATTR_NOP_SGPR4:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_vgpr8_sgpr4() {
+// CHECK: define void @test_tools_num_reserved_vgpr8_sgpr4() [[ATTR_VGPR8_SGPR4:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_tools_insert_nops_num_reserved_vgpr8_sgpr4() {
+// CHECK: define void @test_tools_insert_nops_num_reserved_vgpr8_sgpr4() [[ATTR_NOP_VGPR8_SGPR4:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(0))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_vgpr0() {}
+
+__attribute__((amdgpu_tools_num_reserved_sgpr(0))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_sgpr0() {}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(0), amdgpu_tools_num_reserved_sgpr(0))) // expected-no-diagnostics
+kernel void test_tools_num_reserved_vgpr0_sgpr0() {}
+
+// CHECK-DAG: attributes [[ATTR_NOP]] = { nounwind "amdgpu_tools_insert_nops"
+// CHECK-DAG: attributes [[ATTR_VGPR8]] = { nounwind "amdgpu_tools_num_reserved_vgpr"="8"
+// CHECK-DAG: attributes [[ATTR_NOP_VGPR8]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_vgpr"="8"
+// CHECK-DAG: attributes [[ATTR_SGPR4]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="4"
+// CHECK-DAG: attributes [[ATTR_NOP_SGPR4]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4"
+// CHECK-DAG: attributes [[ATTR_VGPR8_SGPR4]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// CHECK-DAG: attributes [[ATTR_NOP_VGPR8_SGPR4]] = { nounwind "amdgpu_tools_insert_nops" "amdgpu_tools_num_reserved_sgpr"="4" "amdgpu_tools_num_reserved_vgpr"="8"
+// CHECK-NOT: "amdgpu_tools_num_reserved_vgpr"="0"
+// CHECK-NOT: "amdgpu_tools_num_reserved_sgpr"="0"
+// X86-NOT: "amdgpu_tools_insert_nops"
+// X86-NOT: "amdgpu_tools_num_reserved_vgpr"
+// X86-NOT: "amdgpu_tools_num_reserved_sgpr"
Index: test/CodeGenOpenCL/amdgpu-tools-attrs-opts-precedence.cl
===================================================================
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-tools-attrs-opts-precedence.cl
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=PREC1 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=8 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=PREC2 %s
+// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=4 --amdgpu-tools-num-reserved-sgpr=8 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=PREC3 %s
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(8))) // expected-no-diagnostics
+kernel void test_prec1() {
+// PREC1: define void @test_prec1() [[ATTR_PREC1:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_prec2() {
+// PREC2: define void @test_prec2() [[ATTR_PREC2:#[0-9]+]]
+}
+
+__attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics
+kernel void test_prec3() {
+// PREC3: define void @test_prec3() [[ATTR_PREC3:#[0-9]+]]
+}
+
+// PREC1-DAG: attributes [[ATTR_PREC1]] = { nounwind "amdgpu_tools_num_reserved_vgpr"="4"
+// PREC2-DAG: attributes [[ATTR_PREC2]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="8"
+// PREC3-DAG: attributes [[ATTR_PREC3]] = { nounwind "amdgpu_tools_num_reserved_sgpr"="8" "amdgpu_tools_num_reserved_vgpr"="4"
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -4703,30 +4703,18 @@
}
}
-static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
- const AttributeList &Attr) {
- uint32_t NumRegs;
- Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
- if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
- return;
-
- D->addAttr(::new (S.Context)
- AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context,
- NumRegs,
- Attr.getAttributeSpellingListIndex()));
-}
-
-static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D,
- const AttributeList &Attr) {
- uint32_t NumRegs;
- Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
- if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
+template <typename AMDGPUAttrType>
+static void handleAMDGPUUInt32Attr(Sema &S, Decl *D,
+ const AttributeList &Attr) {
+ uint32_t UInt32Arg;
+ Expr *UInt32ArgExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
+ if (!checkUInt32Argument(S, Attr, UInt32ArgExpr, UInt32Arg))
return;
D->addAttr(::new (S.Context)
- AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context,
- NumRegs,
- Attr.getAttributeSpellingListIndex()));
+ AMDGPUAttrType(Attr.getLoc(), S.Context,
+ UInt32Arg,
+ Attr.getAttributeSpellingListIndex()));
}
static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
@@ -5133,10 +5121,19 @@
handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
break;
case AttributeList::AT_AMDGPUNumVGPR:
- handleAMDGPUNumVGPRAttr(S, D, Attr);
+ handleAMDGPUUInt32Attr<AMDGPUNumVGPRAttr>(S, D, Attr);
break;
case AttributeList::AT_AMDGPUNumSGPR:
- handleAMDGPUNumSGPRAttr(S, D, Attr);
+ handleAMDGPUUInt32Attr<AMDGPUNumSGPRAttr>(S, D, Attr);
+ break;
+ case AttributeList::AT_AMDGPUToolsInsertNops:
+ handleSimpleAttribute<AMDGPUToolsInsertNopsAttr>(S, D, Attr);
+ break;
+ case AttributeList::AT_AMDGPUToolsNumReservedVGPR:
+ handleAMDGPUUInt32Attr<AMDGPUToolsNumReservedVGPRAttr>(S, D, Attr);
+ break;
+ case AttributeList::AT_AMDGPUToolsNumReservedSGPR:
+ handleAMDGPUUInt32Attr<AMDGPUToolsNumReservedSGPRAttr>(S, D, Attr);
break;
case AttributeList::AT_IBAction:
handleSimpleAttribute<IBActionAttr>(S, D, Attr);
@@ -5653,6 +5650,15 @@
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
+ } else if (Attr *A = D->getAttr<AMDGPUToolsInsertNopsAttr>()) {
+ Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+ << A << ExpectedKernelFunction;
+ } else if (Attr *A = D->getAttr<AMDGPUToolsNumReservedVGPRAttr>()) {
+ Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+ << A << ExpectedKernelFunction;
+ } else if (Attr *A = D->getAttr<AMDGPUToolsNumReservedSGPRAttr>()) {
+ Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+ << A << ExpectedKernelFunction;
}
}
}
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -664,6 +664,21 @@
Opts.StackProbeSize = StackProbeSize;
}
+ // Set up AMDGPU Tools arguments.
+ Opts.AMDGPUToolsInsertNopsOpt = Args.hasArg(OPT_amdgpu_tools_insert_nops);
+ if (Arg *A = Args.getLastArg(OPT_amdgpu_tools_num_reserved_vgpr)) {
+ StringRef Val = A->getValue();
+ unsigned AMDGPUToolsNumReservedVGPROpt = Opts.AMDGPUToolsNumReservedVGPROpt;
+ Val.getAsInteger(0, AMDGPUToolsNumReservedVGPROpt);
+ Opts.AMDGPUToolsNumReservedVGPROpt = AMDGPUToolsNumReservedVGPROpt;
+ }
+ if (Arg *A = Args.getLastArg(OPT_amdgpu_tools_num_reserved_sgpr)) {
+ StringRef Val = A->getValue();
+ unsigned AMDGPUToolsNumReservedSGPROpt = Opts.AMDGPUToolsNumReservedSGPROpt;
+ Val.getAsInteger(0, AMDGPUToolsNumReservedSGPROpt);
+ Opts.AMDGPUToolsNumReservedSGPROpt = AMDGPUToolsNumReservedSGPROpt;
+ }
+
if (Arg *A = Args.getLastArg(OPT_fobjc_dispatch_method_EQ)) {
StringRef Name = A->getValue();
unsigned Method = llvm::StringSwitch<unsigned>(Name)
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -4813,6 +4813,26 @@
CmdArgs.push_back("-mstack-probe-size=0");
}
+ // Translate AMDGPU Tools arguments.
+ if (Args.hasArg(options::OPT_amdgpu_tools_insert_nops))
+ CmdArgs.push_back("--amdgpu-tools-insert-nops");
+ if (Args.hasArg(options::OPT_amdgpu_tools_num_reserved_vgpr)) {
+ StringRef Size =
+ Args.getLastArgValue(options::OPT_amdgpu_tools_num_reserved_vgpr);
+
+ if (!Size.empty())
+ CmdArgs.push_back(
+ Args.MakeArgString("--amdgpu-tools-num-reserved-vgpr=" + Size));
+ }
+ if (Args.hasArg(options::OPT_amdgpu_tools_num_reserved_sgpr)) {
+ StringRef Size =
+ Args.getLastArgValue(options::OPT_amdgpu_tools_num_reserved_sgpr);
+
+ if (!Size.empty())
+ CmdArgs.push_back(
+ Args.MakeArgString("--amdgpu-tools-num-reserved-sgpr=" + Size));
+ }
+
switch (getToolChain().getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -6589,14 +6589,34 @@
llvm::Function *F = cast<llvm::Function>(GV);
uint32_t NumVGPR = Attr->getNumVGPR();
if (NumVGPR != 0)
- F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR));
+ F->addFnAttr(Attr->getSpelling(), llvm::utostr(NumVGPR));
}
if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
llvm::Function *F = cast<llvm::Function>(GV);
unsigned NumSGPR = Attr->getNumSGPR();
if (NumSGPR != 0)
- F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR));
+ F->addFnAttr(Attr->getSpelling(), llvm::utostr(NumSGPR));
+ }
+
+ if (const auto Attr = FD->getAttr<AMDGPUToolsInsertNopsAttr>()) {
+ llvm::Function *F = cast<llvm::Function>(GV);
+ if (!F->hasFnAttribute(Attr->getSpelling()))
+ F->addFnAttr(Attr->getSpelling());
+ }
+
+ if (const auto Attr = FD->getAttr<AMDGPUToolsNumReservedVGPRAttr>()) {
+ llvm::Function *F = cast<llvm::Function>(GV);
+ unsigned NumReservedVGPR = Attr->getNumReservedVGPR();
+ if (!F->hasFnAttribute(Attr->getSpelling()) && NumReservedVGPR != 0)
+ F->addFnAttr(Attr->getSpelling(), llvm::utostr(NumReservedVGPR));
+ }
+
+ if (const auto Attr = FD->getAttr<AMDGPUToolsNumReservedSGPRAttr>()) {
+ llvm::Function *F = cast<llvm::Function>(GV);
+ unsigned NumReservedSGPR = Attr->getNumReservedSGPR();
+ if (!F->hasFnAttribute(Attr->getSpelling()) && NumReservedSGPR != 0)
+ F->addFnAttr(Attr->getSpelling(), llvm::utostr(NumReservedSGPR));
}
}
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -1595,6 +1595,20 @@
}
}
+ if (getTarget().getTriple().getArch() == llvm::Triple::amdgcn) {
+ // Add AMDGPU Tools attributes if needed.
+ if (CodeGenOpts.AMDGPUToolsInsertNopsOpt)
+ FuncAttrs.addAttribute("amdgpu_tools_insert_nops");
+ if (CodeGenOpts.AMDGPUToolsNumReservedVGPROpt)
+ FuncAttrs.addAttribute(
+ "amdgpu_tools_num_reserved_vgpr",
+ llvm::utostr(CodeGenOpts.AMDGPUToolsNumReservedVGPROpt));
+ if (CodeGenOpts.AMDGPUToolsNumReservedSGPROpt)
+ FuncAttrs.addAttribute(
+ "amdgpu_tools_num_reserved_sgpr",
+ llvm::utostr(CodeGenOpts.AMDGPUToolsNumReservedSGPROpt));
+ }
+
if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
// Conservatively, mark all functions and calls in CUDA as convergent
// (meaning, they may call an intrinsically convergent op, such as
Index: include/clang/Frontend/CodeGenOptions.def
===================================================================
--- include/clang/Frontend/CodeGenOptions.def
+++ include/clang/Frontend/CodeGenOptions.def
@@ -167,6 +167,16 @@
///< alignment, if not 0.
VALUE_CODEGENOPT(StackProbeSize , 32, 4096) ///< Overrides default stack
///< probe size, even if 0.
+
+/// \brief Control setting of ``amdgpu_tools_insert_nops`` attribute
+CODEGENOPT(AMDGPUToolsInsertNopsOpt, 1, 0)
+/// \brief Control setting of ``amdgpu_tools_num_reserved_vgpr(<num>)``
+/// attribute
+VALUE_CODEGENOPT(AMDGPUToolsNumReservedVGPROpt, 32, 0)
+/// \brief Control setting of ``amdgpu_tools_num_reserved_sgpr(<num>)``
+/// attribute
+VALUE_CODEGENOPT(AMDGPUToolsNumReservedSGPROpt, 32, 0)
+
CODEGENOPT(DebugColumnInfo, 1, 0) ///< Whether or not to use column information
///< in debug info.
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -352,6 +352,17 @@
def Z_Joined : Joined<["-"], "Z">;
def all__load : Flag<["-"], "all_load">;
def allowable__client : Separate<["-"], "allowable_client">;
+def amdgpu_tools_insert_nops :
+ Flag<["--"], "amdgpu-tools-insert-nops">, Flags<[CC1Option, HelpHidden]>,
+ HelpText<"Insert two nop instructions for each high level source statement">;
+def amdgpu_tools_num_reserved_vgpr :
+ Joined<["--"], "amdgpu-tools-num-reserved-vgpr=">,
+ Flags<[CC1Option, HelpHidden]>,
+ HelpText<"Reserve <num> vector registers">, MetaVarName<"<num>">;
+def amdgpu_tools_num_reserved_sgpr :
+ Joined<["--"], "amdgpu-tools-num-reserved-sgpr=">,
+ Flags<[CC1Option, HelpHidden]>,
+ HelpText<"Reserve <num> scalar registers">, MetaVarName<"<num>">;
def ansi : Flag<["-", "--"], "ansi">;
def arch__errors__fatal : Flag<["-"], "arch_errors_fatal">;
def arch : Separate<["-"], "arch">, Flags<[DriverOption]>;
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -936,6 +936,66 @@
}];
}
+def DocCatAMDGPUToolsAttributes :
+ DocumentationCategory<"AMD GPU Tools Attributes"> {
+ let Content = [{
+
+Clang supports following AMD GPU attributes for tools, such as debuggers and
+profilers:
+ }];
+}
+
+def AMDGPUToolsInsertNopsDocs : Documentation {
+ let Category = DocCatAMDGPUToolsAttributes;
+ let Content = [{
+
+Clang supports the ``__attribute__((amdgpu_tools_insert_nops))`` attribute on
+AMD Southern Islands GPUs and later. If specified, it causes AMD GPU Backend to
+insert two nop instructions for each high level source statement: one nop
+instruction is inserted before the first ISA instruction of the high level
+source statement, and one nop instruction is inserted after the last ISA
+instruction of the high level source statement.
+
+In addition to specifying this attribute manually, clang can add this attribute
+for each kernel function in the translation unit if the
+``--amdgpu-tools-insert-nops`` clang command line option is specified.
+ }];
+}
+
+def AMDGPUToolsNumReservedVGPRDocs : Documentation {
+ let Category = DocCatAMDGPUToolsAttributes;
+ let Content = [{
+
+Clang supports the ``__attribute__((amdgpu_tools_num_reserved_vgpr(<num>)))``
+attribute on AMD Southern Islands GPUs and later. If specified, it causes AMD
+GPU Backend to reserve ``<num>`` number of vector registers and do not use those
+registers throughout kernel function execution. Index of the first reserved
+vector register is recorded in the ``amd_kernel_code_t``.
+
+In addition to specifying this attribute manually, clang can add this attribute
+for each kernel function in the translation unit if the
+``--amdgpu-tools-num-reserved-vgpr=<num>`` clang command line option is
+specified. In this case, the option takes precedence over the attribute.
+ }];
+}
+
+def AMDGPUToolsNumReservedSGPRDocs : Documentation {
+ let Category = DocCatAMDGPUToolsAttributes;
+ let Content = [{
+
+Clang supports the ``__attribute__((amdgpu_tools_num_reserved_sgpr(<num>)))``
+attribute on AMD Southern Islands GPUs and later. If specified, it causes AMD
+GPU Backend to reserve ``<num>`` number of scalar registers and do not use those
+registers throughout kernel function execution. Index of the first reserved
+scalar register is recorded in the ``amd_kernel_code_t``.
+
+In addition to specifying this attribute manually, clang can add this attribute
+for each kernel function in the translation unit if the
+``--amdgpu-tools-num-reserved-sgpr=<num>`` clang command line option is
+specified. In this case, the option takes precedence over the attribute.
+ }];
+}
+
def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
let Content = [{
Clang supports several different calling conventions, depending on the target
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -987,7 +987,30 @@
let Args = [UnsignedArgument<"NumSGPR">];
let Documentation = [AMDGPUNumSGPRDocs];
let Subjects = SubjectList<[Function], ErrorDiag,
- "ExpectedKernelFunction">;
+ "ExpectedKernelFunction">;
+}
+
+def AMDGPUToolsInsertNops : InheritableAttr {
+ let Spellings = [GNU<"amdgpu_tools_insert_nops">];
+ let Documentation = [AMDGPUToolsInsertNopsDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag,
+ "ExpectedKernelFunction">;
+}
+
+def AMDGPUToolsNumReservedVGPR : InheritableAttr {
+ let Spellings = [GNU<"amdgpu_tools_num_reserved_vgpr">];
+ let Args = [UnsignedArgument<"NumReservedVGPR">];
+ let Documentation = [AMDGPUToolsNumReservedVGPRDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag,
+ "ExpectedKernelFunction">;
+}
+
+def AMDGPUToolsNumReservedSGPR : InheritableAttr {
+ let Spellings = [GNU<"amdgpu_tools_num_reserved_sgpr">];
+ let Args = [UnsignedArgument<"NumReservedSGPR">];
+ let Documentation = [AMDGPUToolsNumReservedSGPRDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag,
+ "ExpectedKernelFunction">;
}
def NoSplitStack : InheritableAttr {
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits