[PATCH] D72841: Add support for pragma float_control, to control precision and exception behavior at the source level

2020-06-23 Thread Changpeng Fang via Phabricator via cfe-commits
cfang added a comment.

-ffast-math flag got lost in the Builder after this change.

FMF.isFast() is true before  updateFastMathFlags(FMF, FPFeatures), but turns 
false after. 
It seems the Builder.FMF has been correctly set before, but I am not clear what 
FPFeatures should be at this point:

+static void setBuilderFlagsFromFPFeatures(CGBuilderTy &Builder,
+  CodeGenFunction &CGF,
+  FPOptions FPFeatures) {
+  auto NewRoundingBehavior = FPFeatures.getRoundingMode();
+  Builder.setDefaultConstrainedRounding(NewRoundingBehavior);
+  auto NewExceptionBehavior =
+  ToConstrainedExceptMD(FPFeatures.getExceptionMode());
+  Builder.setDefaultConstrainedExcept(NewExceptionBehavior);
+  auto FMF = Builder.getFastMathFlags();
+  updateFastMathFlags(FMF, FPFeatures);
+  Builder.setFastMathFlags(FMF);
+  assert((CGF.CurFuncDecl == nullptr || Builder.getIsFPConstrained() ||
+  isa(CGF.CurFuncDecl) ||
+  isa(CGF.CurFuncDecl) ||
+  (NewExceptionBehavior == llvm::fp::ebIgnore &&
+   NewRoundingBehavior == llvm::RoundingMode::NearestTiesToEven)) &&
+ "FPConstrained should be enabled on entire function");
+}


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72841/new/

https://reviews.llvm.org/D72841



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120265: AMDGPU: Use the implicit kernargs for code object version 5

2022-03-17 Thread Changpeng Fang via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGdd5895cc3986: AMDGPU: Use the implicit kernargs for code 
object version 5 (authored by cfang).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Changed prior to commit:
  https://reviews.llvm.org/D120265?vs=416080&id=416315#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120265/new/

https://reviews.llvm.org/D120265

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
  llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
  llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
  llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
  llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
  llvm/lib/Target/AMDGPU/SIDefines.h
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/lib/Target/AMDGPU/SIISelLowering.h
  llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
  llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
  
llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
  llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll

Index: llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
@@ -0,0 +1,550 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=GFX8V3 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefix=GFX8V4 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=GFX8V5 %s
+
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefixes=GFX9V3 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s
+
+define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) {
+; GFX8V3-LABEL: addrspacecast:
+; GFX8V3:   ; %bb.0:
+; GFX8V3-NEXT:s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX8V3-NEXT:s_load_dword s2, s[4:5], 0x44
+; GFX8V3-NEXT:s_load_dword s3, s[4:5], 0x40
+; GFX8V3-NEXT:v_mov_b32_e32 v4, 1
+; GFX8V3-NEXT:s_waitcnt lgkmcnt(0)
+; GFX8V3-NEXT:s_cmp_lg_u32 s0, -1
+; GFX8V3-NEXT:v_mov_b32_e32 v0, s2
+; GFX8V3-NEXT:s_cselect_b64 vcc, -1, 0
+; GFX8V3-NEXT:v_cndmask_b32_e32 v1, 0, v0, vcc
+; GFX8V3-NEXT:v_mov_b32_e32 v0, s0
+; GFX8V3-NEXT:s_cmp_lg_u32 s1, -1
+; GFX8V3-NEXT:v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX8V3-NEXT:v_mov_b32_e32 v2, s3
+; GFX8V3-NEXT:s_cselect_b64 vcc, -1, 0
+; GFX8V3-NEXT:v_cndmask_b32_e32 v3, 0, v2, vcc
+; GFX8V3-NEXT:v_mov_b32_e32 v2, s1
+; GFX8V3-NEXT:v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8V3-NEXT:flat_store_dword v[0:1], v4
+; GFX8V3-NEXT:s_waitcnt vmcnt(0)
+; GFX8V3-NEXT:v_mov_b32_e32 v0, 2
+; GFX8V3-NEXT:flat_store_dword v[2:3], v0
+; GFX8V3-NEXT:s_waitcnt vmcnt(0)
+; GFX8V3-NEXT:s_endpgm
+;
+; GFX8V4-LABEL: addrspacecast:
+; GFX8V4:   ; %bb.0:
+; GFX8V4-NEXT:s_load_dwordx2 s[0:1], s[6:7], 0x0
+; GFX8V4-NEXT:s_load_dword s2, s[4:5], 0x44
+; GFX8V4-NEXT:s_load_dword s3, s[4:5], 0x40
+; GFX8V4-NEXT:v_mov_b32_e32 v4, 1
+; GFX8V4-NEXT:s_waitcnt lgkmcnt(0)
+; GFX8V4-NEXT:s_cmp_lg_u32 s0, -1
+; GFX8V4-NEXT:v_mov_b32_e32 v0, s2
+; GFX8V4-NEXT:s_cselect_b64 vcc, -1, 0
+; GFX8V4-NEXT:v_cndmask_b32_e32 v1, 0, v0, vcc
+; GFX8V4-NEXT:v_mov_b32_e32 v0, s0
+; GFX8V4-NEXT:s_cmp_lg_u32 s1, -1
+; GFX8V4-NEXT:v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX8V4-NEXT:v_mov_b32_e32 v2, s3
+; GFX8V4-NEXT:s_cselect_b64 vcc, -1, 0
+; GFX8V4-NEXT:v_cndmask_b32_e32 v3, 0, v2, vcc
+; GFX8V4-NEXT:v_mov_b32_e32 v2, s1
+; GFX8V4-NEXT:v_cndmask_b32_e32 v2, 0, v2, vcc
+; GFX8V4-NEXT:flat_store_dword v[0:1], v4
+; GFX8V4-NEXT:s_waitcnt vmcnt(0)
+; GFX8V4-NEXT:v_mov_b32_e32 v0, 2
+; GFX8V4-NEXT:flat_store_dword v[2:3], v0
+; GFX8V4-NEXT:s_waitcnt vmcnt(0)
+; GFX8V4-NEXT:s_endpgm
+;
+; GFX8V5-LABEL: addrspacecast:
+; GFX8V5:   ; %bb.0:
+; GFX8V5-NEXT:s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX8V5-NEXT:s_load_dword s2, s[4:5], 0xc8
+; GFX8V5-NEXT:s_load_dword s3, s[4:5], 0xcc
+; GFX8V5-NEXT:v_mov_b32_e32 v4, 1
+; GFX8V5-NEXT:s_waitcnt lgkmcnt(0)
+; GFX8V5-NEXT:s_cmp_lg_u32 s0, -1
+; GFX8V5-NEXT:v_mov_b32_e32 v0, s2
+; GFX8V5-NEXT:s_cselect_b64 vcc, -1, 0
+; GFX8V5-NEXT:v_cndmask_b32_e32 v1, 0, v0, vcc
+; GFX8V5-NEXT:v_mov_b32_e32 v0, s0
+; GF

[PATCH] D134355: [AMDGPU] Emit module flag for all code object versions

2022-09-21 Thread Changpeng Fang via Phabricator via cfe-commits
cfang accepted this revision.
cfang added a comment.
This revision is now accepted and ready to land.

LGTM

Should the module flag name be amdgpu_code_object_version or 
amdhsa_code_object_version?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D134355/new/

https://reviews.llvm.org/D134355

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155986: [clang][AMDGPU]: Don't use byval for struct arguments in function ABI

2023-08-11 Thread Changpeng Fang via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd77c62053c94: [clang][AMDGPU]: Don't use byval for 
struct arguments in function ABI (authored by cfang).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Changed prior to commit:
  https://reviews.llvm.org/D155986?vs=549545&id=549546#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155986/new/

https://reviews.llvm.org/D155986

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/test/CodeGenCUDA/kernel-args.cu
  clang/test/CodeGenCXX/amdgcn-func-arg.cpp
  clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
  clang/test/CodeGenOpenCL/byval.cl
  llvm/docs/AMDGPUUsage.rst

Index: llvm/docs/AMDGPUUsage.rst
===
--- llvm/docs/AMDGPUUsage.rst
+++ llvm/docs/AMDGPUUsage.rst
@@ -13812,6 +13812,10 @@
 9.  All other registers are unspecified.
 10. Any necessary ``s_waitcnt`` has been performed to ensure memory is available
 to the function.
+11: Use pass-by-reference (byref) in stead of pass-by-value (byval) for struct
+arguments in C ABI. Callee is responsible for allocating stack memory and
+copying the value of the struct if modified. Note that the backend still
+supports byval for struct arguments.
 
 On exit from a function:
 
Index: clang/test/CodeGenOpenCL/byval.cl
===
--- clang/test/CodeGenOpenCL/byval.cl
+++ clang/test/CodeGenOpenCL/byval.cl
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s
-
+// RUN: %clang_cc1 -emit-llvm -o - -triple i686-pc-darwin %s | FileCheck -check-prefix=X86 %s
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck -check-prefix=AMDGCN %s
 struct A {
   int x[100];
 };
@@ -8,8 +8,10 @@
 
 int g() {
   struct A a;
-  // CHECK: call i32 @f(ptr addrspace(5) noundef byval{{.*}}%a)
+  // X86:call i32 @f(ptr noundef nonnull byval(%struct.A) align 4 %a)
+  // AMDGCN: call i32 @f(ptr addrspace(5) noundef byref{{.*}}%a)
   return f(a);
 }
 
-// CHECK: declare i32 @f(ptr addrspace(5) noundef byval{{.*}})
+// X86:   declare i32 @f(ptr noundef byval(%struct.A) align 4)
+// AMDGCN: declare i32 @f(ptr addrspace(5) noundef byref{{.*}})
Index: clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
===
--- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -448,11 +448,11 @@
 // CHECK: define{{.*}} void @func_reg_state_lo(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
 void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef %arg4, ptr addrspace(5) nocapture noundef readnone byval(%struct.struct_arg) align 4 %s)
+// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef %arg4, ptr addrspace(5) nocapture noundef readnone byref(%struct.struct_arg) align 4 %{{.*}})
 void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
 // XXX - Why don't the inner structs flatten?
-// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr addrspace(5) nocapture noundef readnone byval(%struct.num_regs_nested_struct) align 8 %arg4)
+// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr addrspace(5) nocapture noundef readnone byref(%struct.num_regs_nested_struct) align 8 %{{.*}})
 void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
 // CHECK: define{{.*}} void @func_double_nested_struct_arg(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -467,7 +467,7 @@
 // CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.co

[PATCH] D146023: [AMDGPU] Remove Code Object V2

2023-08-03 Thread Changpeng Fang via Phabricator via cfe-commits
cfang added a comment.






Comment at: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp:5598
   return ParseDirectiveHSAMetadata();
   } else {
-if (IDVal == ".hsa_code_object_version")

Are you sure Non-HSA does not have the four directives you deleted?  



Comment at: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp:122
 std::optional getHsaAbiVersion(const MCSubtargetInfo *STI) {
   if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
 return std::nullopt;

It is fine now. But I think STI could never be null. 



Comment at: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h:46
 enum {
-  AMDHSA_COV2 = 2,
   AMDHSA_COV3 = 3,

Should we keep this field, and just mention "unsupported"?



Comment at: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h:59
 /// false otherwise.
 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
 /// \returns True if HSA OS ABI Version identification is 4,

Are all these "isHsaAbiVersionX" no longer needed? 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146023/new/

https://reviews.llvm.org/D146023

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits