JonChesterfield created this revision.
JonChesterfield added reviewers: arsenm, jdoerfert.
Herald added subscribers: guansong, yaxunl.
JonChesterfield requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

Fixes miscompile of calls into ocml. Bug 51445.

The stack variable `double __tmp` is moved to dynamically allocated shared
memory by CGOpenMPRuntimeGPU. This is usually fine, but when the variable
is passed to a function that is explicitly annotated address_space(5) then
allocating the variable off-stack leads to a miscompile in the back end,
which cannot decide to move the variable back to the stack from shared.

This could be fixed by removing the AS(5) annotation from the math library
or by explicitly marking the variables as thread_mem_alloc. The cast to
AS(5) is still a no-op once IR is reached.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D107971

Files:
  clang/lib/Headers/__clang_hip_math.h

Index: clang/lib/Headers/__clang_hip_math.h
===================================================================
--- clang/lib/Headers/__clang_hip_math.h
+++ clang/lib/Headers/__clang_hip_math.h
@@ -19,6 +19,9 @@
 #endif
 #include <limits.h>
 #include <stdint.h>
+#ifdef __OPENMP_AMDGCN__
+#include <omp.h>
+#endif
 #endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
@@ -258,6 +261,9 @@
 __DEVICE__
 float frexpf(float __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
       __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -343,6 +349,9 @@
 __DEVICE__
 float modff(float __x, float *__iptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
       __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__iptr = __tmp;
@@ -423,6 +432,9 @@
 __DEVICE__
 float remquof(float __x, float __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r = __ocml_remquo_f32(
       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -479,6 +491,9 @@
 __DEVICE__
 void sincosf(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr =
       __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -487,6 +502,9 @@
 __DEVICE__
 void sincospif(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f32(
       __x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -799,6 +817,9 @@
 __DEVICE__
 double frexp(double __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
       __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -883,6 +904,9 @@
 __DEVICE__
 double modf(double __x, double *__iptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
       __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
   *__iptr = __tmp;
@@ -971,6 +995,9 @@
 __DEVICE__
 double remquo(double __x, double __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r = __ocml_remquo_f64(
       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -1029,6 +1056,9 @@
 __DEVICE__
 void sincos(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincos_f64(
       __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
@@ -1037,6 +1067,9 @@
 __DEVICE__
 void sincospi(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f64(
       __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to