Author: Joseph Huber Date: 2025-03-03T14:26:43-06:00 New Revision: 4ca8ea8c972ae05a891687eda6704ec607184fae
URL: https://github.com/llvm/llvm-project/commit/4ca8ea8c972ae05a891687eda6704ec607184fae DIFF: https://github.com/llvm/llvm-project/commit/4ca8ea8c972ae05a891687eda6704ec607184fae.diff LOG: [Clang] Fix GPU intrinsic helpers incorrectly sign extending (#129560) Summary: These return values are actually signed, meaning that casting will extend it and then all the bits will be one. Added: Modified: clang/lib/Headers/amdgpuintrin.h clang/lib/Headers/nvptxintrin.h Removed: ################################################################################ diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index 355e75d0b2d42..6ad8e54f4aadd 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -121,7 +121,7 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) { uint32_t __hi = (uint32_t)(__x >> 32ull); uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF); return ((uint64_t)__builtin_amdgcn_readfirstlane(__hi) << 32ull) | - ((uint64_t)__builtin_amdgcn_readfirstlane(__lo)); + ((uint64_t)__builtin_amdgcn_readfirstlane(__lo) & 0xFFFFFFFF); } // Returns a bitmask of threads in the current lane for which \p x is true. diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index 29d0adcabc82f..03594dd9bd6cb 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -131,7 +131,8 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) { __gpu_num_lanes() - 1) << 32ull) | ((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __lo, __id, - __gpu_num_lanes() - 1)); + __gpu_num_lanes() - 1) & + 0xFFFFFFFF); } // Returns a bitmask of threads in the current lane for which \p x is true. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits