Author: Chaitanya Koparkar Date: 2025-08-12T20:57:55+09:00 New Revision: c3bf73bc4ade26b8b9dd5080ce7bccd88037cfd0
URL: https://github.com/llvm/llvm-project/commit/c3bf73bc4ade26b8b9dd5080ce7bccd88037cfd0 DIFF: https://github.com/llvm/llvm-project/commit/c3bf73bc4ade26b8b9dd5080ce7bccd88037cfd0.diff LOG: [clang] Add elementwise fshl/fshr builtins (#153113) This patch implements `__builtin_elementwise_fshl` and `__builtin_elementwise_fshr` builtins. These map to the fshl/fshr intrinsics described here: - https://llvm.org/docs/LangRef.html#llvm-fshl-intrinsic - https://llvm.org/docs/LangRef.html#llvm-fshr-intrinsic Fixes https://github.com/llvm/llvm-project/issues/152555. Added: Modified: clang/docs/LanguageExtensions.rst clang/docs/ReleaseNotes.rst clang/include/clang/Basic/Builtins.td clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp clang/test/CodeGen/builtins-elementwise-math.c clang/test/Sema/builtins-elementwise-math.c Removed: ################################################################################ diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 5f2e57b5db5df..eef3d0c4ccb9d 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -860,6 +860,15 @@ of diff erent sizes and signs is forbidden in binary and ternary builtins. semantics, see `LangRef <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_ for the comparison. +T __builtin_elementwise_fshl(T x, T y, T z) perform a funnel shift left. Concatenate x and y (x is the most integer types + significant bits of the wide value), the combined value is shifted + left by z, and the most significant bits are extracted to produce + a result that is the same size as the original arguments. + +T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Concatenate x and y (x is the most integer types + significant bits of the wide value), the combined value is shifted + right by z, and the least significant bits are extracted to produce + a result that is the same size as the original arguments. ============================================== ====================================================================== ========================================= diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 29eb18bcf85fd..0e7aa3a56d0ba 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -113,6 +113,8 @@ C23 Feature Support Non-comprehensive list of changes in this release ------------------------------------------------- +- Added ``__builtin_elementwise_fshl`` and ``__builtin_elementwise_fshr``. + - Added ``__builtin_elementwise_minnumnum`` and ``__builtin_elementwise_maxnumnum``. - Trapping UBSan (e.g. ``-fsanitize-trap=undefined``) now emits a string describing the reason for diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 0181674da89ac..84206cf8b368b 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1514,6 +1514,18 @@ def ElementwiseSubSat : Builtin { let Prototype = "void(...)"; } +def ElementwiseFshl : Builtin { + let Spellings = ["__builtin_elementwise_fshl"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def ElementwiseFshr : Builtin { + let Spellings = ["__builtin_elementwise_fshr"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def ReduceMax : Builtin { let Spellings = ["__builtin_reduce_max"]; let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 071667ac772e8..a1f2a874f010d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4030,6 +4030,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_fma: return RValue::get( emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fma)); + case Builtin::BI__builtin_elementwise_fshl: + return RValue::get( + emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshl)); + case Builtin::BI__builtin_elementwise_fshr: + return RValue::get( + emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshr)); + case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: { Value *Op0 = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 9ecee18661340..907740374dbfe 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3031,6 +3031,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, EltwiseBuiltinArgTyRestriction::IntegerTy)) return ExprError(); break; + case Builtin::BI__builtin_elementwise_fshl: + case Builtin::BI__builtin_elementwise_fshr: + if (BuiltinElementwiseTernaryMath( + TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy)) + return ExprError(); + break; case Builtin::BI__builtin_elementwise_min: case Builtin::BI__builtin_elementwise_max: if (BuiltinElementwiseMath(TheCall)) diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index 9fd12f53da333..bb5d0351db1a2 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -1179,3 +1179,89 @@ void test_builtin_elementwise_fma(float f32, double f64, half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0); } + +void test_builtin_elementwise_fshl(long long int i1, long long int i2, + long long int i3, unsigned short us1, + unsigned short us2, unsigned short us3, + char c1, char c2, char c3, + unsigned char uc1, unsigned char uc2, + unsigned char uc3, si8 vi1, si8 vi2, + si8 vi3, u4 vu1, u4 vu2, u4 vu3) { + // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr + // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr + // CHECK-NEXT: [[I3:%.+]] = load i64, ptr %i3.addr + // CHECK-NEXT: [[I4:%.+]] = call i64 @llvm.fshl.i64(i64 [[I1]], i64 [[I2]], i64 [[I3]]) + // CHECK-NEXT: store i64 [[I4]], ptr %tmp_lli_l + // CHECK-NEXT: [[I5:%.+]] = load i64, ptr %i1.addr + // CHECK-NEXT: [[I6:%.+]] = load i64, ptr %i2.addr + // CHECK-NEXT: [[I7:%.+]] = load i64, ptr %i3.addr + // CHECK-NEXT: [[I8:%.+]] = call i64 @llvm.fshr.i64(i64 [[I5]], i64 [[I6]], i64 [[I7]]) + // CHECK-NEXT: store i64 [[I8]], ptr %tmp_lli_r + long long int tmp_lli_l = __builtin_elementwise_fshl(i1, i2, i3); + long long int tmp_lli_r = __builtin_elementwise_fshr(i1, i2, i3); + + // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr + // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr + // CHECK-NEXT: [[US3:%.+]] = load i16, ptr %us3.addr + // CHECK-NEXT: [[US4:%.+]] = call i16 @llvm.fshl.i16(i16 [[US1]], i16 [[US2]], i16 [[US3]]) + // CHECK-NEXT: store i16 [[US4]], ptr %tmp_usi_l + // CHECK-NEXT: [[US5:%.+]] = load i16, ptr %us1.addr + // CHECK-NEXT: [[US6:%.+]] = load i16, ptr %us2.addr + // CHECK-NEXT: [[US7:%.+]] = load i16, ptr %us3.addr + // CHECK-NEXT: [[US8:%.+]] = call i16 @llvm.fshr.i16(i16 [[US5]], i16 [[US6]], i16 [[US7]]) + // CHECK-NEXT: store i16 [[US8]], ptr %tmp_usi_r + unsigned short tmp_usi_l = __builtin_elementwise_fshl(us1, us2, us3); + unsigned short tmp_usi_r = __builtin_elementwise_fshr(us1, us2, us3); + + // CHECK: [[C1:%.+]] = load i8, ptr %c1.addr + // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr + // CHECK-NEXT: [[C3:%.+]] = load i8, ptr %c3.addr + // CHECK-NEXT: [[C4:%.+]] = call i8 @llvm.fshl.i8(i8 [[C1]], i8 [[C2]], i8 [[C3]]) + // CHECK-NEXT: store i8 [[C4]], ptr %tmp_c_l + // CHECK-NEXT: [[C5:%.+]] = load i8, ptr %c1.addr + // CHECK-NEXT: [[C6:%.+]] = load i8, ptr %c2.addr + // CHECK-NEXT: [[C7:%.+]] = load i8, ptr %c3.addr + // CHECK-NEXT: [[C8:%.+]] = call i8 @llvm.fshr.i8(i8 [[C5]], i8 [[C6]], i8 [[C7]]) + // CHECK-NEXT: store i8 [[C8]], ptr %tmp_c_r + char tmp_c_l = __builtin_elementwise_fshl(c1, c2, c3); + char tmp_c_r = __builtin_elementwise_fshr(c1, c2, c3); + + // CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr + // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr + // CHECK-NEXT: [[UC3:%.+]] = load i8, ptr %uc3.addr + // CHECK-NEXT: [[UC4:%.+]] = call i8 @llvm.fshl.i8(i8 [[UC1]], i8 [[UC2]], i8 [[UC3]]) + // CHECK-NEXT: store i8 [[UC4]], ptr %tmp_uc_l + // CHECK-NEXT: [[UC5:%.+]] = load i8, ptr %uc1.addr + // CHECK-NEXT: [[UC6:%.+]] = load i8, ptr %uc2.addr + // CHECK-NEXT: [[UC7:%.+]] = load i8, ptr %uc3.addr + // CHECK-NEXT: [[UC8:%.+]] = call i8 @llvm.fshr.i8(i8 [[UC5]], i8 [[UC6]], i8 [[UC7]]) + // CHECK-NEXT: store i8 [[UC8]], ptr %tmp_uc_r + unsigned char tmp_uc_l = __builtin_elementwise_fshl(uc1, uc2, uc3); + unsigned char tmp_uc_r = __builtin_elementwise_fshr(uc1, uc2, uc3); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr + // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr + // CHECK-NEXT: [[VI3:%.+]] = load <8 x i16>, ptr %vi3.addr + // CHECK-NEXT: [[VI4:%.+]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]], <8 x i16> [[VI3]]) + // CHECK-NEXT: store <8 x i16> [[VI4]], ptr %tmp_vi_l + // CHECK-NEXT: [[VI5:%.+]] = load <8 x i16>, ptr %vi1.addr + // CHECK-NEXT: [[VI6:%.+]] = load <8 x i16>, ptr %vi2.addr + // CHECK-NEXT: [[VI7:%.+]] = load <8 x i16>, ptr %vi3.addr + // CHECK-NEXT: [[VI8:%.+]] = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> [[VI5]], <8 x i16> [[VI6]], <8 x i16> [[VI7]]) + // CHECK-NEXT: store <8 x i16> [[VI8]], ptr %tmp_vi_r + si8 tmp_vi_l = __builtin_elementwise_fshl(vi1, vi2, vi3); + si8 tmp_vi_r = __builtin_elementwise_fshr(vi1, vi2, vi3); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr + // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr + // CHECK-NEXT: [[VU3:%.+]] = load <4 x i32>, ptr %vu3.addr + // CHECK-NEXT: [[VU4:%.+]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]], <4 x i32> [[VU3]]) + // CHECK-NEXT: store <4 x i32> [[VU4]], ptr %tmp_vu_l + // CHECK-NEXT: [[VU5:%.+]] = load <4 x i32>, ptr %vu1.addr + // CHECK-NEXT: [[VU6:%.+]] = load <4 x i32>, ptr %vu2.addr + // CHECK-NEXT: [[VU7:%.+]] = load <4 x i32>, ptr %vu3.addr + // CHECK-NEXT: [[VU8:%.+]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[VU5]], <4 x i32> [[VU6]], <4 x i32> [[VU7]]) + // CHECK-NEXT: store <4 x i32> [[VU8]], ptr %tmp_vu_r + u4 tmp_vu_l = __builtin_elementwise_fshl(vu1, vu2, vu3); + u4 tmp_vu_r = __builtin_elementwise_fshr(vu1, vu2, vu3); +} diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c index 8548d3be8c44a..a80ff4bed4faf 100644 --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -1294,6 +1294,42 @@ void test_builtin_elementwise_fma(int i32, int2 v2i32, short i16, // expected-error@-1 {{3rd argument must be a scalar or vector of floating-point types (was '_Complex float')}} } +void test_builtin_elementwise_fsh(int i32, int2 v2i32, short i16, int3 v3i32, + double f64, float f32, float2 v2f32) { + i32 = __builtin_elementwise_fshl(); + // expected-error@-1 {{too few arguments to function call, expected 3, have 0}} + + i32 = __builtin_elementwise_fshr(); + // expected-error@-1 {{too few arguments to function call, expected 3, have 0}} + + i32 = __builtin_elementwise_fshl(i32, i32); + // expected-error@-1 {{too few arguments to function call, expected 3, have 2}} + + i32 = __builtin_elementwise_fshr(i32, i32); + // expected-error@-1 {{too few arguments to function call, expected 3, have 2}} + + i32 = __builtin_elementwise_fshl(i32, i32, i16); + // expected-error@-1 {{arguments are of diff erent types ('int' vs 'short')}} + + i16 = __builtin_elementwise_fshr(i16, i32, i16); + // expected-error@-1 {{arguments are of diff erent types ('short' vs 'int')}} + + f32 = __builtin_elementwise_fshl(f32, f32, f32); + // expected-error@-1 {{argument must be a scalar or vector of integer types (was 'float')}} + + f64 = __builtin_elementwise_fshr(f64, f64, f64); + // expected-error@-1 {{argument must be a scalar or vector of integer types (was 'double')}} + + v2i32 = __builtin_elementwise_fshl(v2i32, v2i32, v2f32); + // expected-error@-1 {{argument must be a scalar or vector of integer types (was 'float2' (vector of 2 'float' values))}} + + v2i32 = __builtin_elementwise_fshr(v2i32, v2i32, v3i32); + // expected-error@-1 {{arguments are of diff erent types ('int2' (vector of 2 'int' values) vs 'int3' (vector of 3 'int' values))}} + + v3i32 = __builtin_elementwise_fshl(v3i32, v3i32, v2i32); + // expected-error@-1 {{arguments are of diff erent types ('int3' (vector of 3 'int' values) vs 'int2' (vector of 2 'int' values))}} +} + typedef struct { float3 b; } struct_float3; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits