RKSimon created this revision. RKSimon added reviewers: fhahn, scanon, aaron.ballman, craig.topper. Herald added a subscriber: StephenFan. Herald added a project: All. RKSimon requested review of this revision. Herald added a project: clang.
Similar to the existing bitwise reduction builtins, this lowers to a llvm.vector.reduce.add intrinsic call. For other reductions, we've tried to share builtins for float/integer vectors, but the fadd reduction builtins also take a starting value argument. Technically I could support float by using default values, but we're probably better off with specific fadd reduction builtins for both arguments. (Split off from D117829 <https://reviews.llvm.org/D117829>) Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D124741 Files: clang/include/clang/Basic/Builtins.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp clang/test/CodeGen/builtins-reduction-math.c clang/test/Sema/builtins-reduction-math.c Index: clang/test/Sema/builtins-reduction-math.c =================================================================== --- clang/test/Sema/builtins-reduction-math.c +++ clang/test/Sema/builtins-reduction-math.c @@ -36,6 +36,23 @@ // expected-error@-1 {{1st argument must be a vector type (was 'int')}} } +void test_builtin_reduce_add(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_add(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_add(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_add(iv, iv); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_add(i); + // expected-error@-1 {{1st argument must be a vector of integers (was 'int')}} + + i = __builtin_reduce_add(v); + // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}} +} + void test_builtin_reduce_xor(int i, float4 v, int3 iv) { struct Foo s = __builtin_reduce_xor(iv); // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} Index: clang/test/CodeGen/builtins-reduction-math.c =================================================================== --- clang/test/CodeGen/builtins-reduction-math.c +++ clang/test/CodeGen/builtins-reduction-math.c @@ -58,6 +58,28 @@ unsigned long long r5 = __builtin_reduce_min(cvi1); } +void test_builtin_reduce_add(si8 vi1, u4 vu1) { + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_add(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_add(vu1); + + // CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16 + // CHECK-NEXT: [[RDX1:%.+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[CVI1]]) + // CHECK-NEXT: sext i16 [[RDX1]] to i32 + const si8 cvi1 = vi1; + int r4 = __builtin_reduce_add(cvi1); + + // CHECK: [[CVU1:%.+]] = load <4 x i32>, <4 x i32>* %cvu1, align 16 + // CHECK-NEXT: [[RDX2:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[CVU1]]) + // CHECK-NEXT: zext i32 [[RDX2]] to i64 + const u4 cvu1 = vu1; + unsigned long long r5 = __builtin_reduce_add(cvu1); +} + void test_builtin_reduce_xor(si8 vi1, u4 vu1) { // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -2331,6 +2331,8 @@ } // These builtins support vectors of integers only. + // TODO: ADD should support floating-point types. + case Builtin::BI__builtin_reduce_add: case Builtin::BI__builtin_reduce_xor: case Builtin::BI__builtin_reduce_or: case Builtin::BI__builtin_reduce_and: { Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -3273,6 +3273,9 @@ *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } + case Builtin::BI__builtin_reduce_add: + return RValue::get(emitUnaryBuiltin( + *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add")); case Builtin::BI__builtin_reduce_xor: return RValue::get(emitUnaryBuiltin( *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); Index: clang/include/clang/Basic/Builtins.def =================================================================== --- clang/include/clang/Basic/Builtins.def +++ clang/include/clang/Basic/Builtins.def @@ -663,6 +663,7 @@ BUILTIN(__builtin_reduce_xor, "v.", "nct") BUILTIN(__builtin_reduce_or, "v.", "nct") BUILTIN(__builtin_reduce_and, "v.", "nct") +BUILTIN(__builtin_reduce_add, "v.", "nct") BUILTIN(__builtin_matrix_transpose, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")
Index: clang/test/Sema/builtins-reduction-math.c =================================================================== --- clang/test/Sema/builtins-reduction-math.c +++ clang/test/Sema/builtins-reduction-math.c @@ -36,6 +36,23 @@ // expected-error@-1 {{1st argument must be a vector type (was 'int')}} } +void test_builtin_reduce_add(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_add(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_add(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_add(iv, iv); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_add(i); + // expected-error@-1 {{1st argument must be a vector of integers (was 'int')}} + + i = __builtin_reduce_add(v); + // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}} +} + void test_builtin_reduce_xor(int i, float4 v, int3 iv) { struct Foo s = __builtin_reduce_xor(iv); // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} Index: clang/test/CodeGen/builtins-reduction-math.c =================================================================== --- clang/test/CodeGen/builtins-reduction-math.c +++ clang/test/CodeGen/builtins-reduction-math.c @@ -58,6 +58,28 @@ unsigned long long r5 = __builtin_reduce_min(cvi1); } +void test_builtin_reduce_add(si8 vi1, u4 vu1) { + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_add(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_add(vu1); + + // CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16 + // CHECK-NEXT: [[RDX1:%.+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[CVI1]]) + // CHECK-NEXT: sext i16 [[RDX1]] to i32 + const si8 cvi1 = vi1; + int r4 = __builtin_reduce_add(cvi1); + + // CHECK: [[CVU1:%.+]] = load <4 x i32>, <4 x i32>* %cvu1, align 16 + // CHECK-NEXT: [[RDX2:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[CVU1]]) + // CHECK-NEXT: zext i32 [[RDX2]] to i64 + const u4 cvu1 = vu1; + unsigned long long r5 = __builtin_reduce_add(cvu1); +} + void test_builtin_reduce_xor(si8 vi1, u4 vu1) { // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -2331,6 +2331,8 @@ } // These builtins support vectors of integers only. + // TODO: ADD should support floating-point types. + case Builtin::BI__builtin_reduce_add: case Builtin::BI__builtin_reduce_xor: case Builtin::BI__builtin_reduce_or: case Builtin::BI__builtin_reduce_and: { Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -3273,6 +3273,9 @@ *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } + case Builtin::BI__builtin_reduce_add: + return RValue::get(emitUnaryBuiltin( + *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add")); case Builtin::BI__builtin_reduce_xor: return RValue::get(emitUnaryBuiltin( *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); Index: clang/include/clang/Basic/Builtins.def =================================================================== --- clang/include/clang/Basic/Builtins.def +++ clang/include/clang/Basic/Builtins.def @@ -663,6 +663,7 @@ BUILTIN(__builtin_reduce_xor, "v.", "nct") BUILTIN(__builtin_reduce_or, "v.", "nct") BUILTIN(__builtin_reduce_and, "v.", "nct") +BUILTIN(__builtin_reduce_add, "v.", "nct") BUILTIN(__builtin_matrix_transpose, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits