sepavloff created this revision. sepavloff added reviewers: kpn, rjmccall, aaron.ballman. Herald added subscribers: jsji, pengfei. Herald added a project: All. sepavloff requested review of this revision. Herald added a project: clang.
Previously compilation of a few tests produced incorrect code. In them FP pragmas were ignored and the resulting code contained constrained intrinsics with rounding mode and/or exception behavior determined by command line options only. Compiler creates correct AST for this tests, but FP options were ignored in code generator because builder object was not set up properly. To fix code generation builder object now is updated according to FP options stored in CompoundStmt. This change elucidated an issue in template instantiations, - it occured with FP options taken from the point of instantiation. The issue is also fixed. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D129464 Files: clang/include/clang/AST/Stmt.h clang/lib/CodeGen/CGStmt.cpp clang/lib/Sema/SemaTemplateInstantiateDecl.cpp clang/test/CodeGen/X86/avx512dq-builtins-constrained.c clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c clang/test/CodeGen/complex-strictfp.c clang/test/CodeGen/pragma-fenv_access.cpp
Index: clang/test/CodeGen/pragma-fenv_access.cpp =================================================================== --- /dev/null +++ clang/test/CodeGen/pragma-fenv_access.cpp @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -S -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +template <typename T> +T templ_01(T a, T b) { + return a * b; +} + +#pragma STDC FENV_ACCESS ON + +float func_02(float a, float b) { + return 1.0f + templ_01<float>(a, b); +} + +// CHECK-LABEL: define {{.*}} @_Z7func_02ff +// CHECK: call noundef float @_Z8templ_01IfET_S0_S0_ +// CHECK: call float @llvm.experimental.constrained.fadd.f32({{.*}}, metadata !"round.dynamic", metadata !"fpexcept.strict") + +// CHECK-LABEL: define {{.*}} @_Z8templ_01IfET_S0_S0_ +// CHECK: fmul float + + +#pragma STDC FENV_ACCESS OFF +#pragma STDC FENV_ROUND FE_UPWARD + +template <typename T> +T templ_03(T a, T b) { + return a * b; +} + +#pragma STDC FENV_ACCESS ON + +float func_04(float a, float b) { + return 1.0f + templ_03<float>(a, b); +} + + +// CHECK-LABEL: define {{.*}} @_Z7func_04ff +// CHECK: call noundef float @_Z8templ_03IfET_S0_S0_ +// CHECK: call float @llvm.experimental.constrained.fadd.f32({{.*}}, metadata !"round.upward", metadata !"fpexcept.strict") + +// CHECK-LABEL: define {{.*}} @_Z8templ_03IfET_S0_S0_ +// CHECK: call float @llvm.experimental.constrained.fmul.f32({{.*}}, metadata !"round.upward", metadata !"fpexcept.ignore") Index: clang/test/CodeGen/complex-strictfp.c =================================================================== --- clang/test/CodeGen/complex-strictfp.c +++ clang/test/CodeGen/complex-strictfp.c @@ -5,8 +5,6 @@ // Test that the constrained intrinsics are picking up the exception // metadata from the AST instead of the global default from the command line. // Include rounding metadata in the testing. -// FIXME: All cases of "fpexcept.maytrap" in this test are wrong. -// FIXME: All cases of "round.tonearest" in this test are wrong. #pragma float_control(except, on) #pragma STDC FENV_ROUND FE_UPWARD @@ -74,7 +72,7 @@ // CHECK-NEXT: [[G1_REAL:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 0), align 8 // CHECK-NEXT: [[G1_IMAG:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 1), align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, double* @D, align 8 -// CHECK-NEXT: [[ADD_R:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[G1_REAL]], double [[TMP0]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]] +// CHECK-NEXT: [[ADD_R:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[G1_REAL]], double [[TMP0]], metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]] // CHECK-NEXT: store double [[ADD_R]], double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 0), align 8 // CHECK-NEXT: store double [[G1_IMAG]], double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 1), align 8 // CHECK-NEXT: ret void @@ -88,7 +86,7 @@ // CHECK-NEXT: [[TMP0:%.*]] = load double, double* @D, align 8 // CHECK-NEXT: [[G1_REAL:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 0), align 8 // CHECK-NEXT: [[G1_IMAG:%.*]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 1), align 8 -// CHECK-NEXT: [[ADD_R:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP0]], double [[G1_REAL]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR2]] +// CHECK-NEXT: [[ADD_R:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP0]], double [[G1_REAL]], metadata !"round.upward", metadata !"fpexcept.strict") #[[ATTR2]] // CHECK-NEXT: store double [[ADD_R]], double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 0), align 8 // CHECK-NEXT: store double [[G1_IMAG]], double* getelementptr inbounds ({ double, double }, { double, double }* @g1, i32 0, i32 1), align 8 // CHECK-NEXT: ret void Index: clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c =================================================================== --- clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c +++ clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c @@ -23,7 +23,6 @@ // Test that the constrained intrinsics are picking up the exception // metadata from the AST instead of the global default from the command line. -// FIXME: All cases of "fpexcept.maytrap" in this test are wrong. #if EXCEPT #pragma float_control(except, on) @@ -33,7 +32,7 @@ // COMMON-LABEL: test_vsqrt_f16 // UNCONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a) -// CONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fsqrt v{{[0-9]+}}.4h, v{{[0-9]+}}.4h // COMMONIR: ret <4 x half> [[SQR]] float16x4_t test_vsqrt_f16(float16x4_t a) { @@ -42,7 +41,7 @@ // COMMON-LABEL: test_vsqrtq_f16 // UNCONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a) -// CONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fsqrt v{{[0-9]+}}.8h, v{{[0-9]+}}.8h // COMMONIR: ret <8 x half> [[SQR]] float16x8_t test_vsqrtq_f16(float16x8_t a) { @@ -51,7 +50,7 @@ // COMMON-LABEL: test_vfma_f16 // UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a) -// CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h // COMMONIR: ret <4 x half> [[ADD]] float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) { @@ -60,7 +59,7 @@ // COMMON-LABEL: test_vfmaq_f16 // UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a) -// CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h // COMMONIR: ret <8 x half> [[ADD]] float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { @@ -70,7 +69,7 @@ // COMMON-LABEL: test_vfms_f16 // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b // UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a) -// CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h // COMMONIR: ret <4 x half> [[ADD]] float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) { @@ -80,7 +79,7 @@ // COMMON-LABEL: test_vfmsq_f16 // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b // UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a) -// CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h // COMMONIR: ret <8 x half> [[ADD]] float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { @@ -96,7 +95,7 @@ // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> // COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]]) -// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <4 x half> [[FMLA]] float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) { @@ -112,7 +111,7 @@ // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]]) -// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <8 x half> [[FMLA]] float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { @@ -128,7 +127,7 @@ // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]]) -// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <4 x half> [[FMLA]] float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { @@ -144,7 +143,7 @@ // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]]) -// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <8 x half> [[FMLA]] float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { @@ -157,7 +156,7 @@ // COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2 // COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a) -// CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <4 x half> [[FMA]] float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) { @@ -174,7 +173,7 @@ // COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6 // COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7 // UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a) -// CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <8 x half> [[FMA]] float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { @@ -184,7 +183,7 @@ // COMMON-LABEL: test_vfmah_lane_f16 // COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a) -// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret half [[FMA]] float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) { @@ -194,7 +193,7 @@ // COMMON-LABEL: test_vfmah_laneq_f16 // COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a) -// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret half [[FMA]] float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) { @@ -211,7 +210,7 @@ // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> // COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]]) -// CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <4 x half> [[FMA]] float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) { @@ -228,7 +227,7 @@ // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]]) -// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <8 x half> [[FMLA]] float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { @@ -246,7 +245,7 @@ // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]]) -// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <4 x half> [[FMLA]] float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { @@ -264,7 +263,7 @@ // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]]) -// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <8 x half> [[FMLA]] float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { @@ -278,7 +277,7 @@ // COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2 // COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3 // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a) -// CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <4 x half> [[FMA]] float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) { @@ -296,7 +295,7 @@ // COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6 // COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7 // UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a) -// CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret <8 x half> [[FMA]] float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { @@ -314,7 +313,7 @@ // CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a) -// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret half [[FMA]] float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) { @@ -332,7 +331,7 @@ // CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}} // COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7 // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a) -// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") +// CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] // COMMONIR: ret half [[FMA]] float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) { Index: clang/test/CodeGen/X86/avx512dq-builtins-constrained.c =================================================================== --- clang/test/CodeGen/X86/avx512dq-builtins-constrained.c +++ clang/test/CodeGen/X86/avx512dq-builtins-constrained.c @@ -4,7 +4,6 @@ // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -ffp-exception-behavior=maytrap -DSTRICT=1 -S -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK-ASM --check-prefix=COMMON -// FIXME: Every instance of "fpexcept.maytrap" is wrong. #ifdef STRICT // Test that the constrained intrinsics are picking up the exception // metadata from the AST instead of the global default from the command line. @@ -18,7 +17,7 @@ __m512d test_mm512_cvtepi64_pd(__m512i __A) { // COMMON-LABEL: test_mm512_cvtepi64_pd // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> - // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: vcvtqq2pd return _mm512_cvtepi64_pd(__A); } @@ -26,7 +25,7 @@ __m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) { // COMMON-LABEL: test_mm512_mask_cvtepi64_pd // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> - // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} // CHECK-ASM: vcvtqq2pd return _mm512_mask_cvtepi64_pd(__W, __U, __A); @@ -35,7 +34,7 @@ __m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) { // COMMON-LABEL: test_mm512_maskz_cvtepi64_pd // UNCONSTRAINED: sitofp <8 x i64> %{{.*}} to <8 x double> - // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} // CHECK-ASM: vcvtqq2pd return _mm512_maskz_cvtepi64_pd(__U, __A); @@ -148,7 +147,7 @@ __m512d test_mm512_cvtepu64_pd(__m512i __A) { // COMMON-LABEL: test_mm512_cvtepu64_pd // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> - // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-ASM: vcvtuqq2pd return _mm512_cvtepu64_pd(__A); } @@ -156,7 +155,7 @@ __m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) { // COMMON-LABEL: test_mm512_mask_cvtepu64_pd // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> - // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} // CHECK-ASM: vcvtuqq2pd return _mm512_mask_cvtepu64_pd(__W, __U, __A); @@ -165,7 +164,7 @@ __m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) { // COMMON-LABEL: test_mm512_maskz_cvtepu64_pd // UNCONSTRAINED: uitofp <8 x i64> %{{.*}} to <8 x double> - // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.maytrap") + // CONSTRAINED: call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict") // COMMONIR: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} // CHECK-ASM: vcvtuqq2pd return _mm512_maskz_cvtepu64_pd(__U, __A); Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp =================================================================== --- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -5042,6 +5042,9 @@ // PushDeclContext because we don't have a scope. Sema::ContextRAII savedContext(*this, Function); + FPFeaturesStateRAII SavedFPFeatures(*this); + CurFPFeatures = FPOptions(getLangOpts()); + if (addInstantiatedParametersToScope(Function, PatternDecl, Scope, TemplateArgs)) return; Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -491,6 +491,8 @@ assert((!GetLast || (GetLast && ExprResult)) && "If GetLast is true then the CompoundStmt must have a StmtExprResult"); + CGFPOptionsRAII SavedFPFeatues(*this, S.getNewFPOptions(CurFPFeatures)); + Address RetAlloca = Address::invalid(); for (auto *CurStmt : S.body()) { Index: clang/include/clang/AST/Stmt.h =================================================================== --- clang/include/clang/AST/Stmt.h +++ clang/include/clang/AST/Stmt.h @@ -1458,6 +1458,15 @@ return *getTrailingObjects<FPOptionsOverride>(); } + /// Get FPOptions inside this statement. They may differ from outer options + /// due to pragmas. + /// \param CurFPOptions FPOptions outside this statement. + FPOptions getNewFPOptions(FPOptions CurFPOptions) const { + return hasStoredFPFeatures() + ? getStoredFPFeatures().applyOverrides(CurFPOptions) + : CurFPOptions; + } + using body_iterator = Stmt **; using body_range = llvm::iterator_range<body_iterator>;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits