kbsmith1 created this revision.
kbsmith1 added reviewers: craig.topper, aaron.ballman, erichkeane, 
andrew.w.kaylor.
kbsmith1 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
This change uses #pragma float_control and
floating point fast-math reassociation flags for IA intrinsics
that happen to be implemented using LLVM IR's regular FP
operations.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D107004

Files:
  clang/lib/Headers/emmintrin.h
  clang/lib/Headers/immintrin.h
  clang/lib/Headers/x86intrin.h
  clang/lib/Headers/xmmintrin.h
  clang/test/Headers/emmintrin.c
  clang/test/Headers/immintrin.c
  clang/test/Headers/x86intrin-3.c
  clang/test/Headers/xmmintrin.c

Index: clang/test/Headers/xmmintrin.c
===================================================================
--- clang/test/Headers/xmmintrin.c
+++ clang/test/Headers/xmmintrin.c
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 %s -ffreestanding -triple x86_64-apple-macosx10.9.0 -emit-llvm -o - | FileCheck %s
 //
+// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple x86_64-apple-macosx10.9.0 -emit-llvm -o - \
+// RUN:     | FileCheck -check-prefix=CKFMATH %s
+//
 // RUN: rm -rf %t
 // RUN: %clang_cc1 %s -ffreestanding -triple x86_64-apple-macosx10.9.0 -emit-llvm -o - \
 // RUN:     -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -isystem %S/Inputs/include \
@@ -26,6 +29,21 @@
   return _mm_add_sd(__a, __b);
 }
 
+// Make sure that the llvm IR for _mm_add_ps doesn't have fast attribute and
+// doesn't have reassoc set.
+// CKFMATH: define{{.*}} <4 x float> @test_xmmintrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn <4 x float>
+__m128 test_xmmintrin_no_reassoc(__m128 __a, __m128 __b) {
+  return _mm_add_ps(__a, __b);
+}
+
+// Make sure that all fast flags were restored outside of the include file.
+// CKFMATH: define{{.*}} double @test_fast
+// CKFMATH: fadd reassoc nnan ninf nsz arcp afn double
+double test_fast(double __a, double __b) {
+  return __a + __b;
+}
+
 #if __STDC_HOSTED__
 // Make sure stdlib.h symbols are accessible.
 void *p = NULL;
Index: clang/test/Headers/x86intrin-3.c
===================================================================
--- /dev/null
+++ clang/test/Headers/x86intrin-3.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple i386-unknown-unknown -emit-llvm -o - \
+// RUN:     | FileCheck -check-prefix=CKFMATH %s
+//
+// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple x86_64-unknown-unknown -emit-llvm -o - \
+// RUN:     | FileCheck -check-prefix=CKFMATH %s
+
+// Include the metaheader that includes all x86 intrinsic headers.
+#include <x86intrin.h>
+
+// Make sure that the llvm IR for _mm_add_ps doesn't have fast attribute and
+// doesn't have reassoc set.
+// CKFMATH: define{{.*}} <4 x float> @test_xmmintrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn <4 x float>
+__m128 __attribute__((__target__("sse"))) test_xmmintrin_no_reassoc(__m128 __a, __m128 __b) {
+  return _mm_add_ps(__a, __b);
+}
+
+// Make sure that the llvm IR for _mm_add_pd doesn't have fast attribute and
+// doesn't have reassoc set.
+// CKFMATH: define{{.*}} <2 x double> @test_emmintrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn <2 x double>
+__m128d __attribute__((__target__("sse2"))) test_emmintrin_no_reassoc(__m128d __a, __m128d __b) {
+  return _mm_add_pd(__a, __b);
+}
+
+// Make sure that the llvm IR for _mm256_add_ps doesn't have fast attribute and
+// doesn't have reassoc set.
+// This intrinsic comes from avxintrin.h, and so is checking that
+// changes in immintrin.h affect the files it includes as well.
+// CKFMATH: define{{.*}} <8 x float> @test_mm256intrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn <8 x float>
+__m256 __attribute__((__target__(("avx")))) test_mm256intrin_no_reassoc(__m256 __a, __m256 __b) {
+  return _mm256_add_ps(__a, __b);
+}
+
+// Make sure that the llvm IR for _mm512_add_ps doesn't have fast attribute and
+// doesn't have reassoc set.
+// This intrinsic comes from avxintrin.h
+// CKFMATH: define{{.*}} <16 x float> @test_mm512intrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn <16 x float>
+__m512 __attribute__((__target__(("avx512f")))) test_mm512intrin_no_reassoc(__m512 __a, __m512 __b) {
+  return _mm512_add_ps(__a, __b);
+}
+
+// Make sure that all fast flags were restored outside of the include file.
+// CKFMATH: define{{.*}} double @test_fast
+// CKFMATH: fadd reassoc nnan ninf nsz arcp afn double
+double test_fast(double __a, double __b) {
+  return __a + __b;
+}
Index: clang/test/Headers/immintrin.c
===================================================================
--- /dev/null
+++ clang/test/Headers/immintrin.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple i386-unknown-unknown -emit-llvm -o - \
+// RUN:     | FileCheck -check-prefix=CKFMATH %s
+//
+// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple x86_64-unknown-unknown -emit-llvm -o - \
+// RUN:     | FileCheck -check-prefix=CKFMATH %s
+
+// Include the metaheader that includes all intel intrinsic headers.
+#include <immintrin.h>
+
+// Make sure that the llvm IR for _mm_add_ps doesn't have fast attribute and
+// doesn't have reassoc set.
+// CKFMATH: define{{.*}} <4 x float> @test_xmmintrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn <4 x float>
+__m128 __attribute__((__target__("sse"))) test_xmmintrin_no_reassoc(__m128 __a, __m128 __b) {
+  return _mm_add_ps(__a, __b);
+}
+
+// Make sure that the llvm IR for _mm_add_pd doesn't have fast attribute and
+// doesn't have reassoc set.
+// CKFMATH: define{{.*}} <2 x double> @test_emmintrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn <2 x double>
+__m128d __attribute__((__target__("sse2"))) test_emmintrin_no_reassoc(__m128d __a, __m128d __b) {
+  return _mm_add_pd(__a, __b);
+}
+
+// Make sure that the llvm IR for _mm256_add_ps doesn't have fast attribute and
+// doesn't have reassoc set.
+// This intrinsic comes from avxintrin.h, and so is checking that
+// changes in immintrin.h affect the files it includes as well.
+// CKFMATH: define{{.*}} <8 x float> @test_mm256intrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn <8 x float>
+__m256 __attribute__((__target__(("avx")))) test_mm256intrin_no_reassoc(__m256 __a, __m256 __b) {
+  return _mm256_add_ps(__a, __b);
+}
+
+// Make sure that the llvm IR for _mm512_add_ps doesn't have fast attribute and
+// doesn't have reassoc set.
+// This intrinsic comes from avxintrin.h
+// CKFMATH: define{{.*}} <16 x float> @test_mm512intrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn <16 x float>
+__m512 __attribute__((__target__(("avx512f")))) test_mm512intrin_no_reassoc(__m512 __a, __m512 __b) {
+  return _mm512_add_ps(__a, __b);
+}
+
+// Make sure that all fast flags were restored outside of the include file.
+// CKFMATH: define{{.*}} double @test_fast
+// CKFMATH: fadd reassoc nnan ninf nsz arcp afn double
+double test_fast(double __a, double __b) {
+  return __a + __b;
+}
Index: clang/test/Headers/emmintrin.c
===================================================================
--- /dev/null
+++ clang/test/Headers/emmintrin.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -ffreestanding -ffast-math -triple x86_64-apple-macosx10.9.0 -emit-llvm -o - \
+// RUN:     | FileCheck -check-prefix=CKFMATH %s
+//
+#include <emmintrin.h>
+
+// Make sure that the llvm IR for _mm_add_pd doesn't have fast attribute and
+// doesn't have reassoc set.
+// CKFMATH: define{{.*}} <2 x double> @test_emmintrin_no_reassoc
+// CKFMATH: fadd nnan ninf nsz arcp afn
+__m128d test_emmintrin_no_reassoc(__m128d __a, __m128d __b) {
+  return _mm_add_pd(__a, __b);
+}
+
+// Make sure that all fast flags were restored outside of the include file.
+// CKFMATH: define{{.*}} double @test_fast
+// CKFMATH: fadd reassoc nnan ninf nsz arcp afn double
+double test_fast(double __a, double __b) {
+  return __a + __b;
+}
Index: clang/lib/Headers/xmmintrin.h
===================================================================
--- clang/lib/Headers/xmmintrin.h
+++ clang/lib/Headers/xmmintrin.h
@@ -10,6 +10,10 @@
 #ifndef __XMMINTRIN_H
 #define __XMMINTRIN_H
 
+/* Turn off reassociation for inrinsics, push state to restore at end. */
+#pragma float_control(push)
+#pragma clang fp reassociate(off)
+
 #include <mmintrin.h>
 
 typedef int __v4si __attribute__((__vector_size__(16)));
@@ -3005,4 +3009,6 @@
 #include <emmintrin.h>
 #endif
 
+#pragma float_control(pop)
+
 #endif /* __XMMINTRIN_H */
Index: clang/lib/Headers/x86intrin.h
===================================================================
--- clang/lib/Headers/x86intrin.h
+++ clang/lib/Headers/x86intrin.h
@@ -10,6 +10,10 @@
 #ifndef __X86INTRIN_H
 #define __X86INTRIN_H
 
+/* Turn off reassociation for inrinsics, push state to restore at end. */
+#pragma float_control(push)
+#pragma clang fp reassociate(off)
+
 #include <ia32intrin.h>
 
 #include <immintrin.h>
@@ -59,5 +63,6 @@
 #include <clzerointrin.h>
 #endif
 
+#pragma float_control(pop)
 
 #endif /* __X86INTRIN_H */
Index: clang/lib/Headers/immintrin.h
===================================================================
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -10,6 +10,10 @@
 #ifndef __IMMINTRIN_H
 #define __IMMINTRIN_H
 
+/* Turn off reassociation for inrinsics, push state to restore at end. */
+#pragma float_control(push)
+#pragma clang fp reassociate(off)
+
 #include <x86gprintrin.h>
 
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
@@ -597,4 +601,6 @@
 
 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
 
+#pragma float_control(pop)
+
 #endif /* __IMMINTRIN_H */
Index: clang/lib/Headers/emmintrin.h
===================================================================
--- clang/lib/Headers/emmintrin.h
+++ clang/lib/Headers/emmintrin.h
@@ -10,6 +10,10 @@
 #ifndef __EMMINTRIN_H
 #define __EMMINTRIN_H
 
+/* Turn off reassociation for inrinsics, push state to restore at end. */
+#pragma float_control(push)
+#pragma clang fp reassociate(off)
+
 #include <xmmintrin.h>
 
 typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
@@ -4978,4 +4982,6 @@
 #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
 #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
 
+#pragma float_control(pop)
+
 #endif /* __EMMINTRIN_H */
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to