config_host.mk.in                      |    4 +++
 configure.ac                           |    6 ++++
 sc/inc/arraysumfunctor.hxx             |    5 ++--
 sc/inc/arraysumfunctorinternal.hxx     |    6 +++-
 sc/source/core/tool/arraysumAVX.cxx    |   21 ++++++++--------
 sc/source/core/tool/arraysumAVX512.cxx |   41 ++++++---------------------------
 sc/source/core/tool/arraysumSSE2.cxx   |   21 ++++++++--------
 7 files changed, 47 insertions(+), 57 deletions(-)

New commits:
commit ef42ce579f0e4e4c436f70615f3adeb9f0f68217
Author:     Luboš Luňák <l.lu...@collabora.com>
AuthorDate: Tue Oct 26 23:40:47 2021 +0200
Commit:     Luboš Luňák <l.lu...@collabora.com>
CommitDate: Wed Oct 27 15:02:11 2021 +0200

    fix AVX512 detection
    
    The value wasn't in config_host.mk.in, so it's never been used.
    And also fix Calc Kahan CPU-specific code yet again :( .
    
    Change-Id: Iacfd500e5a662b2b4b96a009d129a012d278a3ad
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/124248
    Tested-by: Jenkins
    Reviewed-by: Luboš Luňák <l.lu...@collabora.com>

diff --git a/config_host.mk.in b/config_host.mk.in
index f9d4e2f0d05d..3970cb6a61c9 100644
--- a/config_host.mk.in
+++ b/config_host.mk.in
@@ -110,6 +110,8 @@ export CXXFLAGS_INTRINSICS_SSE41=@CXXFLAGS_INTRINSICS_SSE41@
 export CXXFLAGS_INTRINSICS_SSE42=@CXXFLAGS_INTRINSICS_SSE42@
 export CXXFLAGS_INTRINSICS_AVX=@CXXFLAGS_INTRINSICS_AVX@
 export CXXFLAGS_INTRINSICS_AVX2=@CXXFLAGS_INTRINSICS_AVX2@
+export CXXFLAGS_INTRINSICS_AVX512=@CXXFLAGS_INTRINSICS_AVX512@
+export CXXFLAGS_INTRINSICS_AVX512F=@CXXFLAGS_INTRINSICS_AVX512F@
 export CXXFLAGS_INTRINSICS_F16C=@CXXFLAGS_INTRINSICS_F16C@
 export CXXFLAGS_INTRINSICS_FMA=@CXXFLAGS_INTRINSICS_FMA@
 export DATADIR=@DATADIR@
@@ -407,6 +409,8 @@ export 
LO_CLANG_CXXFLAGS_INTRINSICS_SSE41=@LO_CLANG_CXXFLAGS_INTRINSICS_SSE41@
 export LO_CLANG_CXXFLAGS_INTRINSICS_SSE42=@LO_CLANG_CXXFLAGS_INTRINSICS_SSE42@
 export LO_CLANG_CXXFLAGS_INTRINSICS_AVX=@LO_CLANG_CXXFLAGS_INTRINSICS_AVX@
 export LO_CLANG_CXXFLAGS_INTRINSICS_AVX2=@LO_CLANG_CXXFLAGS_INTRINSICS_AVX2@
+export 
LO_CLANG_CXXFLAGS_INTRINSICS_AVX512=@LO_CLANG_CXXFLAGS_INTRINSICS_AVX512@
+export 
LO_CLANG_CXXFLAGS_INTRINSICS_AVX512F=@LO_CLANG_CXXFLAGS_INTRINSICS_AVX512F@
 export LO_CLANG_CXXFLAGS_INTRINSICS_F16C=@LO_CLANG_CXXFLAGS_INTRINSICS_F16C@
 export LO_CLANG_CXXFLAGS_INTRINSICS_FMA=@LO_CLANG_CXXFLAGS_INTRINSICS_FMA@
 @x_LO_ELFCHECK_ALLOWLIST@ export LO_ELFCHECK_ALLOWLIST=@LO_ELFCHECK_ALLOWLIST@
diff --git a/configure.ac b/configure.ac
index adf6da19b1f9..a810ba0b88af 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7808,6 +7808,9 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
     #include <immintrin.h>
     int main () {
         __m512i a = _mm512_loadu_si512(0);
+        __m512d v1 = _mm512_load_pd(0);
+        // 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/config/i386/avx512fintrin.h;h=23bce99cbe7016a04e14c2163ed3fe6a5a64f4e2
+        __m512d v2 = _mm512_abs_pd(v1);
         return 0;
     }
     ])],
@@ -12281,6 +12284,9 @@ if test "$ENABLE_SKIA" = TRUE -a "$COM_IS_CLANG" != 
TRUE; then
             #include <immintrin.h>
             int main () {
                 __m512i a = _mm512_loadu_si512(0);
+                __m512d v1 = _mm512_load_pd(0);
+                // 
https://gcc.gnu.org/git/?p=gcc.git;a=commit;f=gcc/config/i386/avx512fintrin.h;h=23bce99cbe7016a04e14c2163ed3fe6a5a64f4e2
+                __m512d v2 = _mm512_abs_pd(v1);
                 return 0;
             }
             ])],
diff --git a/sc/inc/arraysumfunctor.hxx b/sc/inc/arraysumfunctor.hxx
index d251b4a6f9fb..b727f5893a8c 100644
--- a/sc/inc/arraysumfunctor.hxx
+++ b/sc/inc/arraysumfunctor.hxx
@@ -19,8 +19,9 @@
 namespace sc::op
 {
 /* Checkout available optimization options */
-const bool hasAVX = cpuid::hasAVX();
-const bool hasSSE2 = cpuid::hasSSE2();
+const bool hasAVX512F = hasAVX512FCode() && cpuid::hasAVX512F();
+const bool hasAVX = hasAVXCode() && cpuid::hasAVX();
+const bool hasSSE2 = hasSSE2Code() && cpuid::hasSSE2();
 
 /**
   * If no boosts available, Unrolled KahanSum.
diff --git a/sc/inc/arraysumfunctorinternal.hxx 
b/sc/inc/arraysumfunctorinternal.hxx
index a06e3fc17439..e939dbd3037d 100644
--- a/sc/inc/arraysumfunctorinternal.hxx
+++ b/sc/inc/arraysumfunctorinternal.hxx
@@ -13,8 +13,6 @@
 
 namespace sc::op
 {
-SC_DLLPUBLIC extern const bool hasAVX512F;
-
 // Plain old data structure, to be used by code compiled with CPU intrinsics 
without generating any
 // code for it (so that code requiring intrinsics doesn't get accidentally 
selected as the one copy
 // when merging duplicates).
@@ -29,6 +27,10 @@ SC_DLLPUBLIC KahanSumSimple executeAVX512F(size_t& i, size_t 
nSize, const double
 SC_DLLPUBLIC KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* 
pCurrent);
 SC_DLLPUBLIC KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* 
pCurrent);
 
+SC_DLLPUBLIC bool hasAVX512FCode();
+SC_DLLPUBLIC bool hasAVXCode();
+SC_DLLPUBLIC bool hasSSE2Code();
+
 } // namespace
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s 
cinkeys+=0=break: */
diff --git a/sc/source/core/tool/arraysumAVX.cxx 
b/sc/source/core/tool/arraysumAVX.cxx
index c55d71f22983..e256248047d0 100644
--- a/sc/source/core/tool/arraysumAVX.cxx
+++ b/sc/source/core/tool/arraysumAVX.cxx
@@ -20,7 +20,9 @@
 
 namespace sc::op
 {
-#ifdef LO_AVX_AVAILABLE // Old processors
+#ifdef LO_AVX_AVAILABLE
+
+bool hasAVXCode() { return true; }
 
 using namespace AVX;
 
@@ -48,13 +50,10 @@ static inline void sumAVX(__m256d& sum, __m256d& err, const 
__m256d& value)
     sum = t;
 }
 
-#endif
-
 /** Execute Kahan sum with AVX.
   */
 KahanSumSimple executeAVX(size_t& i, size_t nSize, const double* pCurrent)
 {
-#ifdef LO_AVX_AVAILABLE
     // Make sure we don't fall out of bounds.
     // This works by sums of 8 terms.
     // So the 8'th term is i+7
@@ -107,14 +106,16 @@ KahanSumSimple executeAVX(size_t& i, size_t nSize, const 
double* pCurrent)
         return { sums[0], errs[0] };
     }
     return { 0.0, 0.0 };
-#else
-    (void)i;
-    (void)nSize;
-    (void)pCurrent;
-    abort();
-#endif
 }
 
+#else // LO_AVX_AVAILABLE
+
+bool hasAVXCode() { return false; }
+
+KahanSumSimple executeAVX(size_t&, size_t, const double*) { abort(); }
+
+#endif
+
 } // end namespace sc::op
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumAVX512.cxx 
b/sc/source/core/tool/arraysumAVX512.cxx
index 987e5a3e6ff6..6a3235a58e2e 100644
--- a/sc/source/core/tool/arraysumAVX512.cxx
+++ b/sc/source/core/tool/arraysumAVX512.cxx
@@ -18,25 +18,11 @@
 
 #include <stdlib.h>
 
-/* TODO Remove this once GCC updated and AVX512 can work. */
-#ifdef __GNUC__
-#if __GNUC__ < 9
-#ifdef LO_AVX512F_AVAILABLE
-#define HAS_LO_AVX512F_AVAILABLE
-#undef LO_AVX512F_AVAILABLE
-#endif
-#endif
-#endif
-
 namespace sc::op
 {
 #ifdef LO_AVX512F_AVAILABLE
-const bool hasAVX512F = cpuid::hasAVX512F();
-#else
-const bool hasAVX512F = false;
-#endif
 
-#ifdef LO_AVX512F_AVAILABLE // New processors
+bool hasAVX512FCode() { return true; }
 
 using namespace AVX512;
 
@@ -62,13 +48,10 @@ static inline void sumAVX512(__m512d& sum, __m512d& err, 
const __m512d& value)
     sum = t;
 }
 
-#endif
-
 /** Execute Kahan sum with AVX512.
   */
 KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
 {
-#ifdef LO_AVX512F_AVAILABLE // New processors
     // Make sure we don't fall out of bounds.
     // This works by sums of 8 terms.
     // So the 8'th term is i+7
@@ -122,24 +105,16 @@ KahanSumSimple executeAVX512F(size_t& i, size_t nSize, 
const double* pCurrent)
         return { sums[0], errs[0] };
     }
     return { 0.0, 0.0 };
-#else
-    (void)i;
-    (void)nSize;
-    (void)pCurrent;
-    abort();
-#endif
 }
 
-} // end namespace sc::op
+#else // LO_AVX512F_AVAILABLE
+
+bool hasAVX512FCode() { return false; }
+
+KahanSumSimple executeAVX512F(size_t&, size_t, const double*) { abort(); }
 
-/* TODO Remove this once GCC updated and AVX512 can work. */
-#ifdef __GNUC__
-#if __GNUC__ < 9
-#ifdef HAS_LO_AVX512F_AVAILABLE
-#define LO_AVX512F_AVAILABLE
-#undef HAS_LO_AVX512F_AVAILABLE
-#endif
-#endif
 #endif
 
+} // end namespace sc::op
+
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/arraysumSSE2.cxx 
b/sc/source/core/tool/arraysumSSE2.cxx
index b4edb98286f9..1a5cc2f00dfe 100644
--- a/sc/source/core/tool/arraysumSSE2.cxx
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -20,7 +20,9 @@
 
 namespace sc::op
 {
-#ifdef LO_SSE2_AVAILABLE // Old processors
+#ifdef LO_SSE2_AVAILABLE
+
+bool hasSSE2Code() { return true; }
 
 using namespace SSE2;
 
@@ -47,13 +49,10 @@ static inline void sumSSE2(__m128d& sum, __m128d& err, 
const __m128d& value)
     sum = t;
 }
 
-#endif
-
 /** Execute Kahan sum with SSE2.
   */
 KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
 {
-#ifdef LO_SSE2_AVAILABLE
     // Make sure we don't fall out of bounds.
     // This works by sums of 8 terms.
     // So the 8'th term is i+7
@@ -120,13 +119,15 @@ KahanSumSimple executeSSE2(size_t& i, size_t nSize, const 
double* pCurrent)
         return { sums[0], errs[0] };
     }
     return { 0.0, 0.0 };
-#else
-    (void)i;
-    (void)nSize;
-    (void)pCurrent;
-    abort();
-#endif
 }
+
+#else // LO_SSE2_AVAILABLE
+
+bool hasSSE2Code() { return false; }
+
+KahanSumSimple executeSSE2(size_t&, size_t, const double*) { abort(); }
+
+#endif
 }
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Reply via email to