https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111306
Bug ID: 111306
Summary: macro-fusion makes error on conjugate complex
multiplication
Product: gcc
Version: unknown
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c++
Assignee: unassigned at gcc dot gnu.org
Reporter: joony.wie at samsung dot com
Target Milestone: ---
It seems that the operands src1 and src2 of "_mm512_fcmul_pch" are swapped for
macro-fusion with optimize option.
If the operands are swapped, the imag value of result will have incorrect sign
bit.
So, the operands should not be swapped in these conjugate complex
multiplication intrinsics.
Let me show the example and the output.
output:
3.000000 -4.000000 // w/o optimize.
3.000000 4.000000 // w/ optimize.
https://godbolt.org/z/df9Gz18hc // but may not executable
```
#include <immintrin.h>
#include <cstdio>
__attribute__((optimize("O0")))
auto func0(_Float16 *a, _Float16 *b, int n, _Float16 *c) {
__m512h rA = _mm512_loadu_ph(a);
for (int i = 0; i < n; i += 32) {
__m512h rB = _mm512_loadu_ph(b + i);
_mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA));
}
}
__attribute__((optimize("O")))
auto func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) {
__m512h rA = _mm512_loadu_ph(a);
for (int i = 0; i < n; i += 32) {
__m512h rB = _mm512_loadu_ph(b + i);
_mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA));
}
}
int main() {
int n = 32;
_Float16 a[n], b[n], c[n];
for (int i = 1; i <= n; i++) {
a[i - 1] = i & 1 ? -i : i;
b[i - 1] = i;
}
func0(a, b, n, c);
for (int i = 0; i < n / 32 * 2; i++) {
printf("%f ", (float)c[i]);
}
printf("\n");
func1(a, b, n, c);
for (int i = 0; i < n / 32 * 2; i++) {
printf("%f ", (float)c[i]);
}
printf("\n");
return 0;
}
```