================
@@ -1009,6 +1009,23 @@ __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {
// CHECK: call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %{{.*}}, <32 x
i8> %{{.*}}, i8 3)
return _mm256_mpsadbw_epu8(x, y, 3);
}
+// imm=0 both lanes. Lane0 A=4,B=1 -> 12 each ; Lane1 A=8,B=1 -> |8-1|*4=28
each
+TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}),
((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}),
0), 353,344,334,322,310,298,282,268,442,428,410,394,376,352,330,310));
----------------
adream307 wrote:
OK, I add more tests ,and checke on actual avx512 hardware
```cpp
#include <stdio.h>
#include <immintrin.h>
void printv8hu(__v8hu v) {
for (int i=0; i !=8; ++i) {
printf("%d,", v[i]);
}
printf("\n");
}
void printv16hu(__v16hu v) {
for (int i=0; i !=16; ++i) {
printf("%d,", v[i]);
}
printf("\n");
}
int main() {
__v8hu v0 =
(__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
7);
__v8hu v1 =
(__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
((__m128i)(__v16qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}),
7);
__v8hu v2 =
(__v8hu)_mm_mpsadbw_epu8(((__m128i)(__v16qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}),
((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
7);
printv8hu(v0);
printv8hu(v1);
printv8hu(v2);
__v16hu r0 =
(__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
0);
__v16hu r1 =
(__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
((__m256i)(__v32qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}),
1);
__v16hu r2 =
(__v16hu)_mm256_mpsadbw_epu8(((__m256i)(__v32qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}),
((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}),
2);
printv16hu(r0);
printv16hu(r1);
printv16hu(r2);
return 0;
}
```
and the output is
```txt
0,0,0,0,0,0,0,0,
1020,1020,1020,1020,1020,1020,1020,1020,
1020,1020,1020,1020,1020,1020,1020,1020,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,
1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,
```
https://github.com/llvm/llvm-project/pull/202257
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits