https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110583
Bug ID: 110583
Summary: [x86] missed optimizations in vector concatenation
patterns
Product: gcc
Version: 14.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: mkretz at gcc dot gnu.org
Target Milestone: ---
Target: x86_64-*-*, i?86-*-*
Test case (https://godbolt.org/z/f8GdzfjbW):
Compile with e.g. `-O2 -std=gnu++20 -march=skylake`
using short4 [[gnu::vector_size(4 * sizeof(short))]] = short;
using short8 [[gnu::vector_size(8 * sizeof(short))]] = short;
using int4 [[gnu::vector_size(4 * sizeof(int))]] = int;
using int8 [[gnu::vector_size(8 * sizeof(int))]] = int;
using float4 [[gnu::vector_size(4 * sizeof(float))]] = float;
using float8 [[gnu::vector_size(8 * sizeof(float))]] = float;
using double1 [[gnu::vector_size(1 * sizeof(double))]] = double;
using double4 [[gnu::vector_size(4 * sizeof(double))]] = double;
// ------------------------------------------------
// vpunpcklqdq xmm0, xmm0, xmm1
// ret
short8 vpunpcklqdq_1(short4 a, short4 b)
{ return short8{a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]}; }
short8 vpunpcklqdq_2(short4 a, short4 b)
{ return __builtin_shufflevector(a, b, 0, 1, 2, 3, 4, 5, 6, 7); }
// ------------------------------------------------
// vinserti128 ymm0, ymm0, xmm1, 1
// ret
int8 vinserti128_1(int4 a, int4 b)
{ return int8{a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]}; }
int8 vinserti128_2(int4 a, int4 b)
{ return __builtin_shufflevector(a, b, 0, 1, 2, 3, 4, 5, 6, 7); }
// ------------------------------------------------
// vinsertf128 ymm0, ymm0, xmm1, 1
// ret
float8 vinsertf128_good(float4 a, float4 b)
{ return float8{a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]}; }
float8 vinsertf128_bad(float4 a, float4 b)
{ return __builtin_shufflevector(a, b, 0, 1, 2, 3, 4, 5, 6, 7); }
// ------------------------------------------------
// vbroadcastsd ymm1, xmm1
// vblendps ymm0, ymm0, ymm1, 192
// ret
double4 broadcast_blend_0(double4 a, double b)
{ return double4{a[0], a[1], a[2], b}; }
// ------------------------------------------------
// vbroadcastsd ymm1, QWORD PTR [rsp+8]
// vblendps ymm0, ymm0, ymm1, 192
// ret
double4 broadcast_blend_1(double4 a, double1 b)
{ return double4{a[0], a[1], a[2], b[0]}; }
double4 broadcast_blend_2(double4 a, double1 b)
{ return __builtin_shufflevector(a, b, 0, 1, 2, 4); }
These functions should compile to the asm in the comments above them. Only
vinsertf128_good is fine, however I added it because it should be equivalent to
vinsertf128_bad (with the latter having to change ;) ).