https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/138635
>From b2b23bfbb221870a308501d5f5839b59b3fb2370 Mon Sep 17 00:00:00 2001 From: jofrn <jofer...@amd.com> Date: Sun, 1 Jun 2025 16:23:05 -0400 Subject: [PATCH] [X86] Remove extra MOV after widening atomic load This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503 --- llvm/lib/Target/X86/X86InstrCompiler.td | 7 + llvm/test/CodeGen/X86/atomic-load-store.ll | 192 +++------------------ 2 files changed, 35 insertions(+), 164 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 927b2c8b22f05..26b76dd1ca83a 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1204,6 +1204,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 addr:$src)))))), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))), + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index ff5391f44bbe3..4b818b6cfa57e 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -319,159 +319,60 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) { define <2 x i16> @atomic_vec2_i16(ptr %x) { ; CHECK-O3-LABEL: atomic_vec2_i16: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movl (%rdi), %eax -; CHECK-O3-NEXT: movd %eax, %xmm0 +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-O3-NEXT: retq ; ; CHECK-SSE-O3-LABEL: atomic_vec2_i16: ; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movl (%rdi), %eax -; CHECK-SSE-O3-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-O3-NEXT: retq ; ; CHECK-AVX-O3-LABEL: atomic_vec2_i16: ; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movl (%rdi), %eax -; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-AVX-O3-NEXT: retq ; ; CHECK-O0-LABEL: atomic_vec2_i16: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movl (%rdi), %eax -; CHECK-O0-NEXT: movd %eax, %xmm0 +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-O0-NEXT: retq ; ; CHECK-SSE-O0-LABEL: atomic_vec2_i16: ; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movl (%rdi), %eax -; CHECK-SSE-O0-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-O0-NEXT: retq ; ; CHECK-AVX-O0-LABEL: atomic_vec2_i16: ; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movl (%rdi), %eax -; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-AVX-O0-NEXT: retq %ret = load atomic <2 x i16>, ptr %x acquire, align 4 ret <2 x i16> %ret } define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { -; CHECK-O3-LABEL: atomic_vec2_ptr270: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq %rax, %xmm0 -; CHECK-O3-NEXT: retq -; -; CHECK-SSE-O3-LABEL: atomic_vec2_ptr270: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movq (%rdi), %rax -; CHECK-SSE-O3-NEXT: movq %rax, %xmm0 -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec2_ptr270: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movq (%rdi), %rax -; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0 -; CHECK-AVX-O3-NEXT: retq -; -; CHECK-O0-LABEL: atomic_vec2_ptr270: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movq %rax, %xmm0 -; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec2_ptr270: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movq (%rdi), %rax -; CHECK-SSE-O0-NEXT: movq %rax, %xmm0 -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec2_ptr270: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movq (%rdi), %rax -; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0 -; CHECK-AVX-O0-NEXT: retq +; CHECK-LABEL: atomic_vec2_ptr270: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 ret <2 x ptr addrspace(270)> %ret } define <2 x i32> @atomic_vec2_i32_align(ptr %x) { -; CHECK-O3-LABEL: atomic_vec2_i32_align: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq %rax, %xmm0 -; CHECK-O3-NEXT: retq -; -; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movq (%rdi), %rax -; CHECK-SSE-O3-NEXT: movq %rax, %xmm0 -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec2_i32_align: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movq (%rdi), %rax -; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0 -; CHECK-AVX-O3-NEXT: retq -; -; CHECK-O0-LABEL: atomic_vec2_i32_align: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movq %rax, %xmm0 -; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec2_i32_align: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movq (%rdi), %rax -; CHECK-SSE-O0-NEXT: movq %rax, %xmm0 -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec2_i32_align: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movq (%rdi), %rax -; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0 -; CHECK-AVX-O0-NEXT: retq +; CHECK-LABEL: atomic_vec2_i32_align: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq %ret = load atomic <2 x i32>, ptr %x acquire, align 8 ret <2 x i32> %ret } define <2 x float> @atomic_vec2_float_align(ptr %x) { -; CHECK-O3-LABEL: atomic_vec2_float_align: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq %rax, %xmm0 -; CHECK-O3-NEXT: retq -; -; CHECK-SSE-O3-LABEL: atomic_vec2_float_align: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movq (%rdi), %rax -; CHECK-SSE-O3-NEXT: movq %rax, %xmm0 -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec2_float_align: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movq (%rdi), %rax -; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0 -; CHECK-AVX-O3-NEXT: retq -; -; CHECK-O0-LABEL: atomic_vec2_float_align: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movq %rax, %xmm0 -; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec2_float_align: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movq (%rdi), %rax -; CHECK-SSE-O0-NEXT: movq %rax, %xmm0 -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec2_float_align: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movq (%rdi), %rax -; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0 -; CHECK-AVX-O0-NEXT: retq +; CHECK-LABEL: atomic_vec2_float_align: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq %ret = load atomic <2 x float>, ptr %x acquire, align 8 ret <2 x float> %ret } @@ -900,79 +801,42 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK-O3-LABEL: atomic_vec4_i8: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movl (%rdi), %eax -; CHECK-O3-NEXT: movd %eax, %xmm0 +; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-O3-NEXT: retq ; ; CHECK-SSE-O3-LABEL: atomic_vec4_i8: ; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movl (%rdi), %eax -; CHECK-SSE-O3-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-O3-NEXT: retq ; ; CHECK-AVX-O3-LABEL: atomic_vec4_i8: ; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movl (%rdi), %eax -; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-AVX-O3-NEXT: retq ; ; CHECK-O0-LABEL: atomic_vec4_i8: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movl (%rdi), %eax -; CHECK-O0-NEXT: movd %eax, %xmm0 +; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-O0-NEXT: retq ; ; CHECK-SSE-O0-LABEL: atomic_vec4_i8: ; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movl (%rdi), %eax -; CHECK-SSE-O0-NEXT: movd %eax, %xmm0 +; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-SSE-O0-NEXT: retq ; ; CHECK-AVX-O0-LABEL: atomic_vec4_i8: ; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movl (%rdi), %eax -; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0 +; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-AVX-O0-NEXT: retq %ret = load atomic <4 x i8>, ptr %x acquire, align 4 ret <4 x i8> %ret } define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { -; CHECK-O3-LABEL: atomic_vec4_i16: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq %rax, %xmm0 -; CHECK-O3-NEXT: retq -; -; CHECK-SSE-O3-LABEL: atomic_vec4_i16: -; CHECK-SSE-O3: # %bb.0: -; CHECK-SSE-O3-NEXT: movq (%rdi), %rax -; CHECK-SSE-O3-NEXT: movq %rax, %xmm0 -; CHECK-SSE-O3-NEXT: retq -; -; CHECK-AVX-O3-LABEL: atomic_vec4_i16: -; CHECK-AVX-O3: # %bb.0: -; CHECK-AVX-O3-NEXT: movq (%rdi), %rax -; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0 -; CHECK-AVX-O3-NEXT: retq -; -; CHECK-O0-LABEL: atomic_vec4_i16: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movq %rax, %xmm0 -; CHECK-O0-NEXT: retq -; -; CHECK-SSE-O0-LABEL: atomic_vec4_i16: -; CHECK-SSE-O0: # %bb.0: -; CHECK-SSE-O0-NEXT: movq (%rdi), %rax -; CHECK-SSE-O0-NEXT: movq %rax, %xmm0 -; CHECK-SSE-O0-NEXT: retq -; -; CHECK-AVX-O0-LABEL: atomic_vec4_i16: -; CHECK-AVX-O0: # %bb.0: -; CHECK-AVX-O0-NEXT: movq (%rdi), %rax -; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0 -; CHECK-AVX-O0-NEXT: retq +; CHECK-LABEL: atomic_vec4_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %xmm0 +; CHECK-NEXT: retq %ret = load atomic <4 x i16>, ptr %x acquire, align 8 ret <4 x i16> %ret } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits