[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #148898)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Oct 30 22:12:55 PDT 2025
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/148898
>From 55ec8581d4ba5b1f3b0e213834233a594f5b05b3 Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Tue, 15 Jul 2025 13:01:24 -0400
Subject: [PATCH] [X86] Remove extra MOV after widening atomic load
This change adds patterns to optimize out an extra MOV
present after widening the atomic load.
---
llvm/lib/Target/X86/X86InstrCompiler.td | 16 +++++
llvm/test/CodeGen/X86/atomic-load-store.ll | 72 ++++++++--------------
2 files changed, 40 insertions(+), 48 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index ec31675731b79..ce429b5916280 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1204,6 +1204,22 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>;
def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;
+// load atomic <2 x i16>
+def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
+ (MOVDI2PDIrm addr:$src)>, Requires<[UseSSE2]>;
+def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
+ (VMOVSSrm addr:$src)>, Requires<[UseAVX]>;
+def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
+ (VMOVSSZrm addr:$src)>, Requires<[HasAVX512]>;
+
+// load atomic <2 x i32,float>
+def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
+ (MOV64toPQIrm addr:$src)>, Requires<[UseSSE2]>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
+ (VMOV64toPQIrm addr:$src)>, Requires<[UseAVX]>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
+ (VMOV64toPQIZrm addr:$src)>, Requires<[HasAVX512]>;
+
// Floating point loads/stores.
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index fc32c3668d1dd..7e15b9303887f 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -202,26 +202,22 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) {
define <2 x i16> @atomic_vec2_i16(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec2_i16:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec2_i16:
; CHECK-AVX-O3: # %bb.0:
-; CHECK-AVX-O3-NEXT: movl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec2_i16:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movl (%rdi), %eax
-; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec2_i16:
; CHECK-AVX-O0: # %bb.0:
-; CHECK-AVX-O0-NEXT: movl (%rdi), %eax
-; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <2 x i16>, ptr %x acquire, align 4
ret <2 x i16> %ret
@@ -230,26 +226,22 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) {
define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec2_ptr270:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec2_ptr270:
; CHECK-AVX-O3: # %bb.0:
-; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec2_ptr270:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec2_ptr270:
; CHECK-AVX-O0: # %bb.0:
-; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
ret <2 x ptr addrspace(270)> %ret
@@ -258,26 +250,22 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec2_i32_align:
; CHECK-AVX-O3: # %bb.0:
-; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec2_i32_align:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec2_i32_align:
; CHECK-AVX-O0: # %bb.0:
-; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <2 x i32>, ptr %x acquire, align 8
ret <2 x i32> %ret
@@ -286,26 +274,22 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
define <2 x float> @atomic_vec2_float_align(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec2_float_align:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec2_float_align:
; CHECK-AVX-O3: # %bb.0:
-; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec2_float_align:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec2_float_align:
; CHECK-AVX-O0: # %bb.0:
-; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <2 x float>, ptr %x acquire, align 8
ret <2 x float> %ret
@@ -556,26 +540,22 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec4_i8:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec4_i8:
; CHECK-AVX-O3: # %bb.0:
-; CHECK-AVX-O3-NEXT: movl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O3-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec4_i8:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movl (%rdi), %eax
-; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec4_i8:
; CHECK-AVX-O0: # %bb.0:
-; CHECK-AVX-O0-NEXT: movl (%rdi), %eax
-; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O0-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <4 x i8>, ptr %x acquire, align 4
ret <4 x i8> %ret
@@ -584,26 +564,22 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
; CHECK-SSE-O3-LABEL: atomic_vec4_i16:
; CHECK-SSE-O3: # %bb.0:
-; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec4_i16:
; CHECK-AVX-O3: # %bb.0:
-; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O3-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec4_i16:
; CHECK-SSE-O0: # %bb.0:
-; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
-; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: movq (%rdi), %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec4_i16:
; CHECK-AVX-O0: # %bb.0:
-; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
-; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
+; CHECK-AVX-O0-NEXT: vmovq (%rdi), %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <4 x i16>, ptr %x acquire, align 8
ret <4 x i16> %ret
More information about the llvm-branch-commits
mailing list