[llvm-branch-commits] [llvm] [X86] Cast atomic vectors in IR to support floats (PR #142320)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Jun 1 13:47:06 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (jofrn)
<details>
<summary>Changes</summary>
This commit casts floats to ints in an atomic load during AtomicExpand to support
floating point types. It also is required to support 128 bit vectors in SSE/AVX.
---
**Stack**:
- #<!-- -->120716
- #<!-- -->142320 ⬅
- #<!-- -->138635
- #<!-- -->120598
- #<!-- -->120387
- #<!-- -->120386
- #<!-- -->120385
- #<!-- -->120384
⚠️ *Part of a stack created by [spr](https://github.com/ejoffe/spr). Do not merge manually using the UI - doing so may have unexpected results.*
---
Full diff: https://github.com/llvm/llvm-project/pull/142320.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+7)
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+2)
- (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+163-18)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 776d3c0a42e2f..3debf30da0a29 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32070,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
}
}
+TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
+ if (LI->getType()->getScalarType()->isFloatingPointTy())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+}
+
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5cb6b3e493a32..43cddb2b53bd6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1839,6 +1839,8 @@ namespace llvm {
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldCastAtomicLoadInIR(LoadInst *LI) const override;
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 4b818b6cfa57e..039edcbf83544 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -207,19 +207,19 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_bfloat:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O3-NEXT: movd %eax, %xmm0
; CHECK-O3-NEXT: retq
;
; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_bfloat:
@@ -227,8 +227,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-O0-NEXT: movw (%rdi), %cx
; CHECK-O0-NEXT: # implicit-def: $eax
; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O0-NEXT: movd %eax, %xmm0
; CHECK-O0-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
@@ -236,8 +235,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
@@ -245,8 +243,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
-; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
-; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
ret <1 x bfloat> %ret
@@ -377,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
ret <2 x float> %ret
}
+define <2 x half> @atomic_vec2_half(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_half:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_half:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_half:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_half:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_half:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_half:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x half>, ptr %x acquire, align 4
+ ret <2 x half> %ret
+}
+
+define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_bfloat:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_bfloat:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
+ ret <2 x bfloat> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec1_ptr:
; CHECK-O3: # %bb.0:
@@ -457,19 +522,19 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_half:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O3-NEXT: movd %eax, %xmm0
; CHECK-O3-NEXT: retq
;
; CHECK-SSE-O3-LABEL: atomic_vec1_half:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_half:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_half:
@@ -477,8 +542,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-O0-NEXT: movw (%rdi), %cx
; CHECK-O0-NEXT: # implicit-def: $eax
; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O0-NEXT: movd %eax, %xmm0
; CHECK-O0-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_half:
@@ -486,8 +550,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_half:
@@ -495,8 +558,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
-; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
-; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x half>, ptr %x acquire, align 2
ret <1 x half> %ret
@@ -841,6 +903,89 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
ret <4 x i16> %ret
}
+define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_half:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <4 x half>, ptr %x acquire, align 8
+ ret <4 x half> %ret
+}
+
+define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_bfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
+ ret <4 x bfloat> %ret
+}
+
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_float_align:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rax
+; CHECK-O3-NEXT: movl $2, %esi
+; CHECK-O3-NEXT: callq __atomic_load_16 at PLT
+; CHECK-O3-NEXT: movq %rdx, %xmm1
+; CHECK-O3-NEXT: movq %rax, %xmm0
+; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O3-NEXT: popq %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_float_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: pushq %rbx
+; CHECK-SSE-O3-NEXT: xorl %eax, %eax
+; CHECK-SSE-O3-NEXT: xorl %edx, %edx
+; CHECK-SSE-O3-NEXT: xorl %ecx, %ecx
+; CHECK-SSE-O3-NEXT: xorl %ebx, %ebx
+; CHECK-SSE-O3-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-SSE-O3-NEXT: movq %rdx, %xmm1
+; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE-O3-NEXT: popq %rbx
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_float_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec4_float_align:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: movl $2, %esi
+; CHECK-O0-NEXT: callq __atomic_load_16 at PLT
+; CHECK-O0-NEXT: movq %rdx, %xmm1
+; CHECK-O0-NEXT: movq %rax, %xmm0
+; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O0-NEXT: popq %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_float_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: pushq %rbx
+; CHECK-SSE-O0-NEXT: xorl %eax, %eax
+; CHECK-SSE-O0-NEXT: movl %eax, %ebx
+; CHECK-SSE-O0-NEXT: movq %rbx, %rax
+; CHECK-SSE-O0-NEXT: movq %rbx, %rdx
+; CHECK-SSE-O0-NEXT: movq %rbx, %rcx
+; CHECK-SSE-O0-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-SSE-O0-NEXT: movq %rdx, %xmm1
+; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE-O0-NEXT: popq %rbx
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_float_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <4 x float>, ptr %x acquire, align 16
+ ret <4 x float> %ret
+}
+
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec4_float:
; CHECK-O3: # %bb.0:
``````````
</details>
https://github.com/llvm/llvm-project/pull/142320
More information about the llvm-branch-commits
mailing list