[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding store atomic vector (PR #197862)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue May 19 06:28:51 PDT 2026
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/197862
>From fb63f23f69f78958dd61fb0a2bbe61f2e43b8215 Mon Sep 17 00:00:00 2001
From: jofrn <jo7frn1 at gmail.com>
Date: Thu, 14 May 2026 20:42:42 -0700
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding store atomic
vector
AtomicExpand fails for aligned \`store atomic <n x T>\` because it
does not find a compatible library call. This change adds appropriate
ptrtoint + bitcast so that the call can be lowered, mirroring the
load-side handling from #148900.
---
llvm/lib/CodeGen/AtomicExpandPass.cpp | 6 +-
llvm/test/CodeGen/ARM/atomic-load-store.ll | 49 ++++++++
llvm/test/CodeGen/X86/atomic-load-store.ll | 105 +++++++++++++++++-
.../X86/expand-atomic-non-integer.ll | 98 ++++++++++++++++
4 files changed, 250 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 7327290f62970..a41dc77b0b17f 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -699,7 +699,9 @@ StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
auto *M = SI->getModule();
Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
M->getDataLayout());
- Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
+ Value *NewVal = SI->getValueOperand()->getType()->isPtrOrPtrVectorTy()
+ ? Builder.CreatePtrToInt(SI->getValueOperand(), NewTy)
+ : Builder.CreateBitCast(SI->getValueOperand(), NewTy);
Value *Addr = SI->getPointerOperand();
@@ -2175,7 +2177,7 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
if (ValueOperand) {
if (UseSizedLibcall) {
Value *IntValue =
- Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
+ Builder.CreateBitPreservingCastChain(DL, ValueOperand, SizedIntTy);
Args.push_back(IntValue);
} else {
AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 1af2832702296..0c787a4ca05c3 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -1038,3 +1038,52 @@ define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
ret <1 x ptr> %ret
}
+
+define void @store_atomic_vec1_ptr(ptr %x, <1 x ptr> %v) #0 {
+; ARM-LABEL: store_atomic_vec1_ptr:
+; ARM: @ %bb.0:
+; ARM-NEXT: dmb ish
+; ARM-NEXT: str r1, [r0]
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: store_atomic_vec1_ptr:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: str r1, [r0]
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: store_atomic_vec1_ptr:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: str r1, [r0]
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: store_atomic_vec1_ptr:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: bl __sync_lock_test_and_set_4
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: store_atomic_vec1_ptr:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r2, #3
+; ARMV4-NEXT: bl __atomic_store_4
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: store_atomic_vec1_ptr:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: mov r2, #0
+; ARMV6-NEXT: mcr p15, #0, r2, c7, c10, #5
+; ARMV6-NEXT: str r1, [r0]
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: store_atomic_vec1_ptr:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: str r1, [r0]
+; THUMBM-NEXT: bx lr
+ store atomic <1 x ptr> %v, ptr %x release, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 5be8d4d47fa9e..f90c856129b93 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -657,6 +657,53 @@ define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
ret <2 x ptr> %ret
}
+
+define void @store_atomic_vec2_ptr_align(ptr %x, <2 x ptr> %v) nounwind {
+; CHECK-SSE2-O3-LABEL: store_atomic_vec2_ptr_align:
+; CHECK-SSE2-O3: # %bb.0:
+; CHECK-SSE2-O3-NEXT: pushq %rax
+; CHECK-SSE2-O3-NEXT: movq %xmm0, %rsi
+; CHECK-SSE2-O3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; CHECK-SSE2-O3-NEXT: movq %xmm0, %rdx
+; CHECK-SSE2-O3-NEXT: movl $3, %ecx
+; CHECK-SSE2-O3-NEXT: callq __atomic_store_16 at PLT
+; CHECK-SSE2-O3-NEXT: popq %rax
+; CHECK-SSE2-O3-NEXT: retq
+;
+; CHECK-SSE4-O3-LABEL: store_atomic_vec2_ptr_align:
+; CHECK-SSE4-O3: # %bb.0:
+; CHECK-SSE4-O3-NEXT: movaps %xmm0, (%rdi)
+; CHECK-SSE4-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_ptr_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovaps %xmm0, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE2-O0-LABEL: store_atomic_vec2_ptr_align:
+; CHECK-SSE2-O0: # %bb.0:
+; CHECK-SSE2-O0-NEXT: pushq %rax
+; CHECK-SSE2-O0-NEXT: movq %xmm0, %rsi
+; CHECK-SSE2-O0-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; CHECK-SSE2-O0-NEXT: movq %xmm0, %rdx
+; CHECK-SSE2-O0-NEXT: movl $3, %ecx
+; CHECK-SSE2-O0-NEXT: callq __atomic_store_16 at PLT
+; CHECK-SSE2-O0-NEXT: popq %rax
+; CHECK-SSE2-O0-NEXT: retq
+;
+; CHECK-SSE4-O0-LABEL: store_atomic_vec2_ptr_align:
+; CHECK-SSE4-O0: # %bb.0:
+; CHECK-SSE4-O0-NEXT: movaps %xmm0, (%rdi)
+; CHECK-SSE4-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_ptr_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovdqa %xmm0, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <2 x ptr> %v, ptr %x release, align 16
+ ret void
+}
+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
; CHECK-SSE2-O3-LABEL: atomic_vec4_ptr270:
; CHECK-SSE2-O3: # %bb.0:
@@ -703,6 +750,52 @@ define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
ret <4 x ptr addrspace(270)> %ret
}
+define void @store_atomic_vec4_ptr270_align(ptr %x, <4 x ptr addrspace(270)> %v) nounwind {
+; CHECK-SSE2-O3-LABEL: store_atomic_vec4_ptr270_align:
+; CHECK-SSE2-O3: # %bb.0:
+; CHECK-SSE2-O3-NEXT: pushq %rax
+; CHECK-SSE2-O3-NEXT: movq %xmm0, %rsi
+; CHECK-SSE2-O3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; CHECK-SSE2-O3-NEXT: movq %xmm0, %rdx
+; CHECK-SSE2-O3-NEXT: movl $3, %ecx
+; CHECK-SSE2-O3-NEXT: callq __atomic_store_16 at PLT
+; CHECK-SSE2-O3-NEXT: popq %rax
+; CHECK-SSE2-O3-NEXT: retq
+;
+; CHECK-SSE4-O3-LABEL: store_atomic_vec4_ptr270_align:
+; CHECK-SSE4-O3: # %bb.0:
+; CHECK-SSE4-O3-NEXT: movaps %xmm0, (%rdi)
+; CHECK-SSE4-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec4_ptr270_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovaps %xmm0, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE2-O0-LABEL: store_atomic_vec4_ptr270_align:
+; CHECK-SSE2-O0: # %bb.0:
+; CHECK-SSE2-O0-NEXT: pushq %rax
+; CHECK-SSE2-O0-NEXT: movq %xmm0, %rsi
+; CHECK-SSE2-O0-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; CHECK-SSE2-O0-NEXT: movq %xmm0, %rdx
+; CHECK-SSE2-O0-NEXT: movl $3, %ecx
+; CHECK-SSE2-O0-NEXT: callq __atomic_store_16 at PLT
+; CHECK-SSE2-O0-NEXT: popq %rax
+; CHECK-SSE2-O0-NEXT: retq
+;
+; CHECK-SSE4-O0-LABEL: store_atomic_vec4_ptr270_align:
+; CHECK-SSE4-O0: # %bb.0:
+; CHECK-SSE4-O0-NEXT: movaps %xmm0, (%rdi)
+; CHECK-SSE4-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec4_ptr270_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovdqa %xmm0, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <4 x ptr addrspace(270)> %v, ptr %x release, align 16
+ ret void
+}
+
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align:
; CHECK-SSE-O3: # %bb.0:
@@ -1210,7 +1303,7 @@ define void @store_atomic_vec4_float_align(ptr %x, <4 x float> %v) nounwind {
; CHECK-SSE4-O3-NEXT: pextrq $1, %xmm0, %rcx
; CHECK-SSE4-O3-NEXT: movq %xmm0, %rbx
; CHECK-SSE4-O3-NEXT: .p2align 4
-; CHECK-SSE4-O3-NEXT: .LBB39_1: # %atomicrmw.start
+; CHECK-SSE4-O3-NEXT: .LBB41_1: # %atomicrmw.start
; CHECK-SSE4-O3-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-SSE4-O3-NEXT: movq %xmm1, %rax
; CHECK-SSE4-O3-NEXT: pextrq $1, %xmm1, %rdx
@@ -1218,7 +1311,7 @@ define void @store_atomic_vec4_float_align(ptr %x, <4 x float> %v) nounwind {
; CHECK-SSE4-O3-NEXT: movq %rdx, %xmm0
; CHECK-SSE4-O3-NEXT: movq %rax, %xmm1
; CHECK-SSE4-O3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; CHECK-SSE4-O3-NEXT: jne .LBB39_1
+; CHECK-SSE4-O3-NEXT: jne .LBB41_1
; CHECK-SSE4-O3-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-SSE4-O3-NEXT: popq %rbx
; CHECK-SSE4-O3-NEXT: retq
@@ -1246,7 +1339,7 @@ define void @store_atomic_vec4_float_align(ptr %x, <4 x float> %v) nounwind {
; CHECK-SSE4-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-SSE4-O0-NEXT: movaps (%rdi), %xmm0
; CHECK-SSE4-O0-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-SSE4-O0-NEXT: .LBB39_1: # %atomicrmw.start
+; CHECK-SSE4-O0-NEXT: .LBB41_1: # %atomicrmw.start
; CHECK-SSE4-O0-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-SSE4-O0-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-SSE4-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
@@ -1263,9 +1356,9 @@ define void @store_atomic_vec4_float_align(ptr %x, <4 x float> %v) nounwind {
; CHECK-SSE4-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-SSE4-O0-NEXT: testb $1, %al
; CHECK-SSE4-O0-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-SSE4-O0-NEXT: jne .LBB39_2
-; CHECK-SSE4-O0-NEXT: jmp .LBB39_1
-; CHECK-SSE4-O0-NEXT: .LBB39_2: # %atomicrmw.end
+; CHECK-SSE4-O0-NEXT: jne .LBB41_2
+; CHECK-SSE4-O0-NEXT: jmp .LBB41_1
+; CHECK-SSE4-O0-NEXT: .LBB41_2: # %atomicrmw.end
; CHECK-SSE4-O0-NEXT: popq %rbx
; CHECK-SSE4-O0-NEXT: retq
;
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
index 9f973ac5531d1..763b91aafa383 100644
--- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -354,3 +354,101 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
%ret = load atomic <4 x float>, ptr %x acquire, align 16
ret <4 x float> %ret
}
+
+define void @store_atomic_vec2_ptr_align(ptr %x, <2 x ptr> %v) nounwind {
+; CHECK64-LABEL: define void @store_atomic_vec2_ptr_align(
+; CHECK64-SAME: ptr [[X:%.*]], <2 x ptr> [[V:%.*]]) #[[ATTR0]] {
+; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> [[V]] to <2 x i64>
+; CHECK64-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK64-NEXT: call void @__atomic_store_16(ptr [[X]], i128 [[TMP2]], i32 3)
+; CHECK64-NEXT: ret void
+;
+; CHECK32-LABEL: define void @store_atomic_vec2_ptr_align(
+; CHECK32-SAME: ptr [[X:%.*]], <2 x ptr> [[V:%.*]]) #[[ATTR0]] {
+; CHECK32-NEXT: store atomic <2 x ptr> [[V]], ptr [[X]] release, align 16
+; CHECK32-NEXT: ret void
+;
+ store atomic <2 x ptr> %v, ptr %x release, align 16
+ ret void
+}
+
+define void @store_atomic_vec4_ptr270_align(ptr %x, <4 x ptr addrspace(270)> %v) nounwind {
+; CHECK64-LABEL: define void @store_atomic_vec4_ptr270_align(
+; CHECK64-SAME: ptr [[X:%.*]], <4 x ptr addrspace(270)> [[V:%.*]]) #[[ATTR0]] {
+; CHECK64-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr addrspace(270)> [[V]] to <4 x i32>
+; CHECK64-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK64-NEXT: call void @__atomic_store_16(ptr [[X]], i128 [[TMP2]], i32 3)
+; CHECK64-NEXT: ret void
+;
+; CHECK32-LABEL: define void @store_atomic_vec4_ptr270_align(
+; CHECK32-SAME: ptr [[X:%.*]], <4 x ptr addrspace(270)> [[V:%.*]]) #[[ATTR0]] {
+; CHECK32-NEXT: [[TMP1:%.*]] = alloca <4 x ptr addrspace(270)>, align 16
+; CHECK32-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]])
+; CHECK32-NEXT: store <4 x ptr addrspace(270)> [[V]], ptr [[TMP1]], align 16
+; CHECK32-NEXT: call void @__atomic_store(i32 16, ptr [[X]], ptr [[TMP1]], i32 3)
+; CHECK32-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]])
+; CHECK32-NEXT: ret void
+;
+ store atomic <4 x ptr addrspace(270)> %v, ptr %x release, align 16
+ ret void
+}
+
+define void @store_atomic_vec2_i16(ptr %x, <2 x i16> %v) nounwind {
+; CHECK-LABEL: define void @store_atomic_vec2_i16(
+; CHECK-SAME: ptr [[X:%.*]], <2 x i16> [[V:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: store atomic <2 x i16> [[V]], ptr [[X]] release, align 8
+; CHECK-NEXT: ret void
+;
+ store atomic <2 x i16> %v, ptr %x release, align 8
+ ret void
+}
+
+define void @store_atomic_vec2_half(ptr %x, <2 x half> %v) nounwind {
+; CHECK-LABEL: define void @store_atomic_vec2_half(
+; CHECK-SAME: ptr [[X:%.*]], <2 x half> [[V:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: store atomic <2 x half> [[V]], ptr [[X]] release, align 8
+; CHECK-NEXT: ret void
+;
+ store atomic <2 x half> %v, ptr %x release, align 8
+ ret void
+}
+
+define void @store_atomic_vec4_i32(ptr %x, <4 x i32> %v) nounwind {
+; CHECK64-LABEL: define void @store_atomic_vec4_i32(
+; CHECK64-SAME: ptr [[X:%.*]], <4 x i32> [[V:%.*]]) #[[ATTR0]] {
+; CHECK64-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V]] to i128
+; CHECK64-NEXT: call void @__atomic_store_16(ptr [[X]], i128 [[TMP1]], i32 3)
+; CHECK64-NEXT: ret void
+;
+; CHECK32-LABEL: define void @store_atomic_vec4_i32(
+; CHECK32-SAME: ptr [[X:%.*]], <4 x i32> [[V:%.*]]) #[[ATTR0]] {
+; CHECK32-NEXT: [[TMP1:%.*]] = alloca <4 x i32>, align 16
+; CHECK32-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]])
+; CHECK32-NEXT: store <4 x i32> [[V]], ptr [[TMP1]], align 16
+; CHECK32-NEXT: call void @__atomic_store(i32 16, ptr [[X]], ptr [[TMP1]], i32 3)
+; CHECK32-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]])
+; CHECK32-NEXT: ret void
+;
+ store atomic <4 x i32> %v, ptr %x release, align 16
+ ret void
+}
+
+define void @store_atomic_vec4_float(ptr %x, <4 x float> %v) nounwind {
+; CHECK64-LABEL: define void @store_atomic_vec4_float(
+; CHECK64-SAME: ptr [[X:%.*]], <4 x float> [[V:%.*]]) #[[ATTR0]] {
+; CHECK64-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V]] to i128
+; CHECK64-NEXT: call void @__atomic_store_16(ptr [[X]], i128 [[TMP1]], i32 3)
+; CHECK64-NEXT: ret void
+;
+; CHECK32-LABEL: define void @store_atomic_vec4_float(
+; CHECK32-SAME: ptr [[X:%.*]], <4 x float> [[V:%.*]]) #[[ATTR0]] {
+; CHECK32-NEXT: [[TMP1:%.*]] = alloca <4 x float>, align 16
+; CHECK32-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]])
+; CHECK32-NEXT: store <4 x float> [[V]], ptr [[TMP1]], align 16
+; CHECK32-NEXT: call void @__atomic_store(i32 16, ptr [[X]], ptr [[TMP1]], i32 3)
+; CHECK32-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]])
+; CHECK32-NEXT: ret void
+;
+ store atomic <4 x float> %v, ptr %x release, align 16
+ ret void
+}
More information about the llvm-branch-commits
mailing list