[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 22 10:20:02 PST 2025
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716
>From f20870132a790b72b0d63ca18ce53508d1ec4f6a Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.
commit-id:f430c1af
---
llvm/lib/CodeGen/AtomicExpandPass.cpp | 18 ++++-
llvm/test/CodeGen/ARM/atomic-load-store.ll | 51 +++++++++++++++
llvm/test/CodeGen/X86/atomic-load-store.ll | 30 +++++++++
.../X86/expand-atomic-non-integer.ll | 65 +++++++++++++++++++
4 files changed, 161 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a75fa688d87a8d..644977f225d834 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2060,9 +2060,21 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
I->replaceAllUsesWith(V);
} else if (HasResult) {
Value *V;
- if (UseSizedLibcall)
- V = Builder.CreateBitOrPointerCast(Result, I->getType());
- else {
+ if (UseSizedLibcall) {
+ // Add bitcasts from Result's scalar type to I's <n x ptr> vector type
+ if (I->getType()->getScalarType()->isPointerTy() &&
+ I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
+ unsigned AS = cast<PointerType>(
+ I->getType()->getScalarType())->getAddressSpace();
+ ElementCount EC = cast<VectorType>(I->getType())->getElementCount();
+ Value *BC = Builder.CreateBitCast(Result, VectorType::get(
+ IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)), EC));
+ Value *IntToPtr = Builder.CreateIntToPtr(BC, VectorType::get(
+ PointerType::get(Ctx, AS), EC));
+ V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType());
+ } else
+ V = Builder.CreateBitOrPointerCast(Result, I->getType());
+ } else {
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
AllocaAlignment);
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29d..36c1305a7c5df3 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
store atomic double %val1, ptr %ptr seq_cst, align 8
ret void
}
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldr r0, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: ldr r0, [r0]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: ldr r0, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: movs r1, #0
+; THUMBONE-NEXT: mov r2, r1
+; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r1, #2
+; ARMV4-NEXT: bl __atomic_load_4
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: mov r1, #0
+; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT: ldr r0, [r0]
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: ldr r0, [r0]
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: bx lr
+ %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+ ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 08d0405345f573..4293df8c13571f 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
ret <2 x i32> %ret
}
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_ptr_align:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: movl $2, %esi
+; CHECK-NEXT: callq ___atomic_load_16
+; CHECK-NEXT: movq %rdx, %xmm1
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+ ret <2 x ptr> %ret
+}
+
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
; CHECK3-LABEL: atomic_vec4_i8:
; CHECK3: ## %bb.0:
@@ -394,6 +409,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
ret <4 x i16> %ret
}
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_ptr270:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: movl $2, %esi
+; CHECK-NEXT: callq ___atomic_load_16
+; CHECK-NEXT: movq %rdx, %xmm1
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
+ ret <4 x ptr addrspace(270)> %ret
+}
+
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
; CHECK-LABEL: atomic_vec4_half:
; CHECK: ## %bb.0:
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
index 5929c153d5961d..322f74f539ca36 100644
--- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -151,3 +151,68 @@ define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr,
ret void
}
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: define <2 x ptr> @atomic_vec2_ptr_align(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
+; CHECK-NEXT: ret <2 x ptr> [[TMP7]]
+;
+ %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+ ret <2 x ptr> %ret
+}
+
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(270)>
+; CHECK-NEXT: ret <4 x ptr addrspace(270)> [[TMP3]]
+;
+ %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
+ ret <4 x ptr addrspace(270)> %ret
+}
+
+define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
+; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 8
+; CHECK-NEXT: ret <2 x i16> [[RET]]
+;
+ %ret = load atomic <2 x i16>, ptr %x acquire, align 8
+ ret <2 x i16> %ret
+}
+
+define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
+; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
+; CHECK-NEXT: ret <2 x half> [[RET]]
+;
+ %ret = load atomic <2 x half>, ptr %x acquire, align 8
+ ret <2 x half> %ret
+}
+
+define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind {
+; CHECK-LABEL: define <4 x i32> @atomic_vec4_i32(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
+ %ret = load atomic <4 x i32>, ptr %x acquire, align 16
+ ret <4 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
+; CHECK-LABEL: define <4 x float> @atomic_vec4_float(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float>
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
+;
+ %ret = load atomic <4 x float>, ptr %x acquire, align 16
+ ret <4 x float> %ret
+}
More information about the llvm-branch-commits
mailing list