[llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 1 13:46:39 PDT 2025
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716
>From 9e3f3b11f393ebcaed34f8486f885f9a8aed419a Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered. It also adds support for
128 bit lowering in tablegen to support SSE/AVX.
commit-id:f430c1af
---
.../include/llvm/Target/TargetSelectionDAG.td | 14 +
llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 +-
llvm/lib/Target/X86/X86InstrCompiler.td | 5 +
llvm/test/CodeGen/ARM/atomic-load-store.ll | 51 ++++
llvm/test/CodeGen/X86/atomic-load-store.ll | 93 +++++++
.../X86/expand-atomic-non-integer.ll | 263 ++++++++++++------
6 files changed, 359 insertions(+), 82 deletions(-)
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index d56216e8b637d..43da39bdc44cd 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -1904,6 +1904,20 @@ def atomic_load_64 :
let MemoryVT = i64;
}
+def atomic_load_128_v2i64 :
+ PatFrag<(ops node:$ptr),
+ (atomic_load node:$ptr)> {
+ let IsAtomic = true;
+ let MemoryVT = v2i64;
+}
+
+def atomic_load_128_v4i32 :
+ PatFrag<(ops node:$ptr),
+ (atomic_load node:$ptr)> {
+ let IsAtomic = true;
+ let MemoryVT = v4i32;
+}
+
def atomic_load_nonext_8 :
PatFrag<(ops node:$ptr), (atomic_load_nonext node:$ptr)> {
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c376de877ac7d..70f59eafc6ecb 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
I->replaceAllUsesWith(V);
} else if (HasResult) {
Value *V;
- if (UseSizedLibcall)
- V = Builder.CreateBitOrPointerCast(Result, I->getType());
- else {
+ if (UseSizedLibcall) {
+ // Add bitcasts from Result's scalar type to I's <n x ptr> vector type
+ auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
+ auto *VTy = dyn_cast<VectorType>(I->getType());
+ if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
+ unsigned AS = PtrTy->getAddressSpace();
+ Value *BC = Builder.CreateBitCast(
+ Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
+ V = Builder.CreateIntToPtr(BC, I->getType());
+ } else
+ V = Builder.CreateBitOrPointerCast(Result, I->getType());
+ } else {
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
AllocaAlignment);
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 26b76dd1ca83a..3143015b7ec66 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1211,6 +1211,11 @@ def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
(MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>
+def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)),
+ (VMOVAPDrm addr:$src)>; // load atomic <2 x i64>
+def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)),
+ (VMOVAPDrm addr:$src)>; // load atomic <4 x i32>
+
// Floating point loads/stores.
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..eaa2ffd9b2731 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
store atomic double %val1, ptr %ptr seq_cst, align 8
ret void
}
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldr r0, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: ldr r0, [r0]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: ldr r0, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: movs r1, #0
+; THUMBONE-NEXT: mov r2, r1
+; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r1, #2
+; ARMV4-NEXT: bl __atomic_load_4
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: ldr r0, [r0]
+; ARMV6-NEXT: mov r1, #0
+; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: ldr r0, [r0]
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: bx lr
+ %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+ ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 039edcbf83544..9665d4173e23d 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -860,6 +860,53 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
ret <2 x i32> %ret
}
+; Move td records to AtomicExpand
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec2_ptr_align:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rax
+; CHECK-O3-NEXT: movl $2, %esi
+; CHECK-O3-NEXT: callq __atomic_load_16 at PLT
+; CHECK-O3-NEXT: movq %rdx, %xmm1
+; CHECK-O3-NEXT: movq %rax, %xmm0
+; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O3-NEXT: popq %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_ptr_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_ptr_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_ptr_align:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: movl $2, %esi
+; CHECK-O0-NEXT: callq __atomic_load_16 at PLT
+; CHECK-O0-NEXT: movq %rdx, %xmm1
+; CHECK-O0-NEXT: movq %rax, %xmm0
+; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O0-NEXT: popq %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_ptr_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: vmovapd (%rdi), %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_ptr_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+ ret <2 x ptr> %ret
+}
+
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec4_i8:
; CHECK-O3: # %bb.0:
@@ -903,6 +950,52 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
ret <4 x i16> %ret
}
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_ptr270:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rax
+; CHECK-O3-NEXT: movl $2, %esi
+; CHECK-O3-NEXT: callq __atomic_load_16 at PLT
+; CHECK-O3-NEXT: movq %rdx, %xmm1
+; CHECK-O3-NEXT: movq %rax, %xmm0
+; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O3-NEXT: popq %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_ptr270:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_ptr270:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec4_ptr270:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: movl $2, %esi
+; CHECK-O0-NEXT: callq __atomic_load_16 at PLT
+; CHECK-O0-NEXT: movq %rdx, %xmm1
+; CHECK-O0-NEXT: movq %rax, %xmm0
+; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O0-NEXT: popq %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_ptr270:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: vmovapd (%rdi), %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_ptr270:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovapd (%rdi), %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
+ ret <4 x ptr addrspace(270)> %ret
+}
+
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
; CHECK-LABEL: atomic_vec4_half:
; CHECK: # %bb.0:
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
index 5929c153d5961..f5c8baa3e931e 100644
--- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -1,153 +1,258 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S %s -passes=atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s
; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
-; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this
+; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this
; functionality, please move this test to a target which still is.
define float @float_load_expand(ptr %ptr) {
-; CHECK-LABEL: @float_load_expand
-; CHECK: %1 = load atomic i32, ptr %ptr unordered, align 4
-; CHECK: %2 = bitcast i32 %1 to float
-; CHECK: ret float %2
+; CHECK-LABEL: define float @float_load_expand(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] unordered, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT: ret float [[TMP2]]
+;
%res = load atomic float, ptr %ptr unordered, align 4
ret float %res
}
define float @float_load_expand_seq_cst(ptr %ptr) {
-; CHECK-LABEL: @float_load_expand_seq_cst
-; CHECK: %1 = load atomic i32, ptr %ptr seq_cst, align 4
-; CHECK: %2 = bitcast i32 %1 to float
-; CHECK: ret float %2
+; CHECK-LABEL: define float @float_load_expand_seq_cst(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[PTR]] seq_cst, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT: ret float [[TMP2]]
+;
%res = load atomic float, ptr %ptr seq_cst, align 4
ret float %res
}
define float @float_load_expand_vol(ptr %ptr) {
-; CHECK-LABEL: @float_load_expand_vol
-; CHECK: %1 = load atomic volatile i32, ptr %ptr unordered, align 4
-; CHECK: %2 = bitcast i32 %1 to float
-; CHECK: ret float %2
+; CHECK-LABEL: define float @float_load_expand_vol(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load atomic volatile i32, ptr [[PTR]] unordered, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT: ret float [[TMP2]]
+;
%res = load atomic volatile float, ptr %ptr unordered, align 4
ret float %res
}
define float @float_load_expand_addr1(ptr addrspace(1) %ptr) {
-; CHECK-LABEL: @float_load_expand_addr1
-; CHECK: %1 = load atomic i32, ptr addrspace(1) %ptr unordered, align 4
-; CHECK: %2 = bitcast i32 %1 to float
-; CHECK: ret float %2
+; CHECK-LABEL: define float @float_load_expand_addr1(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; CHECK-NEXT: ret float [[TMP2]]
+;
%res = load atomic float, ptr addrspace(1) %ptr unordered, align 4
ret float %res
}
define void @float_store_expand(ptr %ptr, float %v) {
-; CHECK-LABEL: @float_store_expand
-; CHECK: %1 = bitcast float %v to i32
-; CHECK: store atomic i32 %1, ptr %ptr unordered, align 4
+; CHECK-LABEL: define void @float_store_expand(
+; CHECK-SAME: ptr [[PTR:%.*]], float [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32
+; CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[PTR]] unordered, align 4
+; CHECK-NEXT: ret void
+;
store atomic float %v, ptr %ptr unordered, align 4
ret void
}
define void @float_store_expand_seq_cst(ptr %ptr, float %v) {
-; CHECK-LABEL: @float_store_expand_seq_cst
-; CHECK: %1 = bitcast float %v to i32
-; CHECK: store atomic i32 %1, ptr %ptr seq_cst, align 4
+; CHECK-LABEL: define void @float_store_expand_seq_cst(
+; CHECK-SAME: ptr [[PTR:%.*]], float [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32
+; CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[PTR]] seq_cst, align 4
+; CHECK-NEXT: ret void
+;
store atomic float %v, ptr %ptr seq_cst, align 4
ret void
}
define void @float_store_expand_vol(ptr %ptr, float %v) {
-; CHECK-LABEL: @float_store_expand_vol
-; CHECK: %1 = bitcast float %v to i32
-; CHECK: store atomic volatile i32 %1, ptr %ptr unordered, align 4
+; CHECK-LABEL: define void @float_store_expand_vol(
+; CHECK-SAME: ptr [[PTR:%.*]], float [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32
+; CHECK-NEXT: store atomic volatile i32 [[TMP1]], ptr [[PTR]] unordered, align 4
+; CHECK-NEXT: ret void
+;
store atomic volatile float %v, ptr %ptr unordered, align 4
ret void
}
define void @float_store_expand_addr1(ptr addrspace(1) %ptr, float %v) {
-; CHECK-LABEL: @float_store_expand_addr1
-; CHECK: %1 = bitcast float %v to i32
-; CHECK: store atomic i32 %1, ptr addrspace(1) %ptr unordered, align 4
+; CHECK-LABEL: define void @float_store_expand_addr1(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], float [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32
+; CHECK-NEXT: store atomic i32 [[TMP1]], ptr addrspace(1) [[PTR]] unordered, align 4
+; CHECK-NEXT: ret void
+;
store atomic float %v, ptr addrspace(1) %ptr unordered, align 4
ret void
}
define void @pointer_cmpxchg_expand(ptr %ptr, ptr %v) {
-; CHECK-LABEL: @pointer_cmpxchg_expand
-; CHECK: %1 = ptrtoint ptr %v to i64
-; CHECK: %2 = cmpxchg ptr %ptr, i64 0, i64 %1 seq_cst monotonic
-; CHECK: %3 = extractvalue { i64, i1 } %2, 0
-; CHECK: %4 = extractvalue { i64, i1 } %2, 1
-; CHECK: %5 = inttoptr i64 %3 to ptr
-; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0
-; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1
+; CHECK-LABEL: define void @pointer_cmpxchg_expand(
+; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst monotonic, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1
+; CHECK-NEXT: ret void
+;
cmpxchg ptr %ptr, ptr null, ptr %v seq_cst monotonic
ret void
}
define void @pointer_cmpxchg_expand2(ptr %ptr, ptr %v) {
-; CHECK-LABEL: @pointer_cmpxchg_expand2
-; CHECK: %1 = ptrtoint ptr %v to i64
-; CHECK: %2 = cmpxchg ptr %ptr, i64 0, i64 %1 release monotonic
-; CHECK: %3 = extractvalue { i64, i1 } %2, 0
-; CHECK: %4 = extractvalue { i64, i1 } %2, 1
-; CHECK: %5 = inttoptr i64 %3 to ptr
-; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0
-; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1
+; CHECK-LABEL: define void @pointer_cmpxchg_expand2(
+; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] release monotonic, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1
+; CHECK-NEXT: ret void
+;
cmpxchg ptr %ptr, ptr null, ptr %v release monotonic
ret void
}
define void @pointer_cmpxchg_expand3(ptr %ptr, ptr %v) {
-; CHECK-LABEL: @pointer_cmpxchg_expand3
-; CHECK: %1 = ptrtoint ptr %v to i64
-; CHECK: %2 = cmpxchg ptr %ptr, i64 0, i64 %1 seq_cst seq_cst
-; CHECK: %3 = extractvalue { i64, i1 } %2, 0
-; CHECK: %4 = extractvalue { i64, i1 } %2, 1
-; CHECK: %5 = inttoptr i64 %3 to ptr
-; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0
-; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1
+; CHECK-LABEL: define void @pointer_cmpxchg_expand3(
+; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1
+; CHECK-NEXT: ret void
+;
cmpxchg ptr %ptr, ptr null, ptr %v seq_cst seq_cst
ret void
}
define void @pointer_cmpxchg_expand4(ptr %ptr, ptr %v) {
-; CHECK-LABEL: @pointer_cmpxchg_expand4
-; CHECK: %1 = ptrtoint ptr %v to i64
-; CHECK: %2 = cmpxchg weak ptr %ptr, i64 0, i64 %1 seq_cst seq_cst
-; CHECK: %3 = extractvalue { i64, i1 } %2, 0
-; CHECK: %4 = extractvalue { i64, i1 } %2, 1
-; CHECK: %5 = inttoptr i64 %3 to ptr
-; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0
-; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1
+; CHECK-LABEL: define void @pointer_cmpxchg_expand4(
+; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1
+; CHECK-NEXT: ret void
+;
cmpxchg weak ptr %ptr, ptr null, ptr %v seq_cst seq_cst
ret void
}
define void @pointer_cmpxchg_expand5(ptr %ptr, ptr %v) {
-; CHECK-LABEL: @pointer_cmpxchg_expand5
-; CHECK: %1 = ptrtoint ptr %v to i64
-; CHECK: %2 = cmpxchg volatile ptr %ptr, i64 0, i64 %1 seq_cst seq_cst
-; CHECK: %3 = extractvalue { i64, i1 } %2, 0
-; CHECK: %4 = extractvalue { i64, i1 } %2, 1
-; CHECK: %5 = inttoptr i64 %3 to ptr
-; CHECK: %6 = insertvalue { ptr, i1 } poison, ptr %5, 0
-; CHECK: %7 = insertvalue { ptr, i1 } %6, i1 %4, 1
+; CHECK-LABEL: define void @pointer_cmpxchg_expand5(
+; CHECK-SAME: ptr [[PTR:%.*]], ptr [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg volatile ptr [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr, i1 } poison, ptr [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr, i1 } [[TMP6]], i1 [[TMP4]], 1
+; CHECK-NEXT: ret void
+;
cmpxchg volatile ptr %ptr, ptr null, ptr %v seq_cst seq_cst
ret void
}
-define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr,
- ptr addrspace(2) %v) {
-; CHECK-LABEL: @pointer_cmpxchg_expand6
-; CHECK: %1 = ptrtoint ptr addrspace(2) %v to i64
-; CHECK: %2 = cmpxchg ptr addrspace(1) %ptr, i64 0, i64 %1 seq_cst seq_cst
-; CHECK: %3 = extractvalue { i64, i1 } %2, 0
-; CHECK: %4 = extractvalue { i64, i1 } %2, 1
-; CHECK: %5 = inttoptr i64 %3 to ptr addrspace(2)
-; CHECK: %6 = insertvalue { ptr addrspace(2), i1 } poison, ptr addrspace(2) %5, 0
-; CHECK: %7 = insertvalue { ptr addrspace(2), i1 } %6, i1 %4, 1
+define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr,
+; CHECK-LABEL: define void @pointer_cmpxchg_expand6(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[V:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[V]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 0, i64 [[TMP1]] seq_cst seq_cst, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(2)
+; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { ptr addrspace(2), i1 } poison, ptr addrspace(2) [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { ptr addrspace(2), i1 } [[TMP6]], i1 [[TMP4]], 1
+; CHECK-NEXT: ret void
+;
+ ptr addrspace(2) %v) {
cmpxchg ptr addrspace(1) %ptr, ptr addrspace(2) null, ptr addrspace(2) %v seq_cst seq_cst
ret void
}
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: define <2 x ptr> @atomic_vec2_ptr_align(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
+; CHECK-NEXT: ret <2 x ptr> [[TMP7]]
+;
+ %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+ ret <2 x ptr> %ret
+}
+
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(270)>
+; CHECK-NEXT: ret <4 x ptr addrspace(270)> [[TMP3]]
+;
+ %ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
+ ret <4 x ptr addrspace(270)> %ret
+}
+
+define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
+; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 8
+; CHECK-NEXT: ret <2 x i16> [[RET]]
+;
+ %ret = load atomic <2 x i16>, ptr %x acquire, align 8
+ ret <2 x i16> %ret
+}
+
+define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
+; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[X]] acquire, align 8
+; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[TMP1]] to <2 x half>
+; CHECK-NEXT: ret <2 x half> [[RET]]
+;
+ %ret = load atomic <2 x half>, ptr %x acquire, align 8
+ ret <2 x half> %ret
+}
+
+define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind {
+; CHECK-LABEL: define <4 x i32> @atomic_vec4_i32(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
+ %ret = load atomic <4 x i32>, ptr %x acquire, align 16
+ ret <4 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
+; CHECK-LABEL: define <4 x float> @atomic_vec4_float(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float>
+; CHECK-NEXT: ret <4 x float> [[TMP2]]
+;
+ %ret = load atomic <4 x float>, ptr %x acquire, align 16
+ ret <4 x float> %ret
+}
More information about the llvm-commits
mailing list