[llvm-branch-commits] [llvm] [SelectionDAG][X86] Widen <2 x T> vector types for atomic load (PR #120598)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jan 6 11:25:21 PST 2025
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598
>From 36161dfc06ed32668b4b2812fd5943f703cfddb6 Mon Sep 17 00:00:00 2001
From: jofrn <jofernau at amd.com>
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG][X86] Widen <2 x T> vector types for atomic
load
Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size. Also,
it also adds Pats to remove an extra MOV.
commit-id:2894ccd1
---
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 ++-
.../SelectionDAG/LegalizeVectorTypes.cpp | 39 +++++++++++++--
llvm/lib/Target/X86/X86InstrCompiler.td | 7 +++
llvm/test/CodeGen/X86/atomic-load-store.ll | 49 +++++++++++++++++++
llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +-
5 files changed, 97 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b81c9f87cb27d7..3b3dddc44e3682 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
@@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
/// resulting wider type. It takes:
/// LdChain: list of chains for the load to be generated.
/// Ld: load to widen
- SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
- LoadSDNode *LD);
+ template <typename T>
+ SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, T *LD,
+ bool IsAtomic = false);
/// Helper function to generate a set of extension loads to load a vector with
/// a resulting wider type. It takes:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0c21ede7b0dbb4..bc0a3a4589b941 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4517,6 +4517,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::ATOMIC_LOAD:
+ Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+ break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
@@ -5903,6 +5906,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
N->getOperand(1), N->getOperand(2));
}
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/);
+
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+ }
+
+ report_fatal_error("Unable to widen atomic vector load");
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7701,8 +7728,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
+template <typename T>
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
- LoadSDNode *LD) {
+ T *LD, bool IsAtomic) {
// The strategy assumes that we can efficiently load power-of-two widths.
// The routine chops the vector into the largest vector loads with the same
// element type or scalar loads and then recombines it to the widen vector
@@ -7759,8 +7787,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
} while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
}
- SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- LD->getOriginalAlign(), MMOFlags, AAInfo);
+ SDValue LdOp;
+ if (IsAtomic)
+ LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
+ BasePtr, LD->getMemOperand());
+ else
+ LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 7d4c5c0e10e492..7d8845b91fd1ed 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1198,6 +1198,13 @@ def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
+def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)))))),
+ (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8>
+def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
+ (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16>
+def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
+ (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>
+
// Floating point loads/stores.
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 39e9fdfa5e62b0..6c2a7e1d68c382 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,55 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
ret <1 x i64> %ret
}
+define <2 x i8> @atomic_vec2_i8(ptr %x) {
+; CHECK3-LABEL: atomic_vec2_i8:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_i8:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movw (%rdi), %cx
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %cx, %ax
+; CHECK0-NEXT: movd %eax, %xmm0
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x i8>, ptr %x acquire, align 4
+ ret <2 x i8> %ret
+}
+
+define <2 x i16> @atomic_vec2_i16(ptr %x) {
+; CHECK3-LABEL: atomic_vec2_i16:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_i16:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x i16>, ptr %x acquire, align 4
+ ret <2 x i16> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
+; CHECK-LABEL: atomic_vec2_i32_align:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <2 x i32>, ptr %x acquire, align 8
+ ret <2 x i32> %ret
+}
+
+define <2 x float> @atomic_vec2_float_align(ptr %x) {
+; CHECK-LABEL: atomic_vec2_float_align:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <2 x float>, ptr %x acquire, align 8
+ ret <2 x float> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK3-LABEL: atomic_vec1_ptr:
; CHECK3: ## %bb.0:
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index 3fb994cdb751a3..e2803d206c6c57 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -2275,8 +2275,7 @@ define i64 @load_i16_anyext_i64(ptr %ptr) {
;
; CHECK-O3-LABEL: load_i16_anyext_i64:
; CHECK-O3: # %bb.0:
-; CHECK-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-O3-NEXT: vmovd %eax, %xmm0
+; CHECK-O3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-O3-NEXT: vmovq %xmm0, %rax
; CHECK-O3-NEXT: retq
%v = load atomic i16, ptr %ptr unordered, align 8
More information about the llvm-branch-commits
mailing list