[llvm] [SelectionDAG] Legalize vector types for atomic load (PR #111414)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 7 11:44:18 PDT 2024
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/111414
>From c154ab8f383d43da342a127c7b4ef487636c0d64 Mon Sep 17 00:00:00 2001
From: jofernau <Joe.Fernau at amd.com>
Date: Mon, 7 Oct 2024 13:47:50 -0400
Subject: [PATCH 1/3] [SelectionDAG] Legalize vector types for atomic load
Scalarize vector of atomic load in SelectionDAG.
---
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 16 ++++++++++++++++
.../CodeGen/Generic/atomic-scalarization.ll | 17 +++++++++++++++++
3 files changed, 34 insertions(+)
create mode 100644 llvm/test/CodeGen/Generic/atomic-scalarization.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d14516ef3e2fbb..5204f20e97f63e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -850,6 +850,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_ExpOp(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0a22f06271984e..1e911da130f8c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -60,6 +60,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::ATOMIC_LOAD:
+ R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+ break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -447,6 +450,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
return Op;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+
+ SDValue Result = DAG.getAtomic(ISD::ATOMIC_LOAD, SDLoc(N),
+ N->getMemoryVT().getVectorElementType(),
+ N->getValueType(0).getVectorElementType(),
+ N->getChain(), N->getBasePtr(), N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");
diff --git a/llvm/test/CodeGen/Generic/atomic-scalarization.ll b/llvm/test/CodeGen/Generic/atomic-scalarization.ll
new file mode 100644
index 00000000000000..3f611323172884
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/atomic-scalarization.ll
@@ -0,0 +1,17 @@
+; RUN: llc %s --print-after-isel --disable-verify 2>&1 | FileCheck %s
+
+define i32 @atomic_scalar() {
+; CHECK: # After Instruction Selection:
+; CHECK-NEXT: # Machine code for function atomic_scalar: IsSSA, TracksLiveness
+; CHECK-NEXT: Frame Objects:
+; CHECK-NEXT: fi#0: size=4, align=4, at location [SP+8]
+; CHECK: bb.0 (%ir-block.0):
+; CHECK-NEXT: %0:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (dereferenceable load acquire (s32) from %ir.1)
+; CHECK-NEXT: $eax = COPY %0:gr32
+; CHECK-NEXT: RET 0, $eax
+; CHECK: # End machine code for function atomic_scalar.
+ %1 = alloca <1 x i32>
+ %2 = load atomic <1 x i32>, ptr %1 acquire, align 4
+ %3 = extractelement <1 x i32> %2, i32 0
+ ret i32 %3
+}
>From 6716db041ba7ca7b6269f799128cd479e66138f6 Mon Sep 17 00:00:00 2001
From: jofernau <Joe.Fernau at amd.com>
Date: Mon, 7 Oct 2024 14:18:52 -0400
Subject: [PATCH 2/3] Moved test and checking mir after last X86 pass
---
.../CodeGen/Generic/atomic-scalarization.ll | 17 -----------------
llvm/test/CodeGen/X86/atomic-scalarization.ll | 16 ++++++++++++++++
2 files changed, 16 insertions(+), 17 deletions(-)
delete mode 100644 llvm/test/CodeGen/Generic/atomic-scalarization.ll
create mode 100644 llvm/test/CodeGen/X86/atomic-scalarization.ll
diff --git a/llvm/test/CodeGen/Generic/atomic-scalarization.ll b/llvm/test/CodeGen/Generic/atomic-scalarization.ll
deleted file mode 100644
index 3f611323172884..00000000000000
--- a/llvm/test/CodeGen/Generic/atomic-scalarization.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc %s --print-after-isel --disable-verify 2>&1 | FileCheck %s
-
-define i32 @atomic_scalar() {
-; CHECK: # After Instruction Selection:
-; CHECK-NEXT: # Machine code for function atomic_scalar: IsSSA, TracksLiveness
-; CHECK-NEXT: Frame Objects:
-; CHECK-NEXT: fi#0: size=4, align=4, at location [SP+8]
-; CHECK: bb.0 (%ir-block.0):
-; CHECK-NEXT: %0:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (dereferenceable load acquire (s32) from %ir.1)
-; CHECK-NEXT: $eax = COPY %0:gr32
-; CHECK-NEXT: RET 0, $eax
-; CHECK: # End machine code for function atomic_scalar.
- %1 = alloca <1 x i32>
- %2 = load atomic <1 x i32>, ptr %1 acquire, align 4
- %3 = extractelement <1 x i32> %2, i32 0
- ret i32 %3
-}
diff --git a/llvm/test/CodeGen/X86/atomic-scalarization.ll b/llvm/test/CodeGen/X86/atomic-scalarization.ll
new file mode 100644
index 00000000000000..22d240bd855180
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomic-scalarization.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s --mtriple=x86_64 -print-after=unpack-mi-bundles -disable-verify 2>&1 | FileCheck %s
+
+define i32 @atomic_scalar() {
+; CHECK: # *** IR Dump After Unpack machine instruction bundles (unpack-mi-bundles) ***:
+; CHECK-NEXT: # Machine code for function atomic_scalar: NoPHIs, TracksLiveness, NoVRegs, TiedOpsRewritten, TracksDebugUserValues
+; CHECK-NEXT: Frame Objects:
+; CHECK-NEXT: fi#0: size=4, align=4, at location [SP-4]
+; CHECK: bb.0 (%ir-block.0):
+; CHECK-NEXT: renamable $eax = MOV32rm $rsp, 1, $noreg, -4, $noreg :: (dereferenceable load acquire (s32) from %ir.1)
+; CHECK-NEXT: RET64 $eax
+; CHECK: # End machine code for function atomic_scalar.
+ %1 = alloca <1 x i32>
+ %2 = load atomic <1 x i32>, ptr %1 acquire, align 4
+ %3 = extractelement <1 x i32> %2, i32 0
+ ret i32 %3
+}
>From 40263d09a2a607f8a5a880de3e94993681c64243 Mon Sep 17 00:00:00 2001
From: jofernau <Joe.Fernau at amd.com>
Date: Mon, 7 Oct 2024 14:44:00 -0400
Subject: [PATCH 3/3] Checking with finalize-isel
---
llvm/test/CodeGen/X86/atomic-scalarization.ll | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/llvm/test/CodeGen/X86/atomic-scalarization.ll b/llvm/test/CodeGen/X86/atomic-scalarization.ll
index 22d240bd855180..99da5cf8c04d0d 100644
--- a/llvm/test/CodeGen/X86/atomic-scalarization.ll
+++ b/llvm/test/CodeGen/X86/atomic-scalarization.ll
@@ -1,13 +1,14 @@
-; RUN: llc %s --mtriple=x86_64 -print-after=unpack-mi-bundles -disable-verify 2>&1 | FileCheck %s
+; RUN: llc %s --mtriple=x86_64 -print-after=finalize-isel -disable-verify 2>&1 | FileCheck %s
define i32 @atomic_scalar() {
-; CHECK: # *** IR Dump After Unpack machine instruction bundles (unpack-mi-bundles) ***:
-; CHECK-NEXT: # Machine code for function atomic_scalar: NoPHIs, TracksLiveness, NoVRegs, TiedOpsRewritten, TracksDebugUserValues
+; CHECK: # *** IR Dump After Finalize ISel and expand pseudo-instructions (finalize-isel) ***:
+; CHECK-NEXT: # Machine code for function atomic_scalar: IsSSA, TracksLiveness
; CHECK-NEXT: Frame Objects:
-; CHECK-NEXT: fi#0: size=4, align=4, at location [SP-4]
+; CHECK-NEXT: fi#0: size=4, align=4, at location [SP+8]
; CHECK: bb.0 (%ir-block.0):
-; CHECK-NEXT: renamable $eax = MOV32rm $rsp, 1, $noreg, -4, $noreg :: (dereferenceable load acquire (s32) from %ir.1)
-; CHECK-NEXT: RET64 $eax
+; CHECK-NEXT: %0:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (dereferenceable load acquire (s32) from %ir.1)
+; CHECK-NEXT: $eax = COPY %0:gr32
+; CHECK-NEXT: RET 0, $eax
; CHECK: # End machine code for function atomic_scalar.
%1 = alloca <1 x i32>
%2 = load atomic <1 x i32>, ptr %1 acquire, align 4
More information about the llvm-commits
mailing list