[llvm] [NVPTX][NFCI] Use DataLayout to determine short shared/local/const pointers (PR #89404)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 19 08:33:47 PDT 2024
https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/89404
Use the datalayout directly to determine the correct `cvta` instruction for converting shared/local/const pointers. This is cleaner as it eliminates the need to keep a redundant copy of this info in the TM and makes clear which address spaces short pointers are applicable for.
>From 92adacdcc8c3b9b2f38adae7721aa00a5eabe82a Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Thu, 18 Apr 2024 19:25:17 +0000
Subject: [PATCH] [NVPTX][NFCI] Use DataLayout to determine short
shared/local/const pointers
---
llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp | 2 +-
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 51 ++++++++++----------
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 -
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 6 ++-
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 37 +++++++-------
llvm/lib/Target/NVPTX/NVPTXPeephole.cpp | 6 +--
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 3 +-
llvm/lib/Target/NVPTX/NVPTXTargetMachine.h | 3 --
8 files changed, 54 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index c34472c21ccbe9..10ae81e0460e3a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -50,7 +50,7 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
bool Is64Bit =
static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit();
unsigned CvtaLocalOpcode =
- (Is64Bit ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes);
+ (Is64Bit ? NVPTX::cvta_local_64 : NVPTX::cvta_local);
unsigned MovDepotOpcode =
(Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
if (!MR.use_empty(NRI->getFrameRegister(MF))) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 3ff8994602e16b..a362709c98efd6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -74,10 +74,6 @@ bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
return TL->allowUnsafeFPMath(*MF);
}
-bool NVPTXDAGToDAGISel::useShortPointers() const {
- return TM.useShortPointers();
-}
-
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
void NVPTXDAGToDAGISel::Select(SDNode *N) {
@@ -768,22 +764,25 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
switch (SrcAddrSpace) {
default: report_fatal_error("Bad address space in addrspacecast");
case ADDRESS_SPACE_GLOBAL:
- Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global;
break;
case ADDRESS_SPACE_SHARED:
- Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
- : NVPTX::cvta_shared_yes_64)
- : NVPTX::cvta_shared_yes;
+ Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
+ ? NVPTX::cvta_shared_6432
+ : NVPTX::cvta_shared_64)
+ : NVPTX::cvta_shared;
break;
case ADDRESS_SPACE_CONST:
- Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
- : NVPTX::cvta_const_yes_64)
- : NVPTX::cvta_const_yes;
+ Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
+ ? NVPTX::cvta_const_6432
+ : NVPTX::cvta_const_64)
+ : NVPTX::cvta_const;
break;
case ADDRESS_SPACE_LOCAL:
- Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
- : NVPTX::cvta_local_yes_64)
- : NVPTX::cvta_local_yes;
+ Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32
+ ? NVPTX::cvta_local_6432
+ : NVPTX::cvta_local_64)
+ : NVPTX::cvta_local;
break;
}
ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
@@ -797,23 +796,25 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
switch (DstAddrSpace) {
default: report_fatal_error("Bad address space in addrspacecast");
case ADDRESS_SPACE_GLOBAL:
- Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
- : NVPTX::cvta_to_global_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global;
break;
case ADDRESS_SPACE_SHARED:
- Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
- : NVPTX::cvta_to_shared_yes_64)
- : NVPTX::cvta_to_shared_yes;
+ Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
+ ? NVPTX::cvta_to_shared_3264
+ : NVPTX::cvta_to_shared_64)
+ : NVPTX::cvta_to_shared;
break;
case ADDRESS_SPACE_CONST:
- Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
- : NVPTX::cvta_to_const_yes_64)
- : NVPTX::cvta_to_const_yes;
+ Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
+ ? NVPTX::cvta_to_const_3264
+ : NVPTX::cvta_to_const_64)
+ : NVPTX::cvta_to_const;
break;
case ADDRESS_SPACE_LOCAL:
- Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
- : NVPTX::cvta_to_local_yes_64)
- : NVPTX::cvta_to_local_yes;
+ Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32
+ ? NVPTX::cvta_to_local_3264
+ : NVPTX::cvta_to_local_64)
+ : NVPTX::cvta_to_local;
break;
case ADDRESS_SPACE_PARAM:
Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 84c8432047ca31..10822f87cef308 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -36,7 +36,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool useF32FTZ() const;
bool allowFMA() const;
bool allowUnsafeFPMath() const;
- bool useShortPointers() const;
public:
static char ID;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index cd8546005c0289..931292c7fd6042 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -160,6 +160,7 @@ def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
def True : Predicate<"true">;
+def False : Predicate<"false">;
class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
@@ -171,7 +172,10 @@ def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">;
def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
"&& Subtarget->getPTXVersion() >= 64)">;
-def useShortPtr : Predicate<"useShortPointers()">;
+def useShortPtrLocal : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_LOCAL) == 32">;
+def useShortPtrShared : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32">;
+def useShortPtrConst : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_CONST) == 32">;
+
def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index c0c53380a13e9b..ec9170b4e41e5c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2407,46 +2407,45 @@ defm INT_PTX_LDG_G_v4f32_ELE
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
-multiclass NG_TO_G<string Str, Intrinsic Intrin> {
- def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+multiclass NG_TO_G<string Str, Intrinsic Intrin, Predicate ShortPtr> {
+ def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
!strconcat("cvta.", Str, ".u32 \t$result, $src;"),
[(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
- def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
!strconcat("cvta.", Str, ".u64 \t$result, $src;"),
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
- def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
+ def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
"{{ .reg .b64 %tmp;\n\t"
#" cvt.u64.u32 \t%tmp, $src;\n\t"
#" cvta." # Str # ".u64 \t$result, %tmp; }}",
[(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
- Requires<[useShortPtr]>;
+ Requires<[ShortPtr]>;
}
-multiclass G_TO_NG<string Str, Intrinsic Intrin> {
- def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+multiclass G_TO_NG<string Str, Intrinsic Intrin, Predicate ShortPtr> {
+ def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
!strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
[(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
- def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
!strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
- def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
+ def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
"{{ .reg .b64 %tmp;\n\t"
#" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
#" cvt.u32.u64 \t$result, %tmp; }}",
[(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
- Requires<[useShortPtr]>;
+ Requires<[ShortPtr]>;
}
-defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
-defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
-defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
-defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
-
-defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
-defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
-defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
-defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
+defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>;
+defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>;
+defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>;
+defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>;
+defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>;
+defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>;
+defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>;
+defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>;
// nvvm.ptr.gen.to.param
def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
diff --git a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
index 0968701737e88d..f2f547da88c7cb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -22,7 +22,7 @@
//
// It will transform the following pattern
// %0 = LEA_ADDRi64 %VRFrame64, 4
-// %1 = cvta_to_local_yes_64 %0
+// %1 = cvta_to_local_64 %0
//
// into
// %1 = LEA_ADDRi64 %VRFrameLocal64, 4
@@ -76,8 +76,8 @@ static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
auto &MBB = *Root.getParent();
auto &MF = *MBB.getParent();
// Check current instruction is cvta.to.local
- if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
- Root.getOpcode() != NVPTX::cvta_to_local_yes)
+ if (Root.getOpcode() != NVPTX::cvta_to_local_64 &&
+ Root.getOpcode() != NVPTX::cvta_to_local)
return false;
auto &Op = Root.getOperand(1);
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 78f48652c9920f..2a47c16a6bce51 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -132,8 +132,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
CPU, FS, Options, Reloc::PIC_,
getEffectiveCodeModel(CM, CodeModel::Small), OL),
- is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
- TLOF(std::make_unique<NVPTXTargetObjectFile>()),
+ is64bit(is64bit), TLOF(std::make_unique<NVPTXTargetObjectFile>()),
Subtarget(TT, std::string(CPU), std::string(FS), *this),
StrPool(StrAlloc) {
if (TT.getOS() == Triple::NVCL)
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 9e6bf929badba2..870ea20c26f3f2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -24,8 +24,6 @@ namespace llvm {
///
class NVPTXTargetMachine : public LLVMTargetMachine {
bool is64bit;
- // Use 32-bit pointers for accessing const/local/short AS.
- bool UseShortPointers;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
NVPTX::DrvInterface drvInterface;
NVPTXSubtarget Subtarget;
@@ -46,7 +44,6 @@ class NVPTXTargetMachine : public LLVMTargetMachine {
}
const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
bool is64Bit() const { return is64bit; }
- bool useShortPointers() const { return UseShortPointers; }
NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
UniqueStringSaver &getStrPool() const {
return const_cast<UniqueStringSaver &>(StrPool);
More information about the llvm-commits
mailing list