[llvm] 4dab152 - [AMDGPU] Introduce RC flags for vector register classes
Christudasan Devadasan via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 1 00:03:52 PDT 2021
Author: Christudasan Devadasan
Date: 2021-09-01T02:55:45-04:00
New Revision: 4dab15288d69c9002c98473588232d6ecbd7d29e
URL: https://github.com/llvm/llvm-project/commit/4dab15288d69c9002c98473588232d6ecbd7d29e
DIFF: https://github.com/llvm/llvm-project/commit/4dab15288d69c9002c98473588232d6ecbd7d29e.diff
LOG: [AMDGPU] Introduce RC flags for vector register classes
Configure and use the TSFlags in TargetRegisterClass to
have unique flags for VGPR and AGPR register classes.
The vector register class queries like `hasVGPRs` will
now become more efficient with just a bitwise operation.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D108815
Added:
Modified:
llvm/lib/Target/AMDGPU/SIDefines.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 08edb7fda0fe..247ebe3fe741 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -14,6 +14,13 @@
namespace llvm {
+// This needs to be kept in sync with the field bits in SIRegisterClass.
+enum SIRCFlags : uint8_t {
+ // For vector registers.
+ HasVGPR = 1 << 0,
+ HasAGPR = 1 << 1
+}; // enum SIRCFlags
+
namespace SIInstrFlags {
// This needs to be kept in sync with the field bits in InstSI.
enum : uint64_t {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ad8f39adbb98..8dff3f0ac058 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2166,32 +2166,12 @@ bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
return isSGPRClass(RC);
}
-// TODO: It might be helpful to have some target specific flags in
-// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
- unsigned Size = getRegSizeInBits(*RC);
- if (Size == 16) {
- return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
- getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
- }
- const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
- if (!VRC) {
- assert(Size < 32 && "Invalid register class size");
- return false;
- }
- return getCommonSubClass(VRC, RC) != nullptr;
+ return RC->TSFlags & SIRCFlags::HasVGPR;
}
bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
- unsigned Size = getRegSizeInBits(*RC);
- if (Size < 16)
- return false;
- const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
- if (!ARC) {
- assert(getVGPRClassForBitWidth(Size) && "Invalid register class size");
- return false;
- }
- return getCommonSubClass(ARC, RC) != nullptr;
+ return RC->TSFlags & SIRCFlags::HasAGPR;
}
const TargetRegisterClass *
@@ -2335,7 +2315,7 @@ bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
Register Reg) const {
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
// Registers without classes are unaddressable, SGPR-like registers.
- return RC && hasVGPRs(RC);
+ return RC && isVGPRClass(RC);
}
bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
@@ -2343,7 +2323,7 @@ bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
// Registers without classes are unaddressable, SGPR-like registers.
- return RC && hasAGPRs(RC);
+ return RC && isAGPRClass(RC);
}
bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 85055c3b8fcd..ebe1e5cbd95b 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -168,6 +168,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
+ /// \returns true if this class contains only VGPR registers
+ bool isVGPRClass(const TargetRegisterClass *RC) const {
+ return hasVGPRs(RC) && !hasAGPRs(RC);
+ }
+
/// \returns true if this class contains only AGPR registers
bool isAGPRClass(const TargetRegisterClass *RC) const {
return hasAGPRs(RC) && !hasVGPRs(RC);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 6e3c4e8775f3..881b3b1b7775 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -130,6 +130,18 @@ class SIRegWithSubRegs <string n, list<Register> subregs, bits<16> regIdx> :
RegisterWithSubRegs<n, subregs> {
}
+// For register classes that use TSFlags.
+class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
+ : RegisterClass <n, rTypes, Align, rList> {
+ // For vector register classes.
+ field bit HasVGPR = 0;
+ field bit HasAGPR = 0;
+
+ // These need to be kept in sync with the enum SIRCFlags.
+ let TSFlags{0} = HasVGPR;
+ let TSFlags{1} = HasAGPR;
+}
+
multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
bit HWEncodingHigh = 0> {
// There is no special encoding for 16 bit subregs, these are not real
@@ -490,14 +502,15 @@ class RegisterTypes<list<ValueType> reg_types> {
def Reg16Types : RegisterTypes<[i16, f16]>;
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
-def VGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+let HasVGPR = 1 in {
+def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "VGPR%u_LO16", 0, 255))> {
let AllocationPriority = 1;
let Size = 16;
let GeneratePressureSet = 0;
}
-def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "VGPR%u_HI16", 0, 255))> {
let AllocationPriority = 1;
let Size = 16;
@@ -506,12 +519,13 @@ def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
// VGPR 32-bit registers
// i16/f16 only on VI+
-def VGPR_32 : RegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
+def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
let Weight = 1;
}
+} // End HasVGPR = 1
// VGPR 64-bit registers
def VGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, VGPR_32, 255, 1, 2, "v">;
@@ -540,7 +554,8 @@ def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;
// VGPR 1024-bit registers
def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;
-def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+let HasAGPR = 1 in {
+def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "AGPR%u_LO16", 0, 255))> {
let isAllocatable = 0;
let Size = 16;
@@ -548,12 +563,13 @@ def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
}
// AccVGPR 32-bit registers
-def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "AGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
let Weight = 1;
}
+} // End HasAGPR = 1
// AGPR 64-bit registers
def AGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, AGPR_32, 255, 1, 2, "a">;
@@ -748,14 +764,15 @@ defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64], SGPR_256Regs, TTMP_256R
defm "" : SRegClass<16, 20, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
defm "" : SRegClass<32, 21, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
-def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
}
// Register class for all vector registers (VGPRs + Interpolation Registers)
class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
- RegisterClass<"AMDGPU", regTypes, 32, regList> {
+ SIRegisterClass<"AMDGPU", regTypes, 32, regList> {
let Size = !mul(numRegs, 32);
// Requires n v_mov_b32 to copy
@@ -767,11 +784,13 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
// Define a register tuple class, along with one requiring an even
// aligned base register.
multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- // Define the regular class.
- def "" : VRegClassBase<numRegs, regTypes, regList>;
+ let HasVGPR = 1 in {
+ // Define the regular class.
+ def "" : VRegClassBase<numRegs, regTypes, regList>;
- // Define 2-aligned variant
- def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
+ // Define 2-aligned variant
+ def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
+ }
}
defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
@@ -787,7 +806,7 @@ defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- let CopyCost = !add(numRegs, numRegs, 1) in {
+ let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, regList>;
@@ -823,44 +842,53 @@ let GeneratePressureSet = 0 in {
// on an empty register set, but also sorts register classes based on
// the number of registerss in them. Add only one register so this is
// sorted to the end and not preferred over VGPR_32.
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
+def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
let Size = 1;
+ let HasVGPR = 1;
}
-def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
}
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
+def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
}
-def AV_32 : RegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
+def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
(add AGPR_32, VGPR_32)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
+ let HasAGPR = 1;
}
-def AV_64 : RegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
+def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
(add AReg_64, VReg_64)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
+ let HasAGPR = 1;
}
} // End GeneratePressureSet = 0
-def AV_96 : RegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
+let HasVGPR = 1, HasAGPR = 1 in {
+def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
(add AReg_96, VReg_96)> {
let isAllocatable = 0;
}
-def AV_128 : RegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
+def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
(add AReg_128, VReg_128)> {
let isAllocatable = 0;
}
-def AV_160 : RegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
+def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
(add AReg_160, VReg_160)> {
let isAllocatable = 0;
}
+} // End HasVGPR = 1, HasAGPR = 1
//===----------------------------------------------------------------------===//
// Register operands
More information about the llvm-commits
mailing list