[llvm] 654c89d - [AMDGPU] Make vector superclasses allocatable
Christudasan Devadasan via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 25 21:50:30 PST 2021
Author: Christudasan Devadasan
Date: 2021-11-26T00:42:12-05:00
New Revision: 654c89d85a5108f2f04a8a748c44162e16260c7c
URL: https://github.com/llvm/llvm-project/commit/654c89d85a5108f2f04a8a748c44162e16260c7c
DIFF: https://github.com/llvm/llvm-project/commit/654c89d85a5108f2f04a8a748c44162e16260c7c.diff
LOG: [AMDGPU] Make vector superclasses allocatable
The combined vector register classes with both
VGPRs and AGPRs are currently unallocatable.
This patch turns them into allocatable as a
prerequisite to enable copy between VGPR and
AGPR registers during regalloc.
Also, added the missing AV register classes from
192b to 1024b.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D109300
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 3456f9a6156c6..82c09378acac8 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -74,11 +74,11 @@ unsigned GCNRegPressure::getRegKind(Register Reg,
assert(Reg.isVirtual());
const auto RC = MRI.getRegClass(Reg);
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
- return STI->isSGPRClass(RC) ?
- (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) :
- STI->hasAGPRs(RC) ?
- (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) :
- (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
+ return STI->isSGPRClass(RC)
+ ? (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE)
+ : STI->isAGPRClass(RC)
+ ? (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE)
+ : (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
}
void GCNRegPressure::inc(unsigned Reg,
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index cf93a63f26a0a..ad4f5868d8729 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -259,7 +259,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
// VGPRz = REG_SEQUENCE VGPRx, sub0
MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
- bool IsAGPR = TRI->hasAGPRs(DstRC);
+ bool IsAGPR = TRI->isAGPRClass(DstRC);
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
Register SrcReg = MI.getOperand(I).getReg();
@@ -853,7 +853,7 @@ MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
Register PHIRes = MI.getOperand(0).getReg();
const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
- if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
+ if (AllAGPRUses && numVGPRUses && !TRI->isAGPRClass(RC0)) {
LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a3a0e9c9b9ac0..200e00ee55212 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1586,17 +1586,9 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
unsigned OpIdx = Op - &UseMI->getOperand(0);
const MCInstrDesc &InstDesc = UseMI->getDesc();
- const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
- switch (OpInfo.RegClass) {
- case AMDGPU::AV_32RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_64RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_96RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_128RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_160RegClassID:
- break;
- default:
+ if (!TRI->isVectorSuperClass(
+ TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass)))
return false;
- }
const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg));
auto Dst = MRI->createVirtualRegister(NewDstRC);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 02440044d6e2c..0b34f74acfbf5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11483,15 +11483,15 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (I == -1)
break;
MachineOperand &Op = MI.getOperand(I);
- if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID &&
- OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) ||
- !Op.getReg().isVirtual() || !TRI->isAGPR(MRI, Op.getReg()))
+ if (!Op.isReg() || !Op.getReg().isVirtual())
+ continue;
+ auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
+ if (!TRI->hasAGPRs(RC))
continue;
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
if (!Src || !Src->isCopy() ||
!TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
continue;
- auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
auto *NewRC = TRI->getEquivalentVGPRClass(RC);
// All uses of agpr64 and agpr32 can also accept vgpr except for
// v_accvgpr_read, but we do not produce agpr reads during selection,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4a928123b68fe..88996f4552275 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -898,10 +898,10 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
- if (RI.hasAGPRs(RC)) {
+ if (RI.isAGPRClass(RC)) {
Opcode = (RI.hasVGPRs(SrcRC)) ?
AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
- } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) {
+ } else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
} else if ((Size % 64 == 0) && RI.hasVGPRs(RC) &&
(RI.isProperlyAlignedRC(*RC) &&
@@ -1205,7 +1205,7 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB,
unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
- if (RI.hasAGPRs(DstRC))
+ if (RI.isAGPRClass(DstRC))
return AMDGPU::COPY;
if (RI.getRegSizeInBits(*DstRC) == 32) {
return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
@@ -1463,8 +1463,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}
- unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize)
- : getVGPRSpillSaveOpcode(SpillSize);
+ unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize)
+ : getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
BuildMI(MBB, MI, DL, get(Opcode))
@@ -1598,8 +1598,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
- unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
- : getVGPRSpillRestoreOpcode(SpillSize);
+ unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
+ : getVGPRSpillRestoreOpcode(SpillSize);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
@@ -2802,12 +2802,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
if (Is16Bit) {
- if (isVGPRCopy)
- return false; // Do not clobber vgpr_hi16
+ if (isVGPRCopy)
+ return false; // Do not clobber vgpr_hi16
- if (DstReg.isVirtual() &&
- UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
- return false;
+ if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
+ return false;
UseMI.getOperand(0).setSubReg(0);
if (DstReg.isPhysical()) {
@@ -3896,9 +3895,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// verification is broken anyway
if (ST.needsAlignedVGPRs()) {
const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg);
- const bool IsVGPR = RI.hasVGPRs(RC);
- const bool IsAGPR = !IsVGPR && RI.hasAGPRs(RC);
- if ((IsVGPR || IsAGPR) && MO.getSubReg()) {
+ if (RI.hasVectorRegisters(RC) && MO.getSubReg()) {
const TargetRegisterClass *SubRC =
RI.getSubRegClass(RC, MO.getSubReg());
RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg());
@@ -5522,13 +5519,13 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
VRC = &AMDGPU::VReg_1RegClass;
} else
- VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+ VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
? RI.getEquivalentAGPRClass(SRC)
: RI.getEquivalentVGPRClass(SRC);
} else {
- VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
- ? RI.getEquivalentAGPRClass(VRC)
- : RI.getEquivalentVGPRClass(VRC);
+ VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
+ ? RI.getEquivalentAGPRClass(VRC)
+ : RI.getEquivalentVGPRClass(VRC);
}
RC = VRC;
} else {
@@ -7065,8 +7062,8 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::STRICT_WWM:
case AMDGPU::STRICT_WQM: {
const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
- if (RI.hasAGPRs(SrcRC)) {
- if (RI.hasAGPRs(NewDstRC))
+ if (RI.isAGPRClass(SrcRC)) {
+ if (RI.isAGPRClass(NewDstRC))
return nullptr;
switch (Inst.getOpcode()) {
@@ -7082,7 +7079,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
if (!NewDstRC)
return nullptr;
} else {
- if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
+ if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
return nullptr;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 34cbb49dcd16e..f4d9002e930e0 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1609,7 +1609,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
}
unsigned BitWidth = 32 * (CI.Width + Paired.Width);
- return TRI->hasAGPRs(getDataRegClass(*CI.I))
+ return TRI->isAGPRClass(getDataRegClass(*CI.I))
? TRI->getAGPRClassForBitWidth(BitWidth)
: TRI->getVGPRClassForBitWidth(BitWidth);
}
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 6a698348d3894..94fb23e89539e 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -1170,7 +1170,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
unsigned I = MI.getOperandNo(&Op);
if (Desc.OpInfo[I].RegClass == -1 ||
- !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
+ !TRI->isVGPRClass(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
continue;
if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index bfbe84f696f8a..2b72c0bc00a90 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1099,7 +1099,7 @@ void SIRegisterInfo::buildSpillLoadStore(
const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
// On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
- const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC);
+ const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
// Always use 4 byte operations for AGPRs because we need to scavenge
@@ -2163,6 +2163,65 @@ SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
: getAnyAGPRClassForBitWidth(BitWidth);
}
+static const TargetRegisterClass *
+getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
+ if (BitWidth <= 64)
+ return &AMDGPU::AV_64RegClass;
+ if (BitWidth <= 96)
+ return &AMDGPU::AV_96RegClass;
+ if (BitWidth <= 128)
+ return &AMDGPU::AV_128RegClass;
+ if (BitWidth <= 160)
+ return &AMDGPU::AV_160RegClass;
+ if (BitWidth <= 192)
+ return &AMDGPU::AV_192RegClass;
+ if (BitWidth <= 224)
+ return &AMDGPU::AV_224RegClass;
+ if (BitWidth <= 256)
+ return &AMDGPU::AV_256RegClass;
+ if (BitWidth <= 512)
+ return &AMDGPU::AV_512RegClass;
+ if (BitWidth <= 1024)
+ return &AMDGPU::AV_1024RegClass;
+
+ return nullptr;
+}
+
+static const TargetRegisterClass *
+getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
+ if (BitWidth <= 64)
+ return &AMDGPU::AV_64_Align2RegClass;
+ if (BitWidth <= 96)
+ return &AMDGPU::AV_96_Align2RegClass;
+ if (BitWidth <= 128)
+ return &AMDGPU::AV_128_Align2RegClass;
+ if (BitWidth <= 160)
+ return &AMDGPU::AV_160_Align2RegClass;
+ if (BitWidth <= 192)
+ return &AMDGPU::AV_192_Align2RegClass;
+ if (BitWidth <= 224)
+ return &AMDGPU::AV_224_Align2RegClass;
+ if (BitWidth <= 256)
+ return &AMDGPU::AV_256_Align2RegClass;
+ if (BitWidth <= 512)
+ return &AMDGPU::AV_512_Align2RegClass;
+ if (BitWidth <= 1024)
+ return &AMDGPU::AV_1024_Align2RegClass;
+
+ return nullptr;
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
+ if (BitWidth <= 16)
+ return &AMDGPU::VGPR_LO16RegClass;
+ if (BitWidth <= 32)
+ return &AMDGPU::AV_32RegClass;
+ return ST.needsAlignedVGPRs()
+ ? getAlignedVectorSuperClassForBitWidth(BitWidth)
+ : getAnyVectorSuperClassForBitWidth(BitWidth);
+}
+
const TargetRegisterClass *
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 16)
@@ -2305,15 +2364,14 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
// We can assume that each lane corresponds to one 32-bit register.
unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
- if (isSGPRClass(RC)) {
- if (Size == 32)
- RC = &AMDGPU::SGPR_32RegClass;
- else
- RC = getSGPRClassForBitWidth(Size);
- } else if (hasAGPRs(RC)) {
+ if (isAGPRClass(RC)) {
RC = getAGPRClassForBitWidth(Size);
- } else {
+ } else if (isVGPRClass(RC)) {
RC = getVGPRClassForBitWidth(Size);
+ } else if (isVectorSuperClass(RC)) {
+ RC = getVectorSuperClassForBitWidth(Size);
+ } else {
+ RC = getSGPRClassForBitWidth(Size);
}
assert(RC && "Invalid sub-register class size");
return RC;
@@ -2626,10 +2684,13 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
if (!ST.needsAlignedVGPRs())
return true;
- if (hasVGPRs(&RC))
+ if (isVGPRClass(&RC))
return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
- if (hasAGPRs(&RC))
+ if (isAGPRClass(&RC))
return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
+ if (isVectorSuperClass(&RC))
+ return RC.hasSuperClassEq(
+ getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 8d90ddb1cf4cf..49a2814ff639f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -155,6 +155,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
LLVM_READONLY
const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
+ LLVM_READONLY
+ const TargetRegisterClass *
+ getVectorSuperClassForBitWidth(unsigned BitWidth) const;
+
LLVM_READONLY
static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
@@ -184,6 +188,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
return hasAGPRs(RC) && !hasVGPRs(RC);
}
+ /// \returns true only if this class contains both VGPR and AGPR registers
+ bool isVectorSuperClass(const TargetRegisterClass *RC) const {
+ return hasVGPRs(RC) && hasAGPRs(RC);
+ }
+
/// \returns true if this class contains VGPR registers.
static bool hasVGPRs(const TargetRegisterClass *RC) {
return RC->TSFlags & SIRCFlags::HasVGPR;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index cf1d904842284..bc0bd15d36f8e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -862,37 +862,36 @@ def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_
let HasVGPR = 1;
}
-def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
- (add AGPR_32, VGPR_32)> {
- let isAllocatable = 0;
- let HasVGPR = 1;
- let HasAGPR = 1;
-}
-
-def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
- (add AReg_64, VReg_64)> {
- let isAllocatable = 0;
+def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
let HasVGPR = 1;
let HasAGPR = 1;
}
} // End GeneratePressureSet = 0
-let HasVGPR = 1, HasAGPR = 1 in {
-def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
- (add AReg_96, VReg_96)> {
- let isAllocatable = 0;
-}
+// Define a register tuple class, along with one requiring an even
+// aligned base register.
+multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
+ dag vregList, dag aregList> {
+ let HasVGPR = 1, HasAGPR = 1 in {
+ // Define the regular class.
+ def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
-def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
- (add AReg_128, VReg_128)> {
- let isAllocatable = 0;
+ // Define 2-aligned variant
+ def _Align2 : VRegClassBase<numRegs, regTypes,
+ (add (decimate vregList, 2),
+ (decimate aregList, 2))>;
+ }
}
-def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
- (add AReg_160, VReg_160)> {
- let isAllocatable = 0;
-}
-} // End HasVGPR = 1, HasAGPR = 1
+defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>;
+defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>;
+defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>;
+defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;
+defm AV_192 : AVRegClass<6, VReg_160.RegTypes, (add VGPR_192), (add AGPR_192)>;
+defm AV_224 : AVRegClass<7, VReg_160.RegTypes, (add VGPR_224), (add AGPR_224)>;
+defm AV_256 : AVRegClass<8, VReg_160.RegTypes, (add VGPR_256), (add AGPR_256)>;
+defm AV_512 : AVRegClass<16, VReg_160.RegTypes, (add VGPR_512), (add AGPR_512)>;
+defm AV_1024 : AVRegClass<32, VReg_160.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
//===----------------------------------------------------------------------===//
// Register operands
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 9da7b9f5145de..d20eaaaa65e8c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1626,13 +1626,14 @@ unsigned getRegBitWidth(unsigned RCID) {
return 32;
case AMDGPU::SGPR_64RegClassID:
case AMDGPU::VS_64RegClassID:
- case AMDGPU::AV_64RegClassID:
case AMDGPU::SReg_64RegClassID:
case AMDGPU::VReg_64RegClassID:
case AMDGPU::AReg_64RegClassID:
case AMDGPU::SReg_64_XEXECRegClassID:
case AMDGPU::VReg_64_Align2RegClassID:
case AMDGPU::AReg_64_Align2RegClassID:
+ case AMDGPU::AV_64RegClassID:
+ case AMDGPU::AV_64_Align2RegClassID:
return 64;
case AMDGPU::SGPR_96RegClassID:
case AMDGPU::SReg_96RegClassID:
@@ -1641,6 +1642,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_96_Align2RegClassID:
case AMDGPU::AReg_96_Align2RegClassID:
case AMDGPU::AV_96RegClassID:
+ case AMDGPU::AV_96_Align2RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
@@ -1649,6 +1651,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_128_Align2RegClassID:
case AMDGPU::AReg_128_Align2RegClassID:
case AMDGPU::AV_128RegClassID:
+ case AMDGPU::AV_128_Align2RegClassID:
return 128;
case AMDGPU::SGPR_160RegClassID:
case AMDGPU::SReg_160RegClassID:
@@ -1657,6 +1660,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_160_Align2RegClassID:
case AMDGPU::AReg_160_Align2RegClassID:
case AMDGPU::AV_160RegClassID:
+ case AMDGPU::AV_160_Align2RegClassID:
return 160;
case AMDGPU::SGPR_192RegClassID:
case AMDGPU::SReg_192RegClassID:
@@ -1664,6 +1668,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_192RegClassID:
case AMDGPU::VReg_192_Align2RegClassID:
case AMDGPU::AReg_192_Align2RegClassID:
+ case AMDGPU::AV_192RegClassID:
+ case AMDGPU::AV_192_Align2RegClassID:
return 192;
case AMDGPU::SGPR_224RegClassID:
case AMDGPU::SReg_224RegClassID:
@@ -1671,6 +1677,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_224RegClassID:
case AMDGPU::VReg_224_Align2RegClassID:
case AMDGPU::AReg_224_Align2RegClassID:
+ case AMDGPU::AV_224RegClassID:
+ case AMDGPU::AV_224_Align2RegClassID:
return 224;
case AMDGPU::SGPR_256RegClassID:
case AMDGPU::SReg_256RegClassID:
@@ -1678,6 +1686,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_256RegClassID:
case AMDGPU::VReg_256_Align2RegClassID:
case AMDGPU::AReg_256_Align2RegClassID:
+ case AMDGPU::AV_256RegClassID:
+ case AMDGPU::AV_256_Align2RegClassID:
return 256;
case AMDGPU::SGPR_512RegClassID:
case AMDGPU::SReg_512RegClassID:
@@ -1685,6 +1695,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_512RegClassID:
case AMDGPU::VReg_512_Align2RegClassID:
case AMDGPU::AReg_512_Align2RegClassID:
+ case AMDGPU::AV_512RegClassID:
+ case AMDGPU::AV_512_Align2RegClassID:
return 512;
case AMDGPU::SGPR_1024RegClassID:
case AMDGPU::SReg_1024RegClassID:
@@ -1692,6 +1704,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_1024RegClassID:
case AMDGPU::VReg_1024_Align2RegClassID:
case AMDGPU::AReg_1024_Align2RegClassID:
+ case AMDGPU::AV_1024RegClassID:
+ case AMDGPU::AV_1024_Align2RegClassID:
return 1024;
default:
llvm_unreachable("Unexpected register class");
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
index 7503f73a00404..d31e73e91b607 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
@@ -144,7 +144,7 @@ define double @test_multiple_register_outputs_mixed() #0 {
; CHECK-NEXT: liveins: $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2883594 /* regdef:VReg_64 */, def %2
+ ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2949130 /* regdef:VReg_64 */, def %2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
index 3a5654f63a428..b303e69064a02 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll
@@ -8,15 +8,15 @@
define amdgpu_kernel void @s_input_output_i128() {
; GFX908-LABEL: name: s_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: s_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=s"()
call void asm sideeffect "; use $0", "s"(i128 %val)
@@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() {
define amdgpu_kernel void @v_input_output_i128() {
; GFX908-LABEL: name: v_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5046282 /* regdef:VReg_128 */, def %4
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5308426 /* regdef:VReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5046281 /* reguse:VReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5308425 /* reguse:VReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: v_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5177354 /* regdef:VReg_128_Align2 */, def %4
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5505034 /* regdef:VReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5177353 /* reguse:VReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5505033 /* reguse:VReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=v"()
call void asm sideeffect "; use $0", "v"(i128 %val)
@@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() {
define amdgpu_kernel void @a_input_output_i128() {
; GFX908-LABEL: name: a_input_output_i128
; GFX908: bb.0 (%ir-block.0):
- ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:AReg_128 */, def %4
+ ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:AReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4
- ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:AReg_128 */, [[COPY]]
+ ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:AReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: a_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
- ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:AReg_128_Align2 */, def %4
+ ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:AReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4
- ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:AReg_128_Align2 */, [[COPY]]
+ ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:AReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = call i128 asm sideeffect "; def $0", "=a"()
call void asm sideeffect "; use $0", "a"(i128 %val)
More information about the llvm-commits
mailing list