[llvm] e4bc55b - AMDGPU/GlobalISel: Reduce indentation
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 3 05:41:29 PST 2020
Author: Matt Arsenault
Date: 2020-02-03T05:41:14-08:00
New Revision: e4bc55bd94bc5b618f6aa284c71eb69f87f75d06
URL: https://github.com/llvm/llvm-project/commit/e4bc55bd94bc5b618f6aa284c71eb69f87f75d06
DIFF: https://github.com/llvm/llvm-project/commit/e4bc55bd94bc5b618f6aa284c71eb69f87f75d06.diff
LOG: AMDGPU/GlobalISel: Reduce indentation
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 6df7b019c295..44b46f5bf76b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -846,34 +846,129 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
if (!Op.isReg() || Op.isDef())
continue;
- if (SGPROperandRegs.count(Op.getReg())) {
- LLT OpTy = MRI.getType(Op.getReg());
- unsigned OpSize = OpTy.getSizeInBits();
-
- // Can only do a readlane of 32-bit pieces.
- if (OpSize == 32) {
- // Avoid extra copies in the simple case of one 32-bit register.
- Register CurrentLaneOpReg
- = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- MRI.setType(CurrentLaneOpReg, OpTy);
-
- constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpReg)
- .addReg(Op.getReg());
+ if (!SGPROperandRegs.count(Op.getReg()))
+ continue;
+
+ LLT OpTy = MRI.getType(Op.getReg());
+ unsigned OpSize = OpTy.getSizeInBits();
+
+ // Can only do a readlane of 32-bit pieces.
+ if (OpSize == 32) {
+ // Avoid extra copies in the simple case of one 32-bit register.
+ Register CurrentLaneOpReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ MRI.setType(CurrentLaneOpReg, OpTy);
+
+ constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpReg)
+ .addReg(Op.getReg());
+
+ Register NewCondReg = MRI.createVirtualRegister(WaveRC);
+ bool First = CondReg == AMDGPU::NoRegister;
+ if (First)
+ CondReg = NewCondReg;
+
+ // Compare the just read M0 value to all possible Idx values.
+ B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
+ .addDef(NewCondReg)
+ .addReg(CurrentLaneOpReg)
+ .addReg(Op.getReg());
+ Op.setReg(CurrentLaneOpReg);
+
+ if (!First) {
+ Register AndReg = MRI.createVirtualRegister(WaveRC);
+
+ // If there are multiple operands to consider, and the conditions.
+ B.buildInstr(WaveAndOpc)
+ .addDef(AndReg)
+ .addReg(NewCondReg)
+ .addReg(CondReg);
+ CondReg = AndReg;
+ }
+ } else {
+ LLT S32 = LLT::scalar(32);
+ SmallVector<Register, 8> ReadlanePieces;
+
+ // The compares can be done as 64-bit, but the extract needs to be done
+ // in 32-bit pieces.
+
+ bool Is64 = OpSize % 64 == 0;
+
+ LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
+ unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
+ : AMDGPU::V_CMP_EQ_U32_e64;
+
+ // The compares can be done as 64-bit, but the extract needs to be done
+ // in 32-bit pieces.
+
+ // Insert the unmerge before the loop.
+
+ B.setMBB(MBB);
+ auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
+ B.setInstr(*I);
+
+ unsigned NumPieces = Unmerge->getNumOperands() - 1;
+ for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
+ Register UnmergePiece = Unmerge.getReg(PieceIdx);
+
+ Register CurrentLaneOpReg;
+ if (Is64) {
+ Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
+ Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
+
+ MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
+ MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
+ MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpRegLo)
+ .addReg(UnmergePiece, 0, AMDGPU::sub0);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpRegHi)
+ .addReg(UnmergePiece, 0, AMDGPU::sub1);
+
+ CurrentLaneOpReg =
+ B.buildMerge(LLT::scalar(64),
+ {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
+ .getReg(0);
+
+ MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
+
+ if (OpTy.getScalarSizeInBits() == 64) {
+ // If we need to produce a 64-bit element vector, so use the
+ // merged pieces
+ ReadlanePieces.push_back(CurrentLaneOpReg);
+ } else {
+ // 32-bit element type.
+ ReadlanePieces.push_back(CurrentLaneOpRegLo);
+ ReadlanePieces.push_back(CurrentLaneOpRegHi);
+ }
+ } else {
+ CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32);
+ MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
+ MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpReg)
+ .addReg(UnmergePiece);
+ ReadlanePieces.push_back(CurrentLaneOpReg);
+ }
Register NewCondReg = MRI.createVirtualRegister(WaveRC);
bool First = CondReg == AMDGPU::NoRegister;
if (First)
CondReg = NewCondReg;
- // Compare the just read M0 value to all possible Idx values.
- B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
+ B.buildInstr(CmpOp)
.addDef(NewCondReg)
.addReg(CurrentLaneOpReg)
- .addReg(Op.getReg());
- Op.setReg(CurrentLaneOpReg);
+ .addReg(UnmergePiece);
if (!First) {
Register AndReg = MRI.createVirtualRegister(WaveRC);
@@ -885,113 +980,19 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
.addReg(CondReg);
CondReg = AndReg;
}
- } else {
- LLT S32 = LLT::scalar(32);
- SmallVector<Register, 8> ReadlanePieces;
-
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
-
- bool Is64 = OpSize % 64 == 0;
-
- LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
- unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
- : AMDGPU::V_CMP_EQ_U32_e64;
-
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
-
- // Insert the unmerge before the loop.
-
- B.setMBB(MBB);
- auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
- B.setInstr(*I);
-
- unsigned NumPieces = Unmerge->getNumOperands() - 1;
- for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
- Register UnmergePiece = Unmerge.getReg(PieceIdx);
-
- Register CurrentLaneOpReg;
- if (Is64) {
- Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
- Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
-
- MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
- MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
- MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
-
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpRegLo)
- .addReg(UnmergePiece, 0, AMDGPU::sub0);
-
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpRegHi)
- .addReg(UnmergePiece, 0, AMDGPU::sub1);
-
- CurrentLaneOpReg =
- B.buildMerge(LLT::scalar(64),
- {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
- .getReg(0);
-
- MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
-
- if (OpTy.getScalarSizeInBits() == 64) {
- // If we need to produce a 64-bit element vector, so use the
- // merged pieces
- ReadlanePieces.push_back(CurrentLaneOpReg);
- } else {
- // 32-bit element type.
- ReadlanePieces.push_back(CurrentLaneOpRegLo);
- ReadlanePieces.push_back(CurrentLaneOpRegHi);
- }
- } else {
- CurrentLaneOpReg = MRI.createGenericVirtualRegister(S32);
- MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
- MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
-
- // Read the next variant <- also loop target.
- BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- CurrentLaneOpReg)
- .addReg(UnmergePiece);
- ReadlanePieces.push_back(CurrentLaneOpReg);
- }
-
- Register NewCondReg = MRI.createVirtualRegister(WaveRC);
- bool First = CondReg == AMDGPU::NoRegister;
- if (First)
- CondReg = NewCondReg;
-
- B.buildInstr(CmpOp)
- .addDef(NewCondReg)
- .addReg(CurrentLaneOpReg)
- .addReg(UnmergePiece);
-
- if (!First) {
- Register AndReg = MRI.createVirtualRegister(WaveRC);
-
- // If there are multiple operands to consider, and the conditions.
- B.buildInstr(WaveAndOpc)
- .addDef(AndReg)
- .addReg(NewCondReg)
- .addReg(CondReg);
- CondReg = AndReg;
- }
- }
-
- // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
- // BUILD_VECTOR
- if (OpTy.isVector()) {
- auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
- Op.setReg(Merge.getReg(0));
- } else {
- auto Merge = B.buildMerge(OpTy, ReadlanePieces);
- Op.setReg(Merge.getReg(0));
- }
+ }
- MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
+ // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
+ // BUILD_VECTOR
+ if (OpTy.isVector()) {
+ auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
+ Op.setReg(Merge.getReg(0));
+ } else {
+ auto Merge = B.buildMerge(OpTy, ReadlanePieces);
+ Op.setReg(Merge.getReg(0));
}
+
+ MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
}
}
}
More information about the llvm-commits
mailing list