[llvm] 0693e82 - AMDGPU/GlobalISel: Do a better job splitting 64-bit G_SEXT_INREG
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 4 13:24:07 PST 2020
Author: Matt Arsenault
Date: 2020-02-04T13:23:53-08:00
New Revision: 0693e827ed3ce081771366e29f5ece025e42a3d2
URL: https://github.com/llvm/llvm-project/commit/0693e827ed3ce081771366e29f5ece025e42a3d2
DIFF: https://github.com/llvm/llvm-project/commit/0693e827ed3ce081771366e29f5ece025e42a3d2.diff
LOG: AMDGPU/GlobalISel: Do a better job splitting 64-bit G_SEXT_INREG
We don't need to expand to full shifts for the > 32-bit case. This
just switches to a sext_inreg of the high half.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 5dabd8b83a82..4943f99afa73 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1799,53 +1799,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_SEXT_INREG: {
- const RegisterBank *SrcBank =
- OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
-
- // We can directly handle all 64-bit cases with s_bfe_i64.
- if (SrcBank == &AMDGPU::SGPRRegBank)
- break;
+ SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
+ if (SrcRegs.empty())
+ break; // Nothing to repair
const LLT S32 = LLT::scalar(32);
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(DstReg);
- if (Ty == S32)
- break;
-
MachineIRBuilder B(MI);
ApplyRegBankMapping O(*this, MRI, &AMDGPU::VGPRRegBank);
GISelObserverWrapper Observer(&O);
B.setChangeObserver(Observer);
+ // Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs
+ // we would need to further expand, and doesn't let us directly set the
+ // result registers.
+ SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
+
int Amt = MI.getOperand(2).getImm();
if (Amt <= 32) {
- // Don't use LegalizerHelper's narrowScalar. It produces unwanted G_SEXTs
- // we would need to further expand, and doesn't let us directly set the
- // result registers.
- SmallVector<Register, 2> DstRegs(OpdMapper.getVRegs(0));
- SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
-
- if (SrcRegs.empty())
- split64BitValueForMapping(B, SrcRegs, S32, SrcReg);
- // Extend in the low bits and propagate the sign bit to the high half.
- auto ShiftAmt = B.buildConstant(S32, 31);
if (Amt == 32) {
+ // The low bits are unchanged.
B.buildCopy(DstRegs[0], SrcRegs[0]);
- B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt);
} else {
+ // Extend in the low bits and propagate the sign bit to the high half.
B.buildSExtInReg(DstRegs[0], SrcRegs[0], Amt);
- B.buildAShr(DstRegs[1], DstRegs[0], ShiftAmt);
}
+
+ B.buildAShr(DstRegs[1], DstRegs[0], B.buildConstant(S32, 31));
} else {
- assert(empty(OpdMapper.getVRegs(0)) && empty(OpdMapper.getVRegs(1)));
- const LLT S64 = LLT::scalar(64);
- // This straddles two registers. Expand with 64-bit shifts.
- auto ShiftAmt = B.buildConstant(S32, 64 - Amt);
- auto Shl = B.buildShl(S64, SrcReg, ShiftAmt);
- B.buildAShr(DstReg, Shl, ShiftAmt);
+ // The low bits are unchanged, and extend in the high bits.
+ B.buildCopy(DstRegs[0], SrcRegs[0]);
+ B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
}
+ Register DstReg = MI.getOperand(0).getReg();
MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
MI.eraseFromParent();
return;
@@ -2965,7 +2951,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_ZEXT:
case AMDGPU::G_SEXT:
- case AMDGPU::G_ANYEXT: {
+ case AMDGPU::G_ANYEXT:
+ case AMDGPU::G_SEXT_INREG: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
@@ -2996,24 +2983,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
- case AMDGPU::G_SEXT_INREG: {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Amt = MI.getOperand(2).getImm();
- unsigned Size = getSizeInBits(Dst, MRI, *TRI);
- unsigned BankID = getRegBank(Src, MRI, *TRI)->getID();
-
- if (Amt <= 32) {
- OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(BankID, Size);
- } else {
- // If we need to expand a 64 bit for the VALU, this will straddle two
- // registers. Just expand this with 64-bit shifts.
- OpdsMapping[0] = AMDGPU::getValueMapping(BankID, Size);
- }
-
- OpdsMapping[1] = OpdsMapping[0];
- break;
- }
case AMDGPU::G_FCMP: {
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir
index 9d812fde685a..2e72381795ab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext-inreg.mir
@@ -121,8 +121,8 @@ body: |
; CHECK-LABEL: name: sext_inreg_v_s64_1
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 1
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
@@ -143,8 +143,8 @@ body: |
; CHECK-LABEL: name: sext_inreg_v_s64_31
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[UV]], 31
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT_INREG]](s32), [[ASHR]](s32)
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
@@ -165,8 +165,8 @@ body: |
; CHECK-LABEL: name: sext_inreg_v_s64_32
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
@@ -186,12 +186,55 @@ body: |
; CHECK-LABEL: name: sext_inreg_v_s64_33
; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
- ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[COPY]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
- ; CHECK: S_ENDPGM 0, implicit [[ASHR]](s64)
+ ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 1
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
+ ; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_SEXT_INREG %0, 33
S_ENDPGM 0, implicit %1
...
+
+---
+name: sext_inreg_v_s64_35
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: sext_inreg_v_s64_35
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 3
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
+ ; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = G_SEXT_INREG %0, 35
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: sext_inreg_v_s64_63
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: sext_inreg_v_s64_63
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
+ ; CHECK: [[SEXT_INREG:%[0-9]+]]:vgpr(s32) = G_SEXT_INREG [[COPY1]], 31
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[SEXT_INREG]](s32)
+ ; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = G_SEXT_INREG %0, 63
+ S_ENDPGM 0, implicit %1
+
+...
More information about the llvm-commits
mailing list