[llvm] e8f6b0e - AMDGPU/GlobalISel: Fix splitting 64-bit extensions
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed May 20 08:13:41 PDT 2020
Author: Matt Arsenault
Date: 2020-05-20T11:13:32-04:00
New Revision: e8f6b0e583cbf97a08c26b2219faaa5e433a67ab
URL: https://github.com/llvm/llvm-project/commit/e8f6b0e583cbf97a08c26b2219faaa5e433a67ab
DIFF: https://github.com/llvm/llvm-project/commit/e8f6b0e583cbf97a08c26b2219faaa5e433a67ab.diff
LOG: AMDGPU/GlobalISel: Fix splitting 64-bit extensions
This was replicating the low bits into the high bits for G_ZEXT,
rather than using 0.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 18ec745cb9a0..1cfc7ccc6cb7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1778,6 +1778,34 @@ static void reinsertVectorIndexAdd(MachineIRBuilder &B,
IdxUseInstr.getOperand(OpIdx).setReg(Add.getReg(0));
}
+/// Implement extending a 32-bit value to a 64-bit value. \p Lo32Reg is the
+/// original 32-bit source value (to be inserted in the low part of the combined
+/// 64-bit result), and \p Hi32Reg is the high half of the combined 64-bit
+/// value.
+static void extendLow32IntoHigh32(MachineIRBuilder &B,
+ Register Hi32Reg, Register Lo32Reg,
+ unsigned ExtOpc,
+ const RegisterBank &RegBank,
+ bool IsBooleanSrc = false) {
+ if (ExtOpc == AMDGPU::G_ZEXT) {
+ B.buildConstant(Hi32Reg, 0);
+ } else if (ExtOpc == AMDGPU::G_SEXT) {
+ if (IsBooleanSrc) {
+ // If we know the original source was an s1, the high half is the same as
+ // the low.
+ B.buildCopy(Hi32Reg, Lo32Reg);
+ } else {
+ // Replicate sign bit from 32-bit extended part.
+ auto ShiftAmt = B.buildConstant(LLT::scalar(32), 31);
+ B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
+ B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
+ }
+ } else {
+ assert(ExtOpc == AMDGPU::G_ANYEXT && "not an integer extension");
+ B.buildUndef(Hi32Reg);
+ }
+}
+
void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -2237,26 +2265,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// breakdowns supported.
DstTy.getSizeInBits() == 64 &&
SrcTy.getSizeInBits() <= 32) {
- const LLT S32 = LLT::scalar(32);
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
// Extend to 32-bit, and then extend the low half.
if (Signed) {
// TODO: Should really be buildSExtOrCopy
B.buildSExtOrTrunc(DefRegs[0], SrcReg);
-
- // Replicate sign bit from 32-bit extended part.
- auto ShiftAmt = B.buildConstant(S32, 31);
- MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
- B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
} else if (Opc == AMDGPU::G_ZEXT) {
B.buildZExtOrTrunc(DefRegs[0], SrcReg);
- B.buildConstant(DefRegs[1], 0);
} else {
B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
- B.buildUndef(DefRegs[1]);
}
+ extendLow32IntoHigh32(B, DefRegs[1], DefRegs[0], Opc, *SrcBank);
MRI.setRegBank(DstReg, *SrcBank);
MI.eraseFromParent();
return;
@@ -2266,7 +2287,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
// It is not legal to have a legalization artifact with a VCC source. Rather
- // than introducing a copy, insert the selcet we would have to select the
+ // than introducing a copy, insert the select we would have to select the
// copy to.
if (SrcBank == &AMDGPU::VCCRegBank) {
SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
@@ -2289,7 +2310,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
if (DstSize > 32) {
B.buildSelect(DefRegs[0], SrcReg, True, False);
- B.buildCopy(DefRegs[1], DefRegs[0]);
+ extendLow32IntoHigh32(B, DefRegs[1], DefRegs[0], Opc, *SrcBank, true);
} else if (DstSize < 32) {
auto Sel = B.buildSelect(SelType, SrcReg, True, False);
MRI.setRegBank(Sel.getReg(0), *DstBank);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
index 5c2bc3e93112..643726c2dbad 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir
@@ -144,8 +144,8 @@ body: |
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
- ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[DEF]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
index ef83a4c6c529..c0240a7984c0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
@@ -160,8 +160,8 @@ body: |
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
- ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
+ ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
More information about the llvm-commits
mailing list