[llvm] dc3e499 - AMDGPU/GlobalISel: Fix G_EXTRACT of 96-bit results
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 14 15:57:56 PST 2020
Author: Matt Arsenault
Date: 2020-02-14T15:57:40-08:00
New Revision: dc3e499dd4a6bd0882ca0af453cea99620320ffe
URL: https://github.com/llvm/llvm-project/commit/dc3e499dd4a6bd0882ca0af453cea99620320ffe
DIFF: https://github.com/llvm/llvm-project/commit/dc3e499dd4a6bd0882ca0af453cea99620320ffe.diff
LOG: AMDGPU/GlobalISel: Fix G_EXTRACT of 96-bit results
This would assert on an unhandled size in getRegSplitParts.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 935db23da635..2c16f42540f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -468,6 +468,7 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
return true;
}
+// TODO: We should probably legalize these to only using 32-bit results.
bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
Register DstReg = I.getOperand(0).getReg();
@@ -479,7 +480,12 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
// TODO: Should handle any multiple of 32 offset.
unsigned Offset = I.getOperand(2).getImm();
- if (Offset % DstSize != 0)
+ if (Offset % 32 != 0 || DstSize > 128)
+ return false;
+
+ const TargetRegisterClass *DstRC =
+ TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);
+ if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
return false;
const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
@@ -487,20 +493,18 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI);
if (!SrcRC)
return false;
+ unsigned SubReg = SIRegisterInfo::getSubRegFromChannel(Offset / 32,
+ DstSize / 32);
+ SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
+ if (!SrcRC)
+ return false;
- ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
-
+ SrcReg = constrainOperandRegClass(*MF, TRI, *MRI, TII, RBI, I,
+ *SrcRC, I.getOperand(1));
const DebugLoc &DL = I.getDebugLoc();
- MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
- .addReg(SrcReg, 0, SubRegs[Offset / DstSize]);
+ BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
+ .addReg(SrcReg, 0, SubReg);
- for (const MachineOperand &MO : Copy->operands()) {
- const TargetRegisterClass *RC =
- TRI.getConstrainedRegClassForOperand(MO, *MRI);
- if (!RC)
- continue;
- RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
- }
I.eraseFromParent();
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
index d0cca04784b0..d0159822432c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
@@ -177,3 +177,45 @@ body: |
S_ENDPGM 0, implicit %1, implicit %2
...
+
+---
+name: extract_sgpr_s96_from_s128
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-LABEL: name: extract_sgpr_s96_from_s128
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]]
+ ; CHECK: [[COPY2:%[0-9]+]]:sreg_96 = COPY [[COPY1]].sub0_sub1_sub2
+ ; CHECK: [[COPY3:%[0-9]+]]:sreg_96 = COPY [[COPY]].sub1_sub2_sub3
+ ; CHECK: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]]
+ %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s96) = G_EXTRACT %0, 0
+ %2:sgpr(s96) = G_EXTRACT %0, 32
+ S_ENDPGM 0, implicit %1, implicit %2
+
+...
+
+---
+name: extract_sgpr_v3s32_from_v4s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-LABEL: name: extract_sgpr_v3s32_from_v4s32
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]]
+ ; CHECK: [[COPY2:%[0-9]+]]:sreg_96 = COPY [[COPY1]].sub0_sub1_sub2
+ ; CHECK: [[COPY3:%[0-9]+]]:sreg_96 = COPY [[COPY]].sub1_sub2_sub3
+ ; CHECK: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]]
+ %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(<3 x s32>) = G_EXTRACT %0, 0
+ %2:sgpr(<3 x s32>) = G_EXTRACT %0, 32
+ S_ENDPGM 0, implicit %1, implicit %2
+
+...
More information about the llvm-commits
mailing list