[llvm] dc3e499 - AMDGPU/GlobalISel: Fix G_EXTRACT of 96-bit results

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 14 15:57:56 PST 2020


Author: Matt Arsenault
Date: 2020-02-14T15:57:40-08:00
New Revision: dc3e499dd4a6bd0882ca0af453cea99620320ffe

URL: https://github.com/llvm/llvm-project/commit/dc3e499dd4a6bd0882ca0af453cea99620320ffe
DIFF: https://github.com/llvm/llvm-project/commit/dc3e499dd4a6bd0882ca0af453cea99620320ffe.diff

LOG: AMDGPU/GlobalISel: Fix G_EXTRACT of 96-bit results

This would assert on an unhandled size in getRegSplitParts.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 935db23da635..2c16f42540f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -468,6 +468,7 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
   return true;
 }
 
+// TODO: We should probably legalize these to only using 32-bit results.
 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   Register DstReg = I.getOperand(0).getReg();
@@ -479,7 +480,12 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
 
   // TODO: Should handle any multiple of 32 offset.
   unsigned Offset = I.getOperand(2).getImm();
-  if (Offset % DstSize != 0)
+  if (Offset % 32 != 0 || DstSize > 128)
+    return false;
+
+  const TargetRegisterClass *DstRC =
+    TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);
+  if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
     return false;
 
   const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
@@ -487,20 +493,18 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
     TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, *MRI);
   if (!SrcRC)
     return false;
+  unsigned SubReg = SIRegisterInfo::getSubRegFromChannel(Offset / 32,
+                                                         DstSize / 32);
+  SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
+  if (!SrcRC)
+    return false;
 
-  ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
-
+  SrcReg = constrainOperandRegClass(*MF, TRI, *MRI, TII, RBI, I,
+                                    *SrcRC, I.getOperand(1));
   const DebugLoc &DL = I.getDebugLoc();
-  MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
-                               .addReg(SrcReg, 0, SubRegs[Offset / DstSize]);
+  BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
+    .addReg(SrcReg, 0, SubReg);
 
-  for (const MachineOperand &MO : Copy->operands()) {
-    const TargetRegisterClass *RC =
-            TRI.getConstrainedRegClassForOperand(MO, *MRI);
-    if (!RC)
-      continue;
-    RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
-  }
   I.eraseFromParent();
   return true;
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
index d0cca04784b0..d0159822432c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
@@ -177,3 +177,45 @@ body: |
     S_ENDPGM 0, implicit %1, implicit %2
 
 ...
+
+---
+name:            extract_sgpr_s96_from_s128
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-LABEL: name: extract_sgpr_s96_from_s128
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]]
+    ; CHECK: [[COPY2:%[0-9]+]]:sreg_96 = COPY [[COPY1]].sub0_sub1_sub2
+    ; CHECK: [[COPY3:%[0-9]+]]:sreg_96 = COPY [[COPY]].sub1_sub2_sub3
+    ; CHECK: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]]
+    %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    %1:sgpr(s96) = G_EXTRACT %0, 0
+    %2:sgpr(s96) = G_EXTRACT %0, 32
+    S_ENDPGM 0, implicit %1, implicit %2
+
+...
+
+---
+name:            extract_sgpr_v3s32_from_v4s32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK-LABEL: name: extract_sgpr_v3s32_from_v4s32
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]]
+    ; CHECK: [[COPY2:%[0-9]+]]:sreg_96 = COPY [[COPY1]].sub0_sub1_sub2
+    ; CHECK: [[COPY3:%[0-9]+]]:sreg_96 = COPY [[COPY]].sub1_sub2_sub3
+    ; CHECK: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]]
+    %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    %1:sgpr(<3 x s32>) = G_EXTRACT %0, 0
+    %2:sgpr(<3 x s32>) = G_EXTRACT %0, 32
+    S_ENDPGM 0, implicit %1, implicit %2
+
+...


        


More information about the llvm-commits mailing list