[llvm] r373938 - AMDGPU/GlobalISel: Select more G_INSERT cases

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 7 11:43:31 PDT 2019


Author: arsenm
Date: Mon Oct  7 11:43:31 2019
New Revision: 373938

URL: http://llvm.org/viewvc/llvm-project?rev=373938&view=rev
Log:
AMDGPU/GlobalISel: Select more G_INSERT cases

At minimum handle the s64 insert type, which are emitted in real cases
during legalization.

We really need TableGen to emit something to emit something like the
inverse of composeSubRegIndices do determine the subreg index to use.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=373938&r1=373937&r2=373938&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Mon Oct  7 11:43:31 2019
@@ -555,39 +555,97 @@ bool AMDGPUInstructionSelector::selectG_
   return false;
 }
 
+// FIXME: TableGen should generate something to make this manageable for all
+// register classes. At a minimum we could use the opposite of
+// composeSubRegIndices and go up from the base 32-bit subreg.
+static unsigned getSubRegForSizeAndOffset(const SIRegisterInfo &TRI,
+                                          unsigned Size, unsigned Offset) {
+  switch (Size) {
+  case 32:
+    return TRI.getSubRegFromChannel(Offset / 32);
+  case 64: {
+    switch (Offset) {
+    case 0:
+      return AMDGPU::sub0_sub1;
+    case 32:
+      return AMDGPU::sub1_sub2;
+    case 64:
+      return AMDGPU::sub2_sub3;
+    case 96:
+      return AMDGPU::sub4_sub5;
+    case 128:
+      return AMDGPU::sub5_sub6;
+    case 160:
+      return AMDGPU::sub7_sub8;
+      // FIXME: Missing cases up to 1024 bits
+    default:
+      return AMDGPU::NoSubRegister;
+    }
+  }
+  case 96: {
+    switch (Offset) {
+    case 0:
+      return AMDGPU::sub0_sub1_sub2;
+    case 32:
+      return AMDGPU::sub1_sub2_sub3;
+    case 64:
+      return AMDGPU::sub2_sub3_sub4;
+    }
+  }
+  default:
+    return AMDGPU::NoSubRegister;
+  }
+}
+
 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
+
+  Register DstReg = I.getOperand(0).getReg();
   Register Src0Reg = I.getOperand(1).getReg();
   Register Src1Reg = I.getOperand(2).getReg();
   LLT Src1Ty = MRI->getType(Src1Reg);
-  if (Src1Ty.getSizeInBits() != 32)
-    return false;
+
+  unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
+  unsigned InsSize = Src1Ty.getSizeInBits();
 
   int64_t Offset = I.getOperand(3).getImm();
   if (Offset % 32 != 0)
     return false;
 
-  unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32);
+  unsigned SubReg = getSubRegForSizeAndOffset(TRI, InsSize, Offset);
+  if (SubReg == AMDGPU::NoSubRegister)
+    return false;
+
+  const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
+  const TargetRegisterClass *DstRC =
+    TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
+  if (!DstRC)
+    return false;
+
+  const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
+  const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
+  const TargetRegisterClass *Src0RC =
+    TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI);
+  const TargetRegisterClass *Src1RC =
+    TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI);
+
+  // Deal with weird cases where the class only partially supports the subreg
+  // index.
+  Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
+  if (!Src0RC)
+    return false;
+
+  if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
+      !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
+      !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
+    return false;
+
   const DebugLoc &DL = I.getDebugLoc();
+  BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
+    .addReg(Src0Reg)
+    .addReg(Src1Reg)
+    .addImm(SubReg);
 
-  MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
-                               .addDef(I.getOperand(0).getReg())
-                               .addReg(Src0Reg)
-                               .addReg(Src1Reg)
-                               .addImm(SubReg);
-
-  for (const MachineOperand &MO : Ins->operands()) {
-    if (!MO.isReg())
-      continue;
-    if (Register::isPhysicalRegister(MO.getReg()))
-      continue;
-
-    const TargetRegisterClass *RC =
-            TRI.getConstrainedRegClassForOperand(MO, *MRI);
-    if (!RC)
-      continue;
-    RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
-  }
   I.eraseFromParent();
   return true;
 }

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir?rev=373938&r1=373937&r2=373938&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir Mon Oct  7 11:43:31 2019
@@ -1,32 +1,35 @@
-# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
 ---
 
-name:            insert512
+name:            insert_s512_s32
 legalized:       true
 regBankSelected: true
 
-# CHECK-LABEL: insert512
-# CHECK: [[BASE:%[0-9]+]]:sreg_512 = IMPLICIT_DEF
-# CHECK: [[VAL:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
-# CHECK: [[BASE0:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE]], [[VAL]], %subreg.sub0
-# CHECK: [[BASE1:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE0]], [[VAL]], %subreg.sub1
-# CHECK: [[BASE2:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE1]], [[VAL]], %subreg.sub2
-# CHECK: [[BASE3:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE2]], [[VAL]], %subreg.sub3
-# CHECK: [[BASE4:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE3]], [[VAL]], %subreg.sub4
-# CHECK: [[BASE5:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE4]], [[VAL]], %subreg.sub5
-# CHECK: [[BASE6:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE5]], [[VAL]], %subreg.sub6
-# CHECK: [[BASE7:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE6]], [[VAL]], %subreg.sub7
-# CHECK: [[BASE8:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE7]], [[VAL]], %subreg.sub8
-# CHECK: [[BASE9:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE8]], [[VAL]], %subreg.sub9
-# CHECK: [[BASE10:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE9]], [[VAL]], %subreg.sub10
-# CHECK: [[BASE11:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE10]], [[VAL]], %subreg.sub11
-# CHECK: [[BASE12:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE11]], [[VAL]], %subreg.sub12
-# CHECK: [[BASE13:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE12]], [[VAL]], %subreg.sub13
-# CHECK: [[BASE14:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE13]], [[VAL]], %subreg.sub14
-# CHECK: [[BASE15:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE14]], [[VAL]], %subreg.sub15
-
 body: |
   bb.0:
+    ; CHECK-LABEL: name: insert_s512_s32
+    ; CHECK: [[DEF:%[0-9]+]]:sreg_512 = IMPLICIT_DEF
+    ; CHECK: [[DEF1:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[DEF]], [[DEF1]], %subreg.sub0
+    ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG]], [[DEF1]], %subreg.sub1
+    ; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG1]], [[DEF1]], %subreg.sub2
+    ; CHECK: [[INSERT_SUBREG3:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG2]], [[DEF1]], %subreg.sub3
+    ; CHECK: [[INSERT_SUBREG4:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG3]], [[DEF1]], %subreg.sub4
+    ; CHECK: [[INSERT_SUBREG5:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG4]], [[DEF1]], %subreg.sub5
+    ; CHECK: [[INSERT_SUBREG6:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG5]], [[DEF1]], %subreg.sub6
+    ; CHECK: [[INSERT_SUBREG7:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG6]], [[DEF1]], %subreg.sub7
+    ; CHECK: [[INSERT_SUBREG8:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG7]], [[DEF1]], %subreg.sub8
+    ; CHECK: [[INSERT_SUBREG9:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG8]], [[DEF1]], %subreg.sub9
+    ; CHECK: [[INSERT_SUBREG10:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG9]], [[DEF1]], %subreg.sub10
+    ; CHECK: [[INSERT_SUBREG11:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG10]], [[DEF1]], %subreg.sub11
+    ; CHECK: [[INSERT_SUBREG12:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG11]], [[DEF1]], %subreg.sub12
+    ; CHECK: [[INSERT_SUBREG13:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG12]], [[DEF1]], %subreg.sub13
+    ; CHECK: [[INSERT_SUBREG14:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG13]], [[DEF1]], %subreg.sub14
+    ; CHECK: [[INSERT_SUBREG15:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG14]], [[DEF1]], %subreg.sub15
+    ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[INSERT_SUBREG15]]
+    ; CHECK: SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     %0:sgpr(s512) = G_IMPLICIT_DEF
     %1:sgpr(s32) = G_IMPLICIT_DEF
     %2:sgpr(s512) = G_INSERT %0:sgpr, %1:sgpr(s32), 0
@@ -47,3 +50,403 @@ body: |
     %17:sgpr(s512) = G_INSERT %16:sgpr, %1:sgpr(s32), 480
     $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %17:sgpr(s512)
     SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+
+---
+
+name:            insert_v_s64_v_s32_0
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s64) = G_INSERT %0, %1, 0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s64_v_s32_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+    ; CHECK-LABEL: name: insert_v_s64_v_s32_32
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s64) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s64_s_s32_0
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1, $sgpr2
+    ; CHECK-LABEL: name: insert_s_s64_s_s32_0
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64_xexec = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:sgpr(s32) = COPY $sgpr2
+    %2:sgpr(s64) = G_INSERT %0, %1, 0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s64_s_s32_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1, $sgpr2
+    ; CHECK-LABEL: name: insert_s_s64_s_s32_32
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64_xexec = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:sgpr(s32) = COPY $sgpr2
+    %2:sgpr(s64) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s64_v_s32_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1, $vgpr0
+    ; CHECK-LABEL: name: insert_s_s64_v_s32_32
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s64) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s64_s_s32_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $sgpr0
+    ; CHECK-LABEL: name: insert_v_s64_s_s32_32
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s64) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s96_v_s64_0
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4
+    ; CHECK-LABEL: name: insert_v_s96_v_s64_0
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2
+    %1:vgpr(s64) = COPY $vgpr3_vgpr4
+    %2:vgpr(s96) = G_INSERT %0, %1, 0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s96_v_s64_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4
+    ; CHECK-LABEL: name: insert_v_s96_v_s64_32
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2
+    %1:vgpr(s64) = COPY $vgpr3_vgpr4
+    %2:vgpr(s96) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s96_s_s64_0
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
+    ; CHECK-LABEL: name: insert_s_s96_s_s64_0
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2
+    %1:sgpr(s64) = COPY $sgpr4_sgpr5
+    %2:sgpr(s96) = G_INSERT %0, %1, 0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s96_s_s64_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
+    ; CHECK-LABEL: name: insert_s_s96_s_s64_32
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2
+    %1:sgpr(s64) = COPY $sgpr4_sgpr5
+    %2:sgpr(s96) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s128_s_s64_0
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5
+    ; CHECK-LABEL: name: insert_s_s128_s_s64_0
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    %1:sgpr(s64) = COPY $sgpr4_sgpr5
+    %2:sgpr(s128) = G_INSERT %0, %1, 0
+    S_ENDPGM 0, implicit %2
+...
+
+# ---
+
+# name:            insert_s_s128_s_s64_32
+# legalized:       true
+# regBankSelected: true
+
+# body: |
+#   bb.0:
+#     liveins:  $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5
+#     %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+#     %1:sgpr(s64) = COPY $sgpr4_sgpr5
+#     %2:sgpr(s128) = G_INSERT %0, %1, 32
+#     S_ENDPGM 0, implicit %2
+# ...
+
+---
+
+name:            insert_s_s128_s_s64_64
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5
+    ; CHECK-LABEL: name: insert_s_s128_s_s64_64
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    %1:sgpr(s64) = COPY $sgpr4_sgpr5
+    %2:sgpr(s128) = G_INSERT %0, %1, 64
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s256_s_s64_96
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
+    ; CHECK-LABEL: name: insert_s_s256_s_s64_96
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    %1:sgpr(s64) = COPY $sgpr8_sgpr9
+    %2:sgpr(s256) = G_INSERT %0, %1, 96
+    S_ENDPGM 0, implicit %2
+...
+
+# ---
+
+# name:            insert_s_s256_s_s64_128
+# legalized:       true
+# regBankSelected: true
+
+# body: |
+#   bb.0:
+#     liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
+#     %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+#     %1:sgpr(s64) = COPY $sgpr4_sgpr5
+#     %2:sgpr(s256) = G_INSERT %0, %1, 128
+#     S_ENDPGM 0, implicit %2
+# ...
+
+# ---
+
+# name:            insert_s_s256_s_s64_160
+# legalized:       true
+# regBankSelected: true
+
+# body: |
+#   bb.0:
+#     liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
+#     %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+#     %1:sgpr(s64) = COPY $sgpr4_sgpr5
+#     %2:sgpr(s256) = G_INSERT %0, %1, 160
+#     S_ENDPGM 0, implicit %2
+# ...
+
+---
+
+name:            insert_s_s128_s_s96_0
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
+    ; CHECK-LABEL: name: insert_s_s128_s_s96_0
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+    %2:sgpr(s128) = G_INSERT %0, %1, 0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s128_s_s96_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
+    ; CHECK-LABEL: name: insert_s_s128_s_s96_32
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+    %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+    %2:sgpr(s128) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s160_s_s96_0
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
+    ; CHECK-LABEL: name: insert_s_s160_s_s96_0
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+    %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+    %2:sgpr(s160) = G_INSERT %0, %1, 0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s160_s_s96_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
+    ; CHECK-LABEL: name: insert_s_s160_s_s96_32
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+    %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+    %2:sgpr(s160) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s160_s_s96_64
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
+    ; CHECK-LABEL: name: insert_s_s160_s_s96_64
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+    %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+    %2:sgpr(s160) = G_INSERT %0, %1, 64
+    S_ENDPGM 0, implicit %2
+...




More information about the llvm-commits mailing list