[llvm] r373938 - AMDGPU/GlobalISel: Select more G_INSERT cases
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 7 11:43:31 PDT 2019
Author: arsenm
Date: Mon Oct 7 11:43:31 2019
New Revision: 373938
URL: http://llvm.org/viewvc/llvm-project?rev=373938&view=rev
Log:
AMDGPU/GlobalISel: Select more G_INSERT cases
At minimum handle the s64 insert type, which are emitted in real cases
during legalization.
We really need TableGen to emit something to emit something like the
inverse of composeSubRegIndices do determine the subreg index to use.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=373938&r1=373937&r2=373938&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Mon Oct 7 11:43:31 2019
@@ -555,39 +555,97 @@ bool AMDGPUInstructionSelector::selectG_
return false;
}
+// FIXME: TableGen should generate something to make this manageable for all
+// register classes. At a minimum we could use the opposite of
+// composeSubRegIndices and go up from the base 32-bit subreg.
+static unsigned getSubRegForSizeAndOffset(const SIRegisterInfo &TRI,
+ unsigned Size, unsigned Offset) {
+ switch (Size) {
+ case 32:
+ return TRI.getSubRegFromChannel(Offset / 32);
+ case 64: {
+ switch (Offset) {
+ case 0:
+ return AMDGPU::sub0_sub1;
+ case 32:
+ return AMDGPU::sub1_sub2;
+ case 64:
+ return AMDGPU::sub2_sub3;
+ case 96:
+ return AMDGPU::sub4_sub5;
+ case 128:
+ return AMDGPU::sub5_sub6;
+ case 160:
+ return AMDGPU::sub7_sub8;
+ // FIXME: Missing cases up to 1024 bits
+ default:
+ return AMDGPU::NoSubRegister;
+ }
+ }
+ case 96: {
+ switch (Offset) {
+ case 0:
+ return AMDGPU::sub0_sub1_sub2;
+ case 32:
+ return AMDGPU::sub1_sub2_sub3;
+ case 64:
+ return AMDGPU::sub2_sub3_sub4;
+ }
+ }
+ default:
+ return AMDGPU::NoSubRegister;
+ }
+}
+
bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
+
+ Register DstReg = I.getOperand(0).getReg();
Register Src0Reg = I.getOperand(1).getReg();
Register Src1Reg = I.getOperand(2).getReg();
LLT Src1Ty = MRI->getType(Src1Reg);
- if (Src1Ty.getSizeInBits() != 32)
- return false;
+
+ unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
+ unsigned InsSize = Src1Ty.getSizeInBits();
int64_t Offset = I.getOperand(3).getImm();
if (Offset % 32 != 0)
return false;
- unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32);
+ unsigned SubReg = getSubRegForSizeAndOffset(TRI, InsSize, Offset);
+ if (SubReg == AMDGPU::NoSubRegister)
+ return false;
+
+ const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
+ const TargetRegisterClass *DstRC =
+ TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI);
+ if (!DstRC)
+ return false;
+
+ const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
+ const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
+ const TargetRegisterClass *Src0RC =
+ TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank, *MRI);
+ const TargetRegisterClass *Src1RC =
+ TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank, *MRI);
+
+ // Deal with weird cases where the class only partially supports the subreg
+ // index.
+ Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
+ if (!Src0RC)
+ return false;
+
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
+ !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
+ !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
+ return false;
+
const DebugLoc &DL = I.getDebugLoc();
+ BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
+ .addReg(Src0Reg)
+ .addReg(Src1Reg)
+ .addImm(SubReg);
- MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
- .addDef(I.getOperand(0).getReg())
- .addReg(Src0Reg)
- .addReg(Src1Reg)
- .addImm(SubReg);
-
- for (const MachineOperand &MO : Ins->operands()) {
- if (!MO.isReg())
- continue;
- if (Register::isPhysicalRegister(MO.getReg()))
- continue;
-
- const TargetRegisterClass *RC =
- TRI.getConstrainedRegClassForOperand(MO, *MRI);
- if (!RC)
- continue;
- RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
- }
I.eraseFromParent();
return true;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir?rev=373938&r1=373937&r2=373938&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir Mon Oct 7 11:43:31 2019
@@ -1,32 +1,35 @@
-# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
---
-name: insert512
+name: insert_s512_s32
legalized: true
regBankSelected: true
-# CHECK-LABEL: insert512
-# CHECK: [[BASE:%[0-9]+]]:sreg_512 = IMPLICIT_DEF
-# CHECK: [[VAL:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
-# CHECK: [[BASE0:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE]], [[VAL]], %subreg.sub0
-# CHECK: [[BASE1:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE0]], [[VAL]], %subreg.sub1
-# CHECK: [[BASE2:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE1]], [[VAL]], %subreg.sub2
-# CHECK: [[BASE3:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE2]], [[VAL]], %subreg.sub3
-# CHECK: [[BASE4:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE3]], [[VAL]], %subreg.sub4
-# CHECK: [[BASE5:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE4]], [[VAL]], %subreg.sub5
-# CHECK: [[BASE6:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE5]], [[VAL]], %subreg.sub6
-# CHECK: [[BASE7:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE6]], [[VAL]], %subreg.sub7
-# CHECK: [[BASE8:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE7]], [[VAL]], %subreg.sub8
-# CHECK: [[BASE9:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE8]], [[VAL]], %subreg.sub9
-# CHECK: [[BASE10:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE9]], [[VAL]], %subreg.sub10
-# CHECK: [[BASE11:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE10]], [[VAL]], %subreg.sub11
-# CHECK: [[BASE12:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE11]], [[VAL]], %subreg.sub12
-# CHECK: [[BASE13:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE12]], [[VAL]], %subreg.sub13
-# CHECK: [[BASE14:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE13]], [[VAL]], %subreg.sub14
-# CHECK: [[BASE15:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[BASE14]], [[VAL]], %subreg.sub15
-
body: |
bb.0:
+ ; CHECK-LABEL: name: insert_s512_s32
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_512 = IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[DEF]], [[DEF1]], %subreg.sub0
+ ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG]], [[DEF1]], %subreg.sub1
+ ; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG1]], [[DEF1]], %subreg.sub2
+ ; CHECK: [[INSERT_SUBREG3:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG2]], [[DEF1]], %subreg.sub3
+ ; CHECK: [[INSERT_SUBREG4:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG3]], [[DEF1]], %subreg.sub4
+ ; CHECK: [[INSERT_SUBREG5:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG4]], [[DEF1]], %subreg.sub5
+ ; CHECK: [[INSERT_SUBREG6:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG5]], [[DEF1]], %subreg.sub6
+ ; CHECK: [[INSERT_SUBREG7:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG6]], [[DEF1]], %subreg.sub7
+ ; CHECK: [[INSERT_SUBREG8:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG7]], [[DEF1]], %subreg.sub8
+ ; CHECK: [[INSERT_SUBREG9:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG8]], [[DEF1]], %subreg.sub9
+ ; CHECK: [[INSERT_SUBREG10:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG9]], [[DEF1]], %subreg.sub10
+ ; CHECK: [[INSERT_SUBREG11:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG10]], [[DEF1]], %subreg.sub11
+ ; CHECK: [[INSERT_SUBREG12:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG11]], [[DEF1]], %subreg.sub12
+ ; CHECK: [[INSERT_SUBREG13:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG12]], [[DEF1]], %subreg.sub13
+ ; CHECK: [[INSERT_SUBREG14:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG13]], [[DEF1]], %subreg.sub14
+ ; CHECK: [[INSERT_SUBREG15:%[0-9]+]]:sreg_512 = INSERT_SUBREG [[INSERT_SUBREG14]], [[DEF1]], %subreg.sub15
+ ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[INSERT_SUBREG15]]
+ ; CHECK: SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%0:sgpr(s512) = G_IMPLICIT_DEF
%1:sgpr(s32) = G_IMPLICIT_DEF
%2:sgpr(s512) = G_INSERT %0:sgpr, %1:sgpr(s32), 0
@@ -47,3 +50,403 @@ body: |
%17:sgpr(s512) = G_INSERT %16:sgpr, %1:sgpr(s32), 480
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %17:sgpr(s512)
SI_RETURN_TO_EPILOG $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+
+---
+
+name: insert_v_s64_v_s32_0
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s64) = G_INSERT %0, %1, 0
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_v_s64_v_s32_32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: insert_v_s64_v_s32_32
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s64) = G_INSERT %0, %1, 32
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s64_s_s32_0
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: insert_s_s64_s_s32_0
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64_xexec = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s64) = COPY $sgpr0_sgpr1
+ %1:sgpr(s32) = COPY $sgpr2
+ %2:sgpr(s64) = G_INSERT %0, %1, 0
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s64_s_s32_32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2
+ ; CHECK-LABEL: name: insert_s_s64_s_s32_32
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64_xexec = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s64) = COPY $sgpr0_sgpr1
+ %1:sgpr(s32) = COPY $sgpr2
+ %2:sgpr(s64) = G_INSERT %0, %1, 32
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s64_v_s32_32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0
+ ; CHECK-LABEL: name: insert_s_s64_v_s32_32
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s64) = COPY $sgpr0_sgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s64) = G_INSERT %0, %1, 32
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_v_s64_s_s32_32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $sgpr0
+ ; CHECK-LABEL: name: insert_v_s64_s_s32_32
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:sgpr(s32) = COPY $sgpr0
+ %2:vgpr(s64) = G_INSERT %0, %1, 32
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_v_s96_v_s64_0
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4
+ ; CHECK-LABEL: name: insert_v_s96_v_s64_0
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2
+ %1:vgpr(s64) = COPY $vgpr3_vgpr4
+ %2:vgpr(s96) = G_INSERT %0, %1, 0
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_v_s96_v_s64_32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4
+ ; CHECK-LABEL: name: insert_v_s96_v_s64_32
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:vgpr(s96) = COPY $vgpr0_vgpr1_vgpr2
+ %1:vgpr(s64) = COPY $vgpr3_vgpr4
+ %2:vgpr(s96) = G_INSERT %0, %1, 32
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s96_s_s64_0
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
+ ; CHECK-LABEL: name: insert_s_s96_s_s64_0
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2
+ %1:sgpr(s64) = COPY $sgpr4_sgpr5
+ %2:sgpr(s96) = G_INSERT %0, %1, 0
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s96_s_s64_32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2, $sgpr4_sgpr5
+ ; CHECK-LABEL: name: insert_s_s96_s_s64_32
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_96 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s96) = COPY $sgpr0_sgpr1_sgpr2
+ %1:sgpr(s64) = COPY $sgpr4_sgpr5
+ %2:sgpr(s96) = G_INSERT %0, %1, 32
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s128_s_s64_0
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5
+ ; CHECK-LABEL: name: insert_s_s128_s_s64_0
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s64) = COPY $sgpr4_sgpr5
+ %2:sgpr(s128) = G_INSERT %0, %1, 0
+ S_ENDPGM 0, implicit %2
+...
+
+# ---
+
+# name: insert_s_s128_s_s64_32
+# legalized: true
+# regBankSelected: true
+
+# body: |
+# bb.0:
+# liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5
+# %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+# %1:sgpr(s64) = COPY $sgpr4_sgpr5
+# %2:sgpr(s128) = G_INSERT %0, %1, 32
+# S_ENDPGM 0, implicit %2
+# ...
+
+---
+
+name: insert_s_s128_s_s64_64
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5
+ ; CHECK-LABEL: name: insert_s_s128_s_s64_64
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s64) = COPY $sgpr4_sgpr5
+ %2:sgpr(s128) = G_INSERT %0, %1, 64
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s256_s_s64_96
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
+ ; CHECK-LABEL: name: insert_s_s256_s_s64_96
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ %1:sgpr(s64) = COPY $sgpr8_sgpr9
+ %2:sgpr(s256) = G_INSERT %0, %1, 96
+ S_ENDPGM 0, implicit %2
+...
+
+# ---
+
+# name: insert_s_s256_s_s64_128
+# legalized: true
+# regBankSelected: true
+
+# body: |
+# bb.0:
+# liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
+# %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+# %1:sgpr(s64) = COPY $sgpr4_sgpr5
+# %2:sgpr(s256) = G_INSERT %0, %1, 128
+# S_ENDPGM 0, implicit %2
+# ...
+
+# ---
+
+# name: insert_s_s256_s_s64_160
+# legalized: true
+# regBankSelected: true
+
+# body: |
+# bb.0:
+# liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
+# %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+# %1:sgpr(s64) = COPY $sgpr4_sgpr5
+# %2:sgpr(s256) = G_INSERT %0, %1, 160
+# S_ENDPGM 0, implicit %2
+# ...
+
+---
+
+name: insert_s_s128_s_s96_0
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
+ ; CHECK-LABEL: name: insert_s_s128_s_s96_0
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+ %2:sgpr(s128) = G_INSERT %0, %1, 0
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s128_s_s96_32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr6_sgpr7_sgpr8
+ ; CHECK-LABEL: name: insert_s_s128_s_s96_32
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_128 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s128) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+ %2:sgpr(s128) = G_INSERT %0, %1, 32
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s160_s_s96_0
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
+ ; CHECK-LABEL: name: insert_s_s160_s_s96_0
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub0_sub1_sub2 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+ %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+ %2:sgpr(s160) = G_INSERT %0, %1, 0
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s160_s_s96_32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
+ ; CHECK-LABEL: name: insert_s_s160_s_s96_32
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+ %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+ %2:sgpr(s160) = G_INSERT %0, %1, 32
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s160_s_s96_64
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, $sgpr6_sgpr7_sgpr8
+ ; CHECK-LABEL: name: insert_s_s160_s_s96_64
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_160_with_sub2_sub3_sub4 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_96 = COPY $sgpr6_sgpr7_sgpr8
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_160 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s160) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
+ %1:sgpr(s96) = COPY $sgpr6_sgpr7_sgpr8
+ %2:sgpr(s160) = G_INSERT %0, %1, 64
+ S_ENDPGM 0, implicit %2
+...
More information about the llvm-commits
mailing list