[llvm] 3524d44 - AMDGPU/GlobalISel: Fix RegBankSelect for G_INSERT_VECTOR_ELT

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 22 07:58:02 PST 2020


Author: Matt Arsenault
Date: 2020-01-22T10:57:50-05:00
New Revision: 3524d4412cffd5ad2c05c017964719e9f96d4382

URL: https://github.com/llvm/llvm-project/commit/3524d4412cffd5ad2c05c017964719e9f96d4382
DIFF: https://github.com/llvm/llvm-project/commit/3524d4412cffd5ad2c05c017964719e9f96d4382.diff

LOG: AMDGPU/GlobalISel: Fix RegBankSelect for G_INSERT_VECTOR_ELT

The result and source vector are going to be tied, so these need to be
the same bank.

The inserted value also needs to be broken down based on the result
bank, not the inserted value itself.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
    llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 2e92ae51660b..4c0632e4c9fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -132,7 +132,8 @@ const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
 };
 
 
-// For some instructions which can operate 64-bit only for the scalar version.
+// For some instructions which can operate 64-bit only for the scalar
+// version. Otherwise, these need to be split into 2 32-bit operations.
 const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {
   /*32-bit sgpr*/     {&SGPROnly64BreakDown[0], 1},
   /*2 x 32-bit sgpr*/ {&SGPROnly64BreakDown[1], 2},
@@ -207,6 +208,18 @@ const RegisterBankInfo::ValueMapping *getValueMappingSGPR64Only(unsigned BankID,
   return &ValMappingsSGPR64OnlyVGPR32[2];
 }
 
+/// Split any 64-bit value into 2 32-bit pieces. Unlike
+/// getValueMappingSGPR64Only, this splits both VGPRs and SGPRs.
+const RegisterBankInfo::ValueMapping *getValueMappingSplit64(unsigned BankID,
+                                                             unsigned Size) {
+  assert(Size == 64);
+  if (BankID == AMDGPU::VGPRRegBankID)
+    return &ValMappingsSGPR64OnlyVGPR32[4];
+
+  assert(BankID == AMDGPU::SGPRRegBankID);
+  return &ValMappingsSGPR64OnlyVGPR32[1];
+}
+
 const RegisterBankInfo::PartialMapping LoadSGPROnlyBreakDown[] {
   /* 256-bit load */    {0, 256, SGPRRegBank},
   /* 512-bit load */    {0, 512, SGPRRegBank},

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 9f222eea33e4..c4b20f221ec6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1288,13 +1288,16 @@ void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B,
 
 // For cases where only a single copy is inserted for matching register banks.
 // Replace the register in the instruction operand
-static void substituteSimpleCopyRegs(
+static bool substituteSimpleCopyRegs(
   const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
   SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
   if (!SrcReg.empty()) {
     assert(SrcReg.size() == 1);
     OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
+    return true;
   }
+
+  return false;
 }
 
 /// Handle register layout 
diff erence for f16 images for some subtargets.
@@ -2117,17 +2120,21 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
   case AMDGPU::G_INSERT_VECTOR_ELT: {
     SmallVector<Register, 2> InsRegs(OpdMapper.getVRegs(2));
 
+    Register DstReg = MI.getOperand(0).getReg();
+    LLT VecTy = MRI.getType(DstReg);
+
     assert(OpdMapper.getVRegs(0).empty());
-    assert(OpdMapper.getVRegs(1).empty());
     assert(OpdMapper.getVRegs(3).empty());
 
+    if (substituteSimpleCopyRegs(OpdMapper, 1))
+      MRI.setType(MI.getOperand(1).getReg(), VecTy);
+
     if (InsRegs.empty()) {
       applyDefaultMapping(OpdMapper);
       executeInWaterfallLoop(MI, MRI, { 3 });
       return;
     }
 
-    Register DstReg = MI.getOperand(0).getReg();
     Register SrcReg = MI.getOperand(1).getReg();
     Register InsReg = MI.getOperand(2).getReg();
     Register IdxReg = MI.getOperand(3).getReg();
@@ -2951,15 +2958,22 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
     unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
     unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
-    unsigned SrcBankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI);
     unsigned InsertEltBankID = getRegBankID(MI.getOperand(2).getReg(),
                                             MRI, *TRI);
     unsigned IdxBankID = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
 
     OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
-    OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, VecSize);
-    OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(InsertEltBankID,
-                                                       InsertSize);
+    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+
+    // This is a weird case, because we need to break down the mapping based on
+    // the register bank of a 
diff erent operand.
+    if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
+      OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
+                                                      InsertSize);
+    } else {
+      assert(InsertSize == 32 || InsertSize == 64);
+      OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
+    }
 
     // The index can be either if the source vector is VGPR.
     OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir
index 47ebf3488d20..50b0797b7bb3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir
@@ -56,7 +56,8 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     %1:_(s32) = COPY $vgpr0
@@ -80,16 +81,17 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
     ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
     ; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
     ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
     ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
     ; CHECK: .1:
     ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
-    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
+    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %10, %bb.1
     ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
     ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
     ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[V_READFIRSTLANE_B32_]](s32)
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[V_READFIRSTLANE_B32_]](s32)
     ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
     ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
     ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -120,16 +122,17 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
     ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
     ; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
     ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
     ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
     ; CHECK: .1:
     ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
-    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
+    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %10, %bb.1
     ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
     ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
     ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[V_READFIRSTLANE_B32_]](s32)
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[V_READFIRSTLANE_B32_]](s32)
     ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
     ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
     ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -285,8 +288,15 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr16_sgpr17
     ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr18
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<8 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s64), [[COPY2]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[IVEC]](<8 x s64>)
+    ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](s32), [[SHL]](s32)
+    ; CHECK: [[IVEC1:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](s32), [[ADD]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[IVEC1]](<16 x s32>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST1]](<8 x s64>)
     %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s64) = COPY $sgpr16_sgpr17
     %2:_(s32) = COPY $sgpr18
@@ -308,8 +318,9 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
     ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>)
     ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY3]](<8 x s64>)
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
     ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY2]], [[C]](s32)
     ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
@@ -339,16 +350,32 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr16_sgpr17
     ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-    ; CHECK: [[DEF:%[0-9]+]]:vgpr(<8 x s64>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>)
+    ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY3]](<8 x s64>)
+    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+    ; CHECK: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF2:%[0-9]+]]:vgpr(<16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF3:%[0-9]+]]:vgpr(<16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF4:%[0-9]+]]:vgpr(<8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF5:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
     ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
     ; CHECK: .1:
     ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
-    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
-    ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<8 x s64>) = G_PHI [[DEF]](<8 x s64>), %bb.0, %3(<8 x s64>), %bb.1
+    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF5]], %bb.0, %26, %bb.1
+    ; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %9(s32), %bb.1
+    ; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %10(s32), %bb.1
+    ; CHECK: [[PHI3:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, %11(<16 x s32>), %bb.1
+    ; CHECK: [[PHI4:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, %12(<16 x s32>), %bb.1
+    ; CHECK: [[PHI5:%[0-9]+]]:vgpr(<8 x s64>) = G_PHI [[DEF4]](<8 x s64>), %bb.0, %3(<8 x s64>), %bb.1
     ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
     ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<8 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s64), [[V_READFIRSTLANE_B32_]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
+    ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](s32), [[SHL]](s32)
+    ; CHECK: [[IVEC1:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](s32), [[ADD]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[IVEC1]](<16 x s32>)
     ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
     ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
     ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -356,7 +383,7 @@ body: |
     ; CHECK: successors: %bb.3(0x80000000)
     ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
     ; CHECK: .3:
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[IVEC]](<8 x s64>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST1]](<8 x s64>)
     %0:_(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     %1:_(s64) = COPY $sgpr16_sgpr17
     %2:_(s32) = COPY $vgpr0
@@ -379,8 +406,9 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
     ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
     ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<8 x s64>) = COPY [[COPY]](<8 x s64>)
     ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY3]](<8 x s64>)
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
     ; CHECK: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
     ; CHECK: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
@@ -391,11 +419,11 @@ body: |
     ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
     ; CHECK: .1:
     ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
-    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF5]], %bb.0, %25, %bb.1
-    ; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %8(s32), %bb.1
-    ; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %9(s32), %bb.1
-    ; CHECK: [[PHI3:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, %10(<16 x s32>), %bb.1
-    ; CHECK: [[PHI4:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, %11(<16 x s32>), %bb.1
+    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF5]], %bb.0, %26, %bb.1
+    ; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %9(s32), %bb.1
+    ; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %10(s32), %bb.1
+    ; CHECK: [[PHI3:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, %11(<16 x s32>), %bb.1
+    ; CHECK: [[PHI4:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, %12(<16 x s32>), %bb.1
     ; CHECK: [[PHI5:%[0-9]+]]:vgpr(<8 x s64>) = G_PHI [[DEF4]](<8 x s64>), %bb.0, %3(<8 x s64>), %bb.1
     ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
     ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
@@ -464,16 +492,31 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
     ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
-    ; CHECK: [[DEF:%[0-9]+]]:vgpr(<8 x s64>) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+    ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>)
+    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+    ; CHECK: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF1:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF2:%[0-9]+]]:vgpr(<16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF3:%[0-9]+]]:vgpr(<16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF4:%[0-9]+]]:vgpr(<8 x s64>) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF5:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
     ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
     ; CHECK: .1:
     ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
-    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %9, %bb.1
-    ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<8 x s64>) = G_PHI [[DEF]](<8 x s64>), %bb.0, %3(<8 x s64>), %bb.1
+    ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF5]], %bb.0, %25, %bb.1
+    ; CHECK: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %8(s32), %bb.1
+    ; CHECK: [[PHI2:%[0-9]+]]:sgpr(s32) = G_PHI [[DEF1]](s32), %bb.0, %9(s32), %bb.1
+    ; CHECK: [[PHI3:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF2]](<16 x s32>), %bb.0, %10(<16 x s32>), %bb.1
+    ; CHECK: [[PHI4:%[0-9]+]]:vgpr(<16 x s32>) = G_PHI [[DEF3]](<16 x s32>), %bb.0, %11(<16 x s32>), %bb.1
+    ; CHECK: [[PHI5:%[0-9]+]]:vgpr(<8 x s64>) = G_PHI [[DEF4]](<8 x s64>), %bb.0, %3(<8 x s64>), %bb.1
     ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
     ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32), implicit $exec
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<8 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s64), [[V_READFIRSTLANE_B32_]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[V_READFIRSTLANE_B32_]], [[C]](s32)
+    ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SHL]], [[C]]
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[BITCAST]], [[UV]](s32), [[SHL]](s32)
+    ; CHECK: [[IVEC1:%[0-9]+]]:vgpr(<16 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[UV1]](s32), [[ADD]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:vgpr(<8 x s64>) = G_BITCAST [[IVEC1]](<16 x s32>)
     ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
     ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
     ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
@@ -481,7 +524,7 @@ body: |
     ; CHECK: successors: %bb.3(0x80000000)
     ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
     ; CHECK: .3:
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[IVEC]](<8 x s64>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST1]](<8 x s64>)
     %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     %1:_(s64) = COPY $sgpr0_sgpr1
     %2:_(s32) = COPY $vgpr16


        


More information about the llvm-commits mailing list