[llvm] r356611 - AMDGPU: Don't look for constant in insert/extract_vector_elt regbankselect

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 20 13:41:35 PDT 2019


Author: arsenm
Date: Wed Mar 20 13:41:34 2019
New Revision: 356611

URL: http://llvm.org/viewvc/llvm-project?rev=356611&view=rev
Log:
AMDGPU: Don't look for constant in insert/extract_vector_elt regbankselect

The constantness shouldn't change the register bank choice. We also
don't need to restrict this to only indexing VGPRs, since it's
possible to index SGPRs (but SelectionDAG made using this
difficult). Allow directly indexing SGPRs when appropriate.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp?rev=356611&r1=356610&r2=356611&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Wed Mar 20 13:41:34 2019
@@ -52,24 +52,6 @@ AMDGPURegisterBankInfo::AMDGPURegisterBa
 
 }
 
-static bool isConstant(const MachineOperand &MO, int64_t &C) {
-  const MachineFunction *MF = MO.getParent()->getParent()->getParent();
-  const MachineRegisterInfo &MRI = MF->getRegInfo();
-  const MachineInstr *Def = MRI.getVRegDef(MO.getReg());
-  if (!Def)
-    return false;
-
-  if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
-    C = Def->getOperand(1).getCImm()->getSExtValue();
-    return true;
-  }
-
-  if (Def->getOpcode() == AMDGPU::COPY)
-    return isConstant(Def->getOperand(1), C);
-
-  return false;
-}
-
 unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
                                           const RegisterBank &Src,
                                           unsigned Size) const {
@@ -816,42 +798,35 @@ AMDGPURegisterBankInfo::getInstrMapping(
 
 
   case AMDGPU::G_EXTRACT_VECTOR_ELT: {
-    unsigned IdxOp = 2;
-    int64_t Imm;
-    // XXX - Do we really need to fully handle these? The constant case should
-    // be legalized away before RegBankSelect?
-
-    unsigned OutputBankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
+    unsigned OutputBankID = isSALUMapping(MI) ?
                             AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-
+    unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+    unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
     unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
-    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
-    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
+
+    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
+    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
 
     // The index can be either if the source vector is VGPR.
-    OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
+    OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
     break;
   }
   case AMDGPU::G_INSERT_VECTOR_ELT: {
-    // XXX - Do we really need to fully handle these? The constant case should
-    // be legalized away before RegBankSelect?
-
-    int64_t Imm;
-
-    unsigned IdxOp = MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT ? 2 : 3;
-    unsigned BankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
-                      AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-
-
-
-    // TODO: Can do SGPR indexing, which would obviate the need for the
-    // isConstant check.
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
-      OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
-    }
+    unsigned OutputBankID = isSALUMapping(MI) ?
+      AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
 
+    unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+    unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+    unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
+    unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+    unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+
+    OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+    OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+    OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
 
+    // The index can be either if the source vector is VGPR.
+    OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
     break;
   }
   case AMDGPU::G_UNMERGE_VALUES: {

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir?rev=356611&r1=356610&r2=356611&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir Wed Mar 20 13:41:34 2019
@@ -1,39 +1,76 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
 ---
-name: extract_vector_elt_0_v2i32_s
+name: extract_vector_elt_v16i32_ss
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1
-    ; CHECK-LABEL: name: extract_vector_elt_0_v2i32_s
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32)
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
+    ; CHECK-LABEL: name: extract_vector_elt_v16i32_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
+    ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
     ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
-    %0:_(<2 x s32>) = COPY $sgpr0_sgpr1
-    %1:_(s32) = G_CONSTANT i32 0
+    %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    %1:_(s32) = COPY $sgpr16
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
     $vgpr0 = COPY %2
 ...
 
+---
+name: extract_vector_elt_v16i32_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
+    ; CHECK-LABEL: name: extract_vector_elt_v16i32_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>)
+    ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[COPY1]](s32)
+    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: extract_vector_elt_v16i32_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
+    ; CHECK-LABEL: name: extract_vector_elt_v16i32_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
+    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+    $vgpr0 = COPY %2
+...
 
 ---
-name: extract_vector_elt_0_v4i32_s
+name: extract_vector_elt_v16i32_vv
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK-LABEL: name: extract_vector_elt_0_v4i32_s
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s32)
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
+    ; CHECK-LABEL: name: extract_vector_elt_v16i32_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
+    ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
     ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
-    %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = G_CONSTANT i32 0
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s32) = COPY $vgpr16
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
     $vgpr0 = COPY %2
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir?rev=356611&r1=356610&r2=356611&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir Wed Mar 20 13:41:34 2019
@@ -1,111 +1,111 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
 ---
-name: insert_vector_elt_v4i32_s_s_k
+name: insert_vector_elt_v4i32_s_s_s
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5
-    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_k
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_s
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
+    ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = COPY $sgpr4
+    %2:_(s32) = COPY $sgpr5
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
 
 ---
-name: insert_vector_elt_v4i32_v_s_k
+name: insert_vector_elt_v4i32_v_s_s
 legalized: true
 
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5
-    ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_k
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_s
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = COPY $sgpr1
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
 
 ---
-name: insert_vector_elt_v4i32_s_v_k
+name: insert_vector_elt_v4i32_s_v_s
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr5
-    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_k
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $sgpr4
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_s
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY2]], [[COPY1]](s32), [[COPY3]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = COPY $vgpr2
-    %2:_(s32) = G_CONSTANT i32 0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $sgpr4
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
 
 ---
-name: insert_vector_elt_var_v4i32_s_s_s
+name: insert_vector_elt_v4i32_s_s_v
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $sgpr6
-    ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_s
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_v
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
-    ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY5]](s32)
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = COPY $sgpr6
+    %1:_(s32) = COPY $sgpr4
+    %2:_(s32) = COPY $vgpr0
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
 
 ---
-name: insert_vector_elt_var_v4i32_s_s_v
+name: insert_vector_elt_v4i32_s_v_v
 legalized: true
 
 body: |
   bb.0:
-    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $vgpr6
-    ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_v
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_v
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>)
-    ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY2]](s32)
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = COPY $vgpr6
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $vgpr1
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
@@ -116,17 +116,38 @@ legalized: true
 
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5, $vgpr6
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr0
+
     ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_s_v
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY3]](s32), [[COPY2]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
+    ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(s32) = COPY $sgpr4
+    %2:_(s32) = COPY $vgpr0
+    %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
+...
+
+---
+name: insert_vector_elt_var_v4i32_v_v_s
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr0
+
+    ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_s
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    %1:_(s32) = COPY $sgpr5
-    %2:_(s32) = COPY $vgpr6
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $sgpr0
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
 ...
@@ -137,16 +158,17 @@ legalized: true
 
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr5, $vgpr6
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
+
     ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_v
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
     ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[IVEC]](<4 x s32>)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    %1:_(s32) = COPY $vgpr5
-    %2:_(s32) = COPY $vgpr6
+    %1:_(s32) = COPY $vgpr4
+    %2:_(s32) = COPY $vgpr5
     %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
 ...




More information about the llvm-commits mailing list