[llvm] r364806 - AMDGPU/GlobalISel: Complete implementation of G_GEP

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 09:34:49 PDT 2019


Author: arsenm
Date: Mon Jul  1 09:34:48 2019
New Revision: 364806

URL: http://llvm.org/viewvc/llvm-project?rev=364806&view=rev
Log:
AMDGPU/GlobalISel: Complete implementation of G_GEP

Also works around tablegen defect in selecting add with unused carry,
but if we have to manually select GEP, might as well handle add
manually.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td?rev=364806&r1=364805&r2=364806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td Mon Jul  1 09:34:48 2019
@@ -70,17 +70,6 @@ class GISelVop2Pat <
   (inst src0_vt:$src0, src1_vt:$src1)
 >;
 
-// FIXME: clamp operand should be OperandWithDefaultOps to 0, but it's badly broken.
-class GISelVop2ClampingPat <
-  SDPatternOperator node,
-  Instruction inst,
-  ValueType dst_vt,
-  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat <
-
-  (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))),
-  (inst src0_vt:$src0, src1_vt:$src1, 0)
->;
-
 class GISelVop2CommutePat <
   SDPatternOperator node,
   Instruction inst,
@@ -139,18 +128,6 @@ multiclass GISelVop2IntrPat <
 def : GISelSop2Pat <or, S_OR_B32, i32>;
 def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
 
-def : GISelSop2Pat <add, S_ADD_I32, i32>;
-
-let SubtargetPredicate = NotHasAddNoCarryInsts in {
-// FIXME: This should use the VOP3 form
-//def : GISelVop2ClampingPat <add, V_ADD_I32_e64, i32>;
-def : GISelVop2Pat <add, V_ADD_I32_e32, i32>;
-}
-
-let SubtargetPredicate = HasAddNoCarryInsts in {
-def : GISelVop2ClampingPat <add, V_ADD_U32_e64, i32>;
-}
-
 def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
 let AddedComplexity = 100 in {
 let SubtargetPredicate = isGFX6GFX7 in {

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=364806&r1=364805&r2=364806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Mon Jul  1 09:34:48 2019
@@ -173,13 +173,14 @@ bool AMDGPUInstructionSelector::selectPH
 
 MachineOperand
 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
+                                           const TargetRegisterClass &SubRC,
                                            unsigned SubIdx) const {
 
   MachineInstr *MI = MO.getParent();
   MachineBasicBlock *BB = MO.getParent()->getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
-  unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  Register DstReg = MRI.createVirtualRegister(&SubRC);
 
   if (MO.isReg()) {
     unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
@@ -215,41 +216,87 @@ bool AMDGPUInstructionSelector::selectG_
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
-  unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
-  unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-  unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-
-  if (Size != 64)
-    return false;
-
-  DebugLoc DL = I.getDebugLoc();
-
-  MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
-  MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
+  Register DstReg = I.getOperand(0).getReg();
+  const DebugLoc &DL = I.getDebugLoc();
+  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+  const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
+
+  if (Size == 32) {
+    if (IsSALU) {
+      MachineInstr *Add =
+        BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstReg)
+        .add(I.getOperand(1))
+        .add(I.getOperand(2));
+      I.eraseFromParent();
+      return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
+    }
+
+    if (STI.hasAddNoCarry()) {
+      I.setDesc(TII.get(AMDGPU::V_ADD_U32_e64));
+      I.addOperand(*MF, MachineOperand::CreateImm(0));
+      I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+    }
+
+    Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
+    MachineInstr *Add
+      = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstReg)
+      .addDef(UnusedCarry, RegState::Dead)
+      .add(I.getOperand(1))
+      .add(I.getOperand(2))
+      .addImm(0);
+    I.eraseFromParent();
+    return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
+  }
 
-  BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
-          .add(Lo1)
-          .add(Lo2);
-
-  MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
-  MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
-
-  BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
-          .add(Hi1)
-          .add(Hi2);
-
-  BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
-          .addReg(DstLo)
-          .addImm(AMDGPU::sub0)
-          .addReg(DstHi)
-          .addImm(AMDGPU::sub1);
-
-  for (MachineOperand &MO : I.explicit_operands()) {
-    if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
-      continue;
-    RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
+  const TargetRegisterClass &RC
+    = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
+  const TargetRegisterClass &HalfRC
+    = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
+
+  MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
+  MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
+  MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
+  MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
+
+  Register DstLo = MRI.createVirtualRegister(&HalfRC);
+  Register DstHi = MRI.createVirtualRegister(&HalfRC);
+
+  if (IsSALU) {
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
+      .add(Lo1)
+      .add(Lo2);
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
+      .add(Hi1)
+      .add(Hi2);
+  } else {
+    const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
+    Register CarryReg = MRI.createVirtualRegister(CarryRC);
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
+      .addDef(CarryReg)
+      .add(Lo1)
+      .add(Lo2)
+      .addImm(0);
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
+      .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
+      .add(Hi1)
+      .add(Hi2)
+      .addReg(CarryReg, RegState::Kill)
+      .addImm(0);
   }
 
+  BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+    .addReg(DstLo)
+    .addImm(AMDGPU::sub0)
+    .addReg(DstHi)
+    .addImm(AMDGPU::sub1);
+
+  if (!RBI.constrainGenericRegister(DstReg, RC, MRI) ||
+      !RBI.constrainGenericRegister(I.getOperand(1).getReg(), RC, MRI) ||
+      !RBI.constrainGenericRegister(I.getOperand(2).getReg(), RC, MRI))
+    return false;
+
   I.eraseFromParent();
   return true;
 }

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h?rev=364806&r1=364805&r2=364806&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h Mon Jul  1 09:34:48 2019
@@ -65,7 +65,9 @@ private:
   /// tblgen-erated 'select' implementation.
   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
 
-  MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
+  MachineOperand getSubOperand64(MachineOperand &MO,
+                                 const TargetRegisterClass &SubRC,
+                                 unsigned SubIdx) const;
   bool selectCOPY(MachineInstr &I) const;
   bool selectPHI(MachineInstr &I) const;
   bool selectG_TRUNC(MachineInstr &I) const;

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir?rev=364806&r1=364805&r2=364806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir Mon Jul  1 09:34:48 2019
@@ -16,19 +16,19 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
-    ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
-    ; GFX6: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
-    ; GFX6: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
-    ; GFX6: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
-    ; GFX6: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec
+    ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_U32_]], %7, 0, implicit $exec
+    ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec
+    ; GFX6: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     ; GFX9-LABEL: name: add_s32
     ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
-    ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
-    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[COPY2]], 0, implicit $exec
-    ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
+    ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_U32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
     ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
     ; GFX9: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_U32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
 

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir?rev=364806&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-gep.mir Mon Jul  1 09:34:48 2019
@@ -0,0 +1,354 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -mattr=+wavefrontsize32,-wavefrontsize64  -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32,-wavefrontsize64  -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64  -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX10-WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64  -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck -check-prefix=GFX10-WAVE32 %s
+
+---
+name:  gep_p0_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; GFX6-LABEL: name: gep_p0_sgpr_sgpr
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+    ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+    ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+    ; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+    ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX8-LABEL: name: gep_p0_sgpr_sgpr
+    ; GFX8: $vcc_hi = IMPLICIT_DEF
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+    ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+    ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+    ; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+    ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX9-LABEL: name: gep_p0_sgpr_sgpr
+    ; GFX9: $vcc_hi = IMPLICIT_DEF
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+    ; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+    ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+    ; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+    ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_sgpr
+    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+    ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+    ; GFX10-WAVE64: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+    ; GFX10-WAVE64: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_sgpr
+    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+    ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+    ; GFX10-WAVE32: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+    ; GFX10-WAVE32: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    %0:sgpr(p0) = COPY $sgpr0_sgpr1
+    %1:sgpr(s64) = COPY $sgpr2_sgpr3
+    %2:sgpr(p0) = G_GEP %0, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name:  gep_p0_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX6-LABEL: name: gep_p0_vgpr_vgpr
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX8-LABEL: name: gep_p0_vgpr_vgpr
+    ; GFX8: $vcc_hi = IMPLICIT_DEF
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX8: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX9-LABEL: name: gep_p0_vgpr_vgpr
+    ; GFX9: $vcc_hi = IMPLICIT_DEF
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX9: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX10-WAVE64-LABEL: name: gep_p0_vgpr_vgpr
+    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX10-WAVE32-LABEL: name: gep_p0_vgpr_vgpr
+    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    %0:vgpr(p0) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vgpr(p0) = G_GEP %0, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name:  gep_p0_sgpr_vgpr
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+    ; GFX6-LABEL: name: gep_p0_sgpr_vgpr
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX8-LABEL: name: gep_p0_sgpr_vgpr
+    ; GFX8: $vcc_hi = IMPLICIT_DEF
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX8: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX9-LABEL: name: gep_p0_sgpr_vgpr
+    ; GFX9: $vcc_hi = IMPLICIT_DEF
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX9: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr
+    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr
+    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    %0:vgpr(p0) = COPY $sgpr0_sgpr1
+    %1:vgpr(s64) = COPY $vgpr0_vgpr1
+    %2:vgpr(p0) = G_GEP %0, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name:  gep_p3_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: gep_p3_sgpr_sgpr
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+    ; GFX8-LABEL: name: gep_p3_sgpr_sgpr
+    ; GFX8: $vcc_hi = IMPLICIT_DEF
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX8: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+    ; GFX9-LABEL: name: gep_p3_sgpr_sgpr
+    ; GFX9: $vcc_hi = IMPLICIT_DEF
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+    ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_sgpr
+    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX10-WAVE64: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+    ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_sgpr
+    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX10-WAVE32: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+    %0:sgpr(p3) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(p3) = G_GEP %0, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name:  gep_p3_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: gep_p3_vgpr_vgpr
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit %2
+    ; GFX8-LABEL: name: gep_p3_vgpr_vgpr
+    ; GFX8: $vcc_hi = IMPLICIT_DEF
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX8: %2:vgpr_32, dead %3:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit %2
+    ; GFX9-LABEL: name: gep_p3_vgpr_vgpr
+    ; GFX9: $vcc_hi = IMPLICIT_DEF
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+    ; GFX10-WAVE64-LABEL: name: gep_p3_vgpr_vgpr
+    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10-WAVE64: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+    ; GFX10-WAVE32-LABEL: name: gep_p3_vgpr_vgpr
+    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10-WAVE32: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+    %0:vgpr(p3) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(p3) = G_GEP %0, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name:  gep_p3_sgpr_vgpr
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: gep_p3_sgpr_vgpr
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit %2
+    ; GFX8-LABEL: name: gep_p3_sgpr_vgpr
+    ; GFX8: $vcc_hi = IMPLICIT_DEF
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: %2:vgpr_32, dead %3:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit %2
+    ; GFX9-LABEL: name: gep_p3_sgpr_vgpr
+    ; GFX9: $vcc_hi = IMPLICIT_DEF
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+    ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_vgpr
+    ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-WAVE64: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10-WAVE64: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+    ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_vgpr
+    ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-WAVE32: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10-WAVE32: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+    %0:vgpr(p3) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(p3) = G_GEP %0, %1
+    S_ENDPGM 0, implicit %2
+
+...

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir?rev=364806&r1=364805&r2=364806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir Mon Jul  1 09:34:48 2019
@@ -12,7 +12,7 @@ legalized:       true
 regBankSelected: true
 
 # GCN: body:
-# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+# GCN: [[PTR:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
 
 # Immediate offset:
 # SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0
@@ -42,28 +42,28 @@ regBankSelected: true
 # Max immediate for CI
 # SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4294967292
 # SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 3
-# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
-# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0
-# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0
-# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
-# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1
-# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1
+# SIVI: [[K:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
+# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
+# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
+# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
+# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
+# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
-# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
+# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0, 0
 
 # Immediate overflow for CI
 # GCN: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
 # GCN: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4
-# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
-# GCN-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0
-# GCN-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0
-# GCN: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
-# GCN-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1
-# GCN-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1
+# GCN: [[K:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
+# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
+# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
+# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
+# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
+# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
 # GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
-# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
+# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
 # GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
 
 # Max 32-bit byte offset
@@ -74,14 +74,14 @@ regBankSelected: true
 # Overflow 32-bit byte offset
 # SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
 # SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
-# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
-# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0
-# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0
-# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
-# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1
-# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1
+# SIVI: [[K:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
+# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
+# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
+# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
+# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
+# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
-# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
+# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0, 0
 




More information about the llvm-commits mailing list