[llvm] a9ee058 - AMDGPU/GlobalISel: Match global saddr addressing mode

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 17 12:48:13 PDT 2020


Author: Matt Arsenault
Date: 2020-08-17T15:48:06-04:00
New Revision: a9ee0589a8bc1584af4209fe6439c68aa875065f

URL: https://github.com/llvm/llvm-project/commit/a9ee0589a8bc1584af4209fe6439c68aa875065f
DIFF: https://github.com/llvm/llvm-project/commit/a9ee0589a8bc1584af4209fe6439c68aa875065f.diff

LOG: AMDGPU/GlobalISel: Match global saddr addressing mode

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUGISel.td
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index dd7219187348..d1f477f78772 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -74,6 +74,9 @@ def gi_flat_offset :
 def gi_flat_offset_signed :
     GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
     GIComplexPatternEquiv<FLATOffsetSigned>;
+def gi_global_saddr :
+    GIComplexOperandMatcher<s64, "selectGlobalSAddr">,
+    GIComplexPatternEquiv<GlobalSAddr>;
 
 def gi_mubuf_scratch_offset :
     GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f64aaf5062c5..7e842835a5b4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3254,6 +3254,79 @@ AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
   return selectFlatOffsetImpl<true>(Root);
 }
 
+/// Match a zero extend from a 32-bit value to 64-bits.
+static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
+  Register ZExtSrc;
+  if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc))))
+    return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register();
+
+  // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0)
+  const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+  if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
+    return false;
+
+  int64_t MergeRHS;
+  if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(MergeRHS)) &&
+      MergeRHS == 0) {
+    return Def->getOperand(1).getReg();
+  }
+
+  return Register();
+}
+
+// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
+  Register PtrBase;
+  int64_t ImmOffset;
+
+  // Match the immediate offset first, which canonically is moved as low as
+  // possible.
+  std::tie(PtrBase, ImmOffset) = getPtrBaseWithConstantOffset(Root.getReg(),
+                                                              *MRI);
+
+  // TODO: Could split larger constant into VGPR offset.
+  if (ImmOffset != 0 &&
+      !TII.isLegalFLATOffset(ImmOffset, AMDGPUAS::GLOBAL_ADDRESS, true)) {
+    PtrBase = Root.getReg();
+    ImmOffset = 0;
+  }
+
+  // Match the variable offset.
+  const MachineInstr *PtrBaseDef = getDefIgnoringCopies(PtrBase, *MRI);
+  if (PtrBaseDef->getOpcode() != AMDGPU::G_PTR_ADD)
+    return None;
+
+  // Look through the SGPR->VGPR copy.
+  Register PtrBaseSrc =
+    getSrcRegIgnoringCopies(PtrBaseDef->getOperand(1).getReg(), *MRI);
+  if (!PtrBaseSrc)
+    return None;
+
+  const RegisterBank *BaseRB = RBI.getRegBank(PtrBaseSrc, *MRI, TRI);
+  if (BaseRB->getID() != AMDGPU::SGPRRegBankID)
+    return None;
+
+  Register SAddr = PtrBaseSrc;
+  Register PtrBaseOffset = PtrBaseDef->getOperand(2).getReg();
+
+  // It's possible voffset is an SGPR here, but the copy to VGPR will be
+  // inserted later.
+  Register VOffset = matchZeroExtendFromS32(*MRI, PtrBaseOffset);
+  if (!VOffset)
+    return None;
+
+  return {{[=](MachineInstrBuilder &MIB) { // saddr
+             MIB.addReg(SAddr);
+           },
+           [=](MachineInstrBuilder &MIB) { // voffset
+             MIB.addReg(VOffset);
+           },
+           [=](MachineInstrBuilder &MIB) { // offset
+             MIB.addImm(ImmOffset);
+           }}};
+}
+
 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
   return PSV && PSV->isStack();

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index c9129bf1105b..b18867299baf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -185,6 +185,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
   InstructionSelector::ComplexRendererFns
   selectFlatOffsetSigned(MachineOperand &Root) const;
 
+  InstructionSelector::ComplexRendererFns
+  selectGlobalSAddr(MachineOperand &Root) const;
+
   InstructionSelector::ComplexRendererFns
   selectMUBUFScratchOffen(MachineOperand &Root) const;
   InstructionSelector::ComplexRendererFns

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
new file mode 100644
index 000000000000..4f289d555913
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir
@@ -0,0 +1,275 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+
+# TODO: Better to initialize 0 vgpr and use sgpr base
+---
+
+name: load_global_s32_from_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1
+
+    ; GFX9-LABEL: name: load_global_s32_from_sgpr
+    ; GFX9: liveins: $sgpr0_sgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+    ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+    ; GFX10-LABEL: name: load_global_s32_from_sgpr
+    ; GFX10: liveins: $sgpr0_sgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+    ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+    %0:sgpr(p1) = COPY $sgpr0_sgpr1
+    %1:vgpr(p1) = COPY %0
+    %2:vgpr(s32) = G_LOAD %1 :: (load 4, align 4, addrspace 1)
+    $vgpr0 = COPY %2
+
+...
+
+# FIXME: This zext wouldn't select on its own.
+---
+
+name: load_global_s32_from_sgpr_zext_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1, $vgpr0
+
+    ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
+    ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
+    ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
+    ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
+    %0:sgpr(p1) = COPY $sgpr0_sgpr1
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(p1) = COPY %0
+    %3:vgpr(s64) = G_ZEXT %1
+    %4:vgpr(p1) = G_PTR_ADD %2, %3
+    %5:vgpr(s32) = G_LOAD %4 :: (load 4, align 4, addrspace 1)
+    $vgpr0 = COPY %5
+
+...
+
+# Test with zext lowered to G_MERGE_VALUES
+---
+
+name: load_global_s32_from_sgpr_merge_zext_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1, $vgpr0
+
+    ; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
+    ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
+    ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
+    ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
+    %0:sgpr(p1) = COPY $sgpr0_sgpr1
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(p1) = COPY %0
+    %zero:vgpr(s32) = G_CONSTANT i32 0
+    %3:vgpr(s64) = G_MERGE_VALUES %1, %zero
+    %4:vgpr(p1) = G_PTR_ADD %2, %3
+    %5:vgpr(s32) = G_LOAD %4 :: (load 4, align 4, addrspace 1)
+    $vgpr0 = COPY %5
+
+...
+
+---
+
+name: load_global_s32_from_sgpr_merge_not_0_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1, $vgpr0
+
+    ; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr
+    ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+    ; GFX9: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1
+    ; GFX9: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+    ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr
+    ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+    ; GFX10: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1
+    ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+    %0:sgpr(p1) = COPY $sgpr0_sgpr1
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(p1) = COPY %0
+    %notzero:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s64) = G_MERGE_VALUES %1, %notzero
+    %4:vgpr(p1) = G_PTR_ADD %2, %3
+    %5:vgpr(s32) = G_LOAD %4 :: (load 4, align 4, addrspace 1)
+    $vgpr0 = COPY %5
+
+...
+
+---
+
+name: load_global_s32_from_sgpr_zext_vgpr_offset4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1, $vgpr0
+
+    ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
+    ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
+    ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
+    ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+    ; GFX10: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: %zext:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %zero, %subreg.sub1
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY %zext.sub0
+    ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1
+    ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %zext.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX10: %24:vgpr_32, dead %26:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %24, %subreg.sub1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+    ; GFX10: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec
+    ; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+    %0:sgpr(p1) = COPY $sgpr0_sgpr1
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(p1) = COPY %0
+    %zero:vgpr(s32) = G_CONSTANT i32 0
+    %zext:vgpr(s64) = G_MERGE_VALUES %1, %zero
+    %4:vgpr(p1) = G_PTR_ADD %2, %zext
+    %5:vgpr(s64) = G_CONSTANT i64 4095
+    %6:vgpr(p1) = G_PTR_ADD %4, %5
+    %7:vgpr(s32) = G_LOAD %6 :: (load 4, align 4, addrspace 1)
+    $vgpr0 = COPY %7
+
+...
+
+---
+
+name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1, $vgpr0
+
+    ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
+    ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
+    ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
+    ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+    ; GFX10: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: %zext:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %zero, %subreg.sub1
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY %zext.sub0
+    ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1
+    ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %zext.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX10: %24:vgpr_32, dead %26:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %24, %subreg.sub1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+    ; GFX10: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec
+    ; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+    %0:sgpr(p1) = COPY $sgpr0_sgpr1
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(p1) = COPY %0
+    %zero:vgpr(s32) = G_CONSTANT i32 0
+    %zext:vgpr(s64) = G_MERGE_VALUES %1, %zero
+    %4:vgpr(p1) = G_PTR_ADD %2, %zext
+    %5:vgpr(s64) = G_CONSTANT i64 -4096
+    %6:vgpr(p1) = G_PTR_ADD %4, %5
+    %7:vgpr(s32) = G_LOAD %6 :: (load 4, align 4, addrspace 1)
+    $vgpr0 = COPY %7
+
+...


        


More information about the llvm-commits mailing list