[llvm] 4a308d3 - [AMDGPU] Keep consistent check of legal addressing mode.

Michael Liao via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 20 12:08:40 PST 2019


Author: Michael Liao
Date: 2019-11-20T15:08:17-05:00
New Revision: 4a308d302c3378258c5a9af231236de4d7ff741a

URL: https://github.com/llvm/llvm-project/commit/4a308d302c3378258c5a9af231236de4d7ff741a
DIFF: https://github.com/llvm/llvm-project/commit/4a308d302c3378258c5a9af231236de4d7ff741a.diff

LOG: [AMDGPU] Keep consistent check of legal addressing mode.

Summary:
- Add test cases for GFX10, which has narrower offset range compared to
  GFX9.

Reviewers: rampitec, arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70473

Added: 
    llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
    llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c4712198693c..77d3da169cab 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1088,23 +1088,18 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
     return AM.BaseOffs == 0 && AM.Scale == 0;
   }
 
-  // GFX9 added a 13-bit signed offset. When using regular flat instructions,
-  // the sign bit is ignored and is treated as a 12-bit unsigned offset.
-
-  // GFX10 shrinked signed offset to 12 bits. When using regular flat
-  // instructions, the sign bit is also ignored and is treated as 11-bit
-  // unsigned offset.
-
-  if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
-    return isUInt<11>(AM.BaseOffs) && AM.Scale == 0;
-
-  // Just r + i
-  return isUInt<12>(AM.BaseOffs) && AM.Scale == 0;
+  return AM.Scale == 0 &&
+         (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
+                                  AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS,
+                                  /*Signed=*/false));
 }
 
 bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const {
   if (Subtarget->hasFlatGlobalInsts())
-    return isInt<13>(AM.BaseOffs) && AM.Scale == 0;
+    return AM.Scale == 0 &&
+           (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
+                                    AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS,
+                                    /*Signed=*/true));
 
   if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) {
       // Assume the we will use FLAT for all global memory accesses

diff  --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 53c8ff186636..24769a89b9ba 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1430,7 +1430,9 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
 void SILoadStoreOptimizer::updateBaseAndOffset(MachineInstr &MI,
                                                unsigned NewBase,
                                                int32_t NewOffset) const {
-  TII->getNamedOperand(MI, AMDGPU::OpName::vaddr)->setReg(NewBase);
+  auto Base = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
+  Base->setReg(NewBase);
+  Base->setIsKill(false);
   TII->getNamedOperand(MI, AMDGPU::OpName::offset)->setImm(NewOffset);
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
new file mode 100644
index 000000000000..c4fd98098032
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
@@ -0,0 +1,218 @@
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s
+
+# GFX10-LABEL: name: 
diff oporder_add
+# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0, 0
+# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0, 0
+
+name: 
diff oporder_add
+body:             |
+  bb.0.entry:
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
+    %4:sreg_32_xm0 = COPY $sgpr101
+    %5:sreg_32_xm0 = S_MOV_B32 0
+    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
+    $sgpr4 = COPY %4
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    %6:vreg_64 = COPY $vgpr0_vgpr1
+    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
+    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
+    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
+    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
+    %12:sgpr_32 = COPY %1.sub1
+    %13:vgpr_32 = COPY %5
+    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %16:vgpr_32 = COPY %12
+    %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
+    %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
+    %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
+    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
+    %25:sgpr_32 = S_MOV_B32 4096
+    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec
+    %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
+    %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
+    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
+    %32:sgpr_32 = S_MOV_B32 6144
+    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
+    %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
+    %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
+    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
+...
+---
+
+# GFX10-LABEL: name: LowestInMiddle
+# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 6400
+# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
+# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]]
+# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
+# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0, 0
+# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0,
+#
+# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 11200
+# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
+# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]]
+# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
+# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0,
+
+name: LowestInMiddle
+body:             |
+  bb.0.entry:
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
+    %4:sreg_32_xm0 = COPY $sgpr101
+    %5:sreg_32_xm0 = S_MOV_B32 0
+    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
+    $sgpr4 = COPY %4
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    %6:vreg_64 = COPY $vgpr0_vgpr1
+    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
+    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
+    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
+    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
+    %12:sgpr_32 = COPY %1.sub1
+    %13:vgpr_32 = COPY %5
+    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %16:vgpr_32 = COPY %12
+    %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
+    %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
+    %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
+    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
+    %25:sgpr_32 = S_MOV_B32 8000
+    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
+    %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
+    %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
+    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
+    %32:sgpr_32 = S_MOV_B32 6400
+    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
+    %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
+    %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
+    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
+    %39:sgpr_32 = S_MOV_B32 11200
+    %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
+    %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
+    %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
+    %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
+...
+---
+
+# GFX10-LABEL: name: NegativeDistance
+# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
+# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
+# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]]
+# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
+# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0, 0
+# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0
+# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 10240
+# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
+# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]]
+# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
+# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0
+
+name: NegativeDistance
+body:             |
+  bb.0.entry:
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
+    %4:sreg_32_xm0 = COPY $sgpr101
+    %5:sreg_32_xm0 = S_MOV_B32 0
+    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
+    $sgpr4 = COPY %4
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    %6:vreg_64 = COPY $vgpr0_vgpr1
+    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
+    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
+    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
+    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
+    %12:sgpr_32 = COPY %1.sub1
+    %13:vgpr_32 = COPY %5
+    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %16:vgpr_32 = COPY %12
+    %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
+    %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
+    %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
+    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
+    %25:sgpr_32 = S_MOV_B32 6144
+    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
+    %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
+    %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
+    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
+    %32:sgpr_32 = S_MOV_B32 8192
+    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
+    %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
+    %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
+    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
+    %39:sgpr_32 = S_MOV_B32 10240
+    %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
+    %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
+    %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
+    %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
+...
+---
+
+# Tests for a successful compilation.
+name: assert_hit
+body:             |
+    bb.0.entry:
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
+    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
+    %4:sreg_32_xm0 = COPY $sgpr101
+    %5:sreg_32_xm0 = S_MOV_B32 0
+    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
+    $sgpr4 = COPY %4
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    %6:vreg_64 = COPY $vgpr0_vgpr1
+    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
+    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
+    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
+    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
+    %12:sgpr_32 = COPY %1.sub1
+    %13:vgpr_32 = COPY %5
+    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %16:vgpr_32 = COPY %12
+    %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
+    %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
+    %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
+    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
+
+    %25:sgpr_32 = S_MOV_B32 6144
+    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
+    %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
+    %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
+    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
+...
+---
+
+# GFX10-LABEL: name: 
diff oporder_add_store
+# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0, 0, 0
+# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0, 0, 0
+
+name: 
diff oporder_add_store
+body:             |
+  bb.0.entry:
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+
+    %1:sgpr_32 = S_MOV_B32 4000
+    %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_I32_e64 %0.sub0, %1, 0, implicit $exec
+    %4:vgpr_32, dead %5:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
+    %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
+    GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, 0, 0, implicit $exec
+
+    %8:sgpr_32 = S_MOV_B32 3000
+    %9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_I32_e64 %0.sub0, %8, 0, implicit $exec
+    %11:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
+    %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
+    GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, 0, 0, implicit $exec
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index 14e65fe0ee6c..811fba02c609 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
 declare i64 @_Z13get_global_idj(i32)
 
@@ -22,6 +23,15 @@ define amdgpu_kernel void @clmem_read_simplified(i8 addrspace(1)*  %buffer) {
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
+;
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 
 entry:
   %call = tail call i64 @_Z13get_global_idj(i32 0)
@@ -87,6 +97,18 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)*  %buffer) {
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
+;
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
 entry:
   %call = tail call i64 @_Z13get_global_idj(i32 0)
   %conv = and i64 %call, 255
@@ -208,6 +230,17 @@ define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) {
 ; GFX9:    global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072
 ; GFX9:    global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
+;
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
 entry:
    %call = tail call i64 @_Z13get_global_idj(i32 0)
    %conv = and i64 %call, 255
@@ -270,6 +303,11 @@ define amdgpu_kernel void @Offset64(i8 addrspace(1)*  %buffer) {
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+;
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 entry:
   %call = tail call i64 @_Z13get_global_idj(i32 0)
   %conv = and i64 %call, 255
@@ -311,6 +349,11 @@ define amdgpu_kernel void @p32Offset64(i8 addrspace(1)*  %buffer) {
 ; GFX9:    global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:3072
 ; GFX9:    global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
+;
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off offset:1024
+; GFX10:   global_load_dword {{v[0-9]+}}, v[{{[0-9]+:[0-9]+}}], off{{$}}
 entry:
   %call = tail call i64 @_Z13get_global_idj(i32 0)
   %conv = and i64 %call, 255
@@ -348,13 +391,20 @@ define amdgpu_kernel void @DiffBase(i8 addrspace(1)* %buffer1,
 ; GFX8:    flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ; GFX8:    flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ; GFX8:    flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
-
+;
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+;
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
                                     i8 addrspace(1)* %buffer2) {
 entry:
   %call = tail call i64 @_Z13get_global_idj(i32 0)
@@ -412,6 +462,15 @@ define amdgpu_kernel void @ReverseOrder(i8 addrspace(1)* %buffer) {
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
+;
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 entry:
   %call = tail call i64 @_Z13get_global_idj(i32 0)
   %conv = and i64 %call, 255
@@ -462,6 +521,9 @@ define hidden amdgpu_kernel void @negativeoffset(i8 addrspace(1)* nocapture %buf
 ;
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
+;
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
 entry:
   %call = tail call i64 @_Z13get_global_idj(i32 0) #2
   %conv = and i64 %call, 255


        


More information about the llvm-commits mailing list