[llvm] AMDGPU: Return legal addressmode correctly for flat scratch (PR #71494)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 4 18:37:00 PST 2023


https://github.com/ruiling updated https://github.com/llvm/llvm-project/pull/71494

>From 7aa1f4cc88fade961ee5b055d5eae9b3658bd892 Mon Sep 17 00:00:00 2001
From: Ruiling Song <ruiling.song at amd.com>
Date: Tue, 7 Nov 2023 15:01:55 +0800
Subject: [PATCH 1/2] AMDGPU: Pre-commit test to show diff

---
 .../CodeGen/AMDGPU/scratch-pointer-sink.ll    | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll

diff --git a/llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll b/llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll
new file mode 100644
index 0000000000000..d0078bfb34b3f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GISEL
+
+define amdgpu_gfx i32 @sink_scratch_pointer(ptr addrspace(5) %stack, i32 inreg %flag) {
+; GCN-LABEL: sink_scratch_pointer:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_add_nc_u32_e32 v0, -4, v0
+; GCN-NEXT:    s_cmp_lg_u32 s4, 0
+; GCN-NEXT:    s_cbranch_scc0 .LBB0_2
+; GCN-NEXT:  ; %bb.1: ; %bb2
+; GCN-NEXT:    scratch_load_b32 v0, v0, off
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-NEXT:  .LBB0_2: ; %bb1
+; GCN-NEXT:    v_mov_b32_e32 v1, 1
+; GCN-NEXT:    scratch_store_b32 v0, v1, off
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: sink_scratch_pointer:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_cmp_lg_u32 s4, 0
+; GISEL-NEXT:    s_cbranch_scc0 .LBB0_2
+; GISEL-NEXT:  ; %bb.1: ; %bb2
+; GISEL-NEXT:    scratch_load_b32 v0, v0, off offset:-4
+; GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB0_2: ; %bb1
+; GISEL-NEXT:    v_mov_b32_e32 v1, 1
+; GISEL-NEXT:    scratch_store_b32 v0, v1, off offset:-4
+; GISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %ptr = getelementptr inbounds i32, ptr addrspace(5) %stack, i32 -1
+  %cond = icmp eq i32 %flag, 0
+  br i1 %cond, label %bb1, label %bb2
+
+bb1:
+  store i32 1, ptr addrspace(5) %ptr, align 4
+  ret i32 0
+
+bb2:
+  %value = load i32, ptr addrspace(5) %ptr, align 4
+  ret i32 %value
+}

>From e5fe483f99d842dd1a21600819ac805be04bcf4c Mon Sep 17 00:00:00 2001
From: Ruiling Song <ruiling.song at amd.com>
Date: Mon, 6 Nov 2023 09:32:03 +0800
Subject: [PATCH 2/2] AMDGPU: Return legal addressmode correctly for flat
 scratch

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 44 ++++++++++---------
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  3 +-
 .../CodeGen/AMDGPU/scratch-pointer-sink.ll    |  5 +--
 3 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 53ab5da013539..a7f4d63229b7e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1322,7 +1322,9 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
   }
 }
 
-bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
+bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM,
+                                                 unsigned AddrSpace,
+                                                 uint64_t FlatVariant) const {
   if (!Subtarget->hasFlatInstOffsets()) {
     // Flat instructions do not have offsets, and only have the register
     // address.
@@ -1330,29 +1332,27 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
   }
 
   return AM.Scale == 0 &&
-         (AM.BaseOffs == 0 ||
-          Subtarget->getInstrInfo()->isLegalFLATOffset(
-              AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS, SIInstrFlags::FLAT));
+         (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
+                                  AM.BaseOffs, AddrSpace, FlatVariant));
 }
 
 bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const {
   if (Subtarget->hasFlatGlobalInsts())
-    return AM.Scale == 0 &&
-           (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset(
-                                    AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS,
-                                    SIInstrFlags::FlatGlobal));
+    return isLegalFlatAddressingMode(AM, AMDGPUAS::GLOBAL_ADDRESS,
+                                     SIInstrFlags::FlatGlobal);
 
   if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) {
-      // Assume the we will use FLAT for all global memory accesses
-      // on VI.
-      // FIXME: This assumption is currently wrong.  On VI we still use
-      // MUBUF instructions for the r + i addressing mode.  As currently
-      // implemented, the MUBUF instructions only work on buffer < 4GB.
-      // It may be possible to support > 4GB buffers with MUBUF instructions,
-      // by setting the stride value in the resource descriptor which would
-      // increase the size limit to (stride * 4GB).  However, this is risky,
-      // because it has never been validated.
-    return isLegalFlatAddressingMode(AM);
+    // Assume the we will use FLAT for all global memory accesses
+    // on VI.
+    // FIXME: This assumption is currently wrong.  On VI we still use
+    // MUBUF instructions for the r + i addressing mode.  As currently
+    // implemented, the MUBUF instructions only work on buffer < 4GB.
+    // It may be possible to support > 4GB buffers with MUBUF instructions,
+    // by setting the stride value in the resource descriptor which would
+    // increase the size limit to (stride * 4GB).  However, this is risky,
+    // because it has never been validated.
+    return isLegalFlatAddressingMode(AM, AMDGPUAS::FLAT_ADDRESS,
+                                     SIInstrFlags::FLAT);
   }
 
   return isLegalMUBUFAddressingMode(AM);
@@ -1449,7 +1449,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
   }
 
   if (AS == AMDGPUAS::PRIVATE_ADDRESS)
-    return isLegalMUBUFAddressingMode(AM);
+    return Subtarget->enableFlatScratch()
+               ? isLegalFlatAddressingMode(AM, AMDGPUAS::PRIVATE_ADDRESS,
+                                           SIInstrFlags::FlatScratch)
+               : isLegalMUBUFAddressingMode(AM);
 
   if (AS == AMDGPUAS::LOCAL_ADDRESS ||
       (AS == AMDGPUAS::REGION_ADDRESS && Subtarget->hasGDS())) {
@@ -1475,7 +1478,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
     // computation. We don't have instructions that compute pointers with any
     // addressing modes, so treat them as having no offset like flat
     // instructions.
-    return isLegalFlatAddressingMode(AM);
+    return isLegalFlatAddressingMode(AM, AMDGPUAS::FLAT_ADDRESS,
+                                     SIInstrFlags::FLAT);
   }
 
   // Assume a user alias of global for unknown address spaces.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 746a88c5ea13a..c9cc149218a99 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -221,7 +221,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
-  bool isLegalFlatAddressingMode(const AddrMode &AM) const;
+  bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace,
+                                 uint64_t FlatVariant) const;
   bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
 
   unsigned isCFIntrinsic(const SDNode *Intr) const;
diff --git a/llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll b/llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll
index d0078bfb34b3f..4987664dae022 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-pointer-sink.ll
@@ -6,16 +6,15 @@ define amdgpu_gfx i32 @sink_scratch_pointer(ptr addrspace(5) %stack, i32 inreg %
 ; GCN-LABEL: sink_scratch_pointer:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_add_nc_u32_e32 v0, -4, v0
 ; GCN-NEXT:    s_cmp_lg_u32 s4, 0
 ; GCN-NEXT:    s_cbranch_scc0 .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %bb2
-; GCN-NEXT:    scratch_load_b32 v0, v0, off
+; GCN-NEXT:    scratch_load_b32 v0, v0, off offset:-4
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ; GCN-NEXT:  .LBB0_2: ; %bb1
 ; GCN-NEXT:    v_mov_b32_e32 v1, 1
-; GCN-NEXT:    scratch_store_b32 v0, v1, off
+; GCN-NEXT:    scratch_store_b32 v0, v1, off offset:-4
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;



More information about the llvm-commits mailing list