[llvm] 7a62a5b - [AMDGPU] Legalize initialized LDS variables

Christudasan Devadasan via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 23 19:54:48 PDT 2021


Author: Christudasan Devadasan
Date: 2021-09-23T22:53:20-04:00
New Revision: 7a62a5b56d670c4e152159740cd7fc4030a9470f

URL: https://github.com/llvm/llvm-project/commit/7a62a5b56d670c4e152159740cd7fc4030a9470f
DIFF: https://github.com/llvm/llvm-project/commit/7a62a5b56d670c4e152159740cd7fc4030a9470f.diff

LOG: [AMDGPU] Legalize initialized LDS variables

We don't allow an initializer for LDS variables
and there is an early abort during instruction
selection. This patch legalizes them by ignoring
the init values. During assembly emission, proper
error reporting already exists for such instances.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D109901

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
    llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 4503836b68aaf..269c33e30bfc4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1378,16 +1378,11 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
          "Do not know what to do with an non-zero offset");
 
     // TODO: We could emit code to handle the initialization somewhere.
-    if (!hasDefinedInitializer(GV)) {
-      unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
-      return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
-    }
+    // We ignore the initializer for now and legalize it to allow selection.
+    // The initializer will anyway get errored out during assembly emission.
+    unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
+    return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
   }
-
-  const Function &Fn = DAG.getMachineFunction().getFunction();
-  DiagnosticInfoUnsupported BadInit(
-      Fn, "unsupported initializer for address space", SDLoc(Op).getDebugLoc());
-  DAG.getContext()->diagnose(BadInit);
   return SDValue();
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 8aaad17ffdf49..c9230e131cc2c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2420,43 +2420,36 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
     }
 
     // TODO: We could emit code to handle the initialization somewhere.
-    if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) {
-      const SITargetLowering *TLI = ST.getTargetLowering();
-      if (!TLI->shouldUseLDSConstAddress(GV)) {
-        MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO);
-        return true; // Leave in place;
-      }
+    // We ignore the initializer for now and legalize it to allow selection.
+    // The initializer will anyway get errored out during assembly emission.
+    const SITargetLowering *TLI = ST.getTargetLowering();
+    if (!TLI->shouldUseLDSConstAddress(GV)) {
+      MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO);
+      return true; // Leave in place;
+    }
 
-      if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) {
-        Type *Ty = GV->getValueType();
-        // HIP uses an unsized array `extern __shared__ T s[]` or similar
-        // zero-sized type in other languages to declare the dynamic shared
-        // memory which size is not known at the compile time. They will be
-        // allocated by the runtime and placed directly after the static
-        // allocated ones. They all share the same offset.
-        if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) {
-          // Adjust alignment for that dynamic shared memory array.
-          MFI->setDynLDSAlign(B.getDataLayout(), *cast<GlobalVariable>(GV));
-          LLT S32 = LLT::scalar(32);
-          auto Sz =
-              B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
-          B.buildIntToPtr(DstReg, Sz);
-          MI.eraseFromParent();
-          return true;
-        }
+    if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) {
+      Type *Ty = GV->getValueType();
+      // HIP uses an unsized array `extern __shared__ T s[]` or similar
+      // zero-sized type in other languages to declare the dynamic shared
+      // memory which size is not known at the compile time. They will be
+      // allocated by the runtime and placed directly after the static
+      // allocated ones. They all share the same offset.
+      if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) {
+        // Adjust alignment for that dynamic shared memory array.
+        MFI->setDynLDSAlign(B.getDataLayout(), *cast<GlobalVariable>(GV));
+        LLT S32 = LLT::scalar(32);
+        auto Sz =
+            B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
+        B.buildIntToPtr(DstReg, Sz);
+        MI.eraseFromParent();
+        return true;
       }
-
-      B.buildConstant(
-          DstReg,
-          MFI->allocateLDSGlobal(B.getDataLayout(), *cast<GlobalVariable>(GV)));
-      MI.eraseFromParent();
-      return true;
     }
 
-    const Function &Fn = MF.getFunction();
-    DiagnosticInfoUnsupported BadInit(
-      Fn, "unsupported initializer for address space", MI.getDebugLoc());
-    Fn.getContext().diagnose(BadInit);
+    B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(),
+                                                   *cast<GlobalVariable>(GV)));
+    MI.eraseFromParent();
     return true;
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
index f4b8561a4343e..ecfacca5d6972 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
@@ -1,3 +1,38 @@
-; RUN: not llc -global-isel -march=amdgcn -mcpu=tonga < %S/../lds-zero-initializer.ll 2>&1 | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -global-isel -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -global-isel -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-; CHECK: <unknown>:0: error: lds: unsupported initializer for address space
+; RUN: not llc -march=amdgcn -mcpu=tahiti -global-isel < %s 2>&1 | FileCheck %s
+; RUN: not llc -march=amdgcn -mcpu=tonga -global-isel < %s 2>&1 | FileCheck %s
+
+; CHECK: error: lds: unsupported initializer for address space
+
+ at lds = addrspace(3) global [256 x i32] zeroinitializer
+
+define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
+  ; GCN-LABEL: name: load_zeroinit_lds_global
+  ; GCN: bb.1 (%ir-block.0):
+  ; GCN:   liveins: $sgpr0_sgpr1
+  ; GCN:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+  ; GFX8:  [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 40
+  ; GCN:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @lds
+  ; GFX8:  [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_MOV_B32_1]], [[S_MOV_B32_]], implicit-def $scc
+  ; GFX8:  [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0
+  ; GFX9:  [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 36, 0
+  ; GFX8:  [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_U32_]]
+  ; GCN:   $m0 = S_MOV_B32 -1
+  ; GFX9:  [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
+  ; GFX8:  [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY1]], 0, 0, implicit $m0, implicit $exec
+  ; GFX9:  [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY1]], 40, 0, implicit $m0, implicit $exec
+  ; GFX8:  [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295
+  ; GFX8:  [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+  ; GFX8:  [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1
+  ; GFX8:  [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_LOAD_DWORDX2_IMM]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+  ; GFX8:  BUFFER_STORE_DWORD_OFFSET [[DS_READ_B32_]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec
+  ; GFX9:  [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
+  ; GFX9:  FLAT_STORE_DWORD [[COPY2]], [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr
+  ; GCN:   S_ENDPGM 0
+ %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10
+  %ld = load i32, i32 addrspace(3)* %gep
+  store i32 %ld, i32 addrspace(1)* %out
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll
index 63b52fa169bf1..2a8026ff516c2 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll
@@ -1,11 +1,32 @@
-; RUN: not llc -march=amdgcn -mcpu=tahiti -filetype=null < %s 2>&1 | FileCheck %s
-; RUN: not llc -march=amdgcn -mcpu=tonga -filetype=null < %s 2>&1 | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -stop-after=amdgpu-isel -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -stop-after=amdgpu-isel  -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; RUN: not llc -march=amdgcn -mcpu=tahiti < %s 2>&1 | FileCheck %s
+; RUN: not llc -march=amdgcn -mcpu=tonga  < %s 2>&1 | FileCheck %s
 
 ; CHECK: error: lds: unsupported initializer for address space
 
 @lds = addrspace(3) global [256 x i32] zeroinitializer
 
 define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
+  ; GCN-LABEL: name: load_zeroinit_lds_global
+  ; GCN: bb.0 (%ir-block.0):
+  ; GCN:   liveins: $sgpr0_sgpr1
+  ; GCN:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+  ; GFX8:  [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0
+  ; GFX9:  [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0
+  ; GFX8:  [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+  ; GFX8:  [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0
+  ; GFX8:  [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+  ; GFX8:  [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+  ; GFX8:  [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3
+  ; GCN:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @lds, implicit $exec
+  ; GCN:   SI_INIT_M0 -1, implicit-def $m0
+  ; GCN:   [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 killed [[V_MOV_B32_e32_]], 40, 0, implicit $m0, implicit $exec
+  ; GFX9:  [[COPY1:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
+  ; GFX8:  BUFFER_STORE_DWORD_OFFSET killed [[DS_READ_B32_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec
+  ; GFX9:  FLAT_STORE_DWORD killed [[COPY1]], killed [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr
+  ; GCN:   S_ENDPGM 0
  %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10
   %ld = load i32, i32 addrspace(3)* %gep
   store i32 %ld, i32 addrspace(1)* %out


        


More information about the llvm-commits mailing list