[llvm-branch-commits] [llvm] [AMDGPU] Remove the pass `AMDGPUPromoteKernelArguments` (PR #137655)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Apr 28 08:57:32 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
---
Patch is 30.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137655.diff
7 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (-9)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (-2)
- (removed) llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp (-219)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (-13)
- (modified) llvm/lib/Target/AMDGPU/CMakeLists.txt (-1)
- (modified) llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll (+52-68)
- (modified) llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn (-1)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 4ff761ec19b3c..edbded03957dd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -113,15 +113,6 @@ FunctionPass *createAMDGPULowerKernelArgumentsPass();
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
extern char &AMDGPULowerKernelArgumentsID;
-FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
-void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
-extern char &AMDGPUPromoteKernelArgumentsID;
-
-struct AMDGPUPromoteKernelArgumentsPass
- : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-};
-
ModulePass *createAMDGPULowerKernelAttributesPass();
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
extern char &AMDGPULowerKernelAttributesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 98a1147ef6d66..30cf06d3b3dd0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -59,8 +59,6 @@ FUNCTION_PASS("amdgpu-lower-kernel-attributes",
FUNCTION_PASS("amdgpu-promote-alloca", AMDGPUPromoteAllocaPass(*this))
FUNCTION_PASS("amdgpu-promote-alloca-to-vector",
AMDGPUPromoteAllocaToVectorPass(*this))
-FUNCTION_PASS("amdgpu-promote-kernel-arguments",
- AMDGPUPromoteKernelArgumentsPass())
FUNCTION_PASS("amdgpu-rewrite-undef-for-phi", AMDGPURewriteUndefForPHIPass())
FUNCTION_PASS("amdgpu-simplifylib", AMDGPUSimplifyLibCallsPass())
FUNCTION_PASS("amdgpu-unify-divergent-exit-nodes",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
deleted file mode 100644
index 06819d05b4be6..0000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file This pass recursively promotes generic pointer arguments of a kernel
-/// into the global address space.
-///
-/// The pass walks kernel's pointer arguments, then loads from them. If a loaded
-/// value is a pointer and loaded pointer is unmodified in the kernel before the
-/// load, then promote loaded pointer to global. Then recursively continue.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUMemoryUtils.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/InitializePasses.h"
-
-#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUPromoteKernelArguments : public FunctionPass {
- MemorySSA *MSSA;
-
- AliasAnalysis *AA;
-
- Instruction *ArgCastInsertPt;
-
- SmallVector<Value *> Ptrs;
-
- void enqueueUsers(Value *Ptr);
-
- bool promotePointer(Value *Ptr);
-
- bool promoteLoad(LoadInst *LI);
-
-public:
- static char ID;
-
- AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
-
- bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.setPreservesAll();
- }
-};
-
-} // end anonymous namespace
-
-void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
- SmallVector<User *> PtrUsers(Ptr->users());
-
- while (!PtrUsers.empty()) {
- Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
- if (!U)
- continue;
-
- switch (U->getOpcode()) {
- default:
- break;
- case Instruction::Load: {
- LoadInst *LD = cast<LoadInst>(U);
- if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
- !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
- Ptrs.push_back(LD);
-
- break;
- }
- case Instruction::GetElementPtr:
- case Instruction::AddrSpaceCast:
- case Instruction::BitCast:
- if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
- PtrUsers.append(U->user_begin(), U->user_end());
- break;
- }
- }
-}
-
-bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
- bool Changed = false;
-
- LoadInst *LI = dyn_cast<LoadInst>(Ptr);
- if (LI)
- Changed |= promoteLoad(LI);
-
- PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
- if (!PT)
- return Changed;
-
- if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
- PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
- enqueueUsers(Ptr);
-
- if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
- return Changed;
-
- IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
- : ArgCastInsertPt);
-
- // Cast pointer to global address space and back to flat and let
- // Infer Address Spaces pass to do all necessary rewriting.
- PointerType *NewPT =
- PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);
- Value *Cast =
- B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
- Value *CastBack =
- B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
- Ptr->replaceUsesWithIf(CastBack,
- [Cast](Use &U) { return U.getUser() != Cast; });
-
- return true;
-}
-
-bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
- if (!LI->isSimple())
- return false;
-
- LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
- return true;
-}
-
-// skip allocas
-static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
- BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
- for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
- AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
-
- // If this is a dynamic alloca, the value may depend on the loaded kernargs,
- // so loads will need to be inserted before it.
- if (!AI || !AI->isStaticAlloca())
- break;
- }
-
- return InsPt;
-}
-
-bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
- AliasAnalysis &AA) {
- if (skipFunction(F))
- return false;
-
- CallingConv::ID CC = F.getCallingConv();
- if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
- return false;
-
- ArgCastInsertPt = &*getInsertPt(*F.begin());
- this->MSSA = &MSSA;
- this->AA = &AA;
-
- for (Argument &Arg : F.args()) {
- if (Arg.use_empty())
- continue;
-
- PointerType *PT = dyn_cast<PointerType>(Arg.getType());
- if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
- PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
- PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
- continue;
-
- Ptrs.push_back(&Arg);
- }
-
- bool Changed = false;
- while (!Ptrs.empty()) {
- Value *Ptr = Ptrs.pop_back_val();
- Changed |= promotePointer(Ptr);
- }
-
- return Changed;
-}
-
-bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
- MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
- AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- return run(F, MSSA, AA);
-}
-
-INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
- "AMDGPU Promote Kernel Arguments", false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
- "AMDGPU Promote Kernel Arguments", false, false)
-
-char AMDGPUPromoteKernelArguments::ID = 0;
-
-FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
- return new AMDGPUPromoteKernelArguments();
-}
-
-PreservedAnalyses
-AMDGPUPromoteKernelArgumentsPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
- AliasAnalysis &AA = AM.getResult<AAManager>(F);
- if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- PA.preserve<MemorySSAAnalysis>();
- return PA;
- }
- return PreservedAnalyses::all();
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 150060e1b266c..53f41812e523d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -435,11 +435,6 @@ static cl::opt<bool> EnablePreRAOptimizations(
cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
cl::Hidden);
-static cl::opt<bool> EnablePromoteKernelArguments(
- "amdgpu-enable-promote-kernel-arguments",
- cl::desc("Enable promotion of flat kernel pointer arguments to global"),
- cl::Hidden, cl::init(true));
-
static cl::opt<bool> EnableImageIntrinsicOptimizer(
"amdgpu-enable-image-intrinsic-optimizer",
cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),
@@ -520,7 +515,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUArgumentUsageInfoPass(*PR);
initializeAMDGPUAtomicOptimizerPass(*PR);
initializeAMDGPULowerKernelArgumentsPass(*PR);
- initializeAMDGPUPromoteKernelArgumentsPass(*PR);
initializeAMDGPULowerKernelAttributesPass(*PR);
initializeAMDGPUExportKernelRuntimeHandlesLegacyPass(*PR);
initializeAMDGPUPostLegalizerCombinerPass(*PR);
@@ -854,13 +848,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
FunctionPassManager FPM;
- // Add promote kernel arguments pass to the opt pipeline right before
- // infer address spaces which is needed to do actual address space
- // rewriting.
- if (Level.getSpeedupLevel() > OptimizationLevel::O1.getSpeedupLevel() &&
- EnablePromoteKernelArguments)
- FPM.addPass(AMDGPUPromoteKernelArgumentsPass());
-
// Add infer address spaces pass to the opt pipeline after inlining
// but before SROA to increase SROA opportunities.
FPM.addPass(InferAddressSpacesPass());
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 09a3096602fc3..d1c67b408a95f 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -91,7 +91,6 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUPreloadKernArgProlog.cpp
AMDGPUPrintfRuntimeBinding.cpp
AMDGPUPromoteAlloca.cpp
- AMDGPUPromoteKernelArguments.cpp
AMDGPURegBankCombiner.cpp
AMDGPURegBankLegalize.cpp
AMDGPURegBankLegalizeHelper.cpp
diff --git a/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll b/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll
index 0696cbe5aa891..aa7f820189507 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa < %s -amdgpu-promote-kernel-arguments -infer-address-spaces | FileCheck %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa < %s -passes=amdgpu-promote-kernel-arguments,infer-address-spaces | FileCheck %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa < %s -amdgpu-promote-kernel-arguments -infer-address-spaces | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa < %s -passes=infer-address-spaces | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa < %s -infer-address-spaces | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefix=GCN %s
; GCN-LABEL: ptr_nest_3:
; GCN-COUNT-2: global_load_dwordx2
@@ -11,11 +10,9 @@ define amdgpu_kernel void @ptr_nest_3(ptr addrspace(1) nocapture readonly %Arg)
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i32 [[I]]
-; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0:![0-9]+]]
-; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1)
-; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr addrspace(1) [[P2_GLOBAL]], align 8, !amdgpu.noclobber [[META0]]
-; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast ptr [[P3]] to ptr addrspace(1)
-; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[P3_GLOBAL]], align 4
+; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8
+; CHECK-NEXT: [[P3:%.*]] = load ptr, ptr [[P2]], align 8
+; CHECK-NEXT: store float 0.000000e+00, ptr [[P3]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -36,9 +33,8 @@ define amdgpu_kernel void @ptr_bitcast(ptr nocapture readonly %Arg) {
; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1)
; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i32 [[I]]
-; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8, !amdgpu.noclobber [[META0]]
-; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast ptr [[P2]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 0, ptr addrspace(1) [[P2_GLOBAL]], align 4
+; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr addrspace(1) [[P1]], align 8
+; CHECK-NEXT: store i32 0, ptr [[P2]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -57,11 +53,10 @@ entry:
define amdgpu_kernel void @ptr_in_struct(ptr addrspace(1) nocapture readonly %Arg) {
; CHECK-LABEL: @ptr_in_struct(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P1:%.*]] = load ptr, ptr addrspace(1) [[ARG:%.*]], align 8, !amdgpu.noclobber [[META0]]
-; CHECK-NEXT: [[P1_GLOBAL:%.*]] = addrspacecast ptr [[P1]] to ptr addrspace(1)
+; CHECK-NEXT: [[P1:%.*]] = load ptr, ptr addrspace(1) [[ARG:%.*]], align 8
; CHECK-NEXT: [[ID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[P1_GLOBAL]], i32 [[ID]]
-; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i32 [[ID]]
+; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -85,36 +80,34 @@ define amdgpu_kernel void @flat_ptr_arg(ptr nocapture readonly noalias %Arg, ptr
; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast ptr [[ARG:%.*]] to ptr addrspace(1)
; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64
-; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i64 [[IDXPROM]]
-; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8, !amdgpu.noclobber [[META0]]
-; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast ptr [[I1]] to ptr addrspace(1)
-; CHECK-NEXT: [[I2:%.*]] = load float, ptr addrspace(1) [[I1_GLOBAL]], align 4, !amdgpu.noclobber [[META0]]
+; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8
+; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[I1]], align 4
; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X:%.*]]
; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4
-; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 1
-; CHECK-NEXT: [[I3:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[I1]], i64 1
+; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX3_1]], align 4
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[X]], 1
; CHECK-NEXT: [[ARRAYIDX512_1:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_1]]
; CHECK-NEXT: store float [[I3]], ptr addrspace(3) [[ARRAYIDX512_1]], align 4
-; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 2
-; CHECK-NEXT: [[I4:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, ptr [[I1]], i64 2
+; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX3_2]], align 4
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[X]], 2
; CHECK-NEXT: [[ARRAYIDX512_2:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_2]]
; CHECK-NEXT: store float [[I4]], ptr addrspace(3) [[ARRAYIDX512_2]], align 4
-; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 3
-; CHECK-NEXT: [[I5:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_3]], align 4
+; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, ptr [[I1]], i64 3
+; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX3_3]], align 4
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[X]], 3
; CHECK-NEXT: [[ARRAYIDX512_3:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[ADD_3]]
; CHECK-NEXT: store float [[I5]], ptr addrspace(3) [[ARRAYIDX512_3]], align 4
; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1
; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[SUB]]
; CHECK-NEXT: [[I6:%.*]] = load float, ptr addrspace(3) [[ARRAYIDX711]], align 4
-; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[IDXPROM]]
-; CHECK-NEXT: [[I7:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX11]], align 8, !amdgpu.noclobber [[META0]]
-; CHECK-NEXT: [[I7_GLOBAL:%.*]] = addrspacecast ptr [[I7]] to ptr addrspace(1)
+; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG_GLOBAL]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[I7:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX11]], align 8
; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64
-; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I7_GLOBAL]], i64 [[IDXPROM8]]
-; CHECK-NEXT: store float [[I6]], ptr addrspace(1) [[ARRAYIDX9]], align 4
+; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[I7]], i64 [[IDXPROM8]]
+; CHECK-NEXT: store float [[I6]], ptr [[ARRAYIDX9]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -161,23 +154,22 @@ define amdgpu_kernel void @global_ptr_arg(ptr addrspace(1) nocapture readonly %A
; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[ARG:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8, !amdgpu.noclobber [[META0]]
-; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast ptr [[I1]] to ptr addrspace(1)
-; CHECK-NEXT: [[I2:%.*]] = load float, ptr addrspace(1) [[I1_GLOBAL]], align 4, !amdgpu.noclobber [[META0]]
+; CHECK-NEXT: [[I1:%.*]] = load ptr, ptr addrspace(1) [[ARRAYIDX10]], align 8
+; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[I1]], align 4
; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], ptr addrspace(3) @LDS, i32 0, i32 [[X:%.*]]
; CHECK-NEXT: store float [[I2]], ptr addrspace(3) [[ARRAYIDX512]], align 4
-; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[I1_GLOBAL]], i64 1
-; CHECK-NEXT: [[I3:%.*]] = load float, ptr addrspace(1) [[ARRAYIDX3_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, ptr [[I1]], i64 1...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/137655
More information about the llvm-branch-commits
mailing list