[llvm] [ReplaceConstant] Don't create instructions for the same constant multiple times in the same basic block (PR #169141)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 26 07:48:33 PST 2025


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/169141

>From 398edfb754ce217696c7f9f2edfc1f1ab0a762c2 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 21 Nov 2025 21:13:14 -0500
Subject: [PATCH 1/2] [ReplaceConstant] Don't create instructions for the same
 constant multiple times in the same basic block

Fixes #167500.
---
 llvm/lib/IR/ReplaceConstant.cpp               | 10 +++-
 ...s-variable-multiple-use-in-one-phi-node.ll | 51 +++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll

diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index b3586b45a23f2..f3d1914a8dc82 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -91,6 +91,11 @@ bool llvm::convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts,
 
   // Replace those expandable operands with instructions
   bool Changed = false;
+  // We need to cache the instructions we've already expanded to avoid expanding
+  // the same constant multiple times in the same basic block, which is
+  // problematic when the same constant is used in a phi node multiple times.
+  DenseMap<std::pair<Constant *, BasicBlock *>, SmallVector<Instruction *, 4>>
+      ConstantToInstructionMap;
   while (!InstructionWorklist.empty()) {
     Instruction *I = InstructionWorklist.pop_back_val();
     DebugLoc Loc = I->getDebugLoc();
@@ -105,7 +110,10 @@ bool llvm::convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts,
       if (auto *C = dyn_cast<Constant>(U.get())) {
         if (ExpandableUsers.contains(C)) {
           Changed = true;
-          auto NewInsts = expandUser(BI, C);
+          SmallVector<Instruction *, 4> &NewInsts =
+              ConstantToInstructionMap[std::make_pair(C, BI->getParent())];
+          if (NewInsts.empty())
+            NewInsts = expandUser(BI, C);
           for (auto *NI : NewInsts)
             NI->setDebugLoc(Loc);
           InstructionWorklist.insert_range(NewInsts);
diff --git a/llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll b/llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll
new file mode 100644
index 0000000000000..35a9bee03411f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-module-lds %s -o - | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-module-lds %s -o - | FileCheck %s
+
+ at lds = internal unnamed_addr addrspace(3) global [6144 x half] poison, align 2
+
+define amdgpu_kernel void @test(ptr addrspace(1) %out) {
+; CHECK-LABEL: define amdgpu_kernel void @test(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    switch i32 0, label %[[BB_3:.*]] [
+; CHECK-NEXT:      i32 18, label %[[BB_2:.*]]
+; CHECK-NEXT:      i32 1, label %[[BB_2]]
+; CHECK-NEXT:      i32 0, label %[[BB_3]]
+; CHECK-NEXT:    ]
+; CHECK:       [[BB_1:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.test.lds to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT:    switch i32 0, label %[[BB_3]] [
+; CHECK-NEXT:      i32 18, label %[[BB_2]]
+; CHECK-NEXT:      i32 1, label %[[BB_2]]
+; CHECK-NEXT:      i32 0, label %[[BB_3]]
+; CHECK-NEXT:    ]
+; CHECK:       [[BB_2]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[TMP1]], %[[BB_1]] ], [ [[TMP1]], %[[BB_1]] ], [ 10, %[[ENTRY]] ], [ 10, %[[ENTRY]] ]
+; CHECK-NEXT:    store i64 [[PHI]], ptr addrspace(1) [[OUT]], align 8
+; CHECK-NEXT:    br label %[[BB_3]]
+; CHECK:       [[BB_3]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  switch i32 0, label %bb.3 [
+  i32 18, label %bb.2
+  i32 1, label %bb.2
+  i32 0, label %bb.3
+  ]
+bb.1:
+  switch i32 0, label %bb.3 [
+  i32 18, label %bb.2
+  i32 1, label %bb.2
+  i32 0, label %bb.3
+  ]
+
+bb.2:
+  %phi = phi i64 [ ptrtoint (ptr addrspacecast (ptr addrspace(3) @lds to ptr) to i64), %bb.1 ], [ ptrtoint (ptr addrspacecast (ptr addrspace(3) @lds to ptr) to i64), %bb.1 ], [10, %entry], [10, %entry]
+  store i64 %phi, ptr addrspace(1) %out, align 8
+  br label %bb.3
+
+bb.3:
+  ret void
+}

>From 81ef9480a5279d615e74e7cbaa37291bde48bd01 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 26 Nov 2025 10:47:59 -0500
Subject: [PATCH 2/2] fix dominance issue and comment

---
 llvm/lib/IR/ReplaceConstant.cpp               | 16 ++--
 .../AMDGPU/lower-kernel-lds-constexpr.ll      | 43 +++++----
 .../AMDGPU/lower-module-lds-constantexpr.ll   | 88 ++++++++++---------
 3 files changed, 83 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index f3d1914a8dc82..f473421e6fd48 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -22,9 +22,8 @@ static bool isExpandableUser(User *U) {
   return isa<ConstantExpr>(U) || isa<ConstantAggregate>(U);
 }
 
-static SmallVector<Instruction *, 4> expandUser(BasicBlock::iterator InsertPt,
-                                                Constant *C) {
-  SmallVector<Instruction *, 4> NewInsts;
+static void expandUser(BasicBlock::iterator InsertPt, Constant *C,
+                       SmallVector<Instruction *, 4> &NewInsts) {
   if (auto *CE = dyn_cast<ConstantExpr>(C)) {
     Instruction *ConstInst = CE->getAsInstruction();
     ConstInst->insertBefore(*InsertPt->getParent(), InsertPt);
@@ -46,7 +45,6 @@ static SmallVector<Instruction *, 4> expandUser(BasicBlock::iterator InsertPt,
   } else {
     llvm_unreachable("Not an expandable user");
   }
-  return NewInsts;
 }
 
 bool llvm::convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts,
@@ -112,8 +110,14 @@ bool llvm::convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts,
           Changed = true;
           SmallVector<Instruction *, 4> &NewInsts =
               ConstantToInstructionMap[std::make_pair(C, BI->getParent())];
-          if (NewInsts.empty())
-            NewInsts = expandUser(BI, C);
+          // If the cached instruction is after the insertion point, we need to
+          // create a new one. We can't simply move the cached instruction
+          // because its operands (also expanded instructions) might not
+          // dominate the new position.
+          if (NewInsts.empty() || BI->comesBefore(NewInsts.front())) {
+            NewInsts.clear();
+            expandUser(BI, C, NewInsts);
+          }
           for (auto *NI : NewInsts)
             NI->setDebugLoc(Loc);
           InstructionWorklist.insert_range(NewInsts);
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll
index 4fef9624d8ad6..459615139d745 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll
@@ -14,13 +14,13 @@
 
 ; Use constant from different kernels
 ;.
-; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t poison, align 2
-; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 2
-; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t poison, align 4
-; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 16
-; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 2
-; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t poison, align 16
-; CHECK: @llvm.amdgcn.kernel.k6.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k6.lds.t poison, align 16
+; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t poison, align 2, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t poison, align 2, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t poison, align 4, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t poison, align 16, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t poison, align 2, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t poison, align 16, !absolute_symbol !0
+; CHECK: @llvm.amdgcn.kernel.k6.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k6.lds.t poison, align 16, !absolute_symbol !0
 ;.
 define amdgpu_kernel void @k0(i64 %x) {
 ; CHECK-LABEL: @k0(
@@ -67,7 +67,7 @@ define amdgpu_kernel void @k3(i64 %x) {
 ; CHECK-LABEL: @k3(
 ; CHECK-NEXT:    %1 = getelementptr inbounds [32 x i8], ptr addrspace(3) @llvm.amdgcn.kernel.k3.lds, i32 0, i32 16
 ; CHECK-NEXT:    %ptr1 = addrspacecast ptr addrspace(3) %1 to ptr
-; CHECK-NEXT:    store i64 1, ptr %ptr1, align 1
+; CHECK-NEXT:    store i64 1, ptr %ptr1, align 16
 ; CHECK-NEXT:    %2 = getelementptr inbounds [32 x i8], ptr addrspace(3) @llvm.amdgcn.kernel.k3.lds, i32 0, i32 24
 ; CHECK-NEXT:    %ptr2 = addrspacecast ptr addrspace(3) %2 to ptr
 ; CHECK-NEXT:    store i64 2, ptr %ptr2, align 8
@@ -98,9 +98,9 @@ define amdgpu_kernel void @k4(i64 %x) {
 ; Multiple constexpr use in a same instruction.
 define amdgpu_kernel void @k5() {
 ; CHECK-LABEL: @k5(
-; CHECK-NEXT:  %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k5.lds to ptr
-; CHECK-NEXT:  %2 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k5.lds to ptr
-; CHECK-NEXT:  call void poison(ptr %1, ptr %2)
+; CHECK-NEXT:    %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.k5.lds to ptr
+; CHECK-NEXT:    call void poison(ptr %1, ptr %1)
+; CHECK-NEXT:    ret void
 ;
   call void poison(ptr addrspacecast (ptr addrspace(3) @lds.4 to ptr), ptr addrspacecast (ptr addrspace(3) @lds.4 to ptr))
   ret void
@@ -113,13 +113,22 @@ define amdgpu_kernel void @k5() {
 ; expression operands of store should be replaced by equivalent instruction sequences.
 define amdgpu_kernel void @k6() {
 ; CHECK-LABEL: @k6(
-
-; CHECK-NEXT:  %1 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
-; CHECK-NEXT:  %2 = ptrtoint ptr addrspace(3) %1 to i32
-; CHECK-NEXT:  %3 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
-; CHECK-NEXT:  store i32 %2, ptr addrspace(3) %3, align 8
-; CHECK-NEXT:  ret void
+; CHECK-NEXT:    %1 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
+; CHECK-NEXT:    %2 = ptrtoint ptr addrspace(3) %1 to i32
+; CHECK-NEXT:    %3 = getelementptr inbounds [4 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.k6.lds, i32 0, i32 2
+; CHECK-NEXT:    store i32 %2, ptr addrspace(3) %3, align 8
+; CHECK-NEXT:    ret void
 ;
+
   store i32 ptrtoint (ptr addrspace(3) getelementptr inbounds ([4 x i32], ptr addrspace(3) @lds.5, i32 0, i32 2) to i32), ptr addrspace(3) getelementptr inbounds ([4 x i32], ptr addrspace(3) @lds.5, i32 0, i32 2)
   ret void
 }
+;.
+; CHECK: attributes #0 = { "amdgpu-lds-size"="2" }
+; CHECK: attributes #1 = { "amdgpu-lds-size"="4" }
+; CHECK: attributes #2 = { "amdgpu-lds-size"="32" }
+; CHECK: attributes #3 = { "amdgpu-lds-size"="2020" }
+; CHECK: attributes #4 = { "amdgpu-lds-size"="16" }
+;.
+; CHECK: !0 = !{i32 0, i32 1}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll
index a2761193c2d65..deb2d00e8bd81 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-constantexpr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
 
@@ -9,73 +10,78 @@
 @kern = addrspace(3) global float poison, align 4
 
 ; @a_func is only used from a non-kernel function so is rewritten
-; CHECK-NOT: @a_func
 ; @b_both is used from a non-kernel function so is rewritten
-; CHECK-NOT: @b_both
 ; sorted both < func, so @b_both at null and @a_func at 4
 @b_both = addrspace(3) global float poison, align 4
 
-; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4
-; CHECK: @llvm.amdgcn.kernel.timestwo.lds = internal addrspace(3) global %llvm.amdgcn.kernel.timestwo.lds.t poison, align 4
 
-; CHECK-LABEL: @get_func()
-; CHECK:       %0 = addrspacecast ptr addrspace(3) @llvm.amdgcn.module.lds to ptr
-; CHECK:       %1 = ptrtoint ptr %0 to i64
-; CHECK:       %2 = addrspacecast ptr addrspace(3) @llvm.amdgcn.module.lds to ptr
-; CHECK:       %3 = ptrtoint ptr %2 to i64
-; CHECK:       %4 = add i64 %1, %3
-; CHECK:       %5 = inttoptr i64 %4 to ptr
-; CHECK:       %6 = load i32, ptr %5, align 4
-; CHECK:       ret i32 %6
 define i32 @get_func() local_unnamed_addr #0 {
+; CHECK-LABEL: define i32 @get_func() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.module.lds to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
 entry:
   %0 = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @a_func to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @a_func to ptr) to i64)) to ptr), align 4
   ret i32 %0
 }
 
-; CHECK-LABEL: @set_func(i32 %x)
-; CHECK:      %0 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1) to ptr
-; CHECK:      %1 = ptrtoint ptr %0 to i64
-; CHECK:      %2 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1) to ptr
-; CHECK:      %3 = ptrtoint ptr %2 to i64
-; CHECK:      %4 = add i64 %1, %3
-; CHECK:      %5 = inttoptr i64 %4 to ptr
-; CHECK:      store i32 %x, ptr %5, align 4
-; CHECK:      ret void
 define void @set_func(i32 %x) {
+; CHECK-LABEL: define void @set_func(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1) to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    store i32 [[X]], ptr [[TMP3]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   store i32 %x, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64)) to ptr), align 4
   ret void
 }
 
-; CHECK-LABEL: @timestwo() #0
-; CHECK-NOT: call void @llvm.donothing()
 
-; CHECK:      %1 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
-; CHECK:      %2 = ptrtoint ptr %1 to i64
-; CHECK:      %3 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
-; CHECK:      %4 = ptrtoint ptr %3 to i64
-; CHECK:      %5 = add i64 %2, %4
-; CHECK:      %6 = inttoptr i64 %5 to ptr
-; CHECK:      %ld = load i32, ptr %6, align 4
-; CHECK:      %mul = mul i32 %ld, 2
-; CHECK:      %7 = addrspacecast ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.timestwo.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
-; CHECK:      %8 = ptrtoint ptr %7 to i64
-; CHECK:      %9 = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
-; CHECK:      %10 = ptrtoint ptr %9 to i64
-; CHECK:      %11 = add i64 %8, %10
-; CHECK:      %12 = inttoptr i64 %11 to ptr
-; CHECK:      store i32 %mul, ptr %12, align 4
-; CHECK:      ret void
 define amdgpu_kernel void @timestwo() {
+; CHECK-LABEL: define amdgpu_kernel void @timestwo(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = addrspacecast ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TIMESTWO_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    [[LD:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[LD]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = addrspacecast ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TIMESTWO_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds, i32 0, i32 1) to ptr
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.timestwo.lds to ptr
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP8]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[TMP12]], align 4
+; CHECK-NEXT:    ret void
+;
   %ld = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @kern to ptr) to i64)) to ptr), align 4
   %mul = mul i32 %ld, 2
   store i32 %mul, ptr inttoptr (i64 add (i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @kern to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @b_both to ptr) to i64)) to ptr), align 4
   ret void
 }
 
-; CHECK-LABEL: @through_functions() #0
 define amdgpu_kernel void @through_functions() {
+; CHECK-LABEL: define amdgpu_kernel void @through_functions(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
+; CHECK-NEXT:    [[LD:%.*]] = call i32 @get_func()
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[LD]], 4
+; CHECK-NEXT:    call void @set_func(i32 [[MUL]])
+; CHECK-NEXT:    ret void
+;
   %ld = call i32 @get_func()
   %mul = mul i32 %ld, 4
   call void @set_func(i32 %mul)



More information about the llvm-commits mailing list