[llvm] SeparateConstOffsetFromGEP: Add more tests with lower-gep (PR #134684)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 7 09:47:08 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

I didn't see any failures while trying to break hasMoreThanOneUseInLoop
or other paths here.

---

Patch is 21.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134684.diff


1 Files Affected:

- (added) llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll (+482) 


``````````diff
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll
new file mode 100644
index 0000000000000..687e921640492
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll
@@ -0,0 +1,482 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -passes='separate-const-offset-from-gep<lower-gep>' \
+; RUN:       -reassociate-geps-verify-no-dead-code -S | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+
+%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
+
+ at packed_struct_array = addrspace(3) global [1024 x %struct.Packed] poison, align 1
+
+; Verifies we can emit correct uglygep if the address is not natually
+; aligned. This shoult not produce a no-op bitcast with opaque
+; pointers.
+define ptr addrspace(3) @packed_struct(i32 %i, i32 %j) {
+; CHECK-LABEL: @packed_struct(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDXPROM:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[IDXPROM]], 77824
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(3) @packed_struct_array, i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[I:%.*]], 76
+; CHECK-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[J:%.*]], 3
+; CHECK-NEXT:    [[UGLYGEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP3]], i32 [[TMP2]]
+; CHECK-NEXT:    [[UGLYGEP5:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP4]], i32 112
+; CHECK-NEXT:    ret ptr addrspace(3) [[UGLYGEP5]]
+;
+entry:
+  %add = add nsw i32 %j, 3
+  %add1 = add nsw i32 %i, 1
+  %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed], ptr addrspace(3) @packed_struct_array, i64 0, i32 %add1, i32 1, i32 %add
+  ret ptr addrspace(3) %arrayidx3
+}
+
+%struct = type { i32, i32, i32 }
+
+define i32 @test1(ptr %ptr, i64 %idx) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[IDX:%.*]], 12
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 4
+; CHECK-NEXT:    [[LV_1:%.*]] = load i32, ptr [[UGLYGEP1]], align 4
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[LV_1]], 0
+; CHECK-NEXT:    br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[IDX]], 12
+; CHECK-NEXT:    [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP2]]
+; CHECK-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP2]], i64 8
+; CHECK-NEXT:    [[LV_2:%.*]] = load i32, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[LV_1]], [[LV_2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 0
+;
+  %gep.1 = getelementptr %struct, ptr %ptr, i64 %idx, i32 1
+  %lv.1 = load i32, ptr %gep.1
+  %c = icmp slt i32 %lv.1, 0
+  br i1 %c, label %then, label %else
+
+then:
+  %gep.2 = getelementptr %struct, ptr %ptr, i64 %idx, i32 2
+  %lv.2 = load i32, ptr %gep.2
+  %res = add i32 %lv.1, %lv.2
+  ret i32 %res
+
+else:
+  ret i32 0
+}
+
+define i32 @test1_fatptr(ptr addrspace(7) %ptr, i64 %idx) {
+; CHECK-LABEL: @test1_fatptr(
+; CHECK-NEXT:    [[IDXPROM:%.*]] = trunc i64 [[IDX:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[IDXPROM]], 12
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR:%.*]], i32 [[TMP1]]
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr addrspace(7) [[UGLYGEP]], i32 4
+; CHECK-NEXT:    [[LV_1:%.*]] = load i32, ptr addrspace(7) [[UGLYGEP1]], align 4
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[LV_1]], 0
+; CHECK-NEXT:    br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[IDXPROM2:%.*]] = trunc i64 [[IDX]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[IDXPROM2]], 12
+; CHECK-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP2]]
+; CHECK-NEXT:    [[UGLYGEP4:%.*]] = getelementptr i8, ptr addrspace(7) [[UGLYGEP3]], i32 8
+; CHECK-NEXT:    [[LV_2:%.*]] = load i32, ptr addrspace(7) [[UGLYGEP4]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[LV_1]], [[LV_2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK:       else:
+; CHECK-NEXT:    ret i32 0
+;
+  %gep.1 = getelementptr %struct, ptr addrspace(7) %ptr, i64 %idx, i32 1
+  %lv.1 = load i32, ptr addrspace(7) %gep.1
+  %c = icmp slt i32 %lv.1, 0
+  br i1 %c, label %then, label %else
+
+then:
+  %gep.2 = getelementptr %struct, ptr addrspace(7) %ptr, i64 %idx, i32 2
+  %lv.2 = load i32, ptr  addrspace(7) %gep.2
+  %res = add i32 %lv.1, %lv.2
+  ret i32 %res
+
+else:
+  ret i32 0
+}
+
+
+; Test lowerToSingleIndexGEPs
+define void @test_A_sub_B_add_ConstantInt(ptr %p) {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT:    [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT:    br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT:    [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT:    [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[UGLYGEP3:%.*]], i64 2044
+; CHECK-NEXT:    [[UGLYGEP5:%.*]] = getelementptr i8, ptr [[UGLYGEP4]], i64 [[TMP3]]
+; CHECK-NEXT:    store float 1.000000e+00, ptr [[UGLYGEP5]], align 4
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = tail call i32 @foo()
+  %rem = srem i32 %0, 5
+  %add = add nsw i32 %rem , 511
+  br label %for.body
+
+for.body:
+  %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %mul = mul nuw nsw i32 %k, 5
+  %sub1 = sub nsw i32 %mul, %rem
+  %cmp26 = icmp ult i32 %sub1, 512
+  br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+  %sub2 = sub nsw i32 %add, %mul
+  %idxprom = sext i32 %sub2 to i64
+  %arryidx = getelementptr inbounds float, ptr %p, i64 %idxprom
+  store float 1.0, ptr %arryidx, align 4
+  br label %cond.end
+
+cond.end:
+  %inc = add nuw nsw i32 %k, 1
+  %exitcond = icmp ne i32 %inc, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+ at extern_array = global [1024 x i32] poison, align 16
+
+; Test lowerToSingleIndexGEPs with a global variable pointer
+define void @test_A_sub_B_add_ConstantInt_gv_baseptr(ptr %p) {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt_gv_baseptr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT:    [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT:    br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT:    [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr @extern_array, i64 2044
+; CHECK-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP3]]
+; CHECK-NEXT:    store float 1.000000e+00, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = tail call i32 @foo()
+  %rem = srem i32 %0, 5
+  %add = add nsw i32 %rem , 511
+  br label %for.body
+
+for.body:
+  %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %mul = mul nuw nsw i32 %k, 5
+  %sub1 = sub nsw i32 %mul, %rem
+  %cmp26 = icmp ult i32 %sub1, 512
+  br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+  %sub2 = sub nsw i32 %add, %mul
+  %idxprom = sext i32 %sub2 to i64
+  %arryidx = getelementptr inbounds float, ptr @extern_array, i64 %idxprom
+  store float 1.0, ptr %arryidx, align 4
+  br label %cond.end
+
+cond.end:
+  %inc = add nuw nsw i32 %k, 1
+  %exitcond = icmp ne i32 %inc, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; Test lowerToSingleIndexGEPs with a constant data variable pointer
+define void @test_A_sub_B_add_ConstantInt_null_basptr() {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt_null_basptr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT:    [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT:    br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT:    [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr null, i64 2044
+; CHECK-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP3]]
+; CHECK-NEXT:    store float 1.000000e+00, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = tail call i32 @foo()
+  %rem = srem i32 %0, 5
+  %add = add nsw i32 %rem , 511
+  br label %for.body
+
+for.body:
+  %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %mul = mul nuw nsw i32 %k, 5
+  %sub1 = sub nsw i32 %mul, %rem
+  %cmp26 = icmp ult i32 %sub1, 512
+  br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+  %sub2 = sub nsw i32 %add, %mul
+  %idxprom = sext i32 %sub2 to i64
+  %arryidx = getelementptr inbounds float, ptr null, i64 %idxprom
+  store float 1.0, ptr %arryidx, align 4
+  br label %cond.end
+
+cond.end:
+  %inc = add nuw nsw i32 %k, 1
+  %exitcond = icmp ne i32 %inc, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+declare i32 @foo()
+
+define amdgpu_kernel void @multi_use_in_loop(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
+; CHECK-LABEL: @multi_use_in_loop(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = sext i32 [[ARG2:%.*]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG:%.*]], i64 [[TMP]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB8:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    br label [[BB11:%.*]]
+; CHECK:       bb7:
+; CHECK-NEXT:    br label [[BB8]]
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP30:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG1:%.*]], i64 [[TMP]]
+; CHECK-NEXT:    store i32 [[TMP9]], ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       bb11:
+; CHECK-NEXT:    [[TMP12:%.*]] = phi i32 [ [[TMP30]], [[BB22:%.*]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i32 [ [[TMP25:%.*]], [[BB22]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = srem i32 [[TMP13]], [[ARG2]]
+; CHECK-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr addrspace(1) [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], 100
+; CHECK-NEXT:    [[TMP19:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT:    br i1 [[TMP18]], label [[BB20:%.*]], label [[BB22]]
+; CHECK:       bb20:
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG1]], i64 [[TMP19]]
+; CHECK-NEXT:    store i32 0, ptr addrspace(1) [[TMP21]], align 4
+; CHECK-NEXT:    br label [[BB22]]
+; CHECK:       bb22:
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP25]] = add nuw nsw i32 [[TMP13]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[ARG1]], i64 [[TMP1]]
+; CHECK-NEXT:    [[UGLYGEP2:%.*]] = getelementptr i8, ptr addrspace(1) [[UGLYGEP]], i64 4
+; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr addrspace(1) [[UGLYGEP2]], align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = add i32 [[TMP24]], [[TMP12]]
+; CHECK-NEXT:    [[TMP30]] = add i32 [[TMP29]], [[TMP28]]
+; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i32 [[TMP25]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[TMP31]], label [[BB7]], label [[BB11]]
+;
+bb:
+  %tmp = sext i32 %arg2 to i64
+  %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp
+  %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
+  %tmp5 = icmp sgt i32 %tmp4, 0
+  br i1 %tmp5, label %bb6, label %bb8
+
+bb6:                                              ; preds = %bb
+  br label %bb11
+
+bb7:                                              ; preds = %bb22
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb
+  %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
+  %tmp10 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp
+  store i32 %tmp9, ptr addrspace(1) %tmp10, align 4
+  ret void
+
+bb11:                                             ; preds = %bb22, %bb6
+  %tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ]
+  %tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ]
+  %tmp14 = srem i32 %tmp13, %arg2
+  %tmp15 = sext i32 %tmp14 to i64
+  %tmp16 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp15
+  %tmp17 = load i32, ptr addrspace(1) %tmp16, align 4
+  %tmp18 = icmp sgt i32 %tmp17, 100
+  %tmp19 = sext i32 %tmp13 to i64
+  br i1 %tmp18, label %bb20, label %bb22
+
+bb20:                                             ; preds = %bb11
+  %tmp21 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp19
+  store i32 0, ptr addrspace(1) %tmp21, align 4
+  br label %bb22
+
+bb22:                                             ; preds = %bb20, %bb11
+  %tmp23 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp19
+  %tmp24 = load i32, ptr addrspace(1) %tmp23, align 4
+  %tmp25 = add nuw nsw i32 %tmp13, 1
+  %tmp26 = sext i32 %tmp25 to i64
+  %tmp27 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp26
+  %tmp28 = load i32, ptr addrspace(1) %tmp27, align 4
+  %tmp29 = add i32 %tmp24, %tmp12
+  %tmp30 = add i32 %tmp29, %tmp28
+  %tmp31 = icmp eq i32 %tmp25, %tmp4
+  br i1 %tmp31, label %bb7, label %bb11
+}
+
+ at extern_array_1 = external addrspace(1) global [4096 x i32], align 16
+
+ at llvm.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @extern_array_1 to ptr) ]
+
+define void @use_in_other_func() {
+; CHECK-LABEL: @use_in_other_func(
+; CHECK-NEXT:    store i32 0, ptr addrspace(1) @extern_array_1, align 4
+; CHECK-NEXT:    store i32 0, ptr addrspace(1) @extern_array_1, align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 0, ptr addrspace(1) @extern_array_1
+  store i32 0, ptr addrspace(1) @extern_array_1
+  ret void
+}
+
+define amdgpu_kernel void @multi_use_in_loop_global_base_address(ptr addrspace(1) nocapture readonly %arg, i32 %arg2) {
+; CHECK-LABEL: @multi_use_in_loop_global_base_address(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = sext i32 [[ARG2:%.*]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG:%.*]], i64 [[TMP]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB8:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    br label [[BB11:%.*]]
+; CHECK:       bb7:
+; CHECK-NEXT:    br label [[BB8]]
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP30:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP]]
+; CHECK-NEXT:    store i32 [[TMP9]], ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       bb11:
+; CHECK-NEXT:    [[TMP12:%.*]] = phi i32 [ [[TMP30]], [[BB22:%.*]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i32 [ [[TMP25:%.*]], [[BB22]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = srem i32 [[TMP13]], [[ARG2]]
+; CHECK-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr addrspace(1) [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], 100
+; CHECK-NEXT:    [[TMP19:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT:    br i1 [[TMP18]], label [[BB20:%.*]], label [[BB22]]
+; CHECK:       bb20:
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP19]]
+; CHECK-NEXT:    store i32 0, ptr addrspace(1) [[TMP21]], align 4
+; CHECK-NEXT:    br label [[BB22]]
+; CHECK:       bb22:
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr addrspace(1) [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP25]] = add nuw nsw i32 [[TMP13]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(1) @extern_array_1, i64 [[TMP1]]
+; CHECK-NEXT:    [[UGLYGEP2:%.*]] = getelementptr i8, ptr addrspace(1) [[UGLYGEP]], i64 4
+; CHECK-NEXT:    [[TMP28:%.*]] = load i32, ptr addrspace(1) [[UGLYGEP2]], align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = add i32 [[TMP24]], [[TMP12]]
+; CHECK-NEXT:    [[TMP30]] = add i32 [[TMP29]], [[TMP28]]
+; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i32 [[TMP25]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[TMP31]], label [[BB7]], label [[BB11]]
+;
+bb:
+  %tmp = sext i32 %arg2 to i64
+  %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp
+  %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
+  %tmp5 = icmp sgt i32 %tmp4, 0
+  br i1 %tmp5, label %bb6, label %bb8
+
+bb6:                                              ; preds = %bb
+  br label %bb11
+
+bb7:                                              ; preds = %bb22
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %bb
+  %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
+  %tmp1...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/134684


More information about the llvm-commits mailing list