[llvm] SeparateConstOffsetFromGEP: Add more tests with lower-gep (PR #134684)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 7 09:47:08 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
I didn't see any failures while trying to break hasMoreThanOneUseInLoop
or other paths here.
---
Patch is 21.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134684.diff
1 Files Affected:
- (added) llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll (+482)
``````````diff
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll
new file mode 100644
index 0000000000000..687e921640492
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/lower-gep.ll
@@ -0,0 +1,482 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -passes='separate-const-offset-from-gep<lower-gep>' \
+; RUN: -reassociate-geps-verify-no-dead-code -S | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+
+%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
+
+ at packed_struct_array = addrspace(3) global [1024 x %struct.Packed] poison, align 1
+
+; Verifies we can emit correct uglygep if the address is not natually
+; aligned. This shoult not produce a no-op bitcast with opaque
+; pointers.
+define ptr addrspace(3) @packed_struct(i32 %i, i32 %j) {
+; CHECK-LABEL: @packed_struct(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[IDXPROM]], 77824
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(3) @packed_struct_array, i32 [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[I:%.*]], 76
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP]], i32 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[J:%.*]], 3
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP3]], i32 [[TMP2]]
+; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr addrspace(3) [[UGLYGEP4]], i32 112
+; CHECK-NEXT: ret ptr addrspace(3) [[UGLYGEP5]]
+;
+entry:
+ %add = add nsw i32 %j, 3
+ %add1 = add nsw i32 %i, 1
+ %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed], ptr addrspace(3) @packed_struct_array, i64 0, i32 %add1, i32 1, i32 %add
+ ret ptr addrspace(3) %arrayidx3
+}
+
+%struct = type { i32, i32, i32 }
+
+define i32 @test1(ptr %ptr, i64 %idx) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[IDX:%.*]], 12
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 4
+; CHECK-NEXT: [[LV_1:%.*]] = load i32, ptr [[UGLYGEP1]], align 4
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[LV_1]], 0
+; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[IDX]], 12
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP2]]
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP2]], i64 8
+; CHECK-NEXT: [[LV_2:%.*]] = load i32, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[LV_1]], [[LV_2]]
+; CHECK-NEXT: ret i32 [[RES]]
+; CHECK: else:
+; CHECK-NEXT: ret i32 0
+;
+ %gep.1 = getelementptr %struct, ptr %ptr, i64 %idx, i32 1
+ %lv.1 = load i32, ptr %gep.1
+ %c = icmp slt i32 %lv.1, 0
+ br i1 %c, label %then, label %else
+
+then:
+ %gep.2 = getelementptr %struct, ptr %ptr, i64 %idx, i32 2
+ %lv.2 = load i32, ptr %gep.2
+ %res = add i32 %lv.1, %lv.2
+ ret i32 %res
+
+else:
+ ret i32 0
+}
+
+define i32 @test1_fatptr(ptr addrspace(7) %ptr, i64 %idx) {
+; CHECK-LABEL: @test1_fatptr(
+; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[IDX:%.*]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[IDXPROM]], 12
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR:%.*]], i32 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr addrspace(7) [[UGLYGEP]], i32 4
+; CHECK-NEXT: [[LV_1:%.*]] = load i32, ptr addrspace(7) [[UGLYGEP1]], align 4
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[LV_1]], 0
+; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[IDXPROM2:%.*]] = trunc i64 [[IDX]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[IDXPROM2]], 12
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP2]]
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr addrspace(7) [[UGLYGEP3]], i32 8
+; CHECK-NEXT: [[LV_2:%.*]] = load i32, ptr addrspace(7) [[UGLYGEP4]], align 4
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[LV_1]], [[LV_2]]
+; CHECK-NEXT: ret i32 [[RES]]
+; CHECK: else:
+; CHECK-NEXT: ret i32 0
+;
+ %gep.1 = getelementptr %struct, ptr addrspace(7) %ptr, i64 %idx, i32 1
+ %lv.1 = load i32, ptr addrspace(7) %gep.1
+ %c = icmp slt i32 %lv.1, 0
+ br i1 %c, label %then, label %else
+
+then:
+ %gep.2 = getelementptr %struct, ptr addrspace(7) %ptr, i64 %idx, i32 2
+ %lv.2 = load i32, ptr addrspace(7) %gep.2
+ %res = add i32 %lv.1, %lv.2
+ ret i32 %res
+
+else:
+ ret i32 0
+}
+
+
+; Test lowerToSingleIndexGEPs
+define void @test_A_sub_B_add_ConstantInt(ptr %p) {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[UGLYGEP3:%.*]], i64 2044
+; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr [[UGLYGEP4]], i64 [[TMP3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP5]], align 4
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @foo()
+ %rem = srem i32 %0, 5
+ %add = add nsw i32 %rem , 511
+ br label %for.body
+
+for.body:
+ %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %mul = mul nuw nsw i32 %k, 5
+ %sub1 = sub nsw i32 %mul, %rem
+ %cmp26 = icmp ult i32 %sub1, 512
+ br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+ %sub2 = sub nsw i32 %add, %mul
+ %idxprom = sext i32 %sub2 to i64
+ %arryidx = getelementptr inbounds float, ptr %p, i64 %idxprom
+ store float 1.0, ptr %arryidx, align 4
+ br label %cond.end
+
+cond.end:
+ %inc = add nuw nsw i32 %k, 1
+ %exitcond = icmp ne i32 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+ at extern_array = global [1024 x i32] poison, align 16
+
+; Test lowerToSingleIndexGEPs with a global variable pointer
+define void @test_A_sub_B_add_ConstantInt_gv_baseptr(ptr %p) {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt_gv_baseptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr @extern_array, i64 2044
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @foo()
+ %rem = srem i32 %0, 5
+ %add = add nsw i32 %rem , 511
+ br label %for.body
+
+for.body:
+ %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %mul = mul nuw nsw i32 %k, 5
+ %sub1 = sub nsw i32 %mul, %rem
+ %cmp26 = icmp ult i32 %sub1, 512
+ br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+ %sub2 = sub nsw i32 %add, %mul
+ %idxprom = sext i32 %sub2 to i64
+ %arryidx = getelementptr inbounds float, ptr @extern_array, i64 %idxprom
+ store float 1.0, ptr %arryidx, align 4
+ br label %cond.end
+
+cond.end:
+ %inc = add nuw nsw i32 %k, 1
+ %exitcond = icmp ne i32 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; Test lowerToSingleIndexGEPs with a constant data variable pointer
+define void @test_A_sub_B_add_ConstantInt_null_basptr() {
+; CHECK-LABEL: @test_A_sub_B_add_ConstantInt_null_basptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @foo()
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[TMP0]], 5
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[K]], 5
+; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[MUL]], [[REM]]
+; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i32 [[SUB1]], 512
+; CHECK-NEXT: br i1 [[CMP26]], label [[COND_TRUE:%.*]], label [[COND_END]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
+; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[SUB22]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr null, i64 2044
+; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[UGLYGEP3]], align 4
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call i32 @foo()
+ %rem = srem i32 %0, 5
+ %add = add nsw i32 %rem , 511
+ br label %for.body
+
+for.body:
+ %k = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %mul = mul nuw nsw i32 %k, 5
+ %sub1 = sub nsw i32 %mul, %rem
+ %cmp26 = icmp ult i32 %sub1, 512
+ br i1 %cmp26, label %cond.true, label %cond.end
+
+cond.true:
+ %sub2 = sub nsw i32 %add, %mul
+ %idxprom = sext i32 %sub2 to i64
+ %arryidx = getelementptr inbounds float, ptr null, i64 %idxprom
+ store float 1.0, ptr %arryidx, align 4
+ br label %cond.end
+
+cond.end:
+ %inc = add nuw nsw i32 %k, 1
+ %exitcond = icmp ne i32 %inc, 100
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+declare i32 @foo()
+
+define amdgpu_kernel void @multi_use_in_loop(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
+; CHECK-LABEL: @multi_use_in_loop(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[ARG2:%.*]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG:%.*]], i64 [[TMP]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP3]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB8:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: br label [[BB11:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb8:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP30:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG1:%.*]], i64 [[TMP]]
+; CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT: ret void
+; CHECK: bb11:
+; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP30]], [[BB22:%.*]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP25:%.*]], [[BB22]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = srem i32 [[TMP13]], [[ARG2]]
+; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) [[TMP16]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], 100
+; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: br i1 [[TMP18]], label [[BB20:%.*]], label [[BB22]]
+; CHECK: bb20:
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG1]], i64 [[TMP19]]
+; CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP21]], align 4
+; CHECK-NEXT: br label [[BB22]]
+; CHECK: bb22:
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) [[TMP23]], align 4
+; CHECK-NEXT: [[TMP25]] = add nuw nsw i32 [[TMP13]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[ARG1]], i64 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr addrspace(1) [[UGLYGEP]], i64 4
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) [[UGLYGEP2]], align 4
+; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP24]], [[TMP12]]
+; CHECK-NEXT: [[TMP30]] = add i32 [[TMP29]], [[TMP28]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP25]], [[TMP4]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[BB7]], label [[BB11]]
+;
+bb:
+ %tmp = sext i32 %arg2 to i64
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp
+ %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
+ %tmp5 = icmp sgt i32 %tmp4, 0
+ br i1 %tmp5, label %bb6, label %bb8
+
+bb6: ; preds = %bb
+ br label %bb11
+
+bb7: ; preds = %bb22
+ br label %bb8
+
+bb8: ; preds = %bb7, %bb
+ %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
+ %tmp10 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp
+ store i32 %tmp9, ptr addrspace(1) %tmp10, align 4
+ ret void
+
+bb11: ; preds = %bb22, %bb6
+ %tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ]
+ %tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ]
+ %tmp14 = srem i32 %tmp13, %arg2
+ %tmp15 = sext i32 %tmp14 to i64
+ %tmp16 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp15
+ %tmp17 = load i32, ptr addrspace(1) %tmp16, align 4
+ %tmp18 = icmp sgt i32 %tmp17, 100
+ %tmp19 = sext i32 %tmp13 to i64
+ br i1 %tmp18, label %bb20, label %bb22
+
+bb20: ; preds = %bb11
+ %tmp21 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp19
+ store i32 0, ptr addrspace(1) %tmp21, align 4
+ br label %bb22
+
+bb22: ; preds = %bb20, %bb11
+ %tmp23 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp19
+ %tmp24 = load i32, ptr addrspace(1) %tmp23, align 4
+ %tmp25 = add nuw nsw i32 %tmp13, 1
+ %tmp26 = sext i32 %tmp25 to i64
+ %tmp27 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp26
+ %tmp28 = load i32, ptr addrspace(1) %tmp27, align 4
+ %tmp29 = add i32 %tmp24, %tmp12
+ %tmp30 = add i32 %tmp29, %tmp28
+ %tmp31 = icmp eq i32 %tmp25, %tmp4
+ br i1 %tmp31, label %bb7, label %bb11
+}
+
+ at extern_array_1 = external addrspace(1) global [4096 x i32], align 16
+
+ at llvm.used = appending global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @extern_array_1 to ptr) ]
+
+define void @use_in_other_func() {
+; CHECK-LABEL: @use_in_other_func(
+; CHECK-NEXT: store i32 0, ptr addrspace(1) @extern_array_1, align 4
+; CHECK-NEXT: store i32 0, ptr addrspace(1) @extern_array_1, align 4
+; CHECK-NEXT: ret void
+;
+ store i32 0, ptr addrspace(1) @extern_array_1
+ store i32 0, ptr addrspace(1) @extern_array_1
+ ret void
+}
+
+define amdgpu_kernel void @multi_use_in_loop_global_base_address(ptr addrspace(1) nocapture readonly %arg, i32 %arg2) {
+; CHECK-LABEL: @multi_use_in_loop_global_base_address(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[ARG2:%.*]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG:%.*]], i64 [[TMP]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP3]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB6:%.*]], label [[BB8:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: br label [[BB11:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb8:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP30:%.*]], [[BB7:%.*]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP]]
+; CHECK-NEXT: store i32 [[TMP9]], ptr addrspace(1) [[TMP10]], align 4
+; CHECK-NEXT: ret void
+; CHECK: bb11:
+; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP30]], [[BB22:%.*]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP25:%.*]], [[BB22]] ], [ 0, [[BB6]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = srem i32 [[TMP13]], [[ARG2]]
+; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) [[TMP16]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], 100
+; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: br i1 [[TMP18]], label [[BB20:%.*]], label [[BB22]]
+; CHECK: bb20:
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP19]]
+; CHECK-NEXT: store i32 0, ptr addrspace(1) [[TMP21]], align 4
+; CHECK-NEXT: br label [[BB22]]
+; CHECK: bb22:
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) @extern_array_1, i64 [[TMP19]]
+; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) [[TMP23]], align 4
+; CHECK-NEXT: [[TMP25]] = add nuw nsw i32 [[TMP13]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr addrspace(1) @extern_array_1, i64 [[TMP1]]
+; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr addrspace(1) [[UGLYGEP]], i64 4
+; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) [[UGLYGEP2]], align 4
+; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP24]], [[TMP12]]
+; CHECK-NEXT: [[TMP30]] = add i32 [[TMP29]], [[TMP28]]
+; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP25]], [[TMP4]]
+; CHECK-NEXT: br i1 [[TMP31]], label [[BB7]], label [[BB11]]
+;
+bb:
+ %tmp = sext i32 %arg2 to i64
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp
+ %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
+ %tmp5 = icmp sgt i32 %tmp4, 0
+ br i1 %tmp5, label %bb6, label %bb8
+
+bb6: ; preds = %bb
+ br label %bb11
+
+bb7: ; preds = %bb22
+ br label %bb8
+
+bb8: ; preds = %bb7, %bb
+ %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ]
+ %tmp1...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/134684
More information about the llvm-commits
mailing list