[llvm-branch-commits] [llvm] [AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds (PR #165426)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Oct 28 09:16:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Fabian Ritter (ritter-x2a)
<details>
<summary>Changes</summary>
This is in preparation for a patch that will only fold offsets into flat
instructions if their addition is inbounds. Marking the GEPs inbounds here
means that their output won't change with the later patch.
Basically a retry of the very similar PR #<!-- -->131994, as part of an updated stack
of PRs.
For SWDEV-516125.
---
Patch is 476.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165426.diff
21 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll (+42-42)
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll (+50-50)
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll (+44-44)
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll (+44-44)
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll (+44-44)
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics.ll (+143-143)
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll (+100-100)
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll (+123-123)
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll (+123-123)
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll (+100-100)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll (+60-60)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll (+60-60)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll (+60-60)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll (+60-60)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll (+59-59)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll (+56-56)
- (modified) llvm/test/CodeGen/AMDGPU/offset-split-flat.ll (+25-25)
- (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+1-1)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index d89b39348ad9a..0310b7e788ddf 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -62,7 +62,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
; GFX90A-NEXT: renamable $vgpr46, renamable $vcc = V_ADD_CO_U32_e64 $sgpr24, $vgpr0, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr1, killed $vcc, 0, implicit $exec
@@ -959,7 +959,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.71(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
; GFX90A-NEXT: renamable $vgpr4, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr4, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr2, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr5, killed $vcc, 0, implicit $exec
@@ -1007,12 +1007,12 @@ bb:
%i11 = icmp eq i32 %i, 0
%i12 = load i32, ptr addrspace(3) null, align 8
%i13 = zext i32 %i12 to i64
- %i14 = getelementptr i32, ptr addrspace(1) %arg, i64 %i13
+ %i14 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i13
br i1 %arg3, label %bb15, label %bb103
bb15:
%i16 = zext i32 %i to i64
- %i17 = getelementptr i32, ptr addrspace(1) %i14, i64 %i16
+ %i17 = getelementptr inbounds i32, ptr addrspace(1) %i14, i64 %i16
%i18 = ptrtoint ptr addrspace(1) %i17 to i64
br i1 %arg4, label %bb19, label %bb20
@@ -1021,7 +1021,7 @@ bb19:
unreachable
bb20:
- %i21 = getelementptr i32, ptr addrspace(1) %i17, i64 256
+ %i21 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 256
%i22 = ptrtoint ptr addrspace(1) %i21 to i64
%i23 = inttoptr i64 %i22 to ptr
%i24 = load i8, ptr %i23, align 1
@@ -1033,7 +1033,7 @@ bb26:
unreachable
bb27:
- %i28 = getelementptr i32, ptr addrspace(1) %i17, i64 512
+ %i28 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 512
%i29 = ptrtoint ptr addrspace(1) %i28 to i64
%i30 = inttoptr i64 %i29 to ptr
%i31 = load i8, ptr %i30, align 1
@@ -1045,7 +1045,7 @@ bb33:
unreachable
bb34:
- %i35 = getelementptr i32, ptr addrspace(1) %i17, i64 768
+ %i35 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 768
%i36 = ptrtoint ptr addrspace(1) %i35 to i64
%i37 = inttoptr i64 %i36 to ptr
%i38 = load i8, ptr %i37, align 1
@@ -1057,7 +1057,7 @@ bb40:
unreachable
bb41:
- %i42 = getelementptr i32, ptr addrspace(1) %i17, i64 1024
+ %i42 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1024
%i43 = ptrtoint ptr addrspace(1) %i42 to i64
%i44 = inttoptr i64 %i43 to ptr
%i45 = load i8, ptr %i44, align 1
@@ -1069,7 +1069,7 @@ bb47:
unreachable
bb48:
- %i49 = getelementptr i32, ptr addrspace(1) %i17, i64 1280
+ %i49 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1280
%i50 = ptrtoint ptr addrspace(1) %i49 to i64
%i51 = inttoptr i64 %i50 to ptr
%i52 = load i8, ptr %i51, align 1
@@ -1081,7 +1081,7 @@ bb54:
unreachable
bb55:
- %i56 = getelementptr i32, ptr addrspace(1) %i17, i64 1536
+ %i56 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1536
%i57 = ptrtoint ptr addrspace(1) %i56 to i64
%i58 = or i64 %i57, 1
%i59 = inttoptr i64 %i58 to ptr
@@ -1113,7 +1113,7 @@ bb67:
bb68:
%i69 = zext i1 %arg5 to i8
- %i70 = getelementptr [2 x i32], ptr addrspace(1) null, i64 %i16
+ %i70 = getelementptr inbounds [2 x i32], ptr addrspace(1) null, i64 %i16
%i71 = ptrtoint ptr addrspace(1) %i70 to i64
br i1 %arg5, label %bb72, label %bb73
@@ -1122,7 +1122,7 @@ bb72:
unreachable
bb73:
- %i74 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 256
+ %i74 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 256
%i75 = ptrtoint ptr addrspace(1) %i74 to i64
%i76 = inttoptr i64 %i75 to ptr
%i77 = load i8, ptr %i76, align 1
@@ -1134,7 +1134,7 @@ bb79:
unreachable
bb80:
- %i81 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 512
+ %i81 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 512
%i82 = ptrtoint ptr addrspace(1) %i81 to i64
%i83 = or i64 %i82, 1
br i1 %arg6, label %bb84, label %bb85
@@ -1269,7 +1269,7 @@ bb174:
%i182 = select i1 %arg3, i32 %i181, i32 0
%i183 = or i32 %i182, %i154
%i184 = or i32 %i183, %i156
- %i185 = getelementptr [2 x i32], ptr addrspace(1) %arg1, i64 %i13
+ %i185 = getelementptr inbounds [2 x i32], ptr addrspace(1) %arg1, i64 %i13
br i1 %arg3, label %bb186, label %bb196
bb186:
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index 890f4f77ed107..e509d7b2b9b1b 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -12,8 +12,8 @@
define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX7-NEXT: entry:
-; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX7: if:
@@ -28,8 +28,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
;
; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX8-NEXT: entry:
-; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX8: if:
@@ -44,11 +44,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
;
; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX9-NEXT: entry:
-; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX9: if:
-; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
+; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
; OPT-GFX9-NEXT: br label [[ENDIF]]
; OPT-GFX9: endif:
@@ -58,11 +58,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
;
; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX10-NEXT: entry:
-; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX10: if:
-; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
+; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
; OPT-GFX10-NEXT: br label [[ENDIF]]
; OPT-GFX10: endif:
@@ -146,8 +146,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
- %out.gep = getelementptr i32, ptr %out, i64 999999
- %in.gep = getelementptr i32, ptr %in, i64 7
+ %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+ %in.gep = getelementptr inbounds i32, ptr %in, i64 7
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %endif, label %if
@@ -167,12 +167,12 @@ done:
define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) {
; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX7-NEXT: entry:
-; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX7: if:
; OPT-GFX7-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
; OPT-GFX7-NEXT: br label [[ENDIF]]
; OPT-GFX7: endif:
@@ -182,8 +182,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
;
; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX8-NEXT: entry:
-; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX8: if:
@@ -197,12 +197,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
;
; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX9-NEXT: entry:
-; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX9: if:
; OPT-GFX9-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
; OPT-GFX9-NEXT: br label [[ENDIF]]
; OPT-GFX9: endif:
@@ -212,12 +212,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
;
; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX10-NEXT: entry:
-; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX10: if:
; OPT-GFX10-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
; OPT-GFX10-NEXT: br label [[ENDIF]]
; OPT-GFX10: endif:
@@ -303,8 +303,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
- %out.gep = getelementptr i32, ptr %out, i64 999999
- %in.gep = getelementptr i32, ptr %in, i64 7
+ %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+ %in.gep = getelementptr inbounds i32, ptr %in, i64 7
%cast = addrspacecast ptr %in.gep to ptr addrspace(1)
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %endif, label %if
@@ -325,12 +325,12 @@ done:
define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) {
; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
; OPT-NEXT: entry:
-; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT: if:
; OPT-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4)
-; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28
+; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 28
; OPT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4
; OPT-NEXT: br label [[ENDIF]]
; OPT: endif:
@@ -416,8 +416,8 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
- %out.gep = getelementptr i32, ptr %out, i64 999999
- %in.gep = getelementptr i32, ptr %in, i64 7
+ %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+ %in.gep = getelementptr inbounds i32, ptr %in, i64 7
%cast = addrspacecast ptr %in.gep to ptr addrspace(4)
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %endif, label %if
@@ -438,8 +438,8 @@ done:
define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX7-NEXT: entry:
-; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX7-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -456,8 +456,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
;
; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX8-NEXT: entry:
-; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX8-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -474,12 +474,12 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
;
; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX9-NEXT: entry:
-; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
; OPT-GFX9-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX9: if:
-; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1
; OPT-GFX9-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
; OPT-GFX9-NEXT: br label [[ENDIF]]
@@ -490,8 +490,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
;
; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX10-NEXT: entry:
-; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX10-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -588,8 +588,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
- %out.gep = getelementptr i32, ptr %out, i32 1024
- %in.gep = getelementptr i8, ptr %in, i64 4095
+ %out.gep = getelementptr inbounds i32, ptr %out, i32 1024
+ %in.gep = getelementptr inbounds i8, ptr %in, i64 4095
%tid = cal...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/165426
More information about the llvm-branch-commits
mailing list