[llvm] r335988 - [AMDGPU] Enable LICM in the BE pipeline
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 29 09:26:53 PDT 2018
Author: rampitec
Date: Fri Jun 29 09:26:53 2018
New Revision: 335988
URL: http://llvm.org/viewvc/llvm-project?rev=335988&view=rev
Log:
[AMDGPU] Enable LICM in the BE pipeline
This allows to hoist code portion to compute reciprocal of loop
invariant denominator in integer division after codegen prepare
expansion.
Differential Revision: https://reviews.llvm.org/D48604
Added:
llvm/trunk/test/CodeGen/AMDGPU/idiv-licm.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll
llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll
llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll
llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=335988&r1=335987&r2=335988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Fri Jun 29 09:26:53 2018
@@ -587,6 +587,7 @@ void AMDGPUPassConfig::addEarlyCSEOrGVNP
}
void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
+ addPass(createLICMPass());
addPass(createSeparateConstOffsetFromGEPPass());
addPass(createSpeculativeExecutionPass());
// ReassociateGEPs exposes more opportunites for SLSR. See
Modified: llvm/trunk/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll?rev=335988&r1=335987&r2=335988&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll Fri Jun 29 09:26:53 2018
@@ -24,7 +24,7 @@ define amdgpu_hs void @_amdgpu_hs_main(i
.endls: ; preds = %.beginls, %.entry
%.fca.2.gep120.i = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>] addrspace(5)* %__llpc_global_proxy_7.i, i64 0, i64 2
- store <4 x float> <float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01>, <4 x float> addrspace(5)* %.fca.2.gep120.i, align 16
+ store volatile <4 x float> <float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01>, <4 x float> addrspace(5)* %.fca.2.gep120.i, align 16
br label %bb
bb: ; preds = %bb, %.endls
Modified: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll?rev=335988&r1=335987&r2=335988&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll Fri Jun 29 09:26:53 2018
@@ -210,7 +210,7 @@ bb.end:
; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
-; GCN: s_and_b64 exec, exec, vcc
+; GCN: s_and_b64 exec, exec, {{vcc|s\[[0-9:]+\]}}
; GCN-NOT: s_or_b64 exec, exec
Modified: llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll?rev=335988&r1=335987&r2=335988&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/i1-copy-from-loop.ll Fri Jun 29 09:26:53 2018
@@ -7,11 +7,13 @@
; only contain the lanes that were active during the last loop iteration.
;
; SI: ; %for.body
-; SI: v_cmp_gt_u32_e64 [[SREG:s\[[0-9]+:[0-9]+\]]], 4,
-; SI: v_cndmask_b32_e64 [[VREG:v[0-9]+]], 0, -1, [[SREG]]
-; SI-NOT: [[VREG]]
-; SI: ; %for.end
-; SI: v_cmp_ne_u32_e32 vcc, 0, [[VREG]]
+; SI: v_cmp_gt_u32_e64 [[SREG:s\[[0-9]+:[0-9]+\]]], 4,
+; SI: v_cndmask_b32_e64 [[VREG:v[0-9]+]], 0, -1, [[SREG]]
+; SI-NEXT: s_cbranch_vccnz [[ENDIF:BB[0-9_]+]]
+; SI: [[ENDIF]]:
+; SI-NOT: [[VREG]]
+; SI: ; %for.end
+; SI: v_cmp_ne_u32_e32 vcc, 0, [[VREG]]
define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) {
entry:
br label %for.body
Added: llvm/trunk/test/CodeGen/AMDGPU/idiv-licm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/idiv-licm.ll?rev=335988&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/idiv-licm.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/idiv-licm.ll Fri Jun 29 09:26:53 2018
@@ -0,0 +1,249 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}udiv32_invariant_denom:
+; GCN: v_cvt_f32_u32
+; GCN: v_rcp_iflag_f32
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
+; GCN: v_cvt_u32_f32_e32
+; GCN-DAG: v_mul_hi_u32
+; GCN-DAG: v_mul_lo_i32
+; GCN-DAG: v_sub_i32_e32
+; GCN-DAG: v_cmp_eq_u32_e64
+; GCN-DAG: v_cndmask_b32_e64
+; GCN-DAG: v_mul_hi_u32
+; GCN-DAG: v_add_i32_e32
+; GCN-DAG: v_subrev_i32_e32
+; GCN-DAG: v_cndmask_b32_e64
+; GCN: [[LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_rcp
+; GCN: s_cbranch_scc0 [[LOOP]]
+; GCN: s_endpgm
+define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret void
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
+ %tmp4 = udiv i32 %tmp, %arg1
+ %tmp5 = zext i32 %tmp to i64
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
+ store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
+ %tmp7 = add nuw nsw i32 %tmp, 1
+ %tmp8 = icmp eq i32 %tmp7, 1024
+ br i1 %tmp8, label %bb2, label %bb3
+}
+
+; GCN-LABEL: {{^}}urem32_invariant_denom:
+; GCN: v_cvt_f32_u32
+; GCN: v_rcp_iflag_f32
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
+; GCN: v_cvt_u32_f32_e32
+; GCN-DAG: v_mul_hi_u32
+; GCN-DAG: v_mul_lo_i32
+; GCN-DAG: v_sub_i32_e32
+; GCN-DAG: v_cmp_eq_u32_e64
+; GCN-DAG: v_cndmask_b32_e64
+; GCN-DAG: v_mul_hi_u32
+; GCN-DAG: v_add_i32_e32
+; GCN-DAG: v_subrev_i32_e32
+; GCN-DAG: v_cndmask_b32_e64
+; GCN: [[LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_rcp
+; GCN: s_cbranch_scc0 [[LOOP]]
+; GCN: s_endpgm
+define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret void
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
+ %tmp4 = urem i32 %tmp, %arg1
+ %tmp5 = zext i32 %tmp to i64
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
+ store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
+ %tmp7 = add nuw nsw i32 %tmp, 1
+ %tmp8 = icmp eq i32 %tmp7, 1024
+ br i1 %tmp8, label %bb2, label %bb3
+}
+
+; GCN-LABEL: {{^}}sdiv32_invariant_denom:
+; GCN: v_cvt_f32_u32
+; GCN: v_rcp_iflag_f32
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
+; GCN: v_cvt_u32_f32_e32
+; GCN-DAG: v_mul_hi_u32
+; GCN-DAG: v_mul_lo_i32
+; GCN-DAG: v_sub_i32_e32
+; GCN-DAG: v_cmp_eq_u32_e64
+; GCN-DAG: v_cndmask_b32_e64
+; GCN-DAG: v_mul_hi_u32
+; GCN-DAG: v_add_i32_e32
+; GCN-DAG: v_subrev_i32_e32
+; GCN-DAG: v_cndmask_b32_e64
+; GCN: [[LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_rcp
+; GCN: s_cbranch_scc0 [[LOOP]]
+; GCN: s_endpgm
+define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret void
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
+ %tmp4 = sdiv i32 %tmp, %arg1
+ %tmp5 = zext i32 %tmp to i64
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
+ store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
+ %tmp7 = add nuw nsw i32 %tmp, 1
+ %tmp8 = icmp eq i32 %tmp7, 1024
+ br i1 %tmp8, label %bb2, label %bb3
+}
+
+; GCN-LABEL: {{^}}srem32_invariant_denom:
+; GCN: v_cvt_f32_u32
+; GCN: v_rcp_iflag_f32
+; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x4f800000,
+; GCN: v_cvt_u32_f32_e32
+; GCN-DAG: v_mul_hi_u32
+; GCN-DAG: v_mul_lo_i32
+; GCN-DAG: v_sub_i32_e32
+; GCN-DAG: v_cmp_eq_u32_e64
+; GCN-DAG: v_cndmask_b32_e64
+; GCN-DAG: v_mul_hi_u32
+; GCN-DAG: v_add_i32_e32
+; GCN-DAG: v_subrev_i32_e32
+; GCN-DAG: v_cndmask_b32_e64
+; GCN: [[LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_rcp
+; GCN: s_cbranch_scc0 [[LOOP]]
+; GCN: s_endpgm
+define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret void
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i32 [ 0, %bb ], [ %tmp7, %bb3 ]
+ %tmp4 = srem i32 %tmp, %arg1
+ %tmp5 = zext i32 %tmp to i64
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp5
+ store i32 %tmp4, i32 addrspace(1)* %tmp6, align 4
+ %tmp7 = add nuw nsw i32 %tmp, 1
+ %tmp8 = icmp eq i32 %tmp7, 1024
+ br i1 %tmp8, label %bb2, label %bb3
+}
+
+; GCN-LABEL: {{^}}udiv16_invariant_denom:
+; GCN: v_cvt_f32_u32
+; GCN: v_rcp_iflag_f32
+; GCN: [[LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_rcp
+; GCN: s_cbranch_scc0 [[LOOP]]
+; GCN: s_endpgm
+define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret void
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
+ %tmp4 = udiv i16 %tmp, %arg1
+ %tmp5 = zext i16 %tmp to i64
+ %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
+ store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
+ %tmp7 = add nuw nsw i16 %tmp, 1
+ %tmp8 = icmp eq i16 %tmp7, 1024
+ br i1 %tmp8, label %bb2, label %bb3
+}
+
+; GCN-LABEL: {{^}}urem16_invariant_denom:
+; GCN: v_cvt_f32_u32
+; GCN: v_rcp_iflag_f32
+; GCN: [[LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_rcp
+; GCN: s_cbranch_scc0 [[LOOP]]
+; GCN: s_endpgm
+define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret void
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
+ %tmp4 = urem i16 %tmp, %arg1
+ %tmp5 = zext i16 %tmp to i64
+ %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
+ store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
+ %tmp7 = add nuw nsw i16 %tmp, 1
+ %tmp8 = icmp eq i16 %tmp7, 1024
+ br i1 %tmp8, label %bb2, label %bb3
+}
+
+; GCN-LABEL: {{^}}sdiv16_invariant_denom:
+; GCN-DAG: s_sext_i32_i16
+; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff
+; GCN-DAG: v_cvt_f32_i32
+; GCN-DAG: v_rcp_iflag_f32
+; GCN: [[LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_rcp
+; GCN: s_cbranch_scc0 [[LOOP]]
+; GCN: s_endpgm
+define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret void
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
+ %tmp4 = sdiv i16 %tmp, %arg1
+ %tmp5 = zext i16 %tmp to i64
+ %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
+ store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
+ %tmp7 = add nuw nsw i16 %tmp, 1
+ %tmp8 = icmp eq i16 %tmp7, 1024
+ br i1 %tmp8, label %bb2, label %bb3
+}
+
+; GCN-LABEL: {{^}}srem16_invariant_denom:
+; GCN-DAG: s_sext_i32_i16
+; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff
+; GCN-DAG: v_cvt_f32_i32
+; GCN-DAG: v_rcp_iflag_f32
+; GCN: [[LOOP:BB[0-9_]+]]:
+; GCN-NOT: v_rcp
+; GCN: s_cbranch_scc0 [[LOOP]]
+; GCN: s_endpgm
+define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
+bb:
+ br label %bb3
+
+bb2: ; preds = %bb3
+ ret void
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i16 [ 0, %bb ], [ %tmp7, %bb3 ]
+ %tmp4 = srem i16 %tmp, %arg1
+ %tmp5 = zext i16 %tmp to i64
+ %tmp6 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %tmp5
+ store i16 %tmp4, i16 addrspace(1)* %tmp6, align 2
+ %tmp7 = add nuw nsw i16 %tmp, 1
+ %tmp8 = icmp eq i16 %tmp7, 1024
+ br i1 %tmp8, label %bb2, label %bb3
+}
Modified: llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll?rev=335988&r1=335987&r2=335988&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/infinite-loop.ll Fri Jun 29 09:26:53 2018
@@ -12,7 +12,7 @@ entry:
br label %loop
loop:
- store i32 999, i32 addrspace(1)* %out, align 4
+ store volatile i32 999, i32 addrspace(1)* %out, align 4
br label %loop
}
@@ -21,7 +21,7 @@ loop:
; IR: br i1 %cond, label %loop, label %UnifiedReturnBlock
; IR: loop:
-; IR: store i32 999, i32 addrspace(1)* %out, align 4
+; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
; IR: br i1 true, label %loop, label %UnifiedReturnBlock
; IR: UnifiedReturnBlock:
@@ -47,7 +47,7 @@ entry:
br i1 %cond, label %loop, label %return
loop:
- store i32 999, i32 addrspace(1)* %out, align 4
+ store volatile i32 999, i32 addrspace(1)* %out, align 4
br label %loop
return:
@@ -59,11 +59,11 @@ return:
; IR: br i1 undef, label %loop1, label %loop2
; IR: loop1:
-; IR: store i32 999, i32 addrspace(1)* %out, align 4
+; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
; IR: br i1 true, label %loop1, label %DummyReturnBlock
; IR: loop2:
-; IR: store i32 888, i32 addrspace(1)* %out, align 4
+; IR: store volatile i32 888, i32 addrspace(1)* %out, align 4
; IR: br i1 true, label %loop2, label %DummyReturnBlock
; IR: DummyReturnBlock:
@@ -96,11 +96,11 @@ entry:
br i1 undef, label %loop1, label %loop2
loop1:
- store i32 999, i32 addrspace(1)* %out, align 4
+ store volatile i32 999, i32 addrspace(1)* %out, align 4
br label %loop1
loop2:
- store i32 888, i32 addrspace(1)* %out, align 4
+ store volatile i32 888, i32 addrspace(1)* %out, align 4
br label %loop2
}
@@ -113,7 +113,7 @@ loop2:
; IR: br label %inner_loop
; IR: inner_loop:
-; IR: store i32 999, i32 addrspace(1)* %out, align 4
+; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
; IR: %cond3 = icmp eq i32 %tmp, 3
; IR: br i1 true, label %TransitionBlock, label %UnifiedReturnBlock
@@ -132,7 +132,6 @@ loop2:
; SI: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %inner_loop
; SI: s_waitcnt expcnt(0)
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
-; SI: v_cmp_ne_u32_e32
; SI: s_waitcnt lgkmcnt(0)
; SI: buffer_store_dword [[REG]]
@@ -156,7 +155,7 @@ outer_loop:
br label %inner_loop
inner_loop: ; preds = %LeafBlock, %LeafBlock1
- store i32 999, i32 addrspace(1)* %out, align 4
+ store volatile i32 999, i32 addrspace(1)* %out, align 4
%cond3 = icmp eq i32 %tmp, 3
br i1 %cond3, label %inner_loop, label %outer_loop
Modified: llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll?rev=335988&r1=335987&r2=335988&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll Fri Jun 29 09:26:53 2018
@@ -36,7 +36,7 @@
; GCN-NEXT: s_andn2_b64 exec, exec, [[OR_BREAK]]
; GCN-NEXT: s_cbranch_execnz [[INNER_LOOP]]
-; GCN: ; %bb.{{[0-9]+}}: ; %Flow1{{$}}
+; GCN: ; %bb.{{[0-9]+}}: ; %Flow2{{$}}
; GCN-NEXT: ; in Loop: Header=[[OUTER_LOOP]] Depth=1
; Ensure copy is eliminated
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll?rev=335988&r1=335987&r2=335988&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf.ll Fri Jun 29 09:26:53 2018
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -asm-verbose=0 -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -asm-verbose=0 -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
; FUNC-LABEL: {{^}}break_inserted_outside_of_loop:
@@ -89,17 +89,24 @@ declare float @llvm.fabs.f32(float) noun
; This broke the old AMDIL cfg structurizer
; FUNC-LABEL: {{^}}loop_land_info_assert:
-; SI: s_cmp_lt_i32
-; SI-NEXT: s_cbranch_scc1 [[ENDPGM:BB[0-9]+_[0-9]+]]
+; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}}
+; SI: s_and_b64 vcc, exec, [[CMP4]]
+; SI-NEXT: s_cbranch_vccnz [[BR1:BB[0-9_]+]]
+; SI-NEXT: s_branch [[BR2:BB[0-9_]+]]
+; SI-NEXT: BB{{[0-9_]+}}:
+; SI-NEXT: buffer_store_dword
+
+; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]:
+
+; SI: [[BR1]]:
+; SI-NEXT: s_and_b64 vcc, exec,
+; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]]
+; SI: s_branch [[INFLOOP]]
+; SI-NEXT: [[BR2]]:
+; SI: s_cbranch_vccz [[ENDPGM]]
-; SI: s_cmpk_lt_i32
-; SI-NEXT: s_cbranch_scc0 [[ENDPGM]]
-
-; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]
-; SI: s_cbranch_vccnz [[INFLOOP]]
-
-; SI: [[ENDPGM]]:
-; SI: s_endpgm
+; SI: [[ENDPGM]]:
+; SI-NEXT: s_endpgm
define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
entry:
%cmp = icmp sgt i32 %c0, 0
@@ -144,7 +151,6 @@ return:
ret void
}
-
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
attributes #0 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/AMDGPU/smrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smrd.ll?rev=335988&r1=335987&r2=335988&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smrd.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/smrd.ll Fri Jun 29 09:26:53 2018
@@ -328,6 +328,7 @@ ret_block:
.inner_loop_body:
%descriptor = load <4 x i32>, <4 x i32> addrspace(4)* %descptr, align 16, !invariant.load !0
%load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
+ store float %load1result, float addrspace(1)* undef
%inner_br2 = icmp uge i32 %1, 10
br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body
More information about the llvm-commits
mailing list