[llvm] aba1f15 - [AMDGPU] Precommit vgpr-liverange tests

Sebastian Neubauer via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 21 06:25:13 PDT 2021


Author: Sebastian Neubauer
Date: 2021-07-21T15:24:59+02:00
New Revision: aba1f157ca4fb8ddd7295368a644a366853fb57a

URL: https://github.com/llvm/llvm-project/commit/aba1f157ca4fb8ddd7295368a644a366853fb57a
DIFF: https://github.com/llvm/llvm-project/commit/aba1f157ca4fb8ddd7295368a644a366853fb57a.diff

LOG: [AMDGPU] Precommit vgpr-liverange tests

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
    llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index aa721f9fcabcd..7cef530399c93 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-opt-vgpr-liverange=true -stop-after=si-opt-vgpr-liverange -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-opt-vgpr-liverange=true -stop-after=si-opt-vgpr-liverange -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 ; a normal if-else
 define amdgpu_ps float @else1(i32 %z, float %v) #0 {
@@ -9,14 +9,14 @@ define amdgpu_ps float @else1(i32 %z, float %v) #0 {
   ; SI:   liveins: $vgpr0, $vgpr1
   ; SI:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
   ; SI:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
-  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec
-  ; SI:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec
+  ; SI:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI:   S_BRANCH %bb.3
   ; SI: bb.1.Flow:
   ; SI:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
   ; SI:   [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %13:vgpr_32, %bb.0, %4, %bb.3
   ; SI:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, undef %15:vgpr_32, %bb.3
-  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI:   S_BRANCH %bb.2
   ; SI: bb.2.if:
   ; SI:   successors: %bb.4(0x80000000)
@@ -57,13 +57,13 @@ define amdgpu_ps float @else2(i32 %z, float %v) #0 {
   ; SI:   liveins: $vgpr0, $vgpr1
   ; SI:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
   ; SI:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
-  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec
-  ; SI:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY1]], implicit $exec
+  ; SI:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI:   S_BRANCH %bb.3
   ; SI: bb.1.Flow:
   ; SI:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
   ; SI:   [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %15:vgpr_32, %bb.0, %4, %bb.3
-  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI:   S_BRANCH %bb.2
   ; SI: bb.2.if:
   ; SI:   successors: %bb.4(0x80000000)
@@ -109,25 +109,25 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
   ; SI:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY killed $sgpr0
   ; SI:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
   ; SI:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
-  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 6, killed [[COPY3]], implicit $exec
+  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY3]], implicit $exec
   ; SI:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; SI: bb.1.for.body:
   ; SI:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
   ; SI:   [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %14, %bb.5
   ; SI:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %13, %bb.5
-  ; SI:   [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_GT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[V_CMP_GT_I32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI:   S_BRANCH %bb.4
   ; SI: bb.2.Flow:
   ; SI:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
-  ; SI:   [[PHI2:%[0-9]+]]:vgpr_32 = PHI undef %36:vgpr_32, %bb.1, %10, %bb.4
-  ; SI:   [[PHI3:%[0-9]+]]:vgpr_32 = PHI undef %37:vgpr_32, %bb.1, %9, %bb.4
-  ; SI:   [[PHI4:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, undef %40:vgpr_32, %bb.4
-  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_64 = SI_ELSE killed [[SI_IF]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   [[PHI2:%[0-9]+]]:vgpr_32 = PHI undef %34:vgpr_32, %bb.1, %10, %bb.4
+  ; SI:   [[PHI3:%[0-9]+]]:vgpr_32 = PHI undef %35:vgpr_32, %bb.1, %9, %bb.4
+  ; SI:   [[PHI4:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, undef %38:vgpr_32, %bb.4
+  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; SI:   S_BRANCH %bb.3
   ; SI: bb.3.if:
   ; SI:   successors: %bb.5(0x80000000)
   ; SI:   %7:vgpr_32 = nofpexcept V_MUL_F32_e32 [[PHI]], [[COPY2]], implicit $mode, implicit $exec
-  ; SI:   %8:vgpr_32, dead %32:sreg_64 = V_ADD_CO_U32_e64 1, killed [[PHI4]], 0, implicit $exec
+  ; SI:   [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, killed [[PHI4]], implicit $exec
   ; SI:   S_BRANCH %bb.5
   ; SI: bb.4.else:
   ; SI:   successors: %bb.2(0x80000000)
@@ -138,16 +138,16 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
   ; SI: bb.5.if.end:
   ; SI:   successors: %bb.6(0x04000000), %bb.1(0x7c000000)
   ; SI:   [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI3]], %bb.2, %7, %bb.3
-  ; SI:   [[PHI6:%[0-9]+]]:vgpr_32 = PHI [[PHI2]], %bb.2, %8, %bb.3
+  ; SI:   [[PHI6:%[0-9]+]]:vgpr_32 = PHI [[PHI2]], %bb.2, [[V_ADD_U32_e32_]], %bb.3
   ; SI:   SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; SI:   %13:vgpr_32, dead %34:sreg_64 = V_ADD_CO_U32_e64 1, [[PHI6]], 0, implicit $exec
+  ; SI:   [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[PHI6]], implicit $exec
   ; SI:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[PHI]], 1, implicit-def dead $scc
   ; SI:   S_CMP_LT_I32 [[S_ADD_I32_]], [[COPY1]], implicit-def $scc
   ; SI:   S_CBRANCH_SCC1 %bb.1, implicit killed $scc
   ; SI:   S_BRANCH %bb.6
   ; SI: bb.6.for.end:
-  ; SI:   %35:vgpr_32 = nofpexcept V_ADD_F32_e32 killed [[PHI6]], killed [[PHI5]], implicit $mode, implicit $exec
-  ; SI:   $vgpr0 = COPY killed %35
+  ; SI:   %33:vgpr_32 = nofpexcept V_ADD_F32_e32 killed [[PHI6]], killed [[PHI5]], implicit $mode, implicit $exec
+  ; SI:   $vgpr0 = COPY killed %33
   ; SI:   SI_RETURN_TO_EPILOG killed $vgpr0
 entry:
 ;  %break = icmp sgt i32 %bound, 0
@@ -187,4 +187,199 @@ for.end:
   ret float %r
 }
 
+; a loop inside an if-else
+define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* %extern_func, float(float)* %extern_func2) #0 {
+  ; SI-LABEL: name: loop
+  ; SI: bb.0.main_body:
+  ; SI:   successors: %bb.5(0x40000000), %bb.1(0x40000000)
+  ; SI:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; SI:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr5
+  ; SI:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr4
+  ; SI:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr3
+  ; SI:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2
+  ; SI:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
+  ; SI:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY5]], implicit $exec
+  ; SI:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.5
+  ; SI: bb.1.Flow:
+  ; SI:   successors: %bb.2(0x40000000), %bb.8(0x40000000)
+  ; SI:   [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %29:vgpr_32, %bb.0, %4, %bb.7
+  ; SI:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %45:vgpr_32, %bb.7
+  ; SI:   [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %47:vgpr_32, %bb.7
+  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.2
+  ; SI: bb.2.if:
+  ; SI:   successors: %bb.3(0x80000000)
+  ; SI:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI1]], %subreg.sub0, killed [[PHI2]], %subreg.sub1
+  ; SI:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
+  ; SI: bb.3:
+  ; SI:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
+  ; SI:   [[PHI3:%[0-9]+]]:vreg_64 = PHI undef %49:vreg_64, %bb.3, [[REG_SEQUENCE]], %bb.2
+  ; SI:   [[PHI4:%[0-9]+]]:vgpr_32 = PHI undef %51:vgpr_32, %bb.3, [[COPY4]], %bb.2
+  ; SI:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub0, implicit $exec
+  ; SI:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub1, implicit $exec
+  ; SI:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+  ; SI:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], killed [[PHI3]], implicit $exec
+  ; SI:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_]], implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SI:   [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+  ; SI:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]]
+  ; SI:   $vgpr0 = COPY killed [[PHI4]]
+  ; SI:   dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
+  ; SI:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SI:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; SI:   $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc
+  ; SI:   SI_WATERFALL_LOOP %bb.3, implicit $exec
+  ; SI: bb.4:
+  ; SI:   successors: %bb.8(0x80000000)
+  ; SI:   $exec_lo = S_MOV_B32 killed [[S_MOV_B32_]]
+  ; SI:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]]
+  ; SI:   S_BRANCH %bb.8
+  ; SI: bb.5.else:
+  ; SI:   successors: %bb.6(0x80000000)
+  ; SI:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1
+  ; SI:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
+  ; SI: bb.6:
+  ; SI:   successors: %bb.6(0x40000000), %bb.7(0x40000000)
+  ; SI:   [[PHI5:%[0-9]+]]:vreg_64 = PHI undef %53:vreg_64, %bb.6, [[REG_SEQUENCE2]], %bb.5
+  ; SI:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI5]].sub0, implicit $exec
+  ; SI:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI5]].sub1, implicit $exec
+  ; SI:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+  ; SI:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], killed [[PHI5]], implicit $exec
+  ; SI:   [[S_AND_SAVEEXEC_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_1]], implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SI:   [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+  ; SI:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]]
+  ; SI:   $vgpr0 = COPY [[COPY4]]
+  ; SI:   dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
+  ; SI:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SI:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; SI:   $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc
+  ; SI:   SI_WATERFALL_LOOP %bb.6, implicit $exec
+  ; SI: bb.7:
+  ; SI:   successors: %bb.1(0x80000000)
+  ; SI:   $exec_lo = S_MOV_B32 killed [[S_MOV_B32_1]]
+  ; SI:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY10]]
+  ; SI:   S_BRANCH %bb.1
+  ; SI: bb.8.end:
+  ; SI:   [[PHI6:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.4
+  ; SI:   SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   $vgpr0 = COPY killed [[PHI6]]
+  ; SI:   SI_RETURN_TO_EPILOG killed $vgpr0
+main_body:
+  %cc = icmp sgt i32 %z, 5
+  br i1 %cc, label %if, label %else
+
+if:
+  %v.if = call amdgpu_gfx float %extern_func(float %v)
+  br label %end
+
+else:
+  %v.else = call amdgpu_gfx float %extern_func2(float %v)
+  br label %end
+
+end:
+  %r = phi float [ %v.if, %if ], [ %v.else, %else ]
+  ret float %r
+}
+
+; a loop inside an if-else, but the variable is still in use after the if-else
+define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float(float)* %extern_func, float(float)* %extern_func2) #0 {
+  ; SI-LABEL: name: loop_with_use
+  ; SI: bb.0.main_body:
+  ; SI:   successors: %bb.5(0x40000000), %bb.1(0x40000000)
+  ; SI:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+  ; SI:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr5
+  ; SI:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr4
+  ; SI:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed $vgpr3
+  ; SI:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed $vgpr2
+  ; SI:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
+  ; SI:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; SI:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 6, killed [[COPY5]], implicit $exec
+  ; SI:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_GT_I32_e64_]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.5
+  ; SI: bb.1.Flow:
+  ; SI:   successors: %bb.2(0x40000000), %bb.8(0x40000000)
+  ; SI:   [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %30:vgpr_32, %bb.0, %4, %bb.7
+  ; SI:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %46:vgpr_32, %bb.7
+  ; SI:   [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %48:vgpr_32, %bb.7
+  ; SI:   [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   S_BRANCH %bb.2
+  ; SI: bb.2.if:
+  ; SI:   successors: %bb.3(0x80000000)
+  ; SI:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI1]], %subreg.sub0, killed [[PHI2]], %subreg.sub1
+  ; SI:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
+  ; SI: bb.3:
+  ; SI:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
+  ; SI:   [[PHI3:%[0-9]+]]:vreg_64 = PHI undef %50:vreg_64, %bb.3, [[REG_SEQUENCE]], %bb.2
+  ; SI:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub0, implicit $exec
+  ; SI:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub1, implicit $exec
+  ; SI:   [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1
+  ; SI:   [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], killed [[PHI3]], implicit $exec
+  ; SI:   [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_]], implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SI:   [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+  ; SI:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]]
+  ; SI:   $vgpr0 = COPY [[COPY4]]
+  ; SI:   dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
+  ; SI:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SI:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; SI:   $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc
+  ; SI:   SI_WATERFALL_LOOP %bb.3, implicit $exec
+  ; SI: bb.4:
+  ; SI:   successors: %bb.8(0x80000000)
+  ; SI:   $exec_lo = S_MOV_B32 killed [[S_MOV_B32_]]
+  ; SI:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]]
+  ; SI:   S_BRANCH %bb.8
+  ; SI: bb.5.else:
+  ; SI:   successors: %bb.6(0x80000000)
+  ; SI:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1
+  ; SI:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
+  ; SI: bb.6:
+  ; SI:   successors: %bb.6(0x40000000), %bb.7(0x40000000)
+  ; SI:   [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %52:vreg_64, %bb.6, [[REG_SEQUENCE2]], %bb.5
+  ; SI:   [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub0, implicit $exec
+  ; SI:   [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub1, implicit $exec
+  ; SI:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1
+  ; SI:   [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], killed [[PHI4]], implicit $exec
+  ; SI:   [[S_AND_SAVEEXEC_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_1]], implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SI:   [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+  ; SI:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]]
+  ; SI:   $vgpr0 = COPY [[COPY4]]
+  ; SI:   dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
+  ; SI:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+  ; SI:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; SI:   $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc
+  ; SI:   SI_WATERFALL_LOOP %bb.6, implicit $exec
+  ; SI: bb.7:
+  ; SI:   successors: %bb.1(0x80000000)
+  ; SI:   $exec_lo = S_MOV_B32 killed [[S_MOV_B32_1]]
+  ; SI:   [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY10]]
+  ; SI:   S_BRANCH %bb.1
+  ; SI: bb.8.end:
+  ; SI:   [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.4
+  ; SI:   SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; SI:   %27:vgpr_32 = nofpexcept V_ADD_F32_e32 killed [[PHI5]], killed [[COPY4]], implicit $mode, implicit $exec
+  ; SI:   $vgpr0 = COPY killed %27
+  ; SI:   SI_RETURN_TO_EPILOG killed $vgpr0
+main_body:
+  %cc = icmp sgt i32 %z, 5
+  br i1 %cc, label %if, label %else
+
+if:
+  %v.if = call amdgpu_gfx float %extern_func(float %v)
+  br label %end
+
+else:
+  %v.else = call amdgpu_gfx float %extern_func2(float %v)
+  br label %end
+
+end:
+  %r = phi float [ %v.if, %if ], [ %v.else, %else ]
+  %r2 = fadd float %r, %v
+  ret float %r2
+}
+
 attributes #0 = { nounwind }

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
index 0b4859eba68c7..502271ecefbf6 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll
@@ -1,24 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-opt-vgpr-liverange=true -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-opt-vgpr-liverange=true -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 ; a normal if-else
 define amdgpu_ps float @else1(i32 %z, float %v) #0 {
 ; SI-LABEL: else1:
 ; SI:       ; %bb.0: ; %main_body
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 6, v0
+; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
 ; SI-NEXT:    ; implicit-def: $vgpr0
-; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; SI-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
+; SI-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; SI-NEXT:  ; %bb.1: ; %else
 ; SI-NEXT:    v_mul_f32_e32 v0, 0x40400000, v1
 ; SI-NEXT:    ; implicit-def: $vgpr1
 ; SI-NEXT:  ; %bb.2: ; %Flow
-; SI-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; SI-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; SI-NEXT:    s_or_saveexec_b32 s0, s0
+; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; SI-NEXT:  ; %bb.3: ; %if
 ; SI-NEXT:    v_add_f32_e32 v0, v1, v1
 ; SI-NEXT:  ; %bb.4: ; %end
-; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
+; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; SI-NEXT:    ; return to shader part epilog
 main_body:
   %cc = icmp sgt i32 %z, 5
@@ -42,20 +42,20 @@ end:
 define amdgpu_ps float @else2(i32 %z, float %v) #0 {
 ; SI-LABEL: else2:
 ; SI:       ; %bb.0: ; %main_body
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 6, v0
+; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
 ; SI-NEXT:    ; implicit-def: $vgpr0
-; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; SI-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
+; SI-NEXT:    s_xor_b32 s0, exec_lo, s0
 ; SI-NEXT:  ; %bb.1: ; %else
 ; SI-NEXT:    v_mul_f32_e32 v0, 0x40400000, v1
 ; SI-NEXT:  ; %bb.2: ; %Flow
-; SI-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; SI-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; SI-NEXT:    s_or_saveexec_b32 s0, s0
+; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; SI-NEXT:  ; %bb.3: ; %if
 ; SI-NEXT:    v_add_f32_e32 v1, v1, v1
 ; SI-NEXT:    v_mov_b32_e32 v0, v1
 ; SI-NEXT:  ; %bb.4: ; %end
-; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
+; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; SI-NEXT:    v_add_f32_e32 v0, v1, v0
 ; SI-NEXT:    ; return to shader part epilog
 main_body:
@@ -81,22 +81,22 @@ end:
 define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
 ; SI-LABEL: else3:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 6, v0
+; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
 ; SI-NEXT:    s_mov_b32 s1, 0
 ; SI-NEXT:    s_branch BB2_2
 ; SI-NEXT:  BB2_1: ; %if.end
 ; SI-NEXT:    ; in Loop: Header=BB2_2 Depth=1
-; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
+; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s2
+; SI-NEXT:    v_add_nc_u32_e32 v2, 1, v0
 ; SI-NEXT:    s_add_i32 s1, s1, 1
 ; SI-NEXT:    s_cmp_lt_i32 s1, s0
-; SI-NEXT:    v_add_u32_e64 v2, s[2:3], 1, v0
 ; SI-NEXT:    s_cbranch_scc0 BB2_6
 ; SI-NEXT:  BB2_2: ; %for.body
 ; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; SI-NEXT:    ; implicit-def: $vgpr0
 ; SI-NEXT:    ; implicit-def: $vgpr3
-; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
+; SI-NEXT:    s_and_saveexec_b32 s2, vcc_lo
+; SI-NEXT:    s_xor_b32 s2, exec_lo, s2
 ; SI-NEXT:  ; %bb.3: ; %else
 ; SI-NEXT:    ; in Loop: Header=BB2_2 Depth=1
 ; SI-NEXT:    v_mul_lo_u32 v0, v2, 3
@@ -104,13 +104,13 @@ define amdgpu_ps float @else3(i32 %z, float %v, i32 inreg %bound, i32 %x0) #0 {
 ; SI-NEXT:    ; implicit-def: $vgpr2
 ; SI-NEXT:  ; %bb.4: ; %Flow
 ; SI-NEXT:    ; in Loop: Header=BB2_2 Depth=1
-; SI-NEXT:    s_or_saveexec_b64 s[4:5], s[2:3]
-; SI-NEXT:    s_xor_b64 exec, exec, s[4:5]
+; SI-NEXT:    s_or_saveexec_b32 s2, s2
+; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s2
 ; SI-NEXT:    s_cbranch_execz BB2_1
 ; SI-NEXT:  ; %bb.5: ; %if
 ; SI-NEXT:    ; in Loop: Header=BB2_2 Depth=1
 ; SI-NEXT:    v_mul_f32_e32 v3, s1, v1
-; SI-NEXT:    v_add_u32_e64 v0, s[2:3], 1, v2
+; SI-NEXT:    v_add_nc_u32_e32 v0, 1, v2
 ; SI-NEXT:    s_branch BB2_1
 ; SI-NEXT:  BB2_6: ; %for.end
 ; SI-NEXT:    v_add_f32_e32 v0, v0, v3
@@ -153,4 +153,155 @@ for.end:
   ret float %r
 }
 
+; a loop inside an if-else
+define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* %extern_func, float(float)* %extern_func2) #0 {
+; SI-LABEL: loop:
+; SI:       ; %bb.0: ; %main_body
+; SI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s38, -1
+; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
+; SI-NEXT:    v_mov_b32_e32 v40, v1
+; SI-NEXT:    s_mov_b32 s39, 0x31c16000
+; SI-NEXT:    s_add_u32 s36, s36, s1
+; SI-NEXT:    s_addc_u32 s37, s37, 0
+; SI-NEXT:    ; implicit-def: $vgpr0
+; SI-NEXT:    s_mov_b32 s32, 0
+; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
+; SI-NEXT:    s_xor_b32 s33, exec_lo, s0
+; SI-NEXT:    s_cbranch_execz BB3_4
+; SI-NEXT:  ; %bb.1: ; %else
+; SI-NEXT:    s_mov_b32 s34, exec_lo
+; SI-NEXT:  BB3_2: ; =>This Inner Loop Header: Depth=1
+; SI-NEXT:    v_readfirstlane_b32 s4, v4
+; SI-NEXT:    v_readfirstlane_b32 s5, v5
+; SI-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
+; SI-NEXT:    s_and_saveexec_b32 s35, vcc_lo
+; SI-NEXT:    v_mov_b32_e32 v0, v40
+; SI-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SI-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SI-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s35
+; SI-NEXT:    s_cbranch_execnz BB3_2
+; SI-NEXT:  ; %bb.3:
+; SI-NEXT:    s_mov_b32 exec_lo, s34
+; SI-NEXT:    ; implicit-def: $vgpr2
+; SI-NEXT:  BB3_4: ; %Flow
+; SI-NEXT:    s_or_saveexec_b32 s33, s33
+; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s33
+; SI-NEXT:    s_cbranch_execz BB3_8
+; SI-NEXT:  ; %bb.5: ; %if
+; SI-NEXT:    s_mov_b32 s34, exec_lo
+; SI-NEXT:  BB3_6: ; =>This Inner Loop Header: Depth=1
+; SI-NEXT:    v_readfirstlane_b32 s4, v2
+; SI-NEXT:    v_readfirstlane_b32 s5, v3
+; SI-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3]
+; SI-NEXT:    s_and_saveexec_b32 s35, vcc_lo
+; SI-NEXT:    v_mov_b32_e32 v0, v40
+; SI-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SI-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SI-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SI-NEXT:    ; implicit-def: $vgpr40
+; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s35
+; SI-NEXT:    s_cbranch_execnz BB3_6
+; SI-NEXT:  ; %bb.7:
+; SI-NEXT:    s_mov_b32 exec_lo, s34
+; SI-NEXT:  BB3_8: ; %end
+; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s33
+; SI-NEXT:    ; return to shader part epilog
+main_body:
+  %cc = icmp sgt i32 %z, 5
+  br i1 %cc, label %if, label %else
+
+if:
+  %v.if = call amdgpu_gfx float %extern_func(float %v)
+  br label %end
+
+else:
+  %v.else = call amdgpu_gfx float %extern_func2(float %v)
+  br label %end
+
+end:
+  %r = phi float [ %v.if, %if ], [ %v.else, %else ]
+  ret float %r
+}
+
+; a loop inside an if-else, but the variable is still in use after the if-else
+define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float(float)* %extern_func, float(float)* %extern_func2) #0 {
+; SI-LABEL: loop_with_use:
+; SI:       ; %bb.0: ; %main_body
+; SI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s38, -1
+; SI-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 6, v0
+; SI-NEXT:    v_mov_b32_e32 v40, v1
+; SI-NEXT:    s_mov_b32 s39, 0x31c16000
+; SI-NEXT:    s_add_u32 s36, s36, s1
+; SI-NEXT:    s_addc_u32 s37, s37, 0
+; SI-NEXT:    ; implicit-def: $vgpr0
+; SI-NEXT:    s_mov_b32 s32, 0
+; SI-NEXT:    s_and_saveexec_b32 s0, vcc_lo
+; SI-NEXT:    s_xor_b32 s33, exec_lo, s0
+; SI-NEXT:    s_cbranch_execz BB4_4
+; SI-NEXT:  ; %bb.1: ; %else
+; SI-NEXT:    s_mov_b32 s34, exec_lo
+; SI-NEXT:  BB4_2: ; =>This Inner Loop Header: Depth=1
+; SI-NEXT:    v_readfirstlane_b32 s4, v4
+; SI-NEXT:    v_readfirstlane_b32 s5, v5
+; SI-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
+; SI-NEXT:    s_and_saveexec_b32 s35, vcc_lo
+; SI-NEXT:    v_mov_b32_e32 v0, v40
+; SI-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SI-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SI-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s35
+; SI-NEXT:    s_cbranch_execnz BB4_2
+; SI-NEXT:  ; %bb.3:
+; SI-NEXT:    s_mov_b32 exec_lo, s34
+; SI-NEXT:    ; implicit-def: $vgpr2
+; SI-NEXT:  BB4_4: ; %Flow
+; SI-NEXT:    s_or_saveexec_b32 s33, s33
+; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s33
+; SI-NEXT:    s_cbranch_execz BB4_8
+; SI-NEXT:  ; %bb.5: ; %if
+; SI-NEXT:    s_mov_b32 s34, exec_lo
+; SI-NEXT:  BB4_6: ; =>This Inner Loop Header: Depth=1
+; SI-NEXT:    v_readfirstlane_b32 s4, v2
+; SI-NEXT:    v_readfirstlane_b32 s5, v3
+; SI-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3]
+; SI-NEXT:    s_and_saveexec_b32 s35, vcc_lo
+; SI-NEXT:    v_mov_b32_e32 v0, v40
+; SI-NEXT:    s_mov_b64 s[0:1], s[36:37]
+; SI-NEXT:    s_mov_b64 s[2:3], s[38:39]
+; SI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; SI-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SI-NEXT:    s_xor_b32 exec_lo, exec_lo, s35
+; SI-NEXT:    s_cbranch_execnz BB4_6
+; SI-NEXT:  ; %bb.7:
+; SI-NEXT:    s_mov_b32 exec_lo, s34
+; SI-NEXT:  BB4_8: ; %end
+; SI-NEXT:    s_or_b32 exec_lo, exec_lo, s33
+; SI-NEXT:    v_add_f32_e32 v0, v0, v40
+; SI-NEXT:    ; return to shader part epilog
+main_body:
+  %cc = icmp sgt i32 %z, 5
+  br i1 %cc, label %if, label %else
+
+if:
+  %v.if = call amdgpu_gfx float %extern_func(float %v)
+  br label %end
+
+else:
+  %v.else = call amdgpu_gfx float %extern_func2(float %v)
+  br label %end
+
+end:
+  %r = phi float [ %v.if, %if ], [ %v.else, %else ]
+  %r2 = fadd float %r, %v
+  ret float %r2
+}
+
 attributes #0 = { nounwind }


        


More information about the llvm-commits mailing list