[llvm] 09d38dd - AMDGPU: Fix assert when trying to overextend liverange

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 4 15:14:55 PDT 2022


Author: Matt Arsenault
Date: 2022-11-04T15:14:43-07:00
New Revision: 09d38dd7704a52e8ad2d5f8f39aaeccf107f4c56

URL: https://github.com/llvm/llvm-project/commit/09d38dd7704a52e8ad2d5f8f39aaeccf107f4c56
DIFF: https://github.com/llvm/llvm-project/commit/09d38dd7704a52e8ad2d5f8f39aaeccf107f4c56.diff

LOG: AMDGPU: Fix assert when trying to overextend liverange

This was trying to add segments beyond the new and use,
so skip additional segments.

This would hit (S < E && "Cannot create empty or backwards segment").

Added: 
    llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
    llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index aed84437b890..85de3a548411 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -226,7 +226,7 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
       auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot());
       assert(DefSegment != SelLI->end() &&
              "No live interval segment covering definition?");
-      for (auto I = DefSegment; I != SelLI->end(); ++I) {
+      for (auto I = DefSegment; I != SelLI->end() && I->start <= AndIdx; ++I) {
         SlotIndex Start = I->start < SelIdx.getRegSlot() ?
                           SelIdx.getRegSlot() : I->start;
         SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ?

diff  --git a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll
new file mode 100644
index 000000000000..95d7cbb82fb7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck %s
+
+define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1 %arg1, i1 %arg2, i1 %arg3, i1 %arg4, i1 %arg5) {
+; CHECK-LABEL: cannot_create_empty_or_backwards_segment:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_mov_b64 s[26:27], s[2:3]
+; CHECK-NEXT:    s_mov_b64 s[24:25], s[0:1]
+; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CHECK-NEXT:    s_add_u32 s24, s24, s7
+; CHECK-NEXT:    s_addc_u32 s25, s25, 0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_bitcmp1_b32 s0, 0
+; CHECK-NEXT:    s_cselect_b64 s[14:15], -1, 0
+; CHECK-NEXT:    s_bitcmp1_b32 s0, 8
+; CHECK-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; CHECK-NEXT:    s_bitcmp1_b32 s0, 16
+; CHECK-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; CHECK-NEXT:    s_bitcmp1_b32 s0, 24
+; CHECK-NEXT:    s_cselect_b64 s[6:7], -1, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[2:3]
+; CHECK-NEXT:    s_xor_b64 s[2:3], s[6:7], -1
+; CHECK-NEXT:    s_bitcmp1_b32 s1, 0
+; CHECK-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; CHECK-NEXT:    s_bitcmp1_b32 s1, 8
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[14:15]
+; CHECK-NEXT:    s_cselect_b64 s[12:13], -1, 0
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 1, v1
+; CHECK-NEXT:    s_and_b64 s[2:3], exec, s[2:3]
+; CHECK-NEXT:    s_and_b64 s[4:5], exec, s[8:9]
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_branch .LBB0_3
+; CHECK-NEXT:  .LBB0_1: ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 s[18:19], -1
+; CHECK-NEXT:    s_mov_b64 s[16:17], 0
+; CHECK-NEXT:    s_mov_b64 s[20:21], -1
+; CHECK-NEXT:    s_mov_b64 s[22:23], -1
+; CHECK-NEXT:  .LBB0_2: ; %Flow7
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_and_b64 vcc, exec, s[22:23]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_12
+; CHECK-NEXT:  .LBB0_3: ; %bb7
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_and_b64 vcc, exec, s[0:1]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT:  ; %bb.4: ; %bb8
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 vcc, s[2:3]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_6
+; CHECK-NEXT:  ; %bb.5: ; %bb9
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 s[18:19], 0
+; CHECK-NEXT:    s_mov_b64 s[16:17], -1
+; CHECK-NEXT:    s_mov_b64 s[22:23], s[8:9]
+; CHECK-NEXT:    s_cbranch_execz .LBB0_7
+; CHECK-NEXT:    s_branch .LBB0_8
+; CHECK-NEXT:  .LBB0_6: ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 s[18:19], -1
+; CHECK-NEXT:    s_mov_b64 s[16:17], 0
+; CHECK-NEXT:    s_mov_b64 s[22:23], 0
+; CHECK-NEXT:  .LBB0_7: ; %bb10
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 s[18:19], 0
+; CHECK-NEXT:    s_mov_b64 s[16:17], -1
+; CHECK-NEXT:    s_mov_b64 s[22:23], s[12:13]
+; CHECK-NEXT:  .LBB0_8: ; %Flow9
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 s[20:21], -1
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[22:23]
+; CHECK-NEXT:    s_mov_b64 s[22:23], -1
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_2
+; CHECK-NEXT:  ; %bb.9: ; %bb13
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 vcc, s[4:5]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_11
+; CHECK-NEXT:  ; %bb.10: ; %bb16
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 s[16:17], 0
+; CHECK-NEXT:    s_mov_b64 s[20:21], -1
+; CHECK-NEXT:    s_mov_b64 s[22:23], s[10:11]
+; CHECK-NEXT:    s_mov_b64 s[18:19], s[16:17]
+; CHECK-NEXT:    s_branch .LBB0_2
+; CHECK-NEXT:  .LBB0_11: ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 s[22:23], -1
+; CHECK-NEXT:    s_mov_b64 s[20:21], 0
+; CHECK-NEXT:    ; implicit-def: $sgpr16_sgpr17
+; CHECK-NEXT:    s_mov_b64 s[18:19], s[16:17]
+; CHECK-NEXT:    s_branch .LBB0_2
+; CHECK-NEXT:  .LBB0_12: ; %loop.exit.guard6
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_xor_b64 s[14:15], s[20:21], -1
+; CHECK-NEXT:    s_mov_b64 s[20:21], -1
+; CHECK-NEXT:    s_and_b64 vcc, exec, s[14:15]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_16
+; CHECK-NEXT:  ; %bb.13: ; %bb14
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[14:15]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_15
+; CHECK-NEXT:  ; %bb.14: ; %bb15
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    buffer_store_dword v1, off, s[24:27], 0 offset:4
+; CHECK-NEXT:    buffer_store_dword v1, off, s[24:27], 0
+; CHECK-NEXT:  .LBB0_15: ; %Flow
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b64 s[20:21], 0
+; CHECK-NEXT:  .LBB0_16: ; %Flow13
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[20:21]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_3
+; CHECK-NEXT:  ; %bb.17: ; %loop.exit.guard
+; CHECK-NEXT:    s_and_b64 vcc, exec, s[18:19]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_22
+; CHECK-NEXT:  ; %bb.18: ; %loop.exit.guard5
+; CHECK-NEXT:    s_and_b64 vcc, exec, s[16:17]
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_22
+; CHECK-NEXT:  ; %bb.19: ; %bb17
+; CHECK-NEXT:    s_and_b64 vcc, exec, s[6:7]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_21
+; CHECK-NEXT:  ; %bb.20: ; %bb19
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 1, v0
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_22
+; CHECK-NEXT:  .LBB0_21: ; %bb21
+; CHECK-NEXT:    s_endpgm
+; CHECK-NEXT:  .LBB0_22: ; %UnifiedUnreachableBlock
+bb:
+  br label %bb6
+
+bb6:                                              ; preds = %bb15, %bb14, %bb
+  br label %bb7
+
+bb7:                                              ; preds = %bb16, %bb6
+  br i1 %arg2, label %bb8, label %bb20
+
+bb8:                                              ; preds = %bb7
+  br i1 %arg3, label %bb10, label %bb9
+
+bb9:                                              ; preds = %bb8
+  br i1 %arg1, label %bb13, label %bb12
+
+bb10:                                             ; preds = %bb8
+  br i1 %arg5, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb10
+  br label %bb13
+
+bb12:                                             ; preds = %bb10, %bb9
+  unreachable
+
+bb13:                                             ; preds = %bb11, %bb9
+  br i1 %arg1, label %bb16, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br i1 %arg, label %bb15, label %bb6
+
+bb15:                                             ; preds = %bb14
+  store double 0.000000e+00, ptr addrspace(5) null, align 2147483648
+  br label %bb6
+
+bb16:                                             ; preds = %bb13
+  br i1 %arg4, label %bb17, label %bb7
+
+bb17:                                             ; preds = %bb16
+  br i1 %arg3, label %bb19, label %bb18
+
+bb18:                                             ; preds = %bb17
+  ret void
+
+bb19:                                             ; preds = %bb17
+  br i1 %arg, label %bb20, label %bb21
+
+bb20:                                             ; preds = %bb19, %bb7
+  unreachable
+
+bb21:                                             ; preds = %bb19
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
index 1403f9bd1cf0..ae2c77ca8703 100644
--- a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
@@ -686,3 +686,106 @@ body:             |
   bb.3:
 
 ...
+
+# This was trying to extend the liverange of %0 farther than needed,
+# following %1's segment to %bb3
+
+---
+name:            cannot_create_empty_or_backwards_segment
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cannot_create_empty_or_backwards_segment
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[COPY]], implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+    %0:sreg_64_xexec = COPY $sgpr4_sgpr5
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
+
+  bb.1:
+    S_CBRANCH_VCCNZ %bb.3, implicit killed undef $vcc
+
+  bb.2:
+    S_ENDPGM 0
+
+  bb.3:
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name:            cannot_create_empty_or_backwards_segment_2
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cannot_create_empty_or_backwards_segment_2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[COPY]], implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit-def dead [[V_CNDMASK_B32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+  bb.1:
+    liveins: $sgpr4_sgpr5
+
+    %0:sreg_64_xexec = COPY $sgpr4_sgpr5
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
+
+  bb.2:
+    liveins: $sgpr4_sgpr5
+    S_NOP 0, implicit-def %1, implicit %1
+    S_CBRANCH_VCCNZ %bb.4, implicit killed undef $vcc
+    S_BRANCH %bb.1
+
+  bb.3:
+    S_ENDPGM 0
+
+  bb.4:
+    S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list