[llvm] [AMDGPU] Ensure all PERMLANE instructions are marked as convergent (PR #182162)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 18 14:00:50 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (LU-JOHN)
<details>
<summary>Changes</summary>
All PERMLANE instructions in AMDGPUGenInstrInfo.inc were verified to now be marked as convergent. This is necessary to prevent PERMLANE instructions from being incorrectly sunk by machine-sink.
---
Full diff: https://github.com/llvm/llvm-project/pull/182162.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+2)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+7-3)
- (added) llvm/test/CodeGen/AMDGPU/permlane-convergent.mir (+307)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 56e7623496eea..6e00665eeb440 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -890,6 +890,7 @@ let SubtargetPredicate = isGFX11Plus in {
let True16Predicate = UseRealTrue16Insts;
}
// Restrict src0 to be VGPR
+ let isConvergent = 1 in
def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
[], /*VOP1Only=*/ 1>;
let isAsCheapAsAMove = 1, isMoveImm = 1 in
@@ -1202,6 +1203,7 @@ defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x03a,
defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x03b,
"V_FFBH_I32", "v_cls_i32">;
defm V_SWAP_B16 : VOP1Only_Real_gfx11_gfx12_gfx13<0x066>;
+let isConvergent =1 in
defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12_gfx13<0x067>;
defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x01c, "v_mov_b16">;
defm V_NOT_B16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x069>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index bdcf04f734291..ac3dd1de55fcc 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1568,7 +1568,7 @@ let True16Predicate = UseFakeTrue16Insts in {
} // End True16Predicate = UseFakeTrue16Insts
let SubtargetPredicate = isGFX12Plus in {
- let Constraints = "$vdst = $vdst_in" in {
+ let Constraints = "$vdst = $vdst_in", isConvergent = 1 in {
defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>;
defm V_PERMLANEX16_VAR_B32 : VOP3Inst<"v_permlanex16_var_b32", VOP3_PERMLANE_VAR_Profile>;
} // End $vdst = $vdst_in
@@ -1579,11 +1579,13 @@ let SubtargetPredicate = isGFX12Plus in {
} // End SubtargetPredicate = isGFX12Plus
let SubtargetPredicate = isGFX1250Plus, WaveSizePredicate = isWave32 in {
+ let isConvergent = 1 in {
defm V_PERMLANE_BCAST_B32 : VOP3Inst<"v_permlane_bcast_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
defm V_PERMLANE_UP_B32 : VOP3Inst<"v_permlane_up_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
defm V_PERMLANE_DOWN_B32 : VOP3Inst<"v_permlane_down_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
defm V_PERMLANE_XOR_B32 : VOP3Inst<"v_permlane_xor_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
defm V_PERMLANE_IDX_GEN_B32 : VOP3Inst<"v_permlane_idx_gen_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32>>;
+ } // End isConvergent = 1
def : PermlaneNoDppPat3Src<int_amdgcn_permlane_bcast, V_PERMLANE_BCAST_B32_e64>;
def : PermlaneNoDppPat3Src<int_amdgcn_permlane_up, V_PERMLANE_UP_B32_e64>;
@@ -2104,8 +2106,10 @@ defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">;
defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_maximum_f16">;
-defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
-defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
+let isConvergent = 1 in {
+ defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
+ defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
+}
defm V_BITOP3_B16_gfx1250 : VOP3_Real_BITOP3_t16_and_fake16_gfx1250<0x233, "v_bitop3_b16">;
defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx1250<0x234>;
diff --git a/llvm/test/CodeGen/AMDGPU/permlane-convergent.mir b/llvm/test/CodeGen/AMDGPU/permlane-convergent.mir
new file mode 100644
index 0000000000000..147069896ee8f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/permlane-convergent.mir
@@ -0,0 +1,307 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# machine-sink must not sink V_PERMLANE* instructions.
+# Ensure that V_PERMLANE* instructions are marked as convergent to prevent
+# machine-sink from sinking them.
+
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-generic -run-pass=machine-sink %s -o - | FileCheck %s
+
+---
+name: permlane_test_V_PERMLANE_XOR_B32_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: permlane_test_V_PERMLANE_XOR_B32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_PERMLANE_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_XOR_B32_e64 [[DEF]], [[DEF1]], 0, implicit $exec
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_XOR_B32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:sreg_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_PERMLANE_XOR_B32_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+ %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ %5:vgpr_32 = COPY %3:vgpr_32
+
+ bb.2:
+ SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: permlane_test_V_PERMLANE_UP_B32_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: permlane_test_V_PERMLANE_UP_B32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_PERMLANE_UP_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_UP_B32_e64 [[DEF]], [[DEF1]], 0, implicit $exec
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_UP_B32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:sreg_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_PERMLANE_UP_B32_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+ %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ %5:vgpr_32 = COPY %3:vgpr_32
+
+ bb.2:
+ SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: permlane_test_V_PERMLANE_DOWN_B32_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: permlane_test_V_PERMLANE_DOWN_B32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_PERMLANE_DOWN_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_DOWN_B32_e64 [[DEF]], [[DEF1]], 0, implicit $exec
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_DOWN_B32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:sreg_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_PERMLANE_DOWN_B32_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+ %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ %5:vgpr_32 = COPY %3:vgpr_32
+
+ bb.2:
+ SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: permlane_test_V_PERMLANE_BCAST_B32_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: permlane_test_V_PERMLANE_BCAST_B32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_PERMLANE_BCAST_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_BCAST_B32_e64 [[DEF]], [[DEF1]], 0, implicit $exec
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_BCAST_B32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:sreg_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_PERMLANE_BCAST_B32_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+ %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ %5:vgpr_32 = COPY %3:vgpr_32
+
+ bb.2:
+ SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: permlane_test_V_PERMLANE_IDX_GEN_B32_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: permlane_test_V_PERMLANE_IDX_GEN_B32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_PERMLANE_IDX_GEN_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_IDX_GEN_B32_e64 [[DEF]], [[DEF1]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_IDX_GEN_B32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:sreg_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_PERMLANE_IDX_GEN_B32_e64 %0:vgpr_32, %1:sreg_32, implicit $exec
+ %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ %5:vgpr_32 = COPY %3:vgpr_32
+
+ bb.2:
+ SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: permlane_test_V_PERMLANEX_VAR_B32_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: permlane_test_V_PERMLANEX_VAR_B32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_PERMLANEX16_VAR_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_VAR_B32_e64 0, [[DEF]], 0, [[DEF]], [[DEF]], 0, implicit $exec
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF1]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANEX16_VAR_B32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = V_PERMLANEX16_VAR_B32_e64 0, %0:vgpr_32, 0, %0:vgpr_32, %0:vgpr_32, 0, implicit $exec
+ %3:sreg_32 = SI_IF %1:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ %4:vgpr_32 = COPY %2:vgpr_32
+
+ bb.2:
+ SI_END_CF %3:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: permlane_test_V_PERMLANE64_B32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: permlane_test_V_PERMLANE64_B32
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_PERMLANE64_B32_:%[0-9]+]]:vgpr_32 = V_PERMLANE64_B32 [[DEF]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF1]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE64_B32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = V_PERMLANE64_B32 %0:vgpr_32, implicit $exec
+ %3:sreg_32 = SI_IF %1:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ %4:vgpr_32 = COPY %2:vgpr_32
+
+ bb.2:
+ SI_END_CF %3:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+---
+name: permlane_test_V_PERMLANE16_VAR_B32_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: permlane_test_V_PERMLANE16_VAR_B32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_PERMLANE16_VAR_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE16_VAR_B32_e64 0, [[DEF]], 0, [[DEF]], [[DEF]], 0, implicit $exec
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF1]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE16_VAR_B32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = V_PERMLANE16_VAR_B32_e64 0, %0:vgpr_32, 0, %0:vgpr_32, %0:vgpr_32, 0, implicit $exec
+ %3:sreg_32 = SI_IF %1:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ %4:vgpr_32 = COPY %2:vgpr_32
+
+ bb.2:
+ SI_END_CF %3:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+...
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}
``````````
</details>
https://github.com/llvm/llvm-project/pull/182162
More information about the llvm-commits
mailing list