[llvm] [AMDGPU] Ensure all PERMLANE instructions are marked as convergent (PR #182162)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 18 14:00:50 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: None (LU-JOHN)

<details>
<summary>Changes</summary>

All PERMLANE instructions in AMDGPUGenInstrInfo.inc were verified to now be marked as convergent.  This is necessary to prevent PERMLANE instructions from being incorrectly sunk by machine-sink.

---
Full diff: https://github.com/llvm/llvm-project/pull/182162.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+2) 
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+7-3) 
- (added) llvm/test/CodeGen/AMDGPU/permlane-convergent.mir (+307) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 56e7623496eea..6e00665eeb440 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -890,6 +890,7 @@ let SubtargetPredicate = isGFX11Plus in {
     let True16Predicate = UseRealTrue16Insts;
   }
   // Restrict src0 to be VGPR
+  let isConvergent = 1 in
   def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
                                       [], /*VOP1Only=*/ 1>;
   let isAsCheapAsAMove = 1, isMoveImm = 1 in
@@ -1202,6 +1203,7 @@ defm V_CTZ_I32_B32         : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x03a,
 defm V_CLS_I32             : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x03b,
   "V_FFBH_I32", "v_cls_i32">;
 defm V_SWAP_B16              : VOP1Only_Real_gfx11_gfx12_gfx13<0x066>;
+let isConvergent =1 in
 defm V_PERMLANE64_B32        : VOP1Only_Real_gfx11_gfx12_gfx13<0x067>;
 defm V_MOV_B16_t16           : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x01c, "v_mov_b16">;
 defm V_NOT_B16               : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x069>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index bdcf04f734291..ac3dd1de55fcc 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1568,7 +1568,7 @@ let True16Predicate = UseFakeTrue16Insts in {
 } // End True16Predicate = UseFakeTrue16Insts
 
 let SubtargetPredicate = isGFX12Plus in {
-  let Constraints = "$vdst = $vdst_in" in {
+  let Constraints = "$vdst = $vdst_in", isConvergent = 1 in {
     defm V_PERMLANE16_VAR_B32  : VOP3Inst<"v_permlane16_var_b32",  VOP3_PERMLANE_VAR_Profile>;
     defm V_PERMLANEX16_VAR_B32 : VOP3Inst<"v_permlanex16_var_b32", VOP3_PERMLANE_VAR_Profile>;
   } // End $vdst = $vdst_in
@@ -1579,11 +1579,13 @@ let SubtargetPredicate = isGFX12Plus in {
 } // End SubtargetPredicate = isGFX12Plus
 
 let SubtargetPredicate = isGFX1250Plus, WaveSizePredicate = isWave32 in {
+  let isConvergent = 1 in {
   defm V_PERMLANE_BCAST_B32   : VOP3Inst<"v_permlane_bcast_b32",   VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
   defm V_PERMLANE_UP_B32      : VOP3Inst<"v_permlane_up_b32",      VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
   defm V_PERMLANE_DOWN_B32    : VOP3Inst<"v_permlane_down_b32",    VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
   defm V_PERMLANE_XOR_B32     : VOP3Inst<"v_permlane_xor_b32",     VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>;
   defm V_PERMLANE_IDX_GEN_B32 : VOP3Inst<"v_permlane_idx_gen_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32>>;
+  } // End isConvergent = 1
 
   def : PermlaneNoDppPat3Src<int_amdgcn_permlane_bcast,   V_PERMLANE_BCAST_B32_e64>;
   def : PermlaneNoDppPat3Src<int_amdgcn_permlane_up,      V_PERMLANE_UP_B32_e64>;
@@ -2104,8 +2106,10 @@ defm V_MAXIMUM_F32        : VOP3Only_Realtriple_gfx12<0x366>;
 defm V_MINIMUM_F16        : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">;
 defm V_MAXIMUM_F16        : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_maximum_f16">;
 
-defm V_PERMLANE16_VAR_B32  : VOP3Only_Real_Base_gfx12<0x30f>;
-defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
+let isConvergent = 1 in {
+  defm V_PERMLANE16_VAR_B32  : VOP3Only_Real_Base_gfx12<0x30f>;
+  defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
+}
 
 defm V_BITOP3_B16_gfx1250 : VOP3_Real_BITOP3_t16_and_fake16_gfx1250<0x233, "v_bitop3_b16">;
 defm V_BITOP3_B32         : VOP3_Real_BITOP3_gfx1250<0x234>;
diff --git a/llvm/test/CodeGen/AMDGPU/permlane-convergent.mir b/llvm/test/CodeGen/AMDGPU/permlane-convergent.mir
new file mode 100644
index 0000000000000..147069896ee8f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/permlane-convergent.mir
@@ -0,0 +1,307 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# machine-sink must not sink V_PERMLANE* instructions.
+# Ensure that V_PERMLANE* instructions are marked as convergent to prevent
+# machine-sink from sinking them.
+
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-generic -run-pass=machine-sink %s -o - | FileCheck %s
+
+---
+name:            permlane_test_V_PERMLANE_XOR_B32_e64
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: permlane_test_V_PERMLANE_XOR_B32_e64
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[V_PERMLANE_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_XOR_B32_e64 [[DEF]], [[DEF1]], 0, implicit $exec
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_XOR_B32_e64_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = IMPLICIT_DEF
+    %2:sreg_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_PERMLANE_XOR_B32_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+    %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %5:vgpr_32 = COPY %3:vgpr_32
+
+  bb.2:
+    SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+---
+name:            permlane_test_V_PERMLANE_UP_B32_e64
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: permlane_test_V_PERMLANE_UP_B32_e64
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[V_PERMLANE_UP_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_UP_B32_e64 [[DEF]], [[DEF1]], 0, implicit $exec
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_UP_B32_e64_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = IMPLICIT_DEF
+    %2:sreg_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_PERMLANE_UP_B32_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+    %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %5:vgpr_32 = COPY %3:vgpr_32
+
+  bb.2:
+    SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+---
+name:            permlane_test_V_PERMLANE_DOWN_B32_e64
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: permlane_test_V_PERMLANE_DOWN_B32_e64
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[V_PERMLANE_DOWN_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_DOWN_B32_e64 [[DEF]], [[DEF1]], 0, implicit $exec
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_DOWN_B32_e64_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = IMPLICIT_DEF
+    %2:sreg_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_PERMLANE_DOWN_B32_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+    %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %5:vgpr_32 = COPY %3:vgpr_32
+
+  bb.2:
+    SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+---
+name:            permlane_test_V_PERMLANE_BCAST_B32_e64
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: permlane_test_V_PERMLANE_BCAST_B32_e64
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[V_PERMLANE_BCAST_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_BCAST_B32_e64 [[DEF]], [[DEF1]], 0, implicit $exec
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_BCAST_B32_e64_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = IMPLICIT_DEF
+    %2:sreg_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_PERMLANE_BCAST_B32_e64 %0:vgpr_32, %1:sreg_32, 0, implicit $exec
+    %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %5:vgpr_32 = COPY %3:vgpr_32
+
+  bb.2:
+    SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+---
+name:            permlane_test_V_PERMLANE_IDX_GEN_B32_e64
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: permlane_test_V_PERMLANE_IDX_GEN_B32_e64
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[V_PERMLANE_IDX_GEN_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE_IDX_GEN_B32_e64 [[DEF]], [[DEF1]], implicit $exec
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF2]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE_IDX_GEN_B32_e64_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = IMPLICIT_DEF
+    %2:sreg_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_PERMLANE_IDX_GEN_B32_e64 %0:vgpr_32, %1:sreg_32,  implicit $exec
+    %4:sreg_32 = SI_IF %2:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %5:vgpr_32 = COPY %3:vgpr_32
+
+  bb.2:
+    SI_END_CF %4:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+---
+name:            permlane_test_V_PERMLANEX_VAR_B32_e64
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: permlane_test_V_PERMLANEX_VAR_B32_e64
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[V_PERMLANEX16_VAR_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_VAR_B32_e64 0, [[DEF]], 0, [[DEF]], [[DEF]], 0, implicit $exec
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF1]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANEX16_VAR_B32_e64_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = IMPLICIT_DEF
+    %2:vgpr_32 = V_PERMLANEX16_VAR_B32_e64 0, %0:vgpr_32, 0, %0:vgpr_32, %0:vgpr_32, 0, implicit $exec
+    %3:sreg_32 = SI_IF %1:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %4:vgpr_32 = COPY %2:vgpr_32
+
+  bb.2:
+    SI_END_CF %3:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+---
+name:            permlane_test_V_PERMLANE64_B32
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: permlane_test_V_PERMLANE64_B32
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[V_PERMLANE64_B32_:%[0-9]+]]:vgpr_32 = V_PERMLANE64_B32 [[DEF]], implicit $exec
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF1]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE64_B32_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = IMPLICIT_DEF
+    %2:vgpr_32 = V_PERMLANE64_B32 %0:vgpr_32, implicit $exec
+    %3:sreg_32 = SI_IF %1:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %4:vgpr_32 = COPY %2:vgpr_32
+
+  bb.2:
+    SI_END_CF %3:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+---
+name:            permlane_test_V_PERMLANE16_VAR_B32_e64
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: permlane_test_V_PERMLANE16_VAR_B32_e64
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[V_PERMLANE16_VAR_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANE16_VAR_B32_e64 0, [[DEF]], 0, [[DEF]], [[DEF]], 0, implicit $exec
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[DEF1]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_PERMLANE16_VAR_B32_e64_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_32 = IMPLICIT_DEF
+    %2:vgpr_32 = V_PERMLANE16_VAR_B32_e64 0, %0:vgpr_32, 0, %0:vgpr_32, %0:vgpr_32, 0, implicit $exec
+    %3:sreg_32 = SI_IF %1:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    %4:vgpr_32 = COPY %2:vgpr_32
+
+  bb.2:
+    SI_END_CF %3:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+...
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# CHECK: {{.*}}

``````````

</details>


https://github.com/llvm/llvm-project/pull/182162


More information about the llvm-commits mailing list