[llvm] [TwoAddressInstruction] Propagate undef flags for partial defs (PR #79286)

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 24 18:53:47 PST 2024


https://github.com/perlfu updated https://github.com/llvm/llvm-project/pull/79286

>From 27f8e2449999fcea72c83d6cba3b8ff14cbff39c Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Wed, 24 Jan 2024 22:02:09 +0900
Subject: [PATCH 1/2] [TwoAddressInstruction] Propagate undef flags for partial
 defs

If part of a register (lowered from REG_SEQUENCE) is undefined then
we should propagate undef flags to uses of those lanes.
This is only performed when live intervals are present as it
requires live intervals to correctly match uses to defs, and the
primary goal is to allow precise computation of subrange intervals.
---
 .../lib/CodeGen/TwoAddressInstructionPass.cpp | 32 +++++++++++++++----
 .../AMDGPU/GlobalISel/extractelement.ll       |  1 +
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 74d7904aee33a2..af042c972a2c8d 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1929,21 +1929,27 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
   Register DstReg = MI.getOperand(0).getReg();
 
   SmallVector<Register, 4> OrigRegs;
+  VNInfo *DefVN = nullptr;
   if (LIS) {
     OrigRegs.push_back(MI.getOperand(0).getReg());
     for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
       OrigRegs.push_back(MI.getOperand(i).getReg());
+    if (LIS->hasInterval(DstReg)) {
+      DefVN = LIS->getInterval(DstReg)
+                  .Query(LIS->getInstructionIndex(MI))
+                  .valueOut();
+    }
   }
 
+  LaneBitmask UndefLanes = LaneBitmask::getNone();
   bool DefEmitted = false;
-  bool DefIsPartial = false;
   for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
     MachineOperand &UseMO = MI.getOperand(i);
     Register SrcReg = UseMO.getReg();
     unsigned SubIdx = MI.getOperand(i+1).getImm();
     // Nothing needs to be inserted for undef operands.
     if (UseMO.isUndef()) {
-      DefIsPartial = true;
+      UndefLanes |= TRI->getSubRegIndexLaneMask(SubIdx);
       continue;
     }
 
@@ -1991,11 +1997,25 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
       MI.removeOperand(j);
   } else {
     if (LIS) {
-      // Force interval recomputation if we moved from full definition
-      // of register to partial.
-      if (DefIsPartial && LIS->hasInterval(DstReg) &&
-          MRI->shouldTrackSubRegLiveness(DstReg))
+      // Force live interval recomputation if we moved to a partial defintion
+      // of the register.  Undef flags must be propagate to uses of undefined
+      // subregister for accurate interval computation.
+      if (UndefLanes.any() && DefVN && MRI->shouldTrackSubRegLiveness(DstReg)) {
+        auto &LI = LIS->getInterval(DstReg);
+        for (MachineOperand &UseOp : MRI->use_operands(DstReg)) {
+          unsigned SubReg = UseOp.getSubReg();
+          if (UseOp.isUndef() || !SubReg)
+            continue;
+          auto *VN =
+              LI.getVNInfoAt(LIS->getInstructionIndex(*UseOp.getParent()));
+          if (DefVN != VN)
+            continue;
+          LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+          if ((UndefLanes & LaneMask).any())
+            UseOp.setIsUndef(true);
+        }
         LIS->removeInterval(DstReg);
+      }
       LIS->RemoveMachineInstrFromMaps(MI);
     }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index ac153183be642a..3f11c122a68146 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -early-live-intervals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s

>From 70640fa56cc8f7f6cd7f577854acea49c8118665 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Thu, 25 Jan 2024 11:52:26 +0900
Subject: [PATCH 2/2] Switch to MIR test.

---
 .../AMDGPU/GlobalISel/extractelement.ll       |   1 -
 .../GlobalISel/twoaddr-extract-dyn-v7f64.mir  | 134 ++++++++++++++++++
 2 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index 3f11c122a68146..ac153183be642a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -early-live-intervals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GPRIDX %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
new file mode 100644
index 00000000000000..4eca1efedccb79
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
@@ -0,0 +1,134 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -early-live-intervals -run-pass=liveintervals -run-pass=twoaddressinstruction -verify-machineinstrs -o - | FileCheck %s
+
+--- |
+  define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) #0 {
+  entry:
+    %ext = extractelement <7 x double> %vec, i32 %sel
+    ret double %ext
+  }
+
+  attributes #0 = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+...
+---
+name:            dyn_extract_v7f64_v_v
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+
+    ; CHECK-LABEL: name: dyn_extract_v7f64_v_v
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr10
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr11
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr12
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr13
+    ; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub0:vreg_64 = COPY [[COPY]]
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]].sub1:vreg_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: undef [[COPY15:%[0-9]+]].sub0:vreg_64 = COPY [[COPY2]]
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:vreg_64 = COPY [[COPY3]]
+    ; CHECK-NEXT: undef [[COPY16:%[0-9]+]].sub0:vreg_64 = COPY [[COPY4]]
+    ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:vreg_64 = COPY [[COPY5]]
+    ; CHECK-NEXT: undef [[COPY17:%[0-9]+]].sub0:vreg_64 = COPY [[COPY6]]
+    ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:vreg_64 = COPY [[COPY7]]
+    ; CHECK-NEXT: undef [[COPY18:%[0-9]+]].sub0:vreg_64 = COPY [[COPY8]]
+    ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:vreg_64 = COPY [[COPY9]]
+    ; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub0:vreg_64 = COPY [[COPY10]]
+    ; CHECK-NEXT: [[COPY19:%[0-9]+]].sub1:vreg_64 = COPY [[COPY11]]
+    ; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub0:vreg_64 = COPY [[COPY12]]
+    ; CHECK-NEXT: [[COPY20:%[0-9]+]].sub1:vreg_64 = COPY [[COPY13]]
+    ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY $vgpr14
+    ; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0_sub1:vreg_512 = COPY [[COPY14]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub2_sub3:vreg_512 = COPY [[COPY15]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub4_sub5:vreg_512 = COPY [[COPY16]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub6_sub7:vreg_512 = COPY [[COPY17]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub8_sub9:vreg_512 = COPY [[COPY18]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub10_sub11:vreg_512 = COPY [[COPY19]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub12_sub13:vreg_512 = COPY [[COPY20]]
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 1, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub0, 0, [[COPY22]].sub2, [[V_CMP_EQ_U32_e64_]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub1, 0, [[COPY22]].sub3, [[V_CMP_EQ_U32_e64_]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 2, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY22]].sub4, [[V_CMP_EQ_U32_e64_1]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_1]], 0, [[COPY22]].sub5, [[V_CMP_EQ_U32_e64_1]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 3, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_4:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_2]], 0, [[COPY22]].sub6, [[V_CMP_EQ_U32_e64_2]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_5:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_3]], 0, [[COPY22]].sub7, [[V_CMP_EQ_U32_e64_2]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 4, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_6:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_4]], 0, [[COPY22]].sub8, [[V_CMP_EQ_U32_e64_3]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_7:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_5]], 0, [[COPY22]].sub9, [[V_CMP_EQ_U32_e64_3]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 5, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_8:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_6]], 0, [[COPY22]].sub10, [[V_CMP_EQ_U32_e64_4]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_9:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_7]], 0, [[COPY22]].sub11, [[V_CMP_EQ_U32_e64_4]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 6, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_10:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_8]], 0, [[COPY22]].sub12, [[V_CMP_EQ_U32_e64_5]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_11:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_9]], 0, [[COPY22]].sub13, [[V_CMP_EQ_U32_e64_5]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_6:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 7, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_12:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_10]], 0, undef [[COPY22]].sub14, [[V_CMP_EQ_U32_e64_6]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_13:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_11]], 0, undef [[COPY22]].sub15, [[V_CMP_EQ_U32_e64_6]], implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_12]]
+    ; CHECK-NEXT: $vgpr1 = COPY [[V_CNDMASK_B32_e64_13]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+    %2:vgpr_32 = COPY $vgpr0
+    %3:vgpr_32 = COPY $vgpr1
+    %4:vgpr_32 = COPY $vgpr2
+    %5:vgpr_32 = COPY $vgpr3
+    %6:vgpr_32 = COPY $vgpr4
+    %7:vgpr_32 = COPY $vgpr5
+    %8:vgpr_32 = COPY $vgpr6
+    %9:vgpr_32 = COPY $vgpr7
+    %10:vgpr_32 = COPY $vgpr8
+    %11:vgpr_32 = COPY $vgpr9
+    %12:vgpr_32 = COPY $vgpr10
+    %13:vgpr_32 = COPY $vgpr11
+    %14:vgpr_32 = COPY $vgpr12
+    %15:vgpr_32 = COPY $vgpr13
+    %16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %17:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
+    %18:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %7, %subreg.sub1
+    %19:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1
+    %20:vreg_64 = REG_SEQUENCE %10, %subreg.sub0, %11, %subreg.sub1
+    %21:vreg_64 = REG_SEQUENCE %12, %subreg.sub0, %13, %subreg.sub1
+    %22:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %15, %subreg.sub1
+    %1:vgpr_32 = COPY $vgpr14
+    %34:vreg_512 = REG_SEQUENCE %16, %subreg.sub0_sub1, %17, %subreg.sub2_sub3, %18, %subreg.sub4_sub5, %19, %subreg.sub6_sub7, %20, %subreg.sub8_sub9, %21, %subreg.sub10_sub11, %22, %subreg.sub12_sub13, undef %35:vreg_64, %subreg.sub14_sub15
+    %55:sreg_64_xexec = V_CMP_EQ_U32_e64 1, %1, implicit $exec
+    %56:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub0, 0, %34.sub2, %55, implicit $exec
+    %57:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub1, 0, %34.sub3, %55, implicit $exec
+    %59:sreg_64_xexec = V_CMP_EQ_U32_e64 2, %1, implicit $exec
+    %60:vgpr_32 = V_CNDMASK_B32_e64 0, %56, 0, %34.sub4, %59, implicit $exec
+    %61:vgpr_32 = V_CNDMASK_B32_e64 0, %57, 0, %34.sub5, %59, implicit $exec
+    %63:sreg_64_xexec = V_CMP_EQ_U32_e64 3, %1, implicit $exec
+    %64:vgpr_32 = V_CNDMASK_B32_e64 0, %60, 0, %34.sub6, %63, implicit $exec
+    %65:vgpr_32 = V_CNDMASK_B32_e64 0, %61, 0, %34.sub7, %63, implicit $exec
+    %67:sreg_64_xexec = V_CMP_EQ_U32_e64 4, %1, implicit $exec
+    %68:vgpr_32 = V_CNDMASK_B32_e64 0, %64, 0, %34.sub8, %67, implicit $exec
+    %69:vgpr_32 = V_CNDMASK_B32_e64 0, %65, 0, %34.sub9, %67, implicit $exec
+    %71:sreg_64_xexec = V_CMP_EQ_U32_e64 5, %1, implicit $exec
+    %72:vgpr_32 = V_CNDMASK_B32_e64 0, %68, 0, %34.sub10, %71, implicit $exec
+    %73:vgpr_32 = V_CNDMASK_B32_e64 0, %69, 0, %34.sub11, %71, implicit $exec
+    %75:sreg_64_xexec = V_CMP_EQ_U32_e64 6, %1, implicit $exec
+    %76:vgpr_32 = V_CNDMASK_B32_e64 0, %72, 0, %34.sub12, %75, implicit $exec
+    %77:vgpr_32 = V_CNDMASK_B32_e64 0, %73, 0, %34.sub13, %75, implicit $exec
+    %79:sreg_64_xexec = V_CMP_EQ_U32_e64 7, %1, implicit $exec
+    %80:vgpr_32 = V_CNDMASK_B32_e64 0, %76, 0, %34.sub14, %79, implicit $exec
+    %81:vgpr_32 = V_CNDMASK_B32_e64 0, %77, 0, %34.sub15, %79, implicit $exec
+    $vgpr0 = COPY %80
+    $vgpr1 = COPY %81
+    SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...



More information about the llvm-commits mailing list