[llvm] [AMDGPU] Group multiple single use producers under one single use instruction. (PR #90713)

Scott Egerton via llvm-commits llvm-commits at lists.llvm.org
Wed May 1 02:15:07 PDT 2024


https://github.com/ScottEgerton created https://github.com/llvm/llvm-project/pull/90713

Previously each single use producer would be marked with a
"S_SINGLEUSE_VDST 1" instruction. This patch adds support for
larger immediates that encode multiple single use producers into
one S_SINGLEUSE_VDST instruction.


>From 0ea0f55d90c9ae39a16b8258f3b8c8b9a1beab84 Mon Sep 17 00:00:00 2001
From: Scott Egerton <scott.egerton at amd.com>
Date: Tue, 30 Jan 2024 16:46:12 +0000
Subject: [PATCH] [AMDGPU] Group multiple single use producers under one single
 use instruction.

Previously each single use producer would be marked with a
"S_SINGLEUSE_VDST 1" instruction. This patch adds support for
larger immediates that encode multiple single use producers into
one S_SINGLEUSE_VDST instruction.
---
 .../AMDGPU/AMDGPUInsertSingleUseVDST.cpp      | 124 +++-
 .../CodeGen/AMDGPU/insert-singleuse-vdst.mir  | 546 +++++++++++++++++-
 2 files changed, 645 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
index 9415bd3695f010..d3fbaa55b3bced 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -16,10 +16,11 @@
 
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -28,10 +29,11 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/Register.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/MC/MCRegister.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
+#include <array>
 
 using namespace llvm;
 
@@ -41,17 +43,110 @@ namespace {
 class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
 private:
   const SIInstrInfo *SII;
+  class SingleUseInstruction {
+  private:
+    static const unsigned MaxSkipRange = 0b111;
+    static const unsigned MaxNumberOfSkipRegions = 2;
+
+    unsigned LastEncodedPositionEnd;
+    MachineInstr *ProducerInstr;
+
+    std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
+    SmallVector<unsigned, MaxNumberOfSkipRegions> SkipRegions;
+
+    // Adds a skip region into the instruction.
+    void skip(const unsigned ProducerPosition) {
+      while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
+        SkipRegions.push_back(MaxSkipRange);
+        LastEncodedPositionEnd += MaxSkipRange;
+      }
+      SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd);
+      LastEncodedPositionEnd = ProducerPosition;
+    }
+
+    bool currentRegionHasSpace() {
+      const auto Region = SkipRegions.size();
+      // The first region has an extra bit of encoding space.
+      return SingleUseRegions[Region] <
+             ((Region == MaxNumberOfSkipRegions) ? 0b1111 : 0b111);
+    }
+
+    unsigned encodeImm() {
+      // Handle the first Single Use Region separately as it has an extra bit
+      // of encoding space.
+      unsigned Imm = SingleUseRegions[SkipRegions.size()];
+      unsigned ShiftAmount = 4;
+      for (unsigned i = SkipRegions.size(); i > 0; i--) {
+        Imm |= SkipRegions[i - 1] << ShiftAmount;
+        ShiftAmount += 3;
+        Imm |= SingleUseRegions[i - 1] << ShiftAmount;
+        ShiftAmount += 3;
+      }
+      return Imm;
+    }
+
+  public:
+    SingleUseInstruction(const unsigned ProducerPosition,
+                         MachineInstr *Producer)
+        : LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
+          SingleUseRegions({1, 0, 0}) {}
+
+    // Returns false if adding a new single use producer failed. This happens
+    // because it could not be encoded, either because there is no room to
+    // encode another single use producer region or that this single use
+    // producer is too far away to encode the amount of instructions to skip.
+    bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) {
+      // Producer is too far away to encode into this instruction or another
+      // skip region is needed and SkipRegions.size() = 2 so there's no room for
+      // another skip region, therefore a new instruction is needed.
+      if (LastEncodedPositionEnd +
+              (MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) <
+          ProducerPosition)
+        return false;
+
+      // If a skip region is needed.
+      if (LastEncodedPositionEnd != ProducerPosition ||
+          !currentRegionHasSpace()) {
+        // If the current region is out of space therefore a skip region would
+        // be needed, but there is no room for another skip region.
+        if (SkipRegions.size() == MaxNumberOfSkipRegions)
+          return false;
+        skip(ProducerPosition);
+      }
+
+      SingleUseRegions[SkipRegions.size()]++;
+      LastEncodedPositionEnd = ProducerPosition + 1;
+      ProducerInstr = MI;
+      return true;
+    }
+
+    auto emit(const SIInstrInfo *SII) {
+      return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(),
+                     SII->get(AMDGPU::S_SINGLEUSE_VDST))
+          .addImm(encodeImm());
+    }
+  };
 
 public:
   static char ID;
 
   AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
 
-  void emitSingleUseVDST(MachineInstr &MI) const {
-    // Mark the following instruction as a single-use producer:
-    //   s_singleuse_vdst { supr0: 1 }
-    BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
-        .addImm(0x1);
+  void insertSingleUseInstructions(
+      ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers) const {
+    SmallVector<SingleUseInstruction> Instructions;
+
+    for (auto &[Position, MI] : SingleUseProducers) {
+      // Encode this position into the last single use instruction if possible.
+      if (Instructions.empty() ||
+          !Instructions.back().tryAddProducer(Position, MI)) {
+        // If not, add a new instruction.
+        Instructions.push_back(SingleUseInstruction(Position, MI));
+      }
+    }
+
+    for (auto &Instruction : Instructions)
+      Instruction.emit(SII);
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
@@ -78,6 +173,10 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
         }
       }
 
+      SmallVector<std::pair<unsigned, MachineInstr *>>
+          SingleUseProducerPositions;
+
+      unsigned VALUInstrCount = 0;
       for (MachineInstr &MI : reverse(MBB.instrs())) {
         // All registers in all operands need to be single use for an
         // instruction to be marked as a single use producer.
@@ -119,13 +218,16 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
           for (auto &UsedReg : RegisterUseCount)
             UsedReg.second = 2;
         }
-        if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
-          // TODO: Replace with candidate logging for instruction grouping
-          // later.
-          emitSingleUseVDST(MI);
+
+        if (!SIInstrInfo::isVALU(MI))
+          continue;
+        if (AllProducerOperandsAreSingleUse) {
+          SingleUseProducerPositions.push_back({VALUInstrCount, &MI});
           InstructionEmitted = true;
         }
+        VALUInstrCount++;
       }
+      insertSingleUseInstructions(SingleUseProducerPositions);
     }
     return InstructionEmitted;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
index 513734388eb65b..859b19236f7b58 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
@@ -60,9 +60,8 @@ body: |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 2
   ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
   ; CHECK-NEXT:   $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
   ; CHECK-NEXT: {{  $}}
@@ -87,13 +86,10 @@ body: |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 4
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
@@ -160,16 +156,13 @@ body: |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 274
   ; CHECK-NEXT:   $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode
   ; CHECK-NEXT:   $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode
   ; CHECK-NEXT:   $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode
   ; CHECK-NEXT:   $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
   ; CHECK-NEXT:   $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
   ; CHECK-NEXT:   $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode
@@ -229,9 +222,8 @@ body: |
   ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 2
   ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
   ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
@@ -686,9 +678,8 @@ body: |
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 2
   ; CHECK-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
   ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
   ; CHECK-NEXT: {{  $}}
@@ -726,3 +717,530 @@ body: |
   bb.1:
     liveins: $vgpr1, $vgpr2
 ...
+
+# Three single use producer instructions with non single use producer
+# instructions in between.
+---
+name: three_producers_with_two_skips
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: three_producers_with_two_skips
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 9361
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr2, $vgpr4
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr2, $vgpr4
+...
+
+# Six single use producer instructions with non single use producer
+# instructions in between.
+---
+name: six_producers_with_four_skips
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: six_producers_with_four_skips
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 145
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 9362
+  ; CHECK-NEXT:   $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr2, $vgpr4, $vgpr7, $vgpr9
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr2, $vgpr4, $vgpr7, $vgpr9
+...
+
+# Five single use producer instructions, followed by
+# four non single use producers, followed by
+# three single use producer instructions, followed by
+# two non single use producers, followed by
+# one single use producer instructions.
+---
+name: immediate_order
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: immediate_order
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 10693
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr13, $vgpr14
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr13, $vgpr14
+...
+
+# Maximum number of single use producers that can be encoded in a single
+# instruction.
+---
+name: maximum_producers_single_instruction
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: maximum_producers_single_instruction
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 58255
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+...
+
+# One more than the maximum number of single use producers that can be encoded
+# in a single instruction.
+---
+name: too_many_producers_single_instruction
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: too_many_producers_single_instruction
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 58255
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+
+
+
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+...
+
+# Maximum distance between single use producers that can be encoded in a single
+# instruction.
+---
+name: maximum_skips_single_instruction
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: maximum_skips_single_instruction
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 15473
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+...
+
+# One more than the maximum distance between single use producers that can be
+# encoded in a single instruction.
+---
+name: too_many_skips_single_instruction
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: too_many_skips_single_instruction
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16
+...
+
+
+# Maximum possible encoding value with all bits of the immediate set
+---
+name: all_immediate_bits_set
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: all_immediate_bits_set
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 65535
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr31 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr32 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr33 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr34 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr35 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr36 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr37 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr38 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr39 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr40 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr41 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr42 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr43 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr30, $vgpr31, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr31 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr32 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr33 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr34 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr35 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr36 = V_MOV_B32_e32 $vgpr0, implicit $exec
+
+    $vgpr37 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr38 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr39 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr40 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr41 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr42 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr43 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr30, $vgpr31, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36
+
+...



More information about the llvm-commits mailing list