[llvm] [RegisterCoalescer]: Try inflated RC for coalescing reg->subreg (PR #134438)

via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 4 12:15:02 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-regalloc

Author: Jeffrey Byrnes (jrbyrnes)

<details>
<summary>Changes</summary>

This is the first of a few patches designed to improve RegisterCoalescing when there are RegisterClass mismatches between the SrcReg and DstReg of a copy. Thie is from an effort to split up https://github.com/llvm/llvm-project/pull/130870 . This PR handles the case of Reg->SubReg copies. It is being cherry-picked on top of https://github.com/llvm/llvm-project/pull/132137 since it uses the test coverage.

This PR introduces and uses `getLargestConstrainedSuperClass` to try to find a matching super RegisterClass for coalescing when the Src and Dst RegisterClasses are incompatible for coalescing. `getLargestConstrainedSuperClass` checks use MIs of a given register for any RegClass constrains in order to produce legal code.

---

Patch is 307.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134438.diff


7 Files Affected:

- (modified) llvm/include/llvm/CodeGen/MachineRegisterInfo.h (+8-1) 
- (modified) llvm/lib/CodeGen/MachineRegisterInfo.cpp (+12-4) 
- (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+8-1) 
- (modified) llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir (+2229-1) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll (+170-130) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll (+1941-1325) 
- (modified) llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll (+366-340) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 1c465741cb462..dc49733e2a59b 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -738,7 +738,14 @@ class MachineRegisterInfo {
 
   /// recomputeRegClass - Try to find a legal super-class of Reg's register
   /// class that still satisfies the constraints from the instructions using
-  /// Reg.  Returns true if Reg was upgraded.
+  /// \p Reg. \p return the super-class TargetRegisterClass if one was found,
+  /// otherwise \p return the original TargetRegisterClass.
+  const TargetRegisterClass *
+  getLargestConstrainedSuperClass(Register Reg) const;
+
+  /// recomputeRegClass - Try to find a legal super-class of Reg's register
+  /// class that still satisfies the constraints from the instructions using
+  /// \p Reg. \p return true if Reg was upgraded.
   ///
   /// This method can be used after constraints have been removed from a
   /// virtual register, for example after removing instructions or splitting
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 937f63f6c5e00..d483625212c55 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -118,8 +118,8 @@ MachineRegisterInfo::constrainRegAttrs(Register Reg,
   return true;
 }
 
-bool
-MachineRegisterInfo::recomputeRegClass(Register Reg) {
+const TargetRegisterClass *
+MachineRegisterInfo::getLargestConstrainedSuperClass(Register Reg) const {
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   const TargetRegisterClass *OldRC = getRegClass(Reg);
   const TargetRegisterInfo *TRI = getTargetRegisterInfo();
@@ -127,7 +127,7 @@ MachineRegisterInfo::recomputeRegClass(Register Reg) {
 
   // Stop early if there is no room to grow.
   if (NewRC == OldRC)
-    return false;
+    return NewRC;
 
   // Accumulate constraints from all uses.
   for (MachineOperand &MO : reg_nodbg_operands(Reg)) {
@@ -136,8 +136,16 @@ MachineRegisterInfo::recomputeRegClass(Register Reg) {
     unsigned OpNo = &MO - &MI->getOperand(0);
     NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, TII, TRI);
     if (!NewRC || NewRC == OldRC)
-      return false;
+      return OldRC;
   }
+  return NewRC;
+}
+
+bool MachineRegisterInfo::recomputeRegClass(Register Reg) {
+  const TargetRegisterClass *OldRC = getRegClass(Reg);
+  const TargetRegisterClass *NewRC = getLargestConstrainedSuperClass(Reg);
+  if (NewRC == OldRC)
+    return false;
   setRegClass(Reg, NewRC);
   return true;
 }
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index dbd354f2ca2c4..b2e83beae6f62 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -477,7 +477,9 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
     Flipped = true;
   }
 
-  const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
+  const MachineFunction *MF = MI->getMF();
+
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
   const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
 
   if (Dst.isPhysical()) {
@@ -515,6 +517,11 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
       // SrcReg will be merged with a sub-register of DstReg.
       SrcIdx = DstSub;
       NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+      if (!NewRC) {
+        auto SuperDstRC = MRI.getLargestConstrainedSuperClass(Dst);
+        if (SuperDstRC != DstRC)
+          NewRC = TRI.getMatchingSuperRegClass(SuperDstRC, SrcRC, DstSub);
+      }
     } else if (SrcSub) {
       // DstReg will be merged with a sub-register of SrcReg.
       DstIdx = SrcSub;
diff --git a/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir b/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
index 8ea5c3ea73e77..6786c6712ad3e 100644
--- a/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
@@ -4,7 +4,6 @@
 # Test coalescing situations which can use av_* registers to handle
 # copies between VGPRs and AGPRs.
 
-
 # Should coalesce %0 and %1 into subregisters of the av_64 common
 # class
 ---
@@ -517,3 +516,2232 @@ body:             |
     SI_RETURN
 
 ...
+
+
+
+# Should coalesce %0 and %1 into subregisters of the av_64 common
+# class
+---
+name: copy_vgpr32_to_areg64_coalesce_with_av64_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: copy_vgpr32_to_areg64_coalesce_with_av64_sub
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr1
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_64 = COPY $vgpr0
+    %0.sub1:vreg_64 = COPY $vgpr1
+    undef %2.sub0:areg_64 = COPY %0.sub0
+    %2.sub1:areg_64 = COPY %0.sub1
+    INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, killed %2
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_to_areg64_align2_coalesce_with_av64_align2_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: copy_vgpr32_to_areg64_align2_coalesce_with_av64_align2_sub
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr1
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_64 = COPY $vgpr0
+    %0.sub1:vreg_64 = COPY $vgpr1
+    undef %2.sub0:areg_64_align2 = COPY %0.sub0
+    %2.sub1:areg_64_align2 = COPY %0.sub1
+    INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, %2
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_to_areg96_coalesce_with_av96_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: copy_vgpr32_to_areg96_coalesce_with_av96_sub
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_96 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_96 = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_96 =COPY $vgpr0
+    %0.sub1:vreg_96 = COPY $vgpr1
+    %0.sub2:vreg_96 = COPY $vgpr2
+    undef %3.sub0:areg_96 = COPY %0.sub0
+    %3.sub1:areg_96 = COPY %0.sub1
+    %3.sub2:areg_96 = COPY %0.sub2
+    INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, %3
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_to_areg96_coalesce_with_av96_align2_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: copy_vgpr32_to_areg96_coalesce_with_av96_align2_sub
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_96 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_96 = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_96_Align2 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_96 =COPY $vgpr0
+    %0.sub1:vreg_96 = COPY $vgpr1
+    %0.sub2:vreg_96 = COPY $vgpr2
+    undef %3.sub0:areg_96_align2 = COPY %0.sub0
+    %3.sub1:areg_96_align2 = COPY %0.sub1
+    %3.sub2:areg_96_align2 = COPY %0.sub2
+    INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_96_Align2 */, %3
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr64_to_areg64_coalesce_with_av128_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: copy_vgpr64_to_areg64_coalesce_with_av128_sub
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0_sub1:vreg_128 = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128 = COPY [[COPY]].sub0_sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128 = COPY [[COPY]].sub2_sub3
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AV_128_with_hi16_in_VGPR_16_Lo128 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0_sub1:vreg_128 =COPY $vgpr0_vgpr1
+    %0.sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3
+    undef %2.sub0_sub1:areg_128 = COPY %0.sub0_sub1
+    %2.sub2_sub3:areg_128 = COPY %0.sub2_sub3
+    INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, killed %2
+    SI_RETURN
+
+...
+
+
+
+---
+name: copy_vgpr64_to_areg64_align2_coalesce_with_av128_align2_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: copy_vgpr64_to_areg64_align2_coalesce_with_av128_align2_sub
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_128 = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_128 = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128_align2 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_with_sub0_sub1_sub2_in_AReg_96_with_sub1_sub2_in_AReg_64_Align2 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_128 =COPY $vgpr0_vgpr1
+    %0.sub1:vreg_128 = COPY $vgpr2_vgpr3
+    undef %2.sub0_sub1:areg_128_align2 = COPY %0.sub0
+    %2.sub2_sub3:areg_128_align2 = COPY %0.sub1
+    INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, %2
+    SI_RETURN
+
+...
+
+---
+name: copy_sgpr32_to_areg64_align2_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8, $sgpr9
+
+    ; CHECK-LABEL: name: copy_sgpr32_to_areg64_align2_sub
+    ; CHECK: liveins: $sgpr8, $sgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:sreg_64 = COPY $sgpr8
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:sreg_64 = COPY $sgpr9
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:sreg_64 = COPY $sgpr8
+    %0.sub1:sreg_64 = COPY $sgpr9
+    undef %2.sub0:areg_64_align2 = COPY %0.sub0
+    %2.sub1:areg_64_align2 = COPY %0.sub1
+    INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, %2
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_vgpr64_to_areg96_coalesce_with_av96_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: copy_vgpr32_vgpr64_to_areg96_coalesce_with_av96_sub
+    ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_96 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:areg_96 = COPY [[COPY]].sub1_sub2
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_96 =COPY $vgpr0
+    %0.sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
+    undef %2.sub0:areg_96 = COPY %0.sub0
+    %2.sub1_sub2:areg_96 = COPY %0.sub1_sub2
+    INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, %2
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_vgpr64_to_areg96_coalesce_with_av96_align2_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: copy_vgpr32_vgpr64_to_areg96_coalesce_with_av96_align2_sub
+    ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_96 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:areg_96_align2 = COPY [[COPY]].sub1_sub2
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_96 =COPY $vgpr0
+    %0.sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
+    undef %2.sub0:areg_96_align2 = COPY %0.sub0
+    %2.sub1_sub2:areg_96_align2 = COPY %0.sub1_sub2
+    INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, %2
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr64_vgpr32_to_areg96_coalesce_with_av96_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: copy_vgpr64_vgpr32_to_areg96_coalesce_with_av96_sub
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96 = COPY [[COPY]].sub0_sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1
+    %0.sub2:vreg_96 = COPY $vgpr2
+    undef %2.sub0_sub1:areg_96 = COPY %0.sub0_sub1
+    %2.sub2:areg_96 = COPY %0.sub2
+    INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, %2
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr64_vgpr32_to_areg96_coalesce_with_av96_align2_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: copy_vgpr64_vgpr32_to_areg96_coalesce_with_av96_align2_sub
+    ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96_align2 = COPY [[COPY]].sub0_sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1
+    %0.sub2:vreg_96 = COPY $vgpr2
+    undef %2.sub0_sub1:areg_96_align2 = COPY %0.sub0_sub1
+    %2.sub2:areg_96_align2 = COPY %0.sub2
+    INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, %2
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x2_to_areg64_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: copy_vgpr32_x2_to_areg64_sub
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_64 = COPY $vgpr0
+    undef %2.sub0:areg_64 = COPY %0.sub0
+    %2.sub1:areg_64 = COPY %0.sub0
+    INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, killed %2
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x3_to_areg96_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: copy_vgpr32_x3_to_areg96_sub
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_64 = COPY $vgpr0
+    undef %1.sub0:areg_96 = COPY %0.sub0
+    %1.sub1:areg_96 = COPY %0.sub0
+    %1.sub2:areg_96 = COPY %0.sub0
+    INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, %1
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x3_to_areg96_align2_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: copy_vgpr32_x3_to_areg96_align2_sub
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_96_Align2 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_64 = COPY $vgpr0
+    undef %1.sub0:areg_96_align2 = COPY %0.sub0
+    %1.sub1:areg_96_align2 = COPY %0.sub0
+    INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_96_Align2 */, %1
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x4_to_areg128_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: copy_vgpr32_x4_to_areg128_sub
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_128 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AV_128_with_hi16_in_VGPR_16_Lo128 */, [[COPY1]]
+    ; CHECK-NEXT: SI_RETURN
+    undef %0.sub0:vreg_64 = COPY $vgpr0
+    undef %1.sub0:areg_128 = COPY %0.sub0
+    %1.sub1:areg_128 = COPY %0.sub0
+    %1.sub2:areg_128 = COPY %0.sub0
+    %1.sub3:areg_128 = COPY %0.sub0
+    INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, killed %1
+    SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x4_to_areg128_align2_sub
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: copy_vgpr32_x4_to_areg128_align2_sub
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_128_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128_align2 = COPY [[COPY]].sub0
+    ; CHECK-NEX...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/134438


More information about the llvm-commits mailing list