[llvm] [RegisterCoalescer]: Try inflated RC for coalescing reg->subreg (PR #134438)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 4 12:15:02 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-regalloc
Author: Jeffrey Byrnes (jrbyrnes)
<details>
<summary>Changes</summary>
This is the first of a few patches designed to improve RegisterCoalescing when there are RegisterClass mismatches between the SrcReg and DstReg of a copy. Thie is from an effort to split up https://github.com/llvm/llvm-project/pull/130870 . This PR handles the case of Reg->SubReg copies. It is being cherry-picked on top of https://github.com/llvm/llvm-project/pull/132137 since it uses the test coverage.
This PR introduces and uses `getLargestConstrainedSuperClass` to try to find a matching super RegisterClass for coalescing when the Src and Dst RegisterClasses are incompatible for coalescing. `getLargestConstrainedSuperClass` checks use MIs of a given register for any RegClass constrains in order to produce legal code.
---
Patch is 307.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134438.diff
7 Files Affected:
- (modified) llvm/include/llvm/CodeGen/MachineRegisterInfo.h (+8-1)
- (modified) llvm/lib/CodeGen/MachineRegisterInfo.cpp (+12-4)
- (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+8-1)
- (modified) llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir (+2229-1)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll (+170-130)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll (+1941-1325)
- (modified) llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll (+366-340)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 1c465741cb462..dc49733e2a59b 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -738,7 +738,14 @@ class MachineRegisterInfo {
/// recomputeRegClass - Try to find a legal super-class of Reg's register
/// class that still satisfies the constraints from the instructions using
- /// Reg. Returns true if Reg was upgraded.
+ /// \p Reg. \p return the super-class TargetRegisterClass if one was found,
+ /// otherwise \p return the original TargetRegisterClass.
+ const TargetRegisterClass *
+ getLargestConstrainedSuperClass(Register Reg) const;
+
+ /// recomputeRegClass - Try to find a legal super-class of Reg's register
+ /// class that still satisfies the constraints from the instructions using
+ /// \p Reg. \p return true if Reg was upgraded.
///
/// This method can be used after constraints have been removed from a
/// virtual register, for example after removing instructions or splitting
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 937f63f6c5e00..d483625212c55 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -118,8 +118,8 @@ MachineRegisterInfo::constrainRegAttrs(Register Reg,
return true;
}
-bool
-MachineRegisterInfo::recomputeRegClass(Register Reg) {
+const TargetRegisterClass *
+MachineRegisterInfo::getLargestConstrainedSuperClass(Register Reg) const {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const TargetRegisterClass *OldRC = getRegClass(Reg);
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
@@ -127,7 +127,7 @@ MachineRegisterInfo::recomputeRegClass(Register Reg) {
// Stop early if there is no room to grow.
if (NewRC == OldRC)
- return false;
+ return NewRC;
// Accumulate constraints from all uses.
for (MachineOperand &MO : reg_nodbg_operands(Reg)) {
@@ -136,8 +136,16 @@ MachineRegisterInfo::recomputeRegClass(Register Reg) {
unsigned OpNo = &MO - &MI->getOperand(0);
NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, TII, TRI);
if (!NewRC || NewRC == OldRC)
- return false;
+ return OldRC;
}
+ return NewRC;
+}
+
+bool MachineRegisterInfo::recomputeRegClass(Register Reg) {
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ const TargetRegisterClass *NewRC = getLargestConstrainedSuperClass(Reg);
+ if (NewRC == OldRC)
+ return false;
setRegClass(Reg, NewRC);
return true;
}
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index dbd354f2ca2c4..b2e83beae6f62 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -477,7 +477,9 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
Flipped = true;
}
- const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
+ const MachineFunction *MF = MI->getMF();
+
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
if (Dst.isPhysical()) {
@@ -515,6 +517,11 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
// SrcReg will be merged with a sub-register of DstReg.
SrcIdx = DstSub;
NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ if (!NewRC) {
+ auto SuperDstRC = MRI.getLargestConstrainedSuperClass(Dst);
+ if (SuperDstRC != DstRC)
+ NewRC = TRI.getMatchingSuperRegClass(SuperDstRC, SrcRC, DstSub);
+ }
} else if (SrcSub) {
// DstReg will be merged with a sub-register of SrcReg.
DstIdx = SrcSub;
diff --git a/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir b/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
index 8ea5c3ea73e77..6786c6712ad3e 100644
--- a/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir
@@ -4,7 +4,6 @@
# Test coalescing situations which can use av_* registers to handle
# copies between VGPRs and AGPRs.
-
# Should coalesce %0 and %1 into subregisters of the av_64 common
# class
---
@@ -517,3 +516,2232 @@ body: |
SI_RETURN
...
+
+
+
+# Should coalesce %0 and %1 into subregisters of the av_64 common
+# class
+---
+name: copy_vgpr32_to_areg64_coalesce_with_av64_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: copy_vgpr32_to_areg64_coalesce_with_av64_sub
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr1
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]].sub1
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_64 = COPY $vgpr0
+ %0.sub1:vreg_64 = COPY $vgpr1
+ undef %2.sub0:areg_64 = COPY %0.sub0
+ %2.sub1:areg_64 = COPY %0.sub1
+ INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, killed %2
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_to_areg64_align2_coalesce_with_av64_align2_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; CHECK-LABEL: name: copy_vgpr32_to_areg64_align2_coalesce_with_av64_align2_sub
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_64 = COPY $vgpr1
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY]].sub1
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_64 = COPY $vgpr0
+ %0.sub1:vreg_64 = COPY $vgpr1
+ undef %2.sub0:areg_64_align2 = COPY %0.sub0
+ %2.sub1:areg_64_align2 = COPY %0.sub1
+ INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, %2
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_to_areg96_coalesce_with_av96_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: copy_vgpr32_to_areg96_coalesce_with_av96_sub
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_96 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_96 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]].sub1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_96 =COPY $vgpr0
+ %0.sub1:vreg_96 = COPY $vgpr1
+ %0.sub2:vreg_96 = COPY $vgpr2
+ undef %3.sub0:areg_96 = COPY %0.sub0
+ %3.sub1:areg_96 = COPY %0.sub1
+ %3.sub2:areg_96 = COPY %0.sub2
+ INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, %3
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_to_areg96_coalesce_with_av96_align2_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: copy_vgpr32_to_areg96_coalesce_with_av96_align2_sub
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_96 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_96 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]].sub1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_96_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_96 =COPY $vgpr0
+ %0.sub1:vreg_96 = COPY $vgpr1
+ %0.sub2:vreg_96 = COPY $vgpr2
+ undef %3.sub0:areg_96_align2 = COPY %0.sub0
+ %3.sub1:areg_96_align2 = COPY %0.sub1
+ %3.sub2:areg_96_align2 = COPY %0.sub2
+ INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_96_Align2 */, %3
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr64_to_areg64_coalesce_with_av128_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; CHECK-LABEL: name: copy_vgpr64_to_areg64_coalesce_with_av128_sub
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0_sub1:vreg_128 = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128 = COPY [[COPY]].sub0_sub1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128 = COPY [[COPY]].sub2_sub3
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AV_128_with_hi16_in_VGPR_16_Lo128 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0_sub1:vreg_128 =COPY $vgpr0_vgpr1
+ %0.sub2_sub3:vreg_128 = COPY $vgpr2_vgpr3
+ undef %2.sub0_sub1:areg_128 = COPY %0.sub0_sub1
+ %2.sub2_sub3:areg_128 = COPY %0.sub2_sub3
+ INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, killed %2
+ SI_RETURN
+
+...
+
+
+
+---
+name: copy_vgpr64_to_areg64_align2_coalesce_with_av128_align2_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; CHECK-LABEL: name: copy_vgpr64_to_areg64_align2_coalesce_with_av128_align2_sub
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_128 = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:vreg_128 = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_128_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2_sub3:areg_128_align2 = COPY [[COPY]].sub1
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_with_sub0_sub1_sub2_in_AReg_96_with_sub1_sub2_in_AReg_64_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_128 =COPY $vgpr0_vgpr1
+ %0.sub1:vreg_128 = COPY $vgpr2_vgpr3
+ undef %2.sub0_sub1:areg_128_align2 = COPY %0.sub0
+ %2.sub2_sub3:areg_128_align2 = COPY %0.sub1
+ INLINEASM &"; use $0", 0 /* attdialect */, 6488073 /* reguse:AReg_128_Align2 */, %2
+ SI_RETURN
+
+...
+
+---
+name: copy_sgpr32_to_areg64_align2_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr8, $sgpr9
+
+ ; CHECK-LABEL: name: copy_sgpr32_to_areg64_align2_sub
+ ; CHECK: liveins: $sgpr8, $sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:sreg_64 = COPY $sgpr8
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1:sreg_64 = COPY $sgpr9
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64_align2 = COPY [[COPY]].sub1
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:sreg_64 = COPY $sgpr8
+ %0.sub1:sreg_64 = COPY $sgpr9
+ undef %2.sub0:areg_64_align2 = COPY %0.sub0
+ %2.sub1:areg_64_align2 = COPY %0.sub1
+ INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, %2
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_vgpr64_to_areg96_coalesce_with_av96_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1_vgpr2
+
+ ; CHECK-LABEL: name: copy_vgpr32_vgpr64_to_areg96_coalesce_with_av96_sub
+ ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_96 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:areg_96 = COPY [[COPY]].sub1_sub2
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_96 =COPY $vgpr0
+ %0.sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
+ undef %2.sub0:areg_96 = COPY %0.sub0
+ %2.sub1_sub2:areg_96 = COPY %0.sub1_sub2
+ INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, %2
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_vgpr64_to_areg96_coalesce_with_av96_align2_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1_vgpr2
+
+ ; CHECK-LABEL: name: copy_vgpr32_vgpr64_to_areg96_coalesce_with_av96_align2_sub
+ ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_96 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1_sub2:areg_96_align2 = COPY [[COPY]].sub1_sub2
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_96 =COPY $vgpr0
+ %0.sub1_sub2:vreg_96 = COPY $vgpr1_vgpr2
+ undef %2.sub0:areg_96_align2 = COPY %0.sub0
+ %2.sub1_sub2:areg_96_align2 = COPY %0.sub1_sub2
+ INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, %2
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr64_vgpr32_to_areg96_coalesce_with_av96_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: copy_vgpr64_vgpr32_to_areg96_coalesce_with_av96_sub
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96 = COPY [[COPY]].sub0_sub1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub2
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1
+ %0.sub2:vreg_96 = COPY $vgpr2
+ undef %2.sub0_sub1:areg_96 = COPY %0.sub0_sub1
+ %2.sub2:areg_96 = COPY %0.sub2
+ INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, %2
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr64_vgpr32_to_areg96_coalesce_with_av96_align2_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1_vgpr2
+
+ ; CHECK-LABEL: name: copy_vgpr64_vgpr32_to_areg96_coalesce_with_av96_align2_sub
+ ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]].sub2:vreg_96 = COPY $vgpr2
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0_sub1:areg_96_align2 = COPY [[COPY]].sub0_sub1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96_align2 = COPY [[COPY]].sub2
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0_sub1:vreg_96 = COPY $vgpr0_vgpr1
+ %0.sub2:vreg_96 = COPY $vgpr2
+ undef %2.sub0_sub1:areg_96_align2 = COPY %0.sub0_sub1
+ %2.sub2:areg_96_align2 = COPY %0.sub2
+ INLINEASM &"; use $0", 0 /* attdialect */, 3735561 /* reguse:AReg_64_Align2 */, %2
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x2_to_areg64_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: copy_vgpr32_x2_to_areg64_sub
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_64 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_64 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_64 = COPY $vgpr0
+ undef %2.sub0:areg_64 = COPY %0.sub0
+ %2.sub1:areg_64 = COPY %0.sub0
+ INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, killed %2
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x3_to_areg96_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: copy_vgpr32_x3_to_areg96_sub
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_96 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_64 = COPY $vgpr0
+ undef %1.sub0:areg_96 = COPY %0.sub0
+ %1.sub1:areg_96 = COPY %0.sub0
+ %1.sub2:areg_96 = COPY %0.sub0
+ INLINEASM &"; use $0", 0 /* attdialect */, 4587529 /* reguse:AReg_96 */, %1
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x3_to_areg96_align2_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: copy_vgpr32_x3_to_areg96_align2_sub
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_96_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_96_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_96_Align2 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_64 = COPY $vgpr0
+ undef %1.sub0:areg_96_align2 = COPY %0.sub0
+ %1.sub1:areg_96_align2 = COPY %0.sub0
+ INLINEASM &"; use $0", 0 /* attdialect */, 4915209 /* reguse:AReg_96_Align2 */, %1
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x4_to_areg128_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: copy_vgpr32_x4_to_areg128_sub
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_128 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:areg_128 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AV_128_with_hi16_in_VGPR_16_Lo128 */, [[COPY1]]
+ ; CHECK-NEXT: SI_RETURN
+ undef %0.sub0:vreg_64 = COPY $vgpr0
+ undef %1.sub0:areg_128 = COPY %0.sub0
+ %1.sub1:areg_128 = COPY %0.sub0
+ %1.sub2:areg_128 = COPY %0.sub0
+ %1.sub3:areg_128 = COPY %0.sub0
+ INLINEASM &"; use $0", 0 /* attdialect */, 6225929 /* reguse:AReg_128 */, killed %1
+ SI_RETURN
+
+...
+
+---
+name: copy_vgpr32_x4_to_areg128_align2_sub
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: copy_vgpr32_x4_to_areg128_align2_sub
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY $vgpr0
+ ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:areg_128_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:areg_128_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:areg_128_align2 = COPY [[COPY]].sub0
+ ; CHECK-NEX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/134438
More information about the llvm-commits
mailing list