[llvm] 50e345e - [AArch64] Use correct regclass for spills of ZPR2/ZPR4 (#148806)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 06:53:09 PDT 2025
Author: Sander de Smalen
Date: 2025-07-15T14:53:05+01:00
New Revision: 50e345ef95596f9d416700cf9bc8bc6d3ef25591
URL: https://github.com/llvm/llvm-project/commit/50e345ef95596f9d416700cf9bc8bc6d3ef25591
DIFF: https://github.com/llvm/llvm-project/commit/50e345ef95596f9d416700cf9bc8bc6d3ef25591.diff
LOG: [AArch64] Use correct regclass for spills of ZPR2/ZPR4 (#148806)
Commit a6293228fdd5aba8c04c63f02f3d017443feb3f2 forced the register
class of ZPR[24]StridedOrContiguous for spills/fills of ZPR2 and ZPR4,
but this may result in issues when the regclass for the fill is a
ZPR2/ZPR4 which would allow the register allocator to pick `z1_z2`,
which is not a supported register for ZPR2StridedOrContiguous that only
supports tuples of the form (strided) `z0_z8`, `z1_z9` or (contiguous,
start at multiple of 2) `z0_z1`, `z2_z3`. For spills we could add a new
register class that supports any of the tuple forms, but I've decided
to use two pseudos similar to the fills for consistency.
Fixes https://github.com/llvm/llvm-project/issues/148655
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/spillfill-sve.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 36f3a670808d4..07b36d20b0c6d 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1591,18 +1591,22 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
"Non-writeback variants of STGloop / STZGloop should not "
"survive past PrologEpilogInserter.");
case AArch64::STR_ZZZZXI:
+ case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
case AArch64::STR_ZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
case AArch64::STR_ZZXI:
+ case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
case AArch64::STR_PPXI:
return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
case AArch64::LDR_ZZZZXI:
+ case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
case AArch64::LDR_ZZZXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
case AArch64::LDR_ZZXI:
+ case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
case AArch64::LDR_PPXI:
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c1474773faa76..5420545cc3cec 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2482,8 +2482,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::LDR_PXI:
case AArch64::LDR_ZXI:
case AArch64::LDR_ZZXI:
+ case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
case AArch64::LDR_ZZZXI:
case AArch64::LDR_ZZZZXI:
+ case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
case AArch64::LDRBBui:
case AArch64::LDRBui:
case AArch64::LDRDui:
@@ -2525,8 +2527,10 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STR_PXI:
case AArch64::STR_ZXI:
case AArch64::STR_ZZXI:
+ case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
case AArch64::STR_ZZZXI:
case AArch64::STR_ZZZZXI:
+ case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
case AArch64::STRBBui:
case AArch64::STRBui:
case AArch64::STRDui:
@@ -4318,7 +4322,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
break;
// SVE
case AArch64::STR_ZZZZXI:
+ case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
case AArch64::LDR_ZZZZXI:
+ case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
Scale = TypeSize::getScalable(16);
Width = TypeSize::getScalable(16 * 4);
MinOffset = -256;
@@ -4332,7 +4338,9 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MaxOffset = 253;
break;
case AArch64::STR_ZZXI:
+ case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
case AArch64::LDR_ZZXI:
+ case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
Scale = TypeSize::getScalable(16);
Width = TypeSize::getScalable(16 * 2);
MinOffset = -256;
@@ -5559,8 +5567,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d;
Offset = false;
- } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
- AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected register store without SVE store instructions");
+ Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
+ StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZXI;
@@ -5584,8 +5596,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d;
Offset = false;
- } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
- AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected register store without SVE store instructions");
+ Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
+ StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZZZXI;
@@ -5736,8 +5752,12 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d;
Offset = false;
- } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
- AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected register load without SVE load instructions");
+ Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
+ StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZXI;
@@ -5761,8 +5781,12 @@ void AArch64InstrInfo::loadRegFromStackSlot(
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d;
Offset = false;
- } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
- AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected register load without SVE load instructions");
+ Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
+ StackID = TargetStackID::ScalableVector;
+ } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZZZXI;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index eddb96979f7b8..0c4b4f4c3ed88 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2625,16 +2625,22 @@ let Predicates = [HasSVE_or_SME] in {
// These get expanded to individual LDR_ZXI/STR_ZXI instructions in
// AArch64ExpandPseudoInsts.
let mayLoad = 1, hasSideEffects = 0 in {
- def LDR_ZZXI : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+
+ def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
- def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b_strided_and_contiguous:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
- def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def LDR_PPXI : Pseudo<(outs PPR2:$pp), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
let mayStore = 1, hasSideEffects = 0 in {
- def STR_ZZXI : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_ZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_ZZZZXI_STRIDED_CONTIGUOUS : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+
+ def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
- def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b_strided_and_contiguous:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
- def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
+ def STR_PPXI : Pseudo<(outs), (ins PPR2:$pp, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>;
}
let AddedComplexity = 1 in {
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index 83c9b73c57570..2b16dd0f29ecc 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -1,5 +1,5 @@
-# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy %s -o - | FileCheck %s
-# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND
+# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs -aarch64-stack-hazard-size=0 %s -o - | FileCheck %s --check-prefix=EXPAND
--- |
; ModuleID = '<stdin>'
source_filename = "<stdin>"
@@ -14,13 +14,14 @@
define aarch64_sve_vector_pcs void @spills_fills_stack_id_virtreg_ppr_to_pnr() #1 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable }
- define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #0 { entry: unreachable }
+ define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2strided() #2 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable }
define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable }
- define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #0 { entry: unreachable }
+ define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4strided() #2 { entry: unreachable }
attributes #0 = { nounwind "target-features"="+sve" }
attributes #1 = { nounwind "target-features"="+sve2p1" }
+ attributes #2 = { nounwind "target-features"="+sve,+sme2" "aarch64_pstate_sm_enabled" }
...
---
@@ -318,10 +319,10 @@ registers:
- { id: 0, class: zpr2 }
stack:
liveins:
- - { reg: '$z0_z1', virtual-reg: '%0' }
+ - { reg: '$z1_z2', virtual-reg: '%0' }
body: |
bb.0.entry:
- liveins: $z0_z1
+ liveins: $z1_z2
; CHECK-LABEL: name: spills_fills_stack_id_zpr2
; CHECK: stack:
@@ -329,12 +330,12 @@ body: |
; CHECK-NEXT: stack-id: scalable-vector
; EXPAND-LABEL: name: spills_fills_stack_id_zpr2
- ; EXPAND: STR_ZXI $z0, $sp, 0
- ; EXPAND: STR_ZXI $z1, $sp, 1
- ; EXPAND: $z0 = LDR_ZXI $sp, 0
- ; EXPAND: $z1 = LDR_ZXI $sp, 1
+ ; EXPAND: STR_ZXI $z1, $sp, 0
+ ; EXPAND: STR_ZXI $z2, $sp, 1
+ ; EXPAND: $z1 = LDR_ZXI $sp, 0
+ ; EXPAND: $z2 = LDR_ZXI $sp, 1
- %0:zpr2 = COPY $z0_z1
+ %0:zpr2 = COPY $z1_z2
$z0_z1_z2_z3 = IMPLICIT_DEF
$z4_z5_z6_z7 = IMPLICIT_DEF
@@ -345,7 +346,7 @@ body: |
$z24_z25_z26_z27 = IMPLICIT_DEF
$z28_z29_z30_z31 = IMPLICIT_DEF
- $z0_z1 = COPY %0
+ $z1_z2 = COPY %0
RET_ReallyLR
...
---
@@ -439,10 +440,10 @@ registers:
- { id: 0, class: zpr4 }
stack:
liveins:
- - { reg: '$z0_z1_z2_z3', virtual-reg: '%0' }
+ - { reg: '$z1_z2_z3_z4', virtual-reg: '%0' }
body: |
bb.0.entry:
- liveins: $z0_z1_z2_z3
+ liveins: $z1_z2_z3_z4
; CHECK-LABEL: name: spills_fills_stack_id_zpr4
; CHECK: stack:
@@ -450,16 +451,16 @@ body: |
; CHECK-NEXT: stack-id: scalable-vector
; EXPAND-LABEL: name: spills_fills_stack_id_zpr4
- ; EXPAND: STR_ZXI $z0, $sp, 0
- ; EXPAND: STR_ZXI $z1, $sp, 1
- ; EXPAND: STR_ZXI $z2, $sp, 2
- ; EXPAND: STR_ZXI $z3, $sp, 3
- ; EXPAND: $z0 = LDR_ZXI $sp, 0
- ; EXPAND: $z1 = LDR_ZXI $sp, 1
- ; EXPAND: $z2 = LDR_ZXI $sp, 2
- ; EXPAND: $z3 = LDR_ZXI $sp, 3
+ ; EXPAND: STR_ZXI $z1, $sp, 0
+ ; EXPAND: STR_ZXI $z2, $sp, 1
+ ; EXPAND: STR_ZXI $z3, $sp, 2
+ ; EXPAND: STR_ZXI $z4, $sp, 3
+ ; EXPAND: $z1 = LDR_ZXI $sp, 0
+ ; EXPAND: $z2 = LDR_ZXI $sp, 1
+ ; EXPAND: $z3 = LDR_ZXI $sp, 2
+ ; EXPAND: $z4 = LDR_ZXI $sp, 3
- %0:zpr4 = COPY $z0_z1_z2_z3
+ %0:zpr4 = COPY $z1_z2_z3_z4
$z0_z1_z2_z3 = IMPLICIT_DEF
$z4_z5_z6_z7 = IMPLICIT_DEF
@@ -470,7 +471,7 @@ body: |
$z24_z25_z26_z27 = IMPLICIT_DEF
$z28_z29_z30_z31 = IMPLICIT_DEF
- $z0_z1_z2_z3 = COPY %0
+ $z1_z2_z3_z4 = COPY %0
RET_ReallyLR
...
---
More information about the llvm-commits
mailing list