[llvm] [AArch64] Check for streaming mode in HasSME* features. (PR #96302)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 21 06:09:42 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Sander de Smalen (sdesmalen-arm)

<details>
<summary>Changes</summary>

This also fixes up some asserts in copyPhysReg, loadRegFromStackSlot and storeRegToStackSlot.

---

Patch is 89.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96302.diff


109 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2-2) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+20-15) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+30-24) 
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+23-16) 
- (modified) llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtl.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-extract-mova.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas16.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-frint.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x2.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane-x4.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti2-lane.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x2.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mop.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvt.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-qrshr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-complex-dot.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-widening-mul-acc.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-int-mul-lane.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-polynomial-arithmetic.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-psel.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-revd.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-unary-narrowing.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-complex-int-arith.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-dsp.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-cntp.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dots.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fp-reduce.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-int-reduce.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-unpk.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx2.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pn.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipx2.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipx4.ll (+1-1) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7f821715c910d..c8307d5b483e1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -423,7 +423,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     addQRType(MVT::v8bf16);
   }
 
-  if (Subtarget->hasSVEorSME()) {
+  if (Subtarget->isSVEorStreamingSVEAvailable()) {
     // Add legal sve predicate types
     addRegisterClass(MVT::nxv1i1, &AArch64::PPRRegClass);
     addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
@@ -1408,7 +1408,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   // FIXME: Move lowering for more nodes here if those are common between
   // SVE and SME.
-  if (Subtarget->hasSVEorSME()) {
+  if (Subtarget->isSVEorStreamingSVEAvailable()) {
     for (auto VT :
          {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 949e7699d070d..0d8e846afc69f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4497,7 +4497,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Copy a Predicate register by ORRing with itself.
   if (AArch64::PPRRegClass.contains(DestReg) &&
       AArch64::PPRRegClass.contains(SrcReg)) {
-    assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+    assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+           "Unexpected SVE register.");
     BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
       .addReg(SrcReg) // Pg
       .addReg(SrcReg)
@@ -4532,7 +4533,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Copy a Z register by ORRing with itself.
   if (AArch64::ZPRRegClass.contains(DestReg) &&
       AArch64::ZPRRegClass.contains(SrcReg)) {
-    assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+    assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+           "Unexpected SVE register.");
     BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
       .addReg(SrcReg)
       .addReg(SrcReg, getKillRegState(KillSrc));
@@ -4544,7 +4546,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
        AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
       (AArch64::ZPR2RegClass.contains(SrcReg) ||
        AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
-    assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+    assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+           "Unexpected SVE register.");
     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
                      Indices);
@@ -4554,7 +4557,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Copy a Z register triple by copying the individual sub-registers.
   if (AArch64::ZPR3RegClass.contains(DestReg) &&
       AArch64::ZPR3RegClass.contains(SrcReg)) {
-    assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+    assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+           "Unexpected SVE register.");
     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
                                        AArch64::zsub2};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@@ -4567,7 +4571,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
        AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
       (AArch64::ZPR4RegClass.contains(SrcReg) ||
        AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
-    assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+    assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+           "Unexpected SVE register.");
     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
                                        AArch64::zsub2, AArch64::zsub3};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@@ -4834,7 +4839,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
       Opc = AArch64::STRHui;
     else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register store without SVE store instructions");
       assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
              "Unexpected register store without SVE2p1 or SME2");
@@ -4886,7 +4891,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                               AArch64::sube64, AArch64::subo64, FI, MMO);
       return;
     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register store without SVE store instructions");
       Opc = AArch64::STR_ZXI;
       StackID = TargetStackID::ScalableVector;
@@ -4910,7 +4915,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       Offset = false;
     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
                AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register store without SVE store instructions");
       Opc = AArch64::STR_ZZXI;
       StackID = TargetStackID::ScalableVector;
@@ -4922,7 +4927,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       Opc = AArch64::ST1Threev2d;
       Offset = false;
     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register store without SVE store instructions");
       Opc = AArch64::STR_ZZZXI;
       StackID = TargetStackID::ScalableVector;
@@ -4935,7 +4940,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       Offset = false;
     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
                AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register store without SVE store instructions");
       Opc = AArch64::STR_ZZZZXI;
       StackID = TargetStackID::ScalableVector;
@@ -5008,7 +5013,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
       Opc = AArch64::LDRHui;
     else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register load without SVE load instructions");
       assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
              "Unexpected register load without SVE2p1 or SME2");
@@ -5062,7 +5067,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                AArch64::subo64, FI, MMO);
       return;
     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register load without SVE load instructions");
       Opc = AArch64::LDR_ZXI;
       StackID = TargetStackID::ScalableVector;
@@ -5086,7 +5091,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
       Offset = false;
     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
                AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register load without SVE load instructions");
       Opc = AArch64::LDR_ZZXI;
       StackID = TargetStackID::ScalableVector;
@@ -5098,7 +5103,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
       Opc = AArch64::LD1Threev2d;
       Offset = false;
     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register load without SVE load instructions");
       Opc = AArch64::LDR_ZZZXI;
       StackID = TargetStackID::ScalableVector;
@@ -5111,7 +5116,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
       Offset = false;
     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
                AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
-      assert(Subtarget.hasSVEorSME() &&
+      assert(Subtarget.isSVEorStreamingSVEAvailable() &&
              "Unexpected register load without SVE load instructions");
       Opc = AArch64::LDR_ZZZZXI;
       StackID = TargetStackID::ScalableVector;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6afee9bd388a6..c1922b41a6de5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -141,35 +141,41 @@ def HasSPE           : Predicate<"Subtarget->hasSPE()">,
 def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
                                  AssemblerPredicateWithAll<(all_of FeatureFuseAES),
                                  "fuse-aes">;
-def HasSVE           : Predicate<"Subtarget->hasSVE()">,
+def HasSVE           : Predicate<"Subtarget->isSVEAvailable()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
-def HasSVE2          : Predicate<"Subtarget->hasSVE2()">,
+def HasSVE2          : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
-def HasSVE2p1        : Predicate<"Subtarget->hasSVE2p1()">,
+def HasSVE2p1        : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">;
-def HasSVE2AES       : Predicate<"Subtarget->hasSVE2AES()">,
+def HasSVE2AES       : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2AES()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
-def HasSVE2SM4       : Predicate<"Subtarget->hasSVE2SM4()">,
+def HasSVE2SM4       : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SM4()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
-def HasSVE2SHA3      : Predicate<"Subtarget->hasSVE2SHA3()">,
+def HasSVE2SHA3      : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SHA3()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
-def HasSVE2BitPerm   : Predicate<"Subtarget->hasSVE2BitPerm()">,
+def HasSVE2BitPerm   : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2BitPerm()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
 def HasB16B16        : Predicate<"Subtarget->hasB16B16()">,
                                  AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">;
-def HasSME           : Predicate<"Subtarget->hasSME()">,
+def HasSMEandIsNonStreamingSafe
+                     : Predicate<"Subtarget->hasSME()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
-def HasSMEF64F64     : Predicate<"Subtarget->hasSMEF64F64()">,
+def HasSME           : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
+def HasSMEF64F64     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF64F64()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
-def HasSMEF16F16     : Predicate<"Subtarget->hasSMEF16F16()">,
+def HasSMEF16F16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF16F16()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
-def HasSMEFA64       : Predicate<"Subtarget->hasSMEFA64()">,
+def HasSMEFA64       : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEFA64()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
-def HasSMEI16I64     : Predicate<"Subtarget->hasSMEI16I64()">,
+def HasSMEI16I64     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEI16I64()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
-def HasSME2          : Predicate<"Subtarget->hasSME2()">,
+def HasSME2andIsNonStreamingSafe
+                     : Predicate<"Subtarget->hasSME2()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
+def HasSME2          : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
-def HasSME2p1        : Predicate<"Subtarget->hasSME2p1()">,
+def HasSME2p1        : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p1()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
 def HasFP8           : Predicate<"Subtarget->hasFP8()">,
                                  AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
@@ -198,39 +204,39 @@ def HasSSVE_FP8DOT4  : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
                                  "ssve-fp8dot4 or (sve2 and fp8dot4)">;
 def HasLUT          : Predicate<"Subtarget->hasLUT()">,
                                  AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
-def HasSME_LUTv2     : Predicate<"Subtarget->hasSME_LUTv2()">,
+def HasSME_LUTv2     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
-def HasSMEF8F16     : Predicate<"Subtarget->hasSMEF8F16()">,
+def HasSMEF8F16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
-def HasSMEF8F32     : Predicate<"Subtarget->hasSMEF8F32()">,
+def HasSMEF8F32     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
 
 // A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
 // they should be enabled if either has been specified.
 def HasSVEorSME
-    : Predicate<"Subtarget->hasSVEorSME()">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable()">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
                 "sve or sme">;
 def HasSVE2orSME
-    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2() || Subtarget->hasSME())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
                 "sve2 or sme">;
 def HasSVE2orSME2
-    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2() || Subtarget->hasSME2())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
                 "sve2 or sme2">;
 def HasSVE2p1_or_HasSME
-    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p1() || Subtarget->hasSME())">,
                  AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
 def HasSVE2p1_or_HasSME2
-    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p1() || Subtarget->hasSME2())">,
                  AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">;
 def HasSVE2p1_or_HasSME2p1
-    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">,
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p1() || Subtarget->hasSME2p1())">,
                  AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
 
 def HasSMEF16F16orSMEF8F16
-    : Predicate<"Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16()">,
+    : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
                 AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
                 "sme-f16f16 or sme-f8f16">;
 
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index adc8e6d3ff877..054eca8ad7528 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -71,15 +71,17 @@ let usesCustomInserter = 1 in {
 def SDT_AArch64RDSVL  : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
 def AArch64rdsvl : SDNode<"AArch64ISD::RDSVL", SDT_AArch64RDSVL>;
 
-let Predicates = [HasSME] in {
+let Predicates = [HasSMEandIsNonStreamingSafe] in {
 def RDSVLI_XI  : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>;
 def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>;
 def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>;
 
+def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>;
+}
+
+let Predicates = [HasSME] in {
 defm ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha", int_aarch64_sme_addha>;
 defm ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva", int_aarch64_sme_addva>;
-
-def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>;
 }
 
 let Predicates = [HasSMEI16I64] in {
@@ -138,18 +140,20 @@ defm LD1_MXIPXX : sme_mem_ld_ss<"ld1">;
 defm ST1_MXIPXX : sme_mem_st_ss<"st1">;
 
 //===----------------------------------------------------------------------===//
-// Spill + fill
+// Move instructions
 //===----------------------------------------------------------------------===//
 
-defm LDR_ZA : sme_fill<"ldr">;
-defm STR_ZA : sme_spill<"str">;
+defm INSERT_MXIPZ  : sme_vector_to_tile<"mova">;
+defm EXTRACT_ZPMXI : sme_tile_to_vector<"mova">;
+} // End let Predicates = [HasSME]
 
+let Predicates = [HasSMEandIsNonStreamingSafe] in {
 //===----------------------------------------------------------------------===//
-// Move instructions
+// Spill + fill
 //===----------------------------------------------------------------------===//
 
-defm INSERT_MXIPZ  : sme_vector_to_tile<"mova">;
-defm EXTRACT_ZPMXI : sme_tile_to_vector<"mova">;
+defm LDR_ZA : sme_fill<"ldr">;
+defm STR_ZA : sme_spill<"str">;
 
 //===----------------------------------------------------------------------===//
 // Zero instruction
@@ -185,7 +189,7 @@ def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
 def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
           (MRS 0xde85)>;
 
-} // End let Predicates = [HasSME]
+} // End let Predicates = [HasSMEandIsNonStreamingSafe]
 
 multiclass CoalescerBarrierPseudo<RegisterClass rc, list<ValueType> vts> {
   def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> {
@@ -580,11 +584,6 @@ defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001, int_aarch64_sme_smops
 defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100, int_aarch64_sme_umopa_za32>;
 defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops_za32>;
 
-defm ZERO_T : sme2_zero_zt<"zero", 0b0001>;
-
-defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, AArch64_restore_zt>;
-defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, AArch64_save_zt>;
-
 def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>;
 def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>;
 
@@ -710,7 +709,15 @@ def  STNT1D_2Z_STRI...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/96302


More information about the llvm-commits mailing list