[llvm] d4826cd - [AArch64] Observe Z-reg inline asm clobbers without SVE (#143742)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 13 01:07:12 PDT 2025


Author: Sander de Smalen
Date: 2025-06-13T09:07:09+01:00
New Revision: d4826cd324d9a10abdc67c973affa62d36dff4ee

URL: https://github.com/llvm/llvm-project/commit/d4826cd324d9a10abdc67c973affa62d36dff4ee
DIFF: https://github.com/llvm/llvm-project/commit/d4826cd324d9a10abdc67c973affa62d36dff4ee.diff

LOG: [AArch64] Observe Z-reg inline asm clobbers without SVE (#143742)

inline asm that clobbers any of the z-registers when not in streaming
mode, should still observe that the lower 128 bits of those registers
are clobbered.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5b9e699eaa408..781a1281db402 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12281,13 +12281,14 @@ enum class PredicateConstraint { Uph, Upl, Upa };
 // not what we want. The code here pre-empts this by matching the register
 // explicitly.
 static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
-parsePredicateRegAsConstraint(StringRef Constraint) {
+parseSVERegAsConstraint(StringRef Constraint) {
   if (!Constraint.starts_with('{') || !Constraint.ends_with('}') ||
-      Constraint[1] != 'p')
+      (Constraint[1] != 'p' && Constraint[1] != 'z'))
     return std::nullopt;
 
+  bool IsPredicate = Constraint[1] == 'p';
   Constraint = Constraint.substr(2, Constraint.size() - 3);
-  bool IsPredicateAsCount = Constraint.starts_with("n");
+  bool IsPredicateAsCount = IsPredicate && Constraint.starts_with("n");
   if (IsPredicateAsCount)
     Constraint = Constraint.drop_front(1);
 
@@ -12297,8 +12298,9 @@ parsePredicateRegAsConstraint(StringRef Constraint) {
 
   if (IsPredicateAsCount)
     return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
-  else
+  if (IsPredicate)
     return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
+  return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
 }
 
 static std::optional<PredicateConstraint>
@@ -12548,8 +12550,16 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
       break;
     }
   } else {
-    if (const auto P = parsePredicateRegAsConstraint(Constraint))
+    if (const auto P = parseSVERegAsConstraint(Constraint)) {
+      // SME functions that are not in streaming mode, should
+      // still observe clobbers of Z-registers by clobbering
+      // the lower 128bits of those registers.
+      if (AArch64::ZPRRegClass.hasSubClassEq(P->second) &&
+          !Subtarget->isSVEorStreamingSVEAvailable())
+        return std::make_pair(TRI->getSubReg(P->first, AArch64::zsub),
+                              &AArch64::FPR128RegClass);
       return *P;
+    }
     if (const auto PC = parsePredicateConstraint(Constraint))
       if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
         return std::make_pair(0U, RegClass);

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
index 63cc061cb6188..b92a524036985 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sme2 -force-streaming -stop-after=finalize-isel | FileCheck %s
+; RUN: llc < %s -mtriple aarch64-none-linux-gnu -stop-after=finalize-isel | FileCheck %s
 
-define void @UphPNR(target("aarch64.svcount") %predcnt) {
+define void @UphPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
 entry:
 ; CHECK:  %0:ppr = COPY $p0
 ; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -14,7 +14,7 @@ entry:
   ret void
 }
 
-define void @UpaPNR(target("aarch64.svcount") %predcnt) {
+define void @UpaPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
 entry:
 ; CHECK:  %0:ppr = COPY $p0
 ; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -28,7 +28,7 @@ entry:
   ret void
 }
 
-define void @UplPNR(target("aarch64.svcount") %predcnt) {
+define void @UplPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
 entry:
 ; CHECK:  %0:ppr = COPY $p0
 ; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -41,3 +41,86 @@ entry:
   call void asm sideeffect "fadd z0.h, $0/m, z0.h, #0.5", "@3Upl"(target("aarch64.svcount") %0)
   ret void
 }
+
+; Test that the z-register clobbers result in preserving %0 across the inline asm call.
+define <2 x float> @sme_nosve_nonstreaming(ptr %in) "target-features"="+sme,-sve" {
+entry:
+; CHECK-LABEL: name: sme_nosve_nonstreaming
+; CHECK:  INLINEASM &"smstart sm; smstop sm;"
+; CHECK-SAME: implicit-def early-clobber $q0
+; CHECK-SAME: implicit-def early-clobber $q1
+; CHECK-SAME: implicit-def early-clobber $q2
+; CHECK-SAME: implicit-def early-clobber $q3
+; CHECK-SAME: implicit-def early-clobber $q4
+; CHECK-SAME: implicit-def early-clobber $q5
+; CHECK-SAME: implicit-def early-clobber $q6
+; CHECK-SAME: implicit-def early-clobber $q7
+; CHECK-SAME: implicit-def early-clobber $q8
+; CHECK-SAME: implicit-def early-clobber $q9
+; CHECK-SAME: implicit-def early-clobber $q10
+; CHECK-SAME: implicit-def early-clobber $q11
+; CHECK-SAME: implicit-def early-clobber $q12
+; CHECK-SAME: implicit-def early-clobber $q13
+; CHECK-SAME: implicit-def early-clobber $q14
+; CHECK-SAME: implicit-def early-clobber $q15
+; CHECK-SAME: implicit-def early-clobber $q16
+; CHECK-SAME: implicit-def early-clobber $q17
+; CHECK-SAME: implicit-def early-clobber $q18
+; CHECK-SAME: implicit-def early-clobber $q19
+; CHECK-SAME: implicit-def early-clobber $q20
+; CHECK-SAME: implicit-def early-clobber $q21
+; CHECK-SAME: implicit-def early-clobber $q22
+; CHECK-SAME: implicit-def early-clobber $q23
+; CHECK-SAME: implicit-def early-clobber $q24
+; CHECK-SAME: implicit-def early-clobber $q25
+; CHECK-SAME: implicit-def early-clobber $q26
+; CHECK-SAME: implicit-def early-clobber $q27
+; CHECK-SAME: implicit-def early-clobber $q28
+; CHECK-SAME: implicit-def early-clobber $q29
+; CHECK-SAME: implicit-def early-clobber $q30
+; CHECK-SAME: implicit-def early-clobber $q31
+  %0 = load <2 x float>, ptr %in, align 8
+  call void asm sideeffect "smstart sm; smstop sm;", "~{z0},~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"()
+  ret <2 x float> %0
+}
+
+define <2 x float> @sme_nosve_streaming(ptr %in) "target-features"="+sme,-sve" "aarch64_pstate_sm_enabled" {
+entry:
+; CHECK-LABEL: name: sme_nosve_streaming
+; CHECK:  INLINEASM &"smstart sm; smstop sm;"
+; CHECK-SAME: implicit-def early-clobber $z0
+; CHECK-SAME: implicit-def early-clobber $z1
+; CHECK-SAME: implicit-def early-clobber $z2
+; CHECK-SAME: implicit-def early-clobber $z3
+; CHECK-SAME: implicit-def early-clobber $z4
+; CHECK-SAME: implicit-def early-clobber $z5
+; CHECK-SAME: implicit-def early-clobber $z6
+; CHECK-SAME: implicit-def early-clobber $z7
+; CHECK-SAME: implicit-def early-clobber $z8
+; CHECK-SAME: implicit-def early-clobber $z9
+; CHECK-SAME: implicit-def early-clobber $z10
+; CHECK-SAME: implicit-def early-clobber $z11
+; CHECK-SAME: implicit-def early-clobber $z12
+; CHECK-SAME: implicit-def early-clobber $z13
+; CHECK-SAME: implicit-def early-clobber $z14
+; CHECK-SAME: implicit-def early-clobber $z15
+; CHECK-SAME: implicit-def early-clobber $z16
+; CHECK-SAME: implicit-def early-clobber $z17
+; CHECK-SAME: implicit-def early-clobber $z18
+; CHECK-SAME: implicit-def early-clobber $z19
+; CHECK-SAME: implicit-def early-clobber $z20
+; CHECK-SAME: implicit-def early-clobber $z21
+; CHECK-SAME: implicit-def early-clobber $z22
+; CHECK-SAME: implicit-def early-clobber $z23
+; CHECK-SAME: implicit-def early-clobber $z24
+; CHECK-SAME: implicit-def early-clobber $z25
+; CHECK-SAME: implicit-def early-clobber $z26
+; CHECK-SAME: implicit-def early-clobber $z27
+; CHECK-SAME: implicit-def early-clobber $z28
+; CHECK-SAME: implicit-def early-clobber $z29
+; CHECK-SAME: implicit-def early-clobber $z30
+; CHECK-SAME: implicit-def early-clobber $z31
+  %0 = load <2 x float>, ptr %in, align 8
+  call void asm sideeffect "smstart sm; smstop sm;", "~{z0},~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"()
+  ret <2 x float> %0
+}


        


More information about the llvm-commits mailing list