[llvm] [AArch64] Observe Z-reg inline asm clobbers without SVE (PR #143742)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 11 09:22:04 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Sander de Smalen (sdesmalen-arm)

<details>
<summary>Changes</summary>

inline asm that clobbers any of the z-registers when not in streaming mode, should still observe that the lower 128 bits of those registers are clobbered.

---
Full diff: https://github.com/llvm/llvm-project/pull/143742.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+19-6) 
- (modified) llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll (+87-4) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 766599d567efd..738d091134134 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12248,13 +12248,14 @@ enum class PredicateConstraint { Uph, Upl, Upa };
 // not what we want. The code here pre-empts this by matching the register
 // explicitly.
 static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
-parsePredicateRegAsConstraint(StringRef Constraint) {
+parseSVERegAsConstraint(StringRef Constraint) {
   if (!Constraint.starts_with('{') || !Constraint.ends_with('}') ||
-      Constraint[1] != 'p')
+      (Constraint[1] != 'p' && Constraint[1] != 'z'))
     return std::nullopt;
 
+  bool IsPredicate = Constraint[1] == 'p';
   Constraint = Constraint.substr(2, Constraint.size() - 3);
-  bool IsPredicateAsCount = Constraint.starts_with("n");
+  bool IsPredicateAsCount = IsPredicate && Constraint.starts_with("n");
   if (IsPredicateAsCount)
     Constraint = Constraint.drop_front(1);
 
@@ -12262,7 +12263,9 @@ parsePredicateRegAsConstraint(StringRef Constraint) {
   if (Constraint.getAsInteger(10, V) || V > 31)
     return std::nullopt;
 
-  if (IsPredicateAsCount)
+  if (!IsPredicate)
+    return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
+  else if (IsPredicateAsCount)
     return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
   else
     return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
@@ -12515,8 +12518,18 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
       break;
     }
   } else {
-    if (const auto P = parsePredicateRegAsConstraint(Constraint))
-      return *P;
+    if (const auto P = parseSVERegAsConstraint(Constraint)) {
+      // SME functions that are not in streaming mode, should
+      // still observe clobbers of Z-registers by clobbering
+      // the lower 128bits of those registers.
+      if (AArch64::ZPRRegClass.hasSubClassEq(P->second) &&
+          !Subtarget->hasSVE() && Subtarget->hasSME() &&
+          !Subtarget->isStreaming())
+        return std::make_pair(TRI->getSubReg(P->first, AArch64::zsub),
+                              &AArch64::FPR128RegClass);
+      else
+        return *P;
+    }
     if (const auto PC = parsePredicateConstraint(Constraint))
       if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
         return std::make_pair(0U, RegClass);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
index 63cc061cb6188..b92a524036985 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sme2 -force-streaming -stop-after=finalize-isel | FileCheck %s
+; RUN: llc < %s -mtriple aarch64-none-linux-gnu -stop-after=finalize-isel | FileCheck %s
 
-define void @UphPNR(target("aarch64.svcount") %predcnt) {
+define void @UphPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
 entry:
 ; CHECK:  %0:ppr = COPY $p0
 ; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -14,7 +14,7 @@ entry:
   ret void
 }
 
-define void @UpaPNR(target("aarch64.svcount") %predcnt) {
+define void @UpaPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
 entry:
 ; CHECK:  %0:ppr = COPY $p0
 ; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -28,7 +28,7 @@ entry:
   ret void
 }
 
-define void @UplPNR(target("aarch64.svcount") %predcnt) {
+define void @UplPNR(target("aarch64.svcount") %predcnt) "target-features"="+sme2" "aarch64_pstate_sm_enabled" {
 entry:
 ; CHECK:  %0:ppr = COPY $p0
 ; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
@@ -41,3 +41,86 @@ entry:
   call void asm sideeffect "fadd z0.h, $0/m, z0.h, #0.5", "@3Upl"(target("aarch64.svcount") %0)
   ret void
 }
+
+; Test that the z-register clobbers result in preserving %0 across the inline asm call.
+define <2 x float> @sme_nosve_nonstreaming(ptr %in) "target-features"="+sme,-sve" {
+entry:
+; CHECK-LABEL: name: sme_nosve_nonstreaming
+; CHECK:  INLINEASM &"smstart sm; smstop sm;"
+; CHECK-SAME: implicit-def early-clobber $q0
+; CHECK-SAME: implicit-def early-clobber $q1
+; CHECK-SAME: implicit-def early-clobber $q2
+; CHECK-SAME: implicit-def early-clobber $q3
+; CHECK-SAME: implicit-def early-clobber $q4
+; CHECK-SAME: implicit-def early-clobber $q5
+; CHECK-SAME: implicit-def early-clobber $q6
+; CHECK-SAME: implicit-def early-clobber $q7
+; CHECK-SAME: implicit-def early-clobber $q8
+; CHECK-SAME: implicit-def early-clobber $q9
+; CHECK-SAME: implicit-def early-clobber $q10
+; CHECK-SAME: implicit-def early-clobber $q11
+; CHECK-SAME: implicit-def early-clobber $q12
+; CHECK-SAME: implicit-def early-clobber $q13
+; CHECK-SAME: implicit-def early-clobber $q14
+; CHECK-SAME: implicit-def early-clobber $q15
+; CHECK-SAME: implicit-def early-clobber $q16
+; CHECK-SAME: implicit-def early-clobber $q17
+; CHECK-SAME: implicit-def early-clobber $q18
+; CHECK-SAME: implicit-def early-clobber $q19
+; CHECK-SAME: implicit-def early-clobber $q20
+; CHECK-SAME: implicit-def early-clobber $q21
+; CHECK-SAME: implicit-def early-clobber $q22
+; CHECK-SAME: implicit-def early-clobber $q23
+; CHECK-SAME: implicit-def early-clobber $q24
+; CHECK-SAME: implicit-def early-clobber $q25
+; CHECK-SAME: implicit-def early-clobber $q26
+; CHECK-SAME: implicit-def early-clobber $q27
+; CHECK-SAME: implicit-def early-clobber $q28
+; CHECK-SAME: implicit-def early-clobber $q29
+; CHECK-SAME: implicit-def early-clobber $q30
+; CHECK-SAME: implicit-def early-clobber $q31
+  %0 = load <2 x float>, ptr %in, align 8
+  call void asm sideeffect "smstart sm; smstop sm;", "~{z0},~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"()
+  ret <2 x float> %0
+}
+
+define <2 x float> @sme_nosve_streaming(ptr %in) "target-features"="+sme,-sve" "aarch64_pstate_sm_enabled" {
+entry:
+; CHECK-LABEL: name: sme_nosve_streaming
+; CHECK:  INLINEASM &"smstart sm; smstop sm;"
+; CHECK-SAME: implicit-def early-clobber $z0
+; CHECK-SAME: implicit-def early-clobber $z1
+; CHECK-SAME: implicit-def early-clobber $z2
+; CHECK-SAME: implicit-def early-clobber $z3
+; CHECK-SAME: implicit-def early-clobber $z4
+; CHECK-SAME: implicit-def early-clobber $z5
+; CHECK-SAME: implicit-def early-clobber $z6
+; CHECK-SAME: implicit-def early-clobber $z7
+; CHECK-SAME: implicit-def early-clobber $z8
+; CHECK-SAME: implicit-def early-clobber $z9
+; CHECK-SAME: implicit-def early-clobber $z10
+; CHECK-SAME: implicit-def early-clobber $z11
+; CHECK-SAME: implicit-def early-clobber $z12
+; CHECK-SAME: implicit-def early-clobber $z13
+; CHECK-SAME: implicit-def early-clobber $z14
+; CHECK-SAME: implicit-def early-clobber $z15
+; CHECK-SAME: implicit-def early-clobber $z16
+; CHECK-SAME: implicit-def early-clobber $z17
+; CHECK-SAME: implicit-def early-clobber $z18
+; CHECK-SAME: implicit-def early-clobber $z19
+; CHECK-SAME: implicit-def early-clobber $z20
+; CHECK-SAME: implicit-def early-clobber $z21
+; CHECK-SAME: implicit-def early-clobber $z22
+; CHECK-SAME: implicit-def early-clobber $z23
+; CHECK-SAME: implicit-def early-clobber $z24
+; CHECK-SAME: implicit-def early-clobber $z25
+; CHECK-SAME: implicit-def early-clobber $z26
+; CHECK-SAME: implicit-def early-clobber $z27
+; CHECK-SAME: implicit-def early-clobber $z28
+; CHECK-SAME: implicit-def early-clobber $z29
+; CHECK-SAME: implicit-def early-clobber $z30
+; CHECK-SAME: implicit-def early-clobber $z31
+  %0 = load <2 x float>, ptr %in, align 8
+  call void asm sideeffect "smstart sm; smstop sm;", "~{z0},~{z1},~{z2},~{z3},~{z4},~{z5},~{z6},~{z7},~{z8},~{z9},~{z10},~{z11},~{z12},~{z13},~{z14},~{z15},~{z16},~{z17},~{z18},~{z19},~{z20},~{z21},~{z22},~{z23},~{z24},~{z25},~{z26},~{z27},~{z28},~{z29},~{z30},~{z31}"()
+  ret <2 x float> %0
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/143742


More information about the llvm-commits mailing list