[llvm] [AArch64] Don't emit Neon in streaming[-compatible] functions with -fzero-call-used-regs (PR #116995)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 20 08:07:00 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Benjamin Maxwell (MacDue)

<details>
<summary>Changes</summary>

Previously, with `-fzero-call-used-regs` clang/LLVM would incorrectly emit Neon instructions in streaming functions, and streaming-compatible functions without SVE.

With this change:

* In streaming functions, Z/p registers will be zeroed
* In streaming compatible functions w/o SVE, D registers will be zeroed
  - (As Neon vector instructions are illegal including `movi v..`)

---

Patch is 24.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116995.diff


3 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+1-1) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+10-3) 
- (modified) llvm/test/CodeGen/AArch64/zero-call-used-regs.ll (+351-191) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 216244950ba9ee..d6673969aa3056 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1010,7 +1010,7 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
 
   BitVector GPRsToZero(TRI.getNumRegs());
   BitVector FPRsToZero(TRI.getNumRegs());
-  bool HasSVE = STI.hasSVE();
+  bool HasSVE = STI.isSVEorStreamingSVEAvailable();
   for (MCRegister Reg : RegsToZero.set_bits()) {
     if (TRI.isGeneralPurposeRegister(MF, Reg)) {
       // For GPRs, we only care to clear out the 64-bit register.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a470c03efd5eb4..41cfecb60561c9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -9694,19 +9694,26 @@ void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator Iter,
                                           DebugLoc &DL,
                                           bool AllowSideEffects) const {
-  const MachineFunction &MF = *MBB.getParent();
+  MachineFunction &MF = *MBB.getParent();
   const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
   const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
 
   if (TRI.isGeneralPurposeRegister(MF, Reg)) {
     BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
-  } else if (STI.hasSVE()) {
+  } else if (STI.isSVEorStreamingSVEAvailable()) {
     BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
       .addImm(0)
       .addImm(0);
-  } else {
+  } else if (STI.isNeonAvailable()) {
     BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
       .addImm(0);
+  } else {
+    // This is a streaming-compatible function without SVE. We don't have full
+    // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
+    // So given `movi v..` would be illegal use `fmov d..` instead.
+    assert(STI.hasNEON() && "Expected to have NEON.");
+    Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
+    BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
   }
 }
 
diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll
index 75a7c7f4a0511e..4799ea3bcd19f6 100644
--- a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll
+++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll
@@ -1,6 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,DEFAULT
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE-OR-SME
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE-OR-SME
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
+
+target triple = "aarch64-unknown-linux-gnu"
 
 @result = dso_local global i32 0, align 4
 
@@ -156,32 +160,55 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo
 ; DEFAULT-NEXT:    movi v7.2d, #0000000000000000
 ; DEFAULT-NEXT:    ret
 ;
-; SVE-LABEL: all_arg:
-; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    mul w8, w1, w0
-; SVE-NEXT:    mov x1, #0 // =0x0
-; SVE-NEXT:    mov x3, #0 // =0x0
-; SVE-NEXT:    mov x4, #0 // =0x0
-; SVE-NEXT:    mov x5, #0 // =0x0
-; SVE-NEXT:    mov x6, #0 // =0x0
-; SVE-NEXT:    mov x7, #0 // =0x0
-; SVE-NEXT:    mov x18, #0 // =0x0
-; SVE-NEXT:    mov z0.d, #0 // =0x0
-; SVE-NEXT:    orr w0, w8, w2
-; SVE-NEXT:    mov x2, #0 // =0x0
-; SVE-NEXT:    mov x8, #0 // =0x0
-; SVE-NEXT:    mov z1.d, #0 // =0x0
-; SVE-NEXT:    mov z2.d, #0 // =0x0
-; SVE-NEXT:    mov z3.d, #0 // =0x0
-; SVE-NEXT:    mov z4.d, #0 // =0x0
-; SVE-NEXT:    mov z5.d, #0 // =0x0
-; SVE-NEXT:    mov z6.d, #0 // =0x0
-; SVE-NEXT:    mov z7.d, #0 // =0x0
-; SVE-NEXT:    pfalse p0.b
-; SVE-NEXT:    pfalse p1.b
-; SVE-NEXT:    pfalse p2.b
-; SVE-NEXT:    pfalse p3.b
-; SVE-NEXT:    ret
+; SVE-OR-SME-LABEL: all_arg:
+; SVE-OR-SME:       // %bb.0: // %entry
+; SVE-OR-SME-NEXT:    mul w8, w1, w0
+; SVE-OR-SME-NEXT:    mov x1, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x3, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x4, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x5, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x6, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x7, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x18, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z0.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    orr w0, w8, w2
+; SVE-OR-SME-NEXT:    mov x2, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x8, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z2.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z3.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z4.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z5.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z6.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z7.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    pfalse p0.b
+; SVE-OR-SME-NEXT:    pfalse p1.b
+; SVE-OR-SME-NEXT:    pfalse p2.b
+; SVE-OR-SME-NEXT:    pfalse p3.b
+; SVE-OR-SME-NEXT:    ret
+;
+; STREAMING-COMPAT-LABEL: all_arg:
+; STREAMING-COMPAT:       // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT:    mul w8, w1, w0
+; STREAMING-COMPAT-NEXT:    mov x1, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x3, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x4, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x5, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x6, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x7, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x18, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    fmov d0, xzr
+; STREAMING-COMPAT-NEXT:    orr w0, w8, w2
+; STREAMING-COMPAT-NEXT:    mov x2, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x8, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    fmov d1, xzr
+; STREAMING-COMPAT-NEXT:    fmov d2, xzr
+; STREAMING-COMPAT-NEXT:    fmov d3, xzr
+; STREAMING-COMPAT-NEXT:    fmov d4, xzr
+; STREAMING-COMPAT-NEXT:    fmov d5, xzr
+; STREAMING-COMPAT-NEXT:    fmov d6, xzr
+; STREAMING-COMPAT-NEXT:    fmov d7, xzr
+; STREAMING-COMPAT-NEXT:    ret
 
 entry:
   %mul = mul nsw i32 %b, %a
@@ -238,69 +265,117 @@ define dso_local i32 @all(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_
 ; DEFAULT-NEXT:    movi v31.2d, #0000000000000000
 ; DEFAULT-NEXT:    ret
 ;
-; SVE-LABEL: all:
-; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    mul w8, w1, w0
-; SVE-NEXT:    mov x1, #0 // =0x0
-; SVE-NEXT:    mov x3, #0 // =0x0
-; SVE-NEXT:    mov x4, #0 // =0x0
-; SVE-NEXT:    mov x5, #0 // =0x0
-; SVE-NEXT:    mov x6, #0 // =0x0
-; SVE-NEXT:    mov x7, #0 // =0x0
-; SVE-NEXT:    mov x9, #0 // =0x0
-; SVE-NEXT:    mov x10, #0 // =0x0
-; SVE-NEXT:    orr w0, w8, w2
-; SVE-NEXT:    mov x2, #0 // =0x0
-; SVE-NEXT:    mov x8, #0 // =0x0
-; SVE-NEXT:    mov x11, #0 // =0x0
-; SVE-NEXT:    mov x12, #0 // =0x0
-; SVE-NEXT:    mov x13, #0 // =0x0
-; SVE-NEXT:    mov x14, #0 // =0x0
-; SVE-NEXT:    mov x15, #0 // =0x0
-; SVE-NEXT:    mov x16, #0 // =0x0
-; SVE-NEXT:    mov x17, #0 // =0x0
-; SVE-NEXT:    mov x18, #0 // =0x0
-; SVE-NEXT:    mov z0.d, #0 // =0x0
-; SVE-NEXT:    mov z1.d, #0 // =0x0
-; SVE-NEXT:    mov z2.d, #0 // =0x0
-; SVE-NEXT:    mov z3.d, #0 // =0x0
-; SVE-NEXT:    mov z4.d, #0 // =0x0
-; SVE-NEXT:    mov z5.d, #0 // =0x0
-; SVE-NEXT:    mov z6.d, #0 // =0x0
-; SVE-NEXT:    mov z7.d, #0 // =0x0
-; SVE-NEXT:    mov z16.d, #0 // =0x0
-; SVE-NEXT:    mov z17.d, #0 // =0x0
-; SVE-NEXT:    mov z18.d, #0 // =0x0
-; SVE-NEXT:    mov z19.d, #0 // =0x0
-; SVE-NEXT:    mov z20.d, #0 // =0x0
-; SVE-NEXT:    mov z21.d, #0 // =0x0
-; SVE-NEXT:    mov z22.d, #0 // =0x0
-; SVE-NEXT:    mov z23.d, #0 // =0x0
-; SVE-NEXT:    mov z24.d, #0 // =0x0
-; SVE-NEXT:    mov z25.d, #0 // =0x0
-; SVE-NEXT:    mov z26.d, #0 // =0x0
-; SVE-NEXT:    mov z27.d, #0 // =0x0
-; SVE-NEXT:    mov z28.d, #0 // =0x0
-; SVE-NEXT:    mov z29.d, #0 // =0x0
-; SVE-NEXT:    mov z30.d, #0 // =0x0
-; SVE-NEXT:    mov z31.d, #0 // =0x0
-; SVE-NEXT:    pfalse p0.b
-; SVE-NEXT:    pfalse p1.b
-; SVE-NEXT:    pfalse p2.b
-; SVE-NEXT:    pfalse p3.b
-; SVE-NEXT:    pfalse p4.b
-; SVE-NEXT:    pfalse p5.b
-; SVE-NEXT:    pfalse p6.b
-; SVE-NEXT:    pfalse p7.b
-; SVE-NEXT:    pfalse p8.b
-; SVE-NEXT:    pfalse p9.b
-; SVE-NEXT:    pfalse p10.b
-; SVE-NEXT:    pfalse p11.b
-; SVE-NEXT:    pfalse p12.b
-; SVE-NEXT:    pfalse p13.b
-; SVE-NEXT:    pfalse p14.b
-; SVE-NEXT:    pfalse p15.b
-; SVE-NEXT:    ret
+; SVE-OR-SME-LABEL: all:
+; SVE-OR-SME:       // %bb.0: // %entry
+; SVE-OR-SME-NEXT:    mul w8, w1, w0
+; SVE-OR-SME-NEXT:    mov x1, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x3, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x4, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x5, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x6, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x7, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x9, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x10, #0 // =0x0
+; SVE-OR-SME-NEXT:    orr w0, w8, w2
+; SVE-OR-SME-NEXT:    mov x2, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x8, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x11, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x12, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x13, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x14, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x15, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x16, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x17, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x18, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z0.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z2.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z3.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z4.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z5.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z6.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z7.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z16.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z17.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z18.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z19.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z20.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z21.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z22.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z23.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z24.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z25.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z26.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z27.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z28.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z29.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z30.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z31.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    pfalse p0.b
+; SVE-OR-SME-NEXT:    pfalse p1.b
+; SVE-OR-SME-NEXT:    pfalse p2.b
+; SVE-OR-SME-NEXT:    pfalse p3.b
+; SVE-OR-SME-NEXT:    pfalse p4.b
+; SVE-OR-SME-NEXT:    pfalse p5.b
+; SVE-OR-SME-NEXT:    pfalse p6.b
+; SVE-OR-SME-NEXT:    pfalse p7.b
+; SVE-OR-SME-NEXT:    pfalse p8.b
+; SVE-OR-SME-NEXT:    pfalse p9.b
+; SVE-OR-SME-NEXT:    pfalse p10.b
+; SVE-OR-SME-NEXT:    pfalse p11.b
+; SVE-OR-SME-NEXT:    pfalse p12.b
+; SVE-OR-SME-NEXT:    pfalse p13.b
+; SVE-OR-SME-NEXT:    pfalse p14.b
+; SVE-OR-SME-NEXT:    pfalse p15.b
+; SVE-OR-SME-NEXT:    ret
+;
+; STREAMING-COMPAT-LABEL: all:
+; STREAMING-COMPAT:       // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT:    mul w8, w1, w0
+; STREAMING-COMPAT-NEXT:    mov x1, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x3, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x4, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x5, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x6, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x7, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x9, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x10, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    orr w0, w8, w2
+; STREAMING-COMPAT-NEXT:    mov x2, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x8, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x11, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x12, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x13, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x14, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x15, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x16, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x17, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x18, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    fmov d0, xzr
+; STREAMING-COMPAT-NEXT:    fmov d1, xzr
+; STREAMING-COMPAT-NEXT:    fmov d2, xzr
+; STREAMING-COMPAT-NEXT:    fmov d3, xzr
+; STREAMING-COMPAT-NEXT:    fmov d4, xzr
+; STREAMING-COMPAT-NEXT:    fmov d5, xzr
+; STREAMING-COMPAT-NEXT:    fmov d6, xzr
+; STREAMING-COMPAT-NEXT:    fmov d7, xzr
+; STREAMING-COMPAT-NEXT:    fmov d16, xzr
+; STREAMING-COMPAT-NEXT:    fmov d17, xzr
+; STREAMING-COMPAT-NEXT:    fmov d18, xzr
+; STREAMING-COMPAT-NEXT:    fmov d19, xzr
+; STREAMING-COMPAT-NEXT:    fmov d20, xzr
+; STREAMING-COMPAT-NEXT:    fmov d21, xzr
+; STREAMING-COMPAT-NEXT:    fmov d22, xzr
+; STREAMING-COMPAT-NEXT:    fmov d23, xzr
+; STREAMING-COMPAT-NEXT:    fmov d24, xzr
+; STREAMING-COMPAT-NEXT:    fmov d25, xzr
+; STREAMING-COMPAT-NEXT:    fmov d26, xzr
+; STREAMING-COMPAT-NEXT:    fmov d27, xzr
+; STREAMING-COMPAT-NEXT:    fmov d28, xzr
+; STREAMING-COMPAT-NEXT:    fmov d29, xzr
+; STREAMING-COMPAT-NEXT:    fmov d30, xzr
+; STREAMING-COMPAT-NEXT:    fmov d31, xzr
+; STREAMING-COMPAT-NEXT:    ret
 
 entry:
   %mul = mul nsw i32 %b, %a
@@ -355,12 +430,19 @@ define dso_local double @used_arg_float(double noundef %a, float noundef %b) loc
 ; DEFAULT-NEXT:    movi v1.2d, #0000000000000000
 ; DEFAULT-NEXT:    ret
 ;
-; SVE-LABEL: used_arg_float:
-; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    fcvt d1, s1
-; SVE-NEXT:    fmul d0, d1, d0
-; SVE-NEXT:    mov z1.d, #0 // =0x0
-; SVE-NEXT:    ret
+; SVE-OR-SME-LABEL: used_arg_float:
+; SVE-OR-SME:       // %bb.0: // %entry
+; SVE-OR-SME-NEXT:    fcvt d1, s1
+; SVE-OR-SME-NEXT:    fmul d0, d1, d0
+; SVE-OR-SME-NEXT:    mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    ret
+;
+; STREAMING-COMPAT-LABEL: used_arg_float:
+; STREAMING-COMPAT:       // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT:    fcvt d1, s1
+; STREAMING-COMPAT-NEXT:    fmul d0, d1, d0
+; STREAMING-COMPAT-NEXT:    fmov d1, xzr
+; STREAMING-COMPAT-NEXT:    ret
 
 entry:
   %conv = fpext float %b to double
@@ -376,12 +458,19 @@ define dso_local double @used_float(double noundef %a, float noundef %b) local_u
 ; DEFAULT-NEXT:    movi v1.2d, #0000000000000000
 ; DEFAULT-NEXT:    ret
 ;
-; SVE-LABEL: used_float:
-; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    fcvt d1, s1
-; SVE-NEXT:    fmul d0, d1, d0
-; SVE-NEXT:    mov z1.d, #0 // =0x0
-; SVE-NEXT:    ret
+; SVE-OR-SME-LABEL: used_float:
+; SVE-OR-SME:       // %bb.0: // %entry
+; SVE-OR-SME-NEXT:    fcvt d1, s1
+; SVE-OR-SME-NEXT:    fmul d0, d1, d0
+; SVE-OR-SME-NEXT:    mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    ret
+;
+; STREAMING-COMPAT-LABEL: used_float:
+; STREAMING-COMPAT:       // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT:    fcvt d1, s1
+; STREAMING-COMPAT-NEXT:    fmul d0, d1, d0
+; STREAMING-COMPAT-NEXT:    fmov d1, xzr
+; STREAMING-COMPAT-NEXT:    ret
 
 entry:
   %conv = fpext float %b to double
@@ -468,32 +557,55 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca
 ; DEFAULT-NEXT:    movi v7.2d, #0000000000000000
 ; DEFAULT-NEXT:    ret
 ;
-; SVE-LABEL: all_arg_float:
-; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    fcvt d1, s1
-; SVE-NEXT:    fmul d0, d1, d0
-; SVE-NEXT:    mov x0, #0 // =0x0
-; SVE-NEXT:    mov x1, #0 // =0x0
-; SVE-NEXT:    mov x2, #0 // =0x0
-; SVE-NEXT:    mov x3, #0 // =0x0
-; SVE-NEXT:    mov x4, #0 // =0x0
-; SVE-NEXT:    mov x5, #0 // =0x0
-; SVE-NEXT:    mov x6, #0 // =0x0
-; SVE-NEXT:    mov x7, #0 // =0x0
-; SVE-NEXT:    mov x8, #0 // =0x0
-; SVE-NEXT:    mov x18, #0 // =0x0
-; SVE-NEXT:    mov z1.d, #0 // =0x0
-; SVE-NEXT:    mov z2.d, #0 // =0x0
-; SVE-NEXT:    mov z3.d, #0 // =0x0
-; SVE-NEXT:    mov z4.d, #0 // =0x0
-; SVE-NEXT:    mov z5.d, #0 // =0x0
-; SVE-NEXT:    mov z6.d, #0 // =0x0
-; SVE-NEXT:    mov z7.d, #0 // =0x0
-; SVE-NEXT:    pfalse p0.b
-; SVE-NEXT:    pfalse p1.b
-; SVE-NEXT:    pfalse p2.b
-; SVE-NEXT:    pfalse p3.b
-; SVE-NEXT:    ret
+; SVE-OR-SME-LABEL: all_arg_float:
+; SVE-OR-SME:       // %bb.0: // %entry
+; SVE-OR-SME-NEXT:    fcvt d1, s1
+; SVE-OR-SME-NEXT:    fmul d0, d1, d0
+; SVE-OR-SME-NEXT:    mov x0, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x1, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x2, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x3, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x4, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x5, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x6, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x7, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x8, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov x18, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z2.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z3.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z4.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z5.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z6.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    mov z7.d, #0 // =0x0
+; SVE-OR-SME-NEXT:    pfalse p0.b
+; SVE-OR-SME-NEXT:    pfalse p1.b
+; SVE-OR-SME-NEXT:    pfalse p2.b
+; SVE-OR-SME-NEXT:    pfalse p3.b
+; SVE-OR-SME-NEXT:    ret
+;
+; STREAMING-COMPAT-LABEL: all_arg_float:
+; STREAMING-COMPAT:       // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT:    fcvt d1, s1
+; STREAMING-COMPAT-NEXT:    fmul d0, d1, d0
+; STREAMING-COMPAT-NEXT:    mov x0, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x1, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x2, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x3, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x4, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x5, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x6, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x7, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x8, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    mov x18, #0 // =0x0
+; STREAMING-COMPAT-NEXT:    fmov d1, xzr
+; STREAMING-COMPAT-NEXT:    fmov d2, xzr
+; STREAMING-COMPAT-NEXT:    fmov d3, xzr
+; STREAMING-COMPAT-NEXT:    fmov d4, xzr
+; STREAMING-COMPAT-NEXT:    fmov d5, xzr
+; STREAMING-COMPAT-NEXT:    fmov d6, xzr
+; STREAMING-COMPAT-NEXT:    fmov d7, xzr
+; STREAMING-COMPAT-NEXT:    ret
 
 entry:
   %conv = fpext float %b to double
@@ -550,69 +662,117 @@ define dso_local double @all_float(double noundef %a, float noundef %b) local_un
 ; DEFAULT-NEXT:    movi v31.2d, #0000000000000000
 ; DEFAULT-NEXT:    ret
 ;
-; SVE-LABEL: all_float:
-; SVE:       // %bb.0: // %entry
-; SVE-NEXT:    fcvt d1, s1
-; SVE-NEXT:    fmul d0, d1, d0
-; SVE-NEXT:    mov x0, #0 // =0x0
-; SVE-NEXT:    mov x1, #0 // =0x0
-; SVE-NEXT:    mov x2, #0 // =0x0
-; SVE-NEXT:    mov x3, #0 // =0x0
-; SVE-NEXT:    mov x4, #0 // =0x0
-; SVE-NEXT:    mov x5, #0 // =0x0
-; SVE-NEXT:    mov x6, #0 // =0x0
-; SVE-NEXT:    mov x7, #0 // =0x0
-; SVE-NEXT:    mov x8, #0 // =0x0
-; SVE-NEXT:    mov x9, #0 // =0x0
-; SVE-NEXT:    mov x10, #0 // =0x0
-; SVE-NEXT:    mov x11, #0 // =0x0
-; SVE-NEXT:    mov x12, #0 // =0x0
-; SVE-NEXT:    mov x13, #0 // =0x0
-; SVE-NEXT:    mov x14, #0 // =0x0
-; SVE-NEXT:    mov x15, #0 // =0x0
-; SVE-NEXT:    mov x16, #0 // =0x0
-; SVE-NEXT:    mov x17, #0 // =0x0
-; SVE-NEXT:    mov x18, #0 // =0x0
-; SVE-NEXT:    mov z1.d, #0 // =0x0
-; SVE-NEXT:    mov z2.d, #0 // =0x0
-; SVE-NEXT:    mov z3.d, #0 // =0x0
-; SVE-NEXT:    mov z4.d, #0 // =0x0
-; SVE-NEXT:    mov z5.d, #0 // =0x0
-; SVE-NEXT:    mov z6.d, #0 // =0x0
-; SVE-NEXT:    mov z7.d, #0 // =0x0
-; SVE-NEXT:    mov z16.d, #0 // =0x0
-; SVE-NEXT:    mov z17.d, #0 // =0x0
-; SVE-NEXT:    mov z18.d, #0 // =0x0
-; SVE-NEXT:    mov z19.d, #0 // =0x0
-; SVE-NEXT:    mov z20.d, #0 // =0x0
-; SVE-NEXT:    mov z21.d, #0 // =0x0
-; SVE-NEXT:    mov z22.d, #0 // =0x0
-; SVE-NEXT:    mov z23.d, #0 // =0x0
-; SVE-NEXT:    mov z24.d, #0 // =0x0
-; SVE-NEXT:    mov z25.d, #0 // =0x0
-; SVE-NEXT:    mov z26.d, #0 // =0x0
-; SVE-NEXT:    mov z27.d, #0 // =0x0
-; SVE-NEXT:    mov z28.d, #0 // =0x0
-; SVE-NEXT:    mov z29.d, #0 // =0x0
-; SVE-NEXT:    mov z30.d, #0 // =0x0
-; SVE-NEXT:    mov z31.d, #0 // =0x0
-; SVE-NEXT:    pfalse p0.b
-; SVE-NEXT:    pfalse p1.b
-; SVE-NEXT:    pfalse p2.b
-; SVE-NEXT:    pfalse p3.b
-; SVE-NEXT:    pfalse p4.b
-; SVE-NEXT:    pfalse p5.b
-; SVE-NEXT:    pfalse p6.b
-; SVE-NEXT:    pfalse p7.b
-; SVE-NEXT:    pfalse p8.b
-; SVE-NEXT:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/116995


More information about the llvm-commits mailing list