[llvm] [AArch64] Don't emit Neon in streaming[-compatible] functions with -fzero-call-used-regs (PR #116995)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 20 08:06:21 PST 2024
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/116995
Previously, with `-fzero-call-used-regs` clang/LLVM would incorrectly emit Neon instructions in streaming functions, and streaming-compatible functions without SVE.
With this change:
* In streaming functions, Z/p registers will be zeroed
* In streaming compatible functions w/o SVE, D registers will be zeroed
- (As Neon vector instructions are illegal including `movi v..`)
>From 5c8f18dd4b6ee5bc68112132cbe286f41a68006c Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 20 Nov 2024 15:20:08 +0000
Subject: [PATCH] [AArch64] Don't emit Neon in streaming[-compatible] functions
with -fzero-call-used-regs
Previously, with `-fzero-call-used-regs` clang/LLVM would incorrectly
emit Neon instructions in streaming functions, and streaming-compatible
functions without SVE.
With this change:
* In streaming functions, Z/p registers will be zeroed
* In streaming compatible functions w/o SVE, D registers will be zeroed
- (As Neon vector instructions are illegal including `movi v..`)
---
.../Target/AArch64/AArch64FrameLowering.cpp | 2 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 13 +-
.../CodeGen/AArch64/zero-call-used-regs.ll | 542 ++++++++++++------
3 files changed, 362 insertions(+), 195 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 216244950ba9ee..d6673969aa3056 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1010,7 +1010,7 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
BitVector GPRsToZero(TRI.getNumRegs());
BitVector FPRsToZero(TRI.getNumRegs());
- bool HasSVE = STI.hasSVE();
+ bool HasSVE = STI.isSVEorStreamingSVEAvailable();
for (MCRegister Reg : RegsToZero.set_bits()) {
if (TRI.isGeneralPurposeRegister(MF, Reg)) {
// For GPRs, we only care to clear out the 64-bit register.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a470c03efd5eb4..41cfecb60561c9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -9694,19 +9694,26 @@ void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
MachineBasicBlock::iterator Iter,
DebugLoc &DL,
bool AllowSideEffects) const {
- const MachineFunction &MF = *MBB.getParent();
+ MachineFunction &MF = *MBB.getParent();
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
if (TRI.isGeneralPurposeRegister(MF, Reg)) {
BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
- } else if (STI.hasSVE()) {
+ } else if (STI.isSVEorStreamingSVEAvailable()) {
BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
.addImm(0)
.addImm(0);
- } else {
+ } else if (STI.isNeonAvailable()) {
BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
.addImm(0);
+ } else {
+ // This is a streaming-compatible function without SVE. We don't have full
+ // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
+ // So given `movi v..` would be illegal use `fmov d..` instead.
+ assert(STI.hasNEON() && "Expected to have NEON.");
+ Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
+ BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
}
}
diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll
index 75a7c7f4a0511e..4799ea3bcd19f6 100644
--- a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll
+++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,DEFAULT
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE-OR-SME
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE-OR-SME
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
+
+target triple = "aarch64-unknown-linux-gnu"
@result = dso_local global i32 0, align 4
@@ -156,32 +160,55 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo
; DEFAULT-NEXT: movi v7.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
-; SVE-LABEL: all_arg:
-; SVE: // %bb.0: // %entry
-; SVE-NEXT: mul w8, w1, w0
-; SVE-NEXT: mov x1, #0 // =0x0
-; SVE-NEXT: mov x3, #0 // =0x0
-; SVE-NEXT: mov x4, #0 // =0x0
-; SVE-NEXT: mov x5, #0 // =0x0
-; SVE-NEXT: mov x6, #0 // =0x0
-; SVE-NEXT: mov x7, #0 // =0x0
-; SVE-NEXT: mov x18, #0 // =0x0
-; SVE-NEXT: mov z0.d, #0 // =0x0
-; SVE-NEXT: orr w0, w8, w2
-; SVE-NEXT: mov x2, #0 // =0x0
-; SVE-NEXT: mov x8, #0 // =0x0
-; SVE-NEXT: mov z1.d, #0 // =0x0
-; SVE-NEXT: mov z2.d, #0 // =0x0
-; SVE-NEXT: mov z3.d, #0 // =0x0
-; SVE-NEXT: mov z4.d, #0 // =0x0
-; SVE-NEXT: mov z5.d, #0 // =0x0
-; SVE-NEXT: mov z6.d, #0 // =0x0
-; SVE-NEXT: mov z7.d, #0 // =0x0
-; SVE-NEXT: pfalse p0.b
-; SVE-NEXT: pfalse p1.b
-; SVE-NEXT: pfalse p2.b
-; SVE-NEXT: pfalse p3.b
-; SVE-NEXT: ret
+; SVE-OR-SME-LABEL: all_arg:
+; SVE-OR-SME: // %bb.0: // %entry
+; SVE-OR-SME-NEXT: mul w8, w1, w0
+; SVE-OR-SME-NEXT: mov x1, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x3, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x4, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x5, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z0.d, #0 // =0x0
+; SVE-OR-SME-NEXT: orr w0, w8, w2
+; SVE-OR-SME-NEXT: mov x2, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x8, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0
+; SVE-OR-SME-NEXT: pfalse p0.b
+; SVE-OR-SME-NEXT: pfalse p1.b
+; SVE-OR-SME-NEXT: pfalse p2.b
+; SVE-OR-SME-NEXT: pfalse p3.b
+; SVE-OR-SME-NEXT: ret
+;
+; STREAMING-COMPAT-LABEL: all_arg:
+; STREAMING-COMPAT: // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT: mul w8, w1, w0
+; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
+; STREAMING-COMPAT-NEXT: fmov d0, xzr
+; STREAMING-COMPAT-NEXT: orr w0, w8, w2
+; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0
+; STREAMING-COMPAT-NEXT: fmov d1, xzr
+; STREAMING-COMPAT-NEXT: fmov d2, xzr
+; STREAMING-COMPAT-NEXT: fmov d3, xzr
+; STREAMING-COMPAT-NEXT: fmov d4, xzr
+; STREAMING-COMPAT-NEXT: fmov d5, xzr
+; STREAMING-COMPAT-NEXT: fmov d6, xzr
+; STREAMING-COMPAT-NEXT: fmov d7, xzr
+; STREAMING-COMPAT-NEXT: ret
entry:
%mul = mul nsw i32 %b, %a
@@ -238,69 +265,117 @@ define dso_local i32 @all(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_
; DEFAULT-NEXT: movi v31.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
-; SVE-LABEL: all:
-; SVE: // %bb.0: // %entry
-; SVE-NEXT: mul w8, w1, w0
-; SVE-NEXT: mov x1, #0 // =0x0
-; SVE-NEXT: mov x3, #0 // =0x0
-; SVE-NEXT: mov x4, #0 // =0x0
-; SVE-NEXT: mov x5, #0 // =0x0
-; SVE-NEXT: mov x6, #0 // =0x0
-; SVE-NEXT: mov x7, #0 // =0x0
-; SVE-NEXT: mov x9, #0 // =0x0
-; SVE-NEXT: mov x10, #0 // =0x0
-; SVE-NEXT: orr w0, w8, w2
-; SVE-NEXT: mov x2, #0 // =0x0
-; SVE-NEXT: mov x8, #0 // =0x0
-; SVE-NEXT: mov x11, #0 // =0x0
-; SVE-NEXT: mov x12, #0 // =0x0
-; SVE-NEXT: mov x13, #0 // =0x0
-; SVE-NEXT: mov x14, #0 // =0x0
-; SVE-NEXT: mov x15, #0 // =0x0
-; SVE-NEXT: mov x16, #0 // =0x0
-; SVE-NEXT: mov x17, #0 // =0x0
-; SVE-NEXT: mov x18, #0 // =0x0
-; SVE-NEXT: mov z0.d, #0 // =0x0
-; SVE-NEXT: mov z1.d, #0 // =0x0
-; SVE-NEXT: mov z2.d, #0 // =0x0
-; SVE-NEXT: mov z3.d, #0 // =0x0
-; SVE-NEXT: mov z4.d, #0 // =0x0
-; SVE-NEXT: mov z5.d, #0 // =0x0
-; SVE-NEXT: mov z6.d, #0 // =0x0
-; SVE-NEXT: mov z7.d, #0 // =0x0
-; SVE-NEXT: mov z16.d, #0 // =0x0
-; SVE-NEXT: mov z17.d, #0 // =0x0
-; SVE-NEXT: mov z18.d, #0 // =0x0
-; SVE-NEXT: mov z19.d, #0 // =0x0
-; SVE-NEXT: mov z20.d, #0 // =0x0
-; SVE-NEXT: mov z21.d, #0 // =0x0
-; SVE-NEXT: mov z22.d, #0 // =0x0
-; SVE-NEXT: mov z23.d, #0 // =0x0
-; SVE-NEXT: mov z24.d, #0 // =0x0
-; SVE-NEXT: mov z25.d, #0 // =0x0
-; SVE-NEXT: mov z26.d, #0 // =0x0
-; SVE-NEXT: mov z27.d, #0 // =0x0
-; SVE-NEXT: mov z28.d, #0 // =0x0
-; SVE-NEXT: mov z29.d, #0 // =0x0
-; SVE-NEXT: mov z30.d, #0 // =0x0
-; SVE-NEXT: mov z31.d, #0 // =0x0
-; SVE-NEXT: pfalse p0.b
-; SVE-NEXT: pfalse p1.b
-; SVE-NEXT: pfalse p2.b
-; SVE-NEXT: pfalse p3.b
-; SVE-NEXT: pfalse p4.b
-; SVE-NEXT: pfalse p5.b
-; SVE-NEXT: pfalse p6.b
-; SVE-NEXT: pfalse p7.b
-; SVE-NEXT: pfalse p8.b
-; SVE-NEXT: pfalse p9.b
-; SVE-NEXT: pfalse p10.b
-; SVE-NEXT: pfalse p11.b
-; SVE-NEXT: pfalse p12.b
-; SVE-NEXT: pfalse p13.b
-; SVE-NEXT: pfalse p14.b
-; SVE-NEXT: pfalse p15.b
-; SVE-NEXT: ret
+; SVE-OR-SME-LABEL: all:
+; SVE-OR-SME: // %bb.0: // %entry
+; SVE-OR-SME-NEXT: mul w8, w1, w0
+; SVE-OR-SME-NEXT: mov x1, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x3, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x4, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x5, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x9, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x10, #0 // =0x0
+; SVE-OR-SME-NEXT: orr w0, w8, w2
+; SVE-OR-SME-NEXT: mov x2, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x8, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x11, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x12, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x13, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x14, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x15, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x16, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x17, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z0.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z16.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z17.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z18.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z19.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z20.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z21.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z22.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z23.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z24.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z25.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z26.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z27.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z28.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z29.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z30.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z31.d, #0 // =0x0
+; SVE-OR-SME-NEXT: pfalse p0.b
+; SVE-OR-SME-NEXT: pfalse p1.b
+; SVE-OR-SME-NEXT: pfalse p2.b
+; SVE-OR-SME-NEXT: pfalse p3.b
+; SVE-OR-SME-NEXT: pfalse p4.b
+; SVE-OR-SME-NEXT: pfalse p5.b
+; SVE-OR-SME-NEXT: pfalse p6.b
+; SVE-OR-SME-NEXT: pfalse p7.b
+; SVE-OR-SME-NEXT: pfalse p8.b
+; SVE-OR-SME-NEXT: pfalse p9.b
+; SVE-OR-SME-NEXT: pfalse p10.b
+; SVE-OR-SME-NEXT: pfalse p11.b
+; SVE-OR-SME-NEXT: pfalse p12.b
+; SVE-OR-SME-NEXT: pfalse p13.b
+; SVE-OR-SME-NEXT: pfalse p14.b
+; SVE-OR-SME-NEXT: pfalse p15.b
+; SVE-OR-SME-NEXT: ret
+;
+; STREAMING-COMPAT-LABEL: all:
+; STREAMING-COMPAT: // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT: mul w8, w1, w0
+; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x9, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x10, #0 // =0x0
+; STREAMING-COMPAT-NEXT: orr w0, w8, w2
+; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x11, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x12, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x13, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x14, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x16, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x17, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
+; STREAMING-COMPAT-NEXT: fmov d0, xzr
+; STREAMING-COMPAT-NEXT: fmov d1, xzr
+; STREAMING-COMPAT-NEXT: fmov d2, xzr
+; STREAMING-COMPAT-NEXT: fmov d3, xzr
+; STREAMING-COMPAT-NEXT: fmov d4, xzr
+; STREAMING-COMPAT-NEXT: fmov d5, xzr
+; STREAMING-COMPAT-NEXT: fmov d6, xzr
+; STREAMING-COMPAT-NEXT: fmov d7, xzr
+; STREAMING-COMPAT-NEXT: fmov d16, xzr
+; STREAMING-COMPAT-NEXT: fmov d17, xzr
+; STREAMING-COMPAT-NEXT: fmov d18, xzr
+; STREAMING-COMPAT-NEXT: fmov d19, xzr
+; STREAMING-COMPAT-NEXT: fmov d20, xzr
+; STREAMING-COMPAT-NEXT: fmov d21, xzr
+; STREAMING-COMPAT-NEXT: fmov d22, xzr
+; STREAMING-COMPAT-NEXT: fmov d23, xzr
+; STREAMING-COMPAT-NEXT: fmov d24, xzr
+; STREAMING-COMPAT-NEXT: fmov d25, xzr
+; STREAMING-COMPAT-NEXT: fmov d26, xzr
+; STREAMING-COMPAT-NEXT: fmov d27, xzr
+; STREAMING-COMPAT-NEXT: fmov d28, xzr
+; STREAMING-COMPAT-NEXT: fmov d29, xzr
+; STREAMING-COMPAT-NEXT: fmov d30, xzr
+; STREAMING-COMPAT-NEXT: fmov d31, xzr
+; STREAMING-COMPAT-NEXT: ret
entry:
%mul = mul nsw i32 %b, %a
@@ -355,12 +430,19 @@ define dso_local double @used_arg_float(double noundef %a, float noundef %b) loc
; DEFAULT-NEXT: movi v1.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
-; SVE-LABEL: used_arg_float:
-; SVE: // %bb.0: // %entry
-; SVE-NEXT: fcvt d1, s1
-; SVE-NEXT: fmul d0, d1, d0
-; SVE-NEXT: mov z1.d, #0 // =0x0
-; SVE-NEXT: ret
+; SVE-OR-SME-LABEL: used_arg_float:
+; SVE-OR-SME: // %bb.0: // %entry
+; SVE-OR-SME-NEXT: fcvt d1, s1
+; SVE-OR-SME-NEXT: fmul d0, d1, d0
+; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT: ret
+;
+; STREAMING-COMPAT-LABEL: used_arg_float:
+; STREAMING-COMPAT: // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT: fcvt d1, s1
+; STREAMING-COMPAT-NEXT: fmul d0, d1, d0
+; STREAMING-COMPAT-NEXT: fmov d1, xzr
+; STREAMING-COMPAT-NEXT: ret
entry:
%conv = fpext float %b to double
@@ -376,12 +458,19 @@ define dso_local double @used_float(double noundef %a, float noundef %b) local_u
; DEFAULT-NEXT: movi v1.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
-; SVE-LABEL: used_float:
-; SVE: // %bb.0: // %entry
-; SVE-NEXT: fcvt d1, s1
-; SVE-NEXT: fmul d0, d1, d0
-; SVE-NEXT: mov z1.d, #0 // =0x0
-; SVE-NEXT: ret
+; SVE-OR-SME-LABEL: used_float:
+; SVE-OR-SME: // %bb.0: // %entry
+; SVE-OR-SME-NEXT: fcvt d1, s1
+; SVE-OR-SME-NEXT: fmul d0, d1, d0
+; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT: ret
+;
+; STREAMING-COMPAT-LABEL: used_float:
+; STREAMING-COMPAT: // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT: fcvt d1, s1
+; STREAMING-COMPAT-NEXT: fmul d0, d1, d0
+; STREAMING-COMPAT-NEXT: fmov d1, xzr
+; STREAMING-COMPAT-NEXT: ret
entry:
%conv = fpext float %b to double
@@ -468,32 +557,55 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca
; DEFAULT-NEXT: movi v7.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
-; SVE-LABEL: all_arg_float:
-; SVE: // %bb.0: // %entry
-; SVE-NEXT: fcvt d1, s1
-; SVE-NEXT: fmul d0, d1, d0
-; SVE-NEXT: mov x0, #0 // =0x0
-; SVE-NEXT: mov x1, #0 // =0x0
-; SVE-NEXT: mov x2, #0 // =0x0
-; SVE-NEXT: mov x3, #0 // =0x0
-; SVE-NEXT: mov x4, #0 // =0x0
-; SVE-NEXT: mov x5, #0 // =0x0
-; SVE-NEXT: mov x6, #0 // =0x0
-; SVE-NEXT: mov x7, #0 // =0x0
-; SVE-NEXT: mov x8, #0 // =0x0
-; SVE-NEXT: mov x18, #0 // =0x0
-; SVE-NEXT: mov z1.d, #0 // =0x0
-; SVE-NEXT: mov z2.d, #0 // =0x0
-; SVE-NEXT: mov z3.d, #0 // =0x0
-; SVE-NEXT: mov z4.d, #0 // =0x0
-; SVE-NEXT: mov z5.d, #0 // =0x0
-; SVE-NEXT: mov z6.d, #0 // =0x0
-; SVE-NEXT: mov z7.d, #0 // =0x0
-; SVE-NEXT: pfalse p0.b
-; SVE-NEXT: pfalse p1.b
-; SVE-NEXT: pfalse p2.b
-; SVE-NEXT: pfalse p3.b
-; SVE-NEXT: ret
+; SVE-OR-SME-LABEL: all_arg_float:
+; SVE-OR-SME: // %bb.0: // %entry
+; SVE-OR-SME-NEXT: fcvt d1, s1
+; SVE-OR-SME-NEXT: fmul d0, d1, d0
+; SVE-OR-SME-NEXT: mov x0, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x1, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x2, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x3, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x4, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x5, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x8, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0
+; SVE-OR-SME-NEXT: pfalse p0.b
+; SVE-OR-SME-NEXT: pfalse p1.b
+; SVE-OR-SME-NEXT: pfalse p2.b
+; SVE-OR-SME-NEXT: pfalse p3.b
+; SVE-OR-SME-NEXT: ret
+;
+; STREAMING-COMPAT-LABEL: all_arg_float:
+; STREAMING-COMPAT: // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT: fcvt d1, s1
+; STREAMING-COMPAT-NEXT: fmul d0, d1, d0
+; STREAMING-COMPAT-NEXT: mov x0, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
+; STREAMING-COMPAT-NEXT: fmov d1, xzr
+; STREAMING-COMPAT-NEXT: fmov d2, xzr
+; STREAMING-COMPAT-NEXT: fmov d3, xzr
+; STREAMING-COMPAT-NEXT: fmov d4, xzr
+; STREAMING-COMPAT-NEXT: fmov d5, xzr
+; STREAMING-COMPAT-NEXT: fmov d6, xzr
+; STREAMING-COMPAT-NEXT: fmov d7, xzr
+; STREAMING-COMPAT-NEXT: ret
entry:
%conv = fpext float %b to double
@@ -550,69 +662,117 @@ define dso_local double @all_float(double noundef %a, float noundef %b) local_un
; DEFAULT-NEXT: movi v31.2d, #0000000000000000
; DEFAULT-NEXT: ret
;
-; SVE-LABEL: all_float:
-; SVE: // %bb.0: // %entry
-; SVE-NEXT: fcvt d1, s1
-; SVE-NEXT: fmul d0, d1, d0
-; SVE-NEXT: mov x0, #0 // =0x0
-; SVE-NEXT: mov x1, #0 // =0x0
-; SVE-NEXT: mov x2, #0 // =0x0
-; SVE-NEXT: mov x3, #0 // =0x0
-; SVE-NEXT: mov x4, #0 // =0x0
-; SVE-NEXT: mov x5, #0 // =0x0
-; SVE-NEXT: mov x6, #0 // =0x0
-; SVE-NEXT: mov x7, #0 // =0x0
-; SVE-NEXT: mov x8, #0 // =0x0
-; SVE-NEXT: mov x9, #0 // =0x0
-; SVE-NEXT: mov x10, #0 // =0x0
-; SVE-NEXT: mov x11, #0 // =0x0
-; SVE-NEXT: mov x12, #0 // =0x0
-; SVE-NEXT: mov x13, #0 // =0x0
-; SVE-NEXT: mov x14, #0 // =0x0
-; SVE-NEXT: mov x15, #0 // =0x0
-; SVE-NEXT: mov x16, #0 // =0x0
-; SVE-NEXT: mov x17, #0 // =0x0
-; SVE-NEXT: mov x18, #0 // =0x0
-; SVE-NEXT: mov z1.d, #0 // =0x0
-; SVE-NEXT: mov z2.d, #0 // =0x0
-; SVE-NEXT: mov z3.d, #0 // =0x0
-; SVE-NEXT: mov z4.d, #0 // =0x0
-; SVE-NEXT: mov z5.d, #0 // =0x0
-; SVE-NEXT: mov z6.d, #0 // =0x0
-; SVE-NEXT: mov z7.d, #0 // =0x0
-; SVE-NEXT: mov z16.d, #0 // =0x0
-; SVE-NEXT: mov z17.d, #0 // =0x0
-; SVE-NEXT: mov z18.d, #0 // =0x0
-; SVE-NEXT: mov z19.d, #0 // =0x0
-; SVE-NEXT: mov z20.d, #0 // =0x0
-; SVE-NEXT: mov z21.d, #0 // =0x0
-; SVE-NEXT: mov z22.d, #0 // =0x0
-; SVE-NEXT: mov z23.d, #0 // =0x0
-; SVE-NEXT: mov z24.d, #0 // =0x0
-; SVE-NEXT: mov z25.d, #0 // =0x0
-; SVE-NEXT: mov z26.d, #0 // =0x0
-; SVE-NEXT: mov z27.d, #0 // =0x0
-; SVE-NEXT: mov z28.d, #0 // =0x0
-; SVE-NEXT: mov z29.d, #0 // =0x0
-; SVE-NEXT: mov z30.d, #0 // =0x0
-; SVE-NEXT: mov z31.d, #0 // =0x0
-; SVE-NEXT: pfalse p0.b
-; SVE-NEXT: pfalse p1.b
-; SVE-NEXT: pfalse p2.b
-; SVE-NEXT: pfalse p3.b
-; SVE-NEXT: pfalse p4.b
-; SVE-NEXT: pfalse p5.b
-; SVE-NEXT: pfalse p6.b
-; SVE-NEXT: pfalse p7.b
-; SVE-NEXT: pfalse p8.b
-; SVE-NEXT: pfalse p9.b
-; SVE-NEXT: pfalse p10.b
-; SVE-NEXT: pfalse p11.b
-; SVE-NEXT: pfalse p12.b
-; SVE-NEXT: pfalse p13.b
-; SVE-NEXT: pfalse p14.b
-; SVE-NEXT: pfalse p15.b
-; SVE-NEXT: ret
+; SVE-OR-SME-LABEL: all_float:
+; SVE-OR-SME: // %bb.0: // %entry
+; SVE-OR-SME-NEXT: fcvt d1, s1
+; SVE-OR-SME-NEXT: fmul d0, d1, d0
+; SVE-OR-SME-NEXT: mov x0, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x1, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x2, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x3, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x4, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x5, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x8, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x9, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x10, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x11, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x12, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x13, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x14, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x15, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x16, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x17, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z16.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z17.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z18.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z19.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z20.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z21.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z22.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z23.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z24.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z25.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z26.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z27.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z28.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z29.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z30.d, #0 // =0x0
+; SVE-OR-SME-NEXT: mov z31.d, #0 // =0x0
+; SVE-OR-SME-NEXT: pfalse p0.b
+; SVE-OR-SME-NEXT: pfalse p1.b
+; SVE-OR-SME-NEXT: pfalse p2.b
+; SVE-OR-SME-NEXT: pfalse p3.b
+; SVE-OR-SME-NEXT: pfalse p4.b
+; SVE-OR-SME-NEXT: pfalse p5.b
+; SVE-OR-SME-NEXT: pfalse p6.b
+; SVE-OR-SME-NEXT: pfalse p7.b
+; SVE-OR-SME-NEXT: pfalse p8.b
+; SVE-OR-SME-NEXT: pfalse p9.b
+; SVE-OR-SME-NEXT: pfalse p10.b
+; SVE-OR-SME-NEXT: pfalse p11.b
+; SVE-OR-SME-NEXT: pfalse p12.b
+; SVE-OR-SME-NEXT: pfalse p13.b
+; SVE-OR-SME-NEXT: pfalse p14.b
+; SVE-OR-SME-NEXT: pfalse p15.b
+; SVE-OR-SME-NEXT: ret
+;
+; STREAMING-COMPAT-LABEL: all_float:
+; STREAMING-COMPAT: // %bb.0: // %entry
+; STREAMING-COMPAT-NEXT: fcvt d1, s1
+; STREAMING-COMPAT-NEXT: fmul d0, d1, d0
+; STREAMING-COMPAT-NEXT: mov x0, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x9, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x10, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x11, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x12, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x13, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x14, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x16, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x17, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
+; STREAMING-COMPAT-NEXT: fmov d1, xzr
+; STREAMING-COMPAT-NEXT: fmov d2, xzr
+; STREAMING-COMPAT-NEXT: fmov d3, xzr
+; STREAMING-COMPAT-NEXT: fmov d4, xzr
+; STREAMING-COMPAT-NEXT: fmov d5, xzr
+; STREAMING-COMPAT-NEXT: fmov d6, xzr
+; STREAMING-COMPAT-NEXT: fmov d7, xzr
+; STREAMING-COMPAT-NEXT: fmov d16, xzr
+; STREAMING-COMPAT-NEXT: fmov d17, xzr
+; STREAMING-COMPAT-NEXT: fmov d18, xzr
+; STREAMING-COMPAT-NEXT: fmov d19, xzr
+; STREAMING-COMPAT-NEXT: fmov d20, xzr
+; STREAMING-COMPAT-NEXT: fmov d21, xzr
+; STREAMING-COMPAT-NEXT: fmov d22, xzr
+; STREAMING-COMPAT-NEXT: fmov d23, xzr
+; STREAMING-COMPAT-NEXT: fmov d24, xzr
+; STREAMING-COMPAT-NEXT: fmov d25, xzr
+; STREAMING-COMPAT-NEXT: fmov d26, xzr
+; STREAMING-COMPAT-NEXT: fmov d27, xzr
+; STREAMING-COMPAT-NEXT: fmov d28, xzr
+; STREAMING-COMPAT-NEXT: fmov d29, xzr
+; STREAMING-COMPAT-NEXT: fmov d30, xzr
+; STREAMING-COMPAT-NEXT: fmov d31, xzr
+; STREAMING-COMPAT-NEXT: ret
entry:
%conv = fpext float %b to double
More information about the llvm-commits
mailing list