[llvm] 5060f08 - [AArch64] Use pattern to select bf16 fpextend (#137212)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 2 04:55:21 PDT 2025
Author: John Brawn
Date: 2025-05-02T12:55:18+01:00
New Revision: 5060f08c3a98c2e4976d7ec380d9d8ea1888a68c
URL: https://github.com/llvm/llvm-project/commit/5060f08c3a98c2e4976d7ec380d9d8ea1888a68c
DIFF: https://github.com/llvm/llvm-project/commit/5060f08c3a98c2e4976d7ec380d9d8ea1888a68c.diff
LOG: [AArch64] Use pattern to select bf16 fpextend (#137212)
Currently bf16 fpextend is lowered to a vector shift. Instead leave it
as fpextend and have an instruction selection pattern which selects to a
shift later. Doing this means that DAGCombiner patterns for fpextend
will be applied, leading to better codegen. It also means that in some
situations we use a mov instruction where we previously have a dup
instruction, but I don't think this makes any difference.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
llvm/test/CodeGen/AArch64/bf16-instructions.ll
llvm/test/CodeGen/AArch64/bf16_fast_math.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0126b97c9fb9a..af4780e11e890 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -766,13 +766,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(Op, MVT::v8bf16, Expand);
}
- // For bf16, fpextend is custom lowered to be optionally expanded into shifts.
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
+ // fpextend from f16 or bf16 to f32 is legal
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f32, Legal);
+ // fpextend from bf16 to f64 needs to be split into two fpextends
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::v4f32, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
- setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f32, Custom);
auto LegalizeNarrowFP = [this](MVT ScalarVT) {
for (auto Op : {
@@ -4559,33 +4560,6 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
return SDValue();
}
- if (VT.getScalarType() == MVT::f32) {
- // FP16->FP32 extends are legal for v32 and v4f32.
- if (Op0VT.getScalarType() == MVT::f16)
- return Op;
- if (Op0VT.getScalarType() == MVT::bf16) {
- SDLoc DL(Op);
- EVT IVT = VT.changeTypeToInteger();
- if (!Op0VT.isVector()) {
- Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4bf16, Op0);
- IVT = MVT::v4i32;
- }
-
- EVT Op0IVT = Op0.getValueType().changeTypeToInteger();
- SDValue Ext =
- DAG.getNode(ISD::ANY_EXTEND, DL, IVT, DAG.getBitcast(Op0IVT, Op0));
- SDValue Shift =
- DAG.getNode(ISD::SHL, DL, IVT, Ext, DAG.getConstant(16, DL, IVT));
- if (!Op0VT.isVector())
- Shift = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Shift,
- DAG.getConstant(0, DL, MVT::i64));
- Shift = DAG.getBitcast(VT, Shift);
- return IsStrict ? DAG.getMergeValues({Shift, Op.getOperand(0)}, DL)
- : Shift;
- }
- return SDValue();
- }
-
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index bee86aa86ec37..a75091b853d21 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8513,6 +8513,26 @@ def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
(USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
}
+// fpextend from bf16 to f32 is just a shift left by 16
+let Predicates = [HasNEON] in {
+def : Pat<(f32 (any_fpextend (bf16 FPR16:$Rn))),
+ (f32 (EXTRACT_SUBREG
+ (v4i32 (SHLLv4i16 (v4i16 (SUBREG_TO_REG (i64 0), (bf16 FPR16:$Rn), hsub)))),
+ ssub))>;
+def : Pat<(v4f32 (any_fpextend (v4bf16 V64:$Rn))),
+ (SHLLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (any_fpextend (extract_high_v8bf16 (v8bf16 V128:$Rn)))),
+ (SHLLv8i16 V128:$Rn)>;
+}
+// Fallback pattern for when we don't have NEON
+def : Pat<(f32 (any_fpextend (bf16 FPR16:$Rn))),
+ (f32 (COPY_TO_REGCLASS
+ (i32 (UBFMWri (COPY_TO_REGCLASS
+ (f32 (SUBREG_TO_REG (i32 0), (bf16 FPR16:$Rn), hsub)),
+ GPR32),
+ (i64 16), (i64 15))),
+ FPR32))>;
+
def abs_f16 :
OutPatFrag<(ops node:$Rn),
(EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
index 9a1203f18243d..1d33545cb171a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
@@ -155,9 +155,7 @@ entry:
define i32 @fptosi_bf(bfloat %a) nounwind ssp {
; CHECK-LABEL: fptosi_bf:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, s0
-; CHECK-NEXT: // implicit-def: $d0
-; CHECK-NEXT: fmov s0, s1
+; CHECK-NEXT: // kill: def $d0 killed $h0
; CHECK-NEXT: shll v0.4s, v0.4h, #16
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: fcvtzs w0, s0
@@ -171,9 +169,7 @@ entry:
define i32 @fptoui_sbf(bfloat %a) nounwind ssp {
; CHECK-LABEL: fptoui_sbf:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s1, s0
-; CHECK-NEXT: // implicit-def: $d0
-; CHECK-NEXT: fmov s0, s1
+; CHECK-NEXT: // kill: def $d0 killed $h0
; CHECK-NEXT: shll v0.4s, v0.4h, #16
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: fcvtzu w0, s0
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
index 9b5e48d2b4217..e3e18a1f91c6d 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
@@ -641,7 +641,7 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
; NOLSE: // %bb.0:
; NOLSE-NEXT: // kill: def $d0 killed $d0 def $q0
-; NOLSE-NEXT: dup v1.4h, v0.h[1]
+; NOLSE-NEXT: mov h1, v0.h[1]
; NOLSE-NEXT: mov w8, #32767 // =0x7fff
; NOLSE-NEXT: shll v0.4s, v0.4h, #16
; NOLSE-NEXT: shll v1.4s, v1.4h, #16
@@ -649,7 +649,7 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
; NOLSE-NEXT: ldaxr w9, [x0]
; NOLSE-NEXT: fmov s2, w9
-; NOLSE-NEXT: dup v3.4h, v2.h[1]
+; NOLSE-NEXT: mov h3, v2.h[1]
; NOLSE-NEXT: shll v2.4s, v2.4h, #16
; NOLSE-NEXT: fmaxnm s2, s2, s0
; NOLSE-NEXT: shll v3.4s, v3.4h, #16
@@ -677,14 +677,14 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; LSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
; LSE: // %bb.0:
; LSE-NEXT: // kill: def $d0 killed $d0 def $q0
-; LSE-NEXT: dup v1.4h, v0.h[1]
+; LSE-NEXT: mov h1, v0.h[1]
; LSE-NEXT: shll v2.4s, v0.4h, #16
; LSE-NEXT: mov w8, #32767 // =0x7fff
; LSE-NEXT: ldr s0, [x0]
; LSE-NEXT: shll v1.4s, v1.4h, #16
; LSE-NEXT: .LBB7_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
-; LSE-NEXT: dup v3.4h, v0.h[1]
+; LSE-NEXT: mov h3, v0.h[1]
; LSE-NEXT: shll v4.4s, v0.4h, #16
; LSE-NEXT: fmaxnm s4, s4, s2
; LSE-NEXT: shll v3.4s, v3.4h, #16
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
index f6c542fe7d407..10de6777bd285 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
@@ -641,7 +641,7 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; NOLSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4:
; NOLSE: // %bb.0:
; NOLSE-NEXT: // kill: def $d0 killed $d0 def $q0
-; NOLSE-NEXT: dup v1.4h, v0.h[1]
+; NOLSE-NEXT: mov h1, v0.h[1]
; NOLSE-NEXT: mov w8, #32767 // =0x7fff
; NOLSE-NEXT: shll v0.4s, v0.4h, #16
; NOLSE-NEXT: shll v1.4s, v1.4h, #16
@@ -649,7 +649,7 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
; NOLSE-NEXT: ldaxr w9, [x0]
; NOLSE-NEXT: fmov s2, w9
-; NOLSE-NEXT: dup v3.4h, v2.h[1]
+; NOLSE-NEXT: mov h3, v2.h[1]
; NOLSE-NEXT: shll v2.4s, v2.4h, #16
; NOLSE-NEXT: fminnm s2, s2, s0
; NOLSE-NEXT: shll v3.4s, v3.4h, #16
@@ -677,14 +677,14 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; LSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4:
; LSE: // %bb.0:
; LSE-NEXT: // kill: def $d0 killed $d0 def $q0
-; LSE-NEXT: dup v1.4h, v0.h[1]
+; LSE-NEXT: mov h1, v0.h[1]
; LSE-NEXT: shll v2.4s, v0.4h, #16
; LSE-NEXT: mov w8, #32767 // =0x7fff
; LSE-NEXT: ldr s0, [x0]
; LSE-NEXT: shll v1.4s, v1.4h, #16
; LSE-NEXT: .LBB7_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
-; LSE-NEXT: dup v3.4h, v0.h[1]
+; LSE-NEXT: mov h3, v0.h[1]
; LSE-NEXT: shll v4.4s, v0.4h, #16
; LSE-NEXT: fminnm s4, s4, s2
; LSE-NEXT: shll v3.4s, v3.4h, #16
diff --git a/llvm/test/CodeGen/AArch64/bf16-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
index 2fc9c53112ab6..9f002b1e0da55 100644
--- a/llvm/test/CodeGen/AArch64/bf16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
@@ -202,16 +202,13 @@ define bfloat @test_fmadd(bfloat %a, bfloat %b, bfloat %c) #0 {
;
; CHECK-BF16-LABEL: test_fmadd:
; CHECK-BF16: // %bb.0:
+; CHECK-BF16-NEXT: // kill: def $h2 killed $h2 def $d2
; CHECK-BF16-NEXT: // kill: def $h1 killed $h1 def $d1
; CHECK-BF16-NEXT: // kill: def $h0 killed $h0 def $d0
-; CHECK-BF16-NEXT: // kill: def $h2 killed $h2 def $d2
; CHECK-BF16-NEXT: shll v1.4s, v1.4h, #16
; CHECK-BF16-NEXT: shll v0.4s, v0.4h, #16
-; CHECK-BF16-NEXT: fmul s0, s0, s1
-; CHECK-BF16-NEXT: shll v1.4s, v2.4h, #16
-; CHECK-BF16-NEXT: bfcvt h0, s0
-; CHECK-BF16-NEXT: shll v0.4s, v0.4h, #16
-; CHECK-BF16-NEXT: fadd s0, s0, s1
+; CHECK-BF16-NEXT: shll v2.4s, v2.4h, #16
+; CHECK-BF16-NEXT: fmadd s0, s0, s1, s2
; CHECK-BF16-NEXT: bfcvt h0, s0
; CHECK-BF16-NEXT: ret
%mul = fmul fast bfloat %a, %b
@@ -1996,13 +1993,11 @@ define bfloat @test_copysign_f64(bfloat %a, double %b) #0 {
define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
; CHECK-CVT-LABEL: test_copysign_extended:
; CHECK-CVT: // %bb.0:
-; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 def $d0
-; CHECK-CVT-NEXT: movi v2.4s, #16
; CHECK-CVT-NEXT: // kill: def $h1 killed $h1 def $d1
-; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16
-; CHECK-CVT-NEXT: ushl v0.4s, v0.4s, v2.4s
+; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 def $d0
; CHECK-CVT-NEXT: mvni v2.4s, #128, lsl #24
+; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16
+; CHECK-CVT-NEXT: shll v0.4s, v0.4h, #16
; CHECK-CVT-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-CVT-NEXT: fmov w8, s0
; CHECK-CVT-NEXT: lsr w8, w8, #16
@@ -2013,16 +2008,12 @@ define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
;
; CHECK-SD-LABEL: test_copysign_extended:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $d0
-; CHECK-SD-NEXT: movi v2.4s, #16
; CHECK-SD-NEXT: // kill: def $h1 killed $h1 def $d1
-; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-SD-NEXT: shll v1.4s, v1.4h, #16
-; CHECK-SD-NEXT: ushl v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $d0
; CHECK-SD-NEXT: mvni v2.4s, #128, lsl #24
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: bfcvt h0, s0
+; CHECK-SD-NEXT: shll v1.4s, v1.4h, #16
; CHECK-SD-NEXT: shll v0.4s, v0.4h, #16
+; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/bf16_fast_math.ll b/llvm/test/CodeGen/AArch64/bf16_fast_math.ll
index 7d7fb67ca2f77..871ca12c9de77 100644
--- a/llvm/test/CodeGen/AArch64/bf16_fast_math.ll
+++ b/llvm/test/CodeGen/AArch64/bf16_fast_math.ll
@@ -4,8 +4,6 @@
; Check that the output instructions have the same fast math flags as the input
; fadd, even when bf16 is legalized to f32.
-; FIXME: Conversion from float to bf16 is done via a vector type for some
-; reason, when we should just be using scalar instructions.
define bfloat @normal_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-LABEL: name: normal_fadd
@@ -14,13 +12,11 @@ define bfloat @normal_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: {{ $}}
; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr
; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[FADDSrr]]
@@ -40,13 +36,11 @@ define bfloat @normal_fadd(bfloat %x, bfloat %y) {
; CHECK-BF16-NEXT: {{ $}}
; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr
; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr
@@ -64,13 +58,11 @@ define bfloat @fast_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: {{ $}}
; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr
; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[FADDSrr]]
@@ -90,13 +82,11 @@ define bfloat @fast_fadd(bfloat %x, bfloat %y) {
; CHECK-BF16-NEXT: {{ $}}
; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr
; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nnan ninf nsz arcp contract afn reassoc nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr
@@ -114,13 +104,11 @@ define bfloat @ninf_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: {{ $}}
; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr
; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY [[FADDSrr]]
@@ -140,13 +128,11 @@ define bfloat @ninf_fadd(bfloat %x, bfloat %y) {
; CHECK-BF16-NEXT: {{ $}}
; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY3]], killed [[COPY2]], implicit $fpcr
; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = ninf nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr
@@ -159,8 +145,6 @@ entry:
; Check that when we have the right fast math flags the converts in between the
; two fadds are removed.
-; FIXME: The convert from float to bf16 being done by a shift prevents this from
-; happening.
define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-LABEL: name: normal_fadd_sequence
@@ -170,13 +154,11 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2
; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr
; CHECK-NOBF16-NEXT: [[COPY5:%[0-9]+]]:gpr32 = COPY [[FADDSrr]]
@@ -187,13 +169,11 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri killed [[ADDWrr1]], 16, 31
; CHECK-NOBF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]]
; CHECK-NOBF16-NEXT: [[COPY7:%[0-9]+]]:fpr16 = COPY [[COPY6]].hsub
- ; CHECK-NOBF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[COPY7]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[COPY7]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]]
; CHECK-NOBF16-NEXT: [[COPY8:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub
- ; CHECK-NOBF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]]
; CHECK-NOBF16-NEXT: [[COPY9:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub
; CHECK-NOBF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY8]], killed [[COPY9]], implicit $fpcr
; CHECK-NOBF16-NEXT: [[COPY10:%[0-9]+]]:gpr32 = COPY [[FADDSrr1]]
@@ -213,23 +193,19 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2
; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-BF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr
; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr
- ; CHECK-BF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[BFCVT]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[BFCVT]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]]
; CHECK-BF16-NEXT: [[COPY5:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub
- ; CHECK-BF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]]
; CHECK-BF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub
; CHECK-BF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[COPY5]], killed [[COPY6]], implicit $fpcr
; CHECK-BF16-NEXT: [[BFCVT1:%[0-9]+]]:fpr16 = nofpexcept BFCVT killed [[FADDSrr1]], implicit $fpcr
@@ -249,13 +225,11 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z)
; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2
; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr
; CHECK-NOBF16-NEXT: [[COPY5:%[0-9]+]]:gpr32 = COPY [[FADDSrr]]
@@ -266,13 +240,11 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z)
; CHECK-NOBF16-NEXT: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri killed [[ADDWrr1]], 16, 31
; CHECK-NOBF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]]
; CHECK-NOBF16-NEXT: [[COPY7:%[0-9]+]]:fpr16 = COPY [[COPY6]].hsub
- ; CHECK-NOBF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[COPY7]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[COPY7]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]]
; CHECK-NOBF16-NEXT: [[COPY8:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub
- ; CHECK-NOBF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]]
; CHECK-NOBF16-NEXT: [[COPY9:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub
; CHECK-NOBF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[COPY8]], killed [[COPY9]], implicit $fpcr
; CHECK-NOBF16-NEXT: [[COPY10:%[0-9]+]]:gpr32 = COPY [[FADDSrr1]]
@@ -292,27 +264,19 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z)
; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2
; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-BF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr
- ; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nnan ninf contract nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr
- ; CHECK-BF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[BFCVT]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]]
; CHECK-BF16-NEXT: [[COPY5:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub
- ; CHECK-BF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]]
- ; CHECK-BF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub
- ; CHECK-BF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[COPY5]], killed [[COPY6]], implicit $fpcr
- ; CHECK-BF16-NEXT: [[BFCVT1:%[0-9]+]]:fpr16 = nnan ninf contract nofpexcept BFCVT killed [[FADDSrr1]], implicit $fpcr
- ; CHECK-BF16-NEXT: $h0 = COPY [[BFCVT1]]
+ ; CHECK-BF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = nnan ninf contract nofpexcept FADDSrr killed [[FADDSrr]], killed [[COPY5]], implicit $fpcr
+ ; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = nnan ninf contract nofpexcept BFCVT killed [[FADDSrr1]], implicit $fpcr
+ ; CHECK-BF16-NEXT: $h0 = COPY [[BFCVT]]
; CHECK-BF16-NEXT: RET_ReallyLR implicit $h0
entry:
%add1 = fadd nnan ninf contract bfloat %x, %y
@@ -328,13 +292,11 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2
; CHECK-NOBF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-NOBF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-NOBF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-NOBF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-NOBF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-NOBF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-NOBF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr
; CHECK-NOBF16-NEXT: [[COPY5:%[0-9]+]]:gpr32 = COPY [[FADDSrr]]
@@ -345,13 +307,11 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri killed [[ADDWrr1]], 16, 31
; CHECK-NOBF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]]
; CHECK-NOBF16-NEXT: [[COPY7:%[0-9]+]]:fpr16 = COPY [[COPY6]].hsub
- ; CHECK-NOBF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[COPY7]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[COPY7]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]]
; CHECK-NOBF16-NEXT: [[COPY8:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub
- ; CHECK-NOBF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-NOBF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub
- ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]]
+ ; CHECK-NOBF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-NOBF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]]
; CHECK-NOBF16-NEXT: [[COPY9:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub
; CHECK-NOBF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY8]], killed [[COPY9]], implicit $fpcr
; CHECK-NOBF16-NEXT: [[COPY10:%[0-9]+]]:gpr32 = COPY [[FADDSrr1]]
@@ -371,23 +331,19 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-BF16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h2
; CHECK-BF16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
; CHECK-BF16-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0
- ; CHECK-BF16-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG]]
; CHECK-BF16-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_]].ssub
- ; CHECK-BF16-NEXT: [[DEF1:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF1]], [[COPY2]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG1]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY2]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_1:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG1]]
; CHECK-BF16-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_1]].ssub
; CHECK-BF16-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY4]], killed [[COPY3]], implicit $fpcr
; CHECK-BF16-NEXT: [[BFCVT:%[0-9]+]]:fpr16 = ninf nofpexcept BFCVT killed [[FADDSrr]], implicit $fpcr
- ; CHECK-BF16-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF2]], killed [[BFCVT]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG2]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, killed [[BFCVT]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_2:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG2]]
; CHECK-BF16-NEXT: [[COPY5:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_2]].ssub
- ; CHECK-BF16-NEXT: [[DEF3:%[0-9]+]]:fpr64 = IMPLICIT_DEF
- ; CHECK-BF16-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.hsub
- ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[INSERT_SUBREG3]]
+ ; CHECK-BF16-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub
+ ; CHECK-BF16-NEXT: [[SHLLv4i16_3:%[0-9]+]]:fpr128 = SHLLv4i16 killed [[SUBREG_TO_REG3]]
; CHECK-BF16-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[SHLLv4i16_3]].ssub
; CHECK-BF16-NEXT: [[FADDSrr1:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[COPY5]], killed [[COPY6]], implicit $fpcr
; CHECK-BF16-NEXT: [[BFCVT1:%[0-9]+]]:fpr16 = ninf nofpexcept BFCVT killed [[FADDSrr1]], implicit $fpcr
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index 381c67c6d749e..da6b3bb99dbda 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -74,30 +74,16 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
;
; NONEON-NOSVE-LABEL: test_copysign_bf16:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #80
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: sub sp, sp, #16
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr h0, [x0]
; NONEON-NOSVE-NEXT: ldr h1, [x1]
-; NONEON-NOSVE-NEXT: str h0, [sp, #40]
-; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
-; NONEON-NOSVE-NEXT: str h1, [sp, #76]
-; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
-; NONEON-NOSVE-NEXT: str q0, [sp]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
-; NONEON-NOSVE-NEXT: lsl w9, w8, #16
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
-; NONEON-NOSVE-NEXT: lsl w8, w8, #16
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
-; NONEON-NOSVE-NEXT: lsl w9, w8, #16
-; NONEON-NOSVE-NEXT: ldr w8, [sp]
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: str h1, [sp, #12]
; NONEON-NOSVE-NEXT: lsl w8, w8, #16
-; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16]
-; NONEON-NOSVE-NEXT: ldrb w8, [sp, #77]
-; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
; NONEON-NOSVE-NEXT: tst w8, #0x80
-; NONEON-NOSVE-NEXT: str q0, [sp, #48]
-; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
; NONEON-NOSVE-NEXT: fabs s0, s0
; NONEON-NOSVE-NEXT: fneg s1, s0
; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
@@ -105,7 +91,7 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
; NONEON-NOSVE-NEXT: lsr w8, w8, #16
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [x0]
-; NONEON-NOSVE-NEXT: add sp, sp, #80
+; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%a = load bfloat, ptr %ap
%b = load bfloat, ptr %bp
More information about the llvm-commits
mailing list