[llvm] 21c4c15 - [LLVM][CodeGen][SVE] Only use unpredicated bfloat instructions when all lanes are in use. (#168387)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 04:01:08 PST 2025
Author: Paul Walker
Date: 2025-11-20T12:01:04Z
New Revision: 21c4c1502e3383988ba77eac75b13da7b9426957
URL: https://github.com/llvm/llvm-project/commit/21c4c1502e3383988ba77eac75b13da7b9426957
DIFF: https://github.com/llvm/llvm-project/commit/21c4c1502e3383988ba77eac75b13da7b9426957.diff
LOG: [LLVM][CodeGen][SVE] Only use unpredicated bfloat instructions when all lanes are in use. (#168387)
While SVE support for exception safe floating point code generation is
bare bones we try to ensure inactive lanes remiain inert. I mistakenly
broke this rule when adding support for SVE-B16B16 by lowering some
bfloat operations of unpacked vectors to unpredicated instructions.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5c574b91e3ed0..7eb976558507f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1786,14 +1786,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->hasSVEB16B16() &&
Subtarget->isNonStreamingSVEorSME2Available()) {
- setOperationAction(ISD::FADD, VT, Legal);
+ setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMAXNUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMINNUM, VT, Custom);
- setOperationAction(ISD::FMUL, VT, Legal);
- setOperationAction(ISD::FSUB, VT, Legal);
+ setOperationAction(ISD::FMUL, VT, Custom);
+ setOperationAction(ISD::FSUB, VT, Custom);
}
}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 1e771e1fb9403..c63ae8660cad2 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2464,8 +2464,6 @@ multiclass sve_fp_3op_u_zd_bfloat<bits<3> opc, string asm, SDPatternOperator op>
def NAME : sve_fp_3op_u_zd<0b00, opc, asm, ZPR16>;
def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
- def : SVE_2_Op_Pat<nxv4bf16, op, nxv4bf16, nxv4bf16, !cast<Instruction>(NAME)>;
- def : SVE_2_Op_Pat<nxv2bf16, op, nxv2bf16, nxv2bf16, !cast<Instruction>(NAME)>;
}
multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll b/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
index 582e8456c05b3..2103bc30b8381 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-arith.ll
@@ -56,7 +56,8 @@ define <vscale x 2 x bfloat> @fadd_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
;
; B16B16-LABEL: fadd_nxv2bf16:
; B16B16: // %bb.0:
-; B16B16-NEXT: bfadd z0.h, z0.h, z1.h
+; B16B16-NEXT: ptrue p0.d
+; B16B16-NEXT: bfadd z0.h, p0/m, z0.h, z1.h
; B16B16-NEXT: ret
%res = fadd <vscale x 2 x bfloat> %a, %b
ret <vscale x 2 x bfloat> %res
@@ -74,7 +75,8 @@ define <vscale x 4 x bfloat> @fadd_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
;
; B16B16-LABEL: fadd_nxv4bf16:
; B16B16: // %bb.0:
-; B16B16-NEXT: bfadd z0.h, z0.h, z1.h
+; B16B16-NEXT: ptrue p0.s
+; B16B16-NEXT: bfadd z0.h, p0/m, z0.h, z1.h
; B16B16-NEXT: ret
%res = fadd <vscale x 4 x bfloat> %a, %b
ret <vscale x 4 x bfloat> %res
@@ -525,7 +527,8 @@ define <vscale x 2 x bfloat> @fmul_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
;
; B16B16-LABEL: fmul_nxv2bf16:
; B16B16: // %bb.0:
-; B16B16-NEXT: bfmul z0.h, z0.h, z1.h
+; B16B16-NEXT: ptrue p0.d
+; B16B16-NEXT: bfmul z0.h, p0/m, z0.h, z1.h
; B16B16-NEXT: ret
%res = fmul <vscale x 2 x bfloat> %a, %b
ret <vscale x 2 x bfloat> %res
@@ -543,7 +546,8 @@ define <vscale x 4 x bfloat> @fmul_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
;
; B16B16-LABEL: fmul_nxv4bf16:
; B16B16: // %bb.0:
-; B16B16-NEXT: bfmul z0.h, z0.h, z1.h
+; B16B16-NEXT: ptrue p0.s
+; B16B16-NEXT: bfmul z0.h, p0/m, z0.h, z1.h
; B16B16-NEXT: ret
%res = fmul <vscale x 4 x bfloat> %a, %b
ret <vscale x 4 x bfloat> %res
@@ -672,7 +676,8 @@ define <vscale x 2 x bfloat> @fsub_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
;
; B16B16-LABEL: fsub_nxv2bf16:
; B16B16: // %bb.0:
-; B16B16-NEXT: bfsub z0.h, z0.h, z1.h
+; B16B16-NEXT: ptrue p0.d
+; B16B16-NEXT: bfsub z0.h, p0/m, z0.h, z1.h
; B16B16-NEXT: ret
%res = fsub <vscale x 2 x bfloat> %a, %b
ret <vscale x 2 x bfloat> %res
@@ -690,7 +695,8 @@ define <vscale x 4 x bfloat> @fsub_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
;
; B16B16-LABEL: fsub_nxv4bf16:
; B16B16: // %bb.0:
-; B16B16-NEXT: bfsub z0.h, z0.h, z1.h
+; B16B16-NEXT: ptrue p0.s
+; B16B16-NEXT: bfsub z0.h, p0/m, z0.h, z1.h
; B16B16-NEXT: ret
%res = fsub <vscale x 4 x bfloat> %a, %b
ret <vscale x 4 x bfloat> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
index fc3e018f2ec7a..be721930cf015 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
@@ -311,8 +311,7 @@ define <vscale x 8 x bfloat> @fadd_sel_nxv8bf16(<vscale x 8 x bfloat> %a, <vscal
;
; SVE-B16B16-LABEL: fadd_sel_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfadd z0.h, p0/m, z0.h, z1.h
; SVE-B16B16-NEXT: ret
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> zeroinitializer
%fadd = fadd nsz <vscale x 8 x bfloat> %a, %sel
@@ -341,8 +340,7 @@ define <vscale x 8 x bfloat> @fsub_sel_nxv8bf16(<vscale x 8 x bfloat> %a, <vscal
;
; SVE-B16B16-LABEL: fsub_sel_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfsub z0.h, p0/m, z0.h, z1.h
; SVE-B16B16-NEXT: ret
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> zeroinitializer
%fsub = fsub <vscale x 8 x bfloat> %a, %sel
@@ -371,8 +369,7 @@ define <vscale x 8 x bfloat> @fadd_sel_negzero_nxv8bf16(<vscale x 8 x bfloat> %a
;
; SVE-B16B16-LABEL: fadd_sel_negzero_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfadd z0.h, p0/m, z0.h, z1.h
; SVE-B16B16-NEXT: ret
%nz = fneg <vscale x 8 x bfloat> zeroinitializer
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %nz
@@ -402,8 +399,7 @@ define <vscale x 8 x bfloat> @fsub_sel_negzero_nxv8bf16(<vscale x 8 x bfloat> %a
;
; SVE-B16B16-LABEL: fsub_sel_negzero_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfsub z0.h, p0/m, z0.h, z1.h
; SVE-B16B16-NEXT: ret
%nz = fneg <vscale x 8 x bfloat> zeroinitializer
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %nz
@@ -490,9 +486,7 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <
;
; SVE-B16B16-LABEL: fsub_sel_fmul_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfmls z0.h, p0/m, z1.h, z2.h
; SVE-B16B16-NEXT: ret
%fmul = fmul <vscale x 8 x bfloat> %b, %c
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %fmul, <vscale x 8 x bfloat> zeroinitializer
@@ -532,9 +526,7 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %
;
; SVE-B16B16-LABEL: fadd_sel_fmul_nsz_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfmla z0.h, p0/m, z1.h, z2.h
; SVE-B16B16-NEXT: ret
%fmul = fmul <vscale x 8 x bfloat> %b, %c
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %fmul, <vscale x 8 x bfloat> zeroinitializer
@@ -574,9 +566,7 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %
;
; SVE-B16B16-LABEL: fsub_sel_fmul_nsz_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfmls z0.h, p0/m, z1.h, z2.h
; SVE-B16B16-NEXT: ret
%fmul = fmul <vscale x 8 x bfloat> %b, %c
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %fmul, <vscale x 8 x bfloat> zeroinitializer
@@ -616,9 +606,7 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
;
; SVE-B16B16-LABEL: fadd_sel_fmul_negzero_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfmla z0.h, p0/m, z1.h, z2.h
; SVE-B16B16-NEXT: ret
%fmul = fmul <vscale x 8 x bfloat> %b, %c
%nz = fneg <vscale x 8 x bfloat> zeroinitializer
@@ -707,9 +695,7 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x b
;
; SVE-B16B16-LABEL: fadd_sel_fmul_negzero_nsz_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfmla z0.h, p0/m, z1.h, z2.h
; SVE-B16B16-NEXT: ret
%fmul = fmul <vscale x 8 x bfloat> %b, %c
%nz = fneg <vscale x 8 x bfloat> zeroinitializer
@@ -750,9 +736,7 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x b
;
; SVE-B16B16-LABEL: fsub_sel_fmul_negzero_nsz_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
-; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
+; SVE-B16B16-NEXT: bfmls z0.h, p0/m, z1.h, z2.h
; SVE-B16B16-NEXT: ret
%fmul = fmul <vscale x 8 x bfloat> %b, %c
%nz = fneg <vscale x 8 x bfloat> zeroinitializer
More information about the llvm-commits
mailing list