[llvm] [LLVM][SVE] Add isel for bfloat based select operations. (PR #128881)

Wed Feb 26 06:37:00 PST 2025

https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/128881

Patch also adds missing tests for unpacked half and float types.

>From 8d205fc89086198eb257c78bac470d7cfa0b6c47 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 26 Feb 2025 14:27:53 +0000
Subject: [PATCH] [LLVM][SVE] Add isel for bfloat based select operations.

Patch also adds  missing tests for unpacked half and float types.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |   2 +
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |   2 +
 llvm/test/CodeGen/AArch64/sve-select.ll       | 114 ++++++++++++++++++
 3 files changed, 118 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b00aa11f8499d..3be022b049243 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1701,6 +1701,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::FP_ROUND, VT, Custom);
       setOperationAction(ISD::MLOAD, VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+      setOperationAction(ISD::SELECT, VT, Custom);
+      setOperationAction(ISD::SELECT_CC, VT, Expand);
       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
       setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
 
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index e443c5ab150bd..54fd4eb52a9fb 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1962,6 +1962,8 @@ multiclass sve_int_sel_vvv<string asm, SDPatternOperator op> {
   def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1,  nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
 
   def : SVE_3_Op_Pat<nxv8bf16, op, nxv8i1,  nxv8bf16, nxv8bf16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_3_Op_Pat<nxv4bf16, op, nxv4i1,  nxv4bf16, nxv4bf16, !cast<Instruction>(NAME # _S)>;
+  def : SVE_3_Op_Pat<nxv2bf16, op, nxv2i1,  nxv2bf16, nxv2bf16, !cast<Instruction>(NAME # _D)>;
 
   def : InstAlias<"mov $Zd, $Pg/m, $Zn",
                   (!cast<Instruction>(NAME # _B) ZPR8:$Zd, PPRAny:$Pg, ZPR8:$Zn, ZPR8:$Zd), 1>;
diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll
index b1270165556e6..85451f0bf4246 100644
--- a/llvm/test/CodeGen/AArch64/sve-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-select.ll
@@ -109,6 +109,30 @@ define <vscale x  8 x half> @select_nxv8f16(i1 %cond, <vscale x  8 x half> %a, <
   ret <vscale x  8 x half> %res
 }
 
+define <vscale x  4 x half> @select_nxv4f16(i1 %cond, <vscale x  4 x half> %a, <vscale x  4 x half> %b) {
+; CHECK-LABEL: select_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sbfx x8, x0, #0, #1
+; CHECK-NEXT:    whilelo p0.s, xzr, x8
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = select i1 %cond, <vscale x  4 x half> %a, <vscale x  4 x half> %b
+  ret <vscale x  4 x half> %res
+}
+
+define <vscale x  2 x half> @select_nxv2f16(i1 %cond, <vscale x  2 x half> %a, <vscale x  2 x half> %b) {
+; CHECK-LABEL: select_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sbfx x8, x0, #0, #1
+; CHECK-NEXT:    whilelo p0.d, xzr, x8
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = select i1 %cond, <vscale x  2 x half> %a, <vscale x  2 x half> %b
+  ret <vscale x  2 x half> %res
+}
+
 define <vscale x  4 x float> @select_nxv4f32(i1 %cond, <vscale x  4 x float> %a, <vscale x  4 x float> %b) {
 ; CHECK-LABEL: select_nxv4f32:
 ; CHECK:       // %bb.0:
@@ -121,6 +145,18 @@ define <vscale x  4 x float> @select_nxv4f32(i1 %cond, <vscale x  4 x float> %a,
   ret <vscale x  4 x float> %res
 }
 
+define <vscale x  2 x float> @select_nxv2f32(i1 %cond, <vscale x  2 x float> %a, <vscale x  2 x float> %b) {
+; CHECK-LABEL: select_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sbfx x8, x0, #0, #1
+; CHECK-NEXT:    whilelo p0.d, xzr, x8
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = select i1 %cond, <vscale x  2 x float> %a, <vscale x  2 x float> %b
+  ret <vscale x  2 x float> %res
+}
+
 define <vscale x  2 x double> @select_nxv2f64(i1 %cond, <vscale x  2 x double> %a, <vscale x  2 x double> %b) {
 ; CHECK-LABEL: select_nxv2f64:
 ; CHECK:       // %bb.0:
@@ -133,6 +169,42 @@ define <vscale x  2 x double> @select_nxv2f64(i1 %cond, <vscale x  2 x double> %
   ret <vscale x  2 x double> %res
 }
 
+define <vscale x  8 x bfloat> @select_nxv8bf16(i1 %cond, <vscale x  8 x bfloat> %a, <vscale x  8 x bfloat> %b) {
+; CHECK-LABEL: select_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sbfx x8, x0, #0, #1
+; CHECK-NEXT:    whilelo p0.h, xzr, x8
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = select i1 %cond, <vscale x  8 x bfloat> %a, <vscale x  8 x bfloat> %b
+  ret <vscale x  8 x bfloat> %res
+}
+
+define <vscale x  4 x bfloat> @select_nxv4bf16(i1 %cond, <vscale x  4 x bfloat> %a, <vscale x  4 x bfloat> %b) {
+; CHECK-LABEL: select_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sbfx x8, x0, #0, #1
+; CHECK-NEXT:    whilelo p0.s, xzr, x8
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = select i1 %cond, <vscale x  4 x bfloat> %a, <vscale x  4 x bfloat> %b
+  ret <vscale x  4 x bfloat> %res
+}
+
+define <vscale x  2 x bfloat> @select_nxv2bf16(i1 %cond, <vscale x  2 x bfloat> %a, <vscale x  2 x bfloat> %b) {
+; CHECK-LABEL: select_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sbfx x8, x0, #0, #1
+; CHECK-NEXT:    whilelo p0.d, xzr, x8
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = select i1 %cond, <vscale x  2 x bfloat> %a, <vscale x  2 x bfloat> %b
+  ret <vscale x  2 x bfloat> %res
+}
+
 define <vscale x  16 x i1> @select_nxv16i1(i1 %cond, <vscale x  16 x i1> %a, <vscale x  16 x i1> %b) {
 ; CHECK-LABEL: select_nxv16i1:
 ; CHECK:       // %bb.0:
@@ -324,6 +396,20 @@ define <vscale x 2 x double> @icmp_select_nxv2f64(<vscale x 2 x double> %a, <vsc
   ret <vscale x 2 x double> %sel
 }
 
+define <vscale x 2 x bfloat> @icmp_select_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x0, #0
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    sbfx x8, x8, #0, #1
+; CHECK-NEXT:    whilelo p0.d, xzr, x8
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b
+  ret <vscale x 2 x bfloat> %sel
+}
+
 define <vscale x 4 x half> @icmp_select_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i64 %x0) {
 ; CHECK-LABEL: icmp_select_nxv4f16:
 ; CHECK:       // %bb.0:
@@ -352,6 +438,20 @@ define <vscale x 4 x float> @icmp_select_nxv4f32(<vscale x 4 x float> %a, <vscal
   ret <vscale x 4 x float> %sel
 }
 
+define <vscale x 4 x bfloat> @icmp_select_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x0, #0
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    sbfx x8, x8, #0, #1
+; CHECK-NEXT:    whilelo p0.s, xzr, x8
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b
+  ret <vscale x 4 x bfloat> %sel
+}
+
 define <vscale x 8 x half> @icmp_select_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i64 %x0) {
 ; CHECK-LABEL: icmp_select_nxv8f16:
 ; CHECK:       // %bb.0:
@@ -366,6 +466,20 @@ define <vscale x 8 x half> @icmp_select_nxv8f16(<vscale x 8 x half> %a, <vscale
   ret <vscale x 8 x half> %sel
 }
 
+define <vscale x 8 x bfloat> @icmp_select_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp x0, #0
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    sbfx x8, x8, #0, #1
+; CHECK-NEXT:    whilelo p0.h, xzr, x8
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b
+  ret <vscale x 8 x bfloat> %sel
+}
+
 define <vscale x 1 x i64> @icmp_select_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, i64 %x0) {
 ; CHECK-LABEL: icmp_select_nxv1i64:
 ; CHECK:       // %bb.0: