[llvm] 16ee102 - [SVE] Add missing splat patterns for bfloat vectors.

Fri Feb 25 08:58:01 PST 2022

Author: Paul Walker
Date: 2022-02-25T16:53:39Z
New Revision: 16ee102964b90a6fe126e1e8a8d17f8859693941

URL: https://github.com/llvm/llvm-project/commit/16ee102964b90a6fe126e1e8a8d17f8859693941
DIFF: https://github.com/llvm/llvm-project/commit/16ee102964b90a6fe126e1e8a8d17f8859693941.diff

LOG: [SVE] Add missing splat patterns for bfloat vectors.

Differential Revision: https://reviews.llvm.org/D120496

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-vector-splat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c181253897b0f..9754df1a6a641 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1304,10 +1304,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::MSCATTER, VT, Custom);
       setOperationAction(ISD::MLOAD, VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
     }
 
-    setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
-
     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
 

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 6f3883f83d152..6f926256e021e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -642,6 +642,10 @@ let Predicates = [HasSVEorStreamingSVE] in {
             (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;
   def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
             (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+  def : Pat<(nxv4bf16 (AArch64dup (bf16 FPR16:$src))),
+            (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
+  def : Pat<(nxv2bf16 (AArch64dup (bf16 FPR16:$src))),
+            (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
 
   // Duplicate +0.0 into all vector elements
   def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
@@ -651,6 +655,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
   def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
   def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
   def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
+  def : Pat<(nxv4bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
+  def : Pat<(nxv2bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
 
   // Duplicate Int immediate into all vector elements
   def : Pat<(nxv16i8 (AArch64dup (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)))),

diff  --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 641ee2f308747..31d2404666247 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -276,6 +276,28 @@ define <vscale x 8 x bfloat> @splat_nxv8bf16(bfloat %val) #0 {
   ret <vscale x 8 x bfloat> %2
 }
 
+define <vscale x 4 x bfloat> @splat_nxv4bf16(bfloat %val) #0 {
+; CHECK-LABEL: splat_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    ret
+  %1 = insertelement <vscale x 4 x bfloat> undef, bfloat %val, i32 0
+  %2 = shufflevector <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> undef, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x bfloat> %2
+}
+
+define <vscale x 2 x bfloat> @splat_nxv2bf16(bfloat %val) #0 {
+; CHECK-LABEL: splat_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    ret
+  %1 = insertelement <vscale x 2 x bfloat> undef, bfloat %val, i32 0
+  %2 = shufflevector <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> undef, <vscale x 2 x i32> zeroinitializer
+  ret <vscale x 2 x bfloat> %2
+}
+
 define <vscale x 8 x half> @splat_nxv8f16(half %val) {
 ; CHECK-LABEL: splat_nxv8f16:
 ; CHECK:       // %bb.0:
@@ -342,20 +364,36 @@ define <vscale x 2 x double> @splat_nxv2f64(double %val) {
   ret <vscale x 2 x double> %2
 }
 
-define <vscale x 8 x half> @splat_nxv8f16_zero() {
-; CHECK-LABEL: splat_nxv8f16_zero:
+define <vscale x 8 x bfloat> @splat_nxv8bf16_zero() #0 {
+; CHECK-LABEL: splat_nxv8bf16_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, #0 // =0x0
 ; CHECK-NEXT:    ret
-  ret <vscale x 8 x half> zeroinitializer
+  ret <vscale x 8 x bfloat> zeroinitializer
 }
 
-define <vscale x 8 x bfloat> @splat_nxv8bf16_zero() #0 {
-; CHECK-LABEL: splat_nxv8bf16_zero:
+define <vscale x 4 x bfloat> @splat_nxv4bf16_zero() #0 {
+; CHECK-LABEL: splat_nxv4bf16_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, #0 // =0x0
 ; CHECK-NEXT:    ret
-  ret <vscale x 8 x bfloat> zeroinitializer
+  ret <vscale x 4 x bfloat> zeroinitializer
+}
+
+define <vscale x 2 x bfloat> @splat_nxv2bf16_zero() #0 {
+; CHECK-LABEL: splat_nxv2bf16_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    ret
+  ret <vscale x 2 x bfloat> zeroinitializer
+}
+
+define <vscale x 8 x half> @splat_nxv8f16_zero() {
+; CHECK-LABEL: splat_nxv8f16_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, #0 // =0x0
+; CHECK-NEXT:    ret
+  ret <vscale x 8 x half> zeroinitializer
 }
 
 define <vscale x 4 x half> @splat_nxv4f16_zero() {
@@ -539,9 +577,9 @@ define <vscale x 4 x float> @splat_nxv4f32_imm_out_of_range() {
 define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
 ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI51_0
+; CHECK-NEXT:    adrp x8, .LCPI55_0
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    add x8, x8, :lo12:.LCPI51_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI55_0
 ; CHECK-NEXT:    ld1rd { z0.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x double> undef, double 3.33, i32 0