[llvm] 2c958a5 - [AArch64] Add missing bf16 SVE insert vector patterns.

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 5 08:24:48 PST 2023


Author: David Green
Date: 2023-03-05T16:24:43Z
New Revision: 2c958a5aa9ff7746c4a5b53ae46a549967269aa2

URL: https://github.com/llvm/llvm-project/commit/2c958a5aa9ff7746c4a5b53ae46a549967269aa2
DIFF: https://github.com/llvm/llvm-project/commit/2c958a5aa9ff7746c4a5b53ae46a549967269aa2.diff

LOG: [AArch64] Add missing bf16 SVE insert vector patterns.

These should be identical to the fp16 SVE inserts but were missing from the
tests and tablegen patterns. They are always legal (not requiring +bf16),
although there are some other issues around bf16 legalization currently.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-insert-element.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 8c4ad6ca1a20c..a2dcf9dde47b6 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2971,6 +2971,12 @@ let Predicates = [HasSVEorSME] in {
             (INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
   def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)),
             (INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+  def : Pat<(nxv8bf16 (vector_insert (nxv8bf16 (undef)), (bf16 FPR16:$src), 0)),
+            (INSERT_SUBREG (nxv8bf16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+  def : Pat<(nxv4bf16 (vector_insert (nxv4bf16 (undef)), (bf16 FPR16:$src), 0)),
+            (INSERT_SUBREG (nxv4bf16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+  def : Pat<(nxv2bf16 (vector_insert (nxv2bf16 (undef)), (bf16 FPR16:$src), 0)),
+            (INSERT_SUBREG (nxv2bf16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
   def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)),
             (INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
   def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)),
@@ -2990,6 +2996,8 @@ let Predicates = [HasSVEorSME] in {
 
   def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), 0)),
             (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>;
+  def : Pat<(nxv8bf16 (vector_insert (nxv8bf16 ZPR:$vec), (bf16 FPR16:$src), 0)),
+            (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>;
   def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), 0)),
             (SEL_ZPZZ_S (PTRUE_S 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), ZPR:$vec)>;
   def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), 0)),
@@ -3040,6 +3048,24 @@ let Predicates = [HasSVEorSME] in {
                                        (INDEX_II_H 0, 1),
                                        (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
                         $src)>;
+  def : Pat<(nxv2bf16 (vector_insert (nxv2bf16 ZPR:$vec), (bf16 FPR16:$src), GPR64:$index)),
+            (CPY_ZPmV_H ZPR:$vec,
+                        (CMPEQ_PPzZZ_D (PTRUE_D 31),
+                                       (INDEX_II_D 0, 1),
+                                       (DUP_ZR_D GPR64:$index)),
+                        $src)>;
+  def : Pat<(nxv4bf16 (vector_insert (nxv4bf16 ZPR:$vec), (bf16 FPR16:$src), GPR64:$index)),
+            (CPY_ZPmV_H ZPR:$vec,
+                        (CMPEQ_PPzZZ_S (PTRUE_S 31),
+                                       (INDEX_II_S 0, 1),
+                                       (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+                        $src)>;
+  def : Pat<(nxv8bf16 (vector_insert (nxv8bf16 ZPR:$vec), (bf16 FPR16:$src), GPR64:$index)),
+            (CPY_ZPmV_H ZPR:$vec,
+                        (CMPEQ_PPzZZ_H (PTRUE_H 31),
+                                       (INDEX_II_H 0, 1),
+                                       (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+                        $src)>;
   def : Pat<(nxv2f32 (vector_insert (nxv2f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)),
             (CPY_ZPmV_S ZPR:$vec,
                         (CMPEQ_PPzZZ_D (PTRUE_D 31),

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index b67184eb67ec6..480c79f0ac691 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
 
 define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: test_lane0_16xi8:
@@ -78,6 +78,17 @@ define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {
   ret <vscale x 8 x half> %b
 }
 
+define <vscale x 8 x bfloat> @test_lane0_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
+; CHECK-LABEL: test_lane0_8xbf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h, vl1
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
+; CHECK-NEXT:    mov z0.h, p0/m, z1.h
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 0
+  ret <vscale x 8 x bfloat> %b
+}
+
 ; Undefined lane insert
 define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: test_lane4_2xi64:
@@ -110,6 +121,20 @@ define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
   ret <vscale x 8 x half> %b
 }
 
+define <vscale x 8 x bfloat> @test_lane9_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) {
+; CHECK-LABEL: test_lane9_8xbf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #9
+; CHECK-NEXT:    index z3.h, #0, #1
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z3.h, z2.h
+; CHECK-NEXT:    mov z0.h, p0/m, h1
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 9
+  ret <vscale x 8 x bfloat> %b
+}
+
 define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
 ; CHECK-LABEL: test_lane1_16xi8:
 ; CHECK:       // %bb.0:
@@ -246,6 +271,33 @@ define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
   ret <vscale x 2 x half> %b
 }
 
+define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) {
+; CHECK-LABEL: test_insert_into_undef_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 8 x bfloat> undef, bfloat %a, i32 0
+  ret <vscale x 8 x bfloat> %b
+}
+
+define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) {
+; CHECK-LABEL: test_insert_into_undef_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 4 x bfloat> undef, bfloat %a, i32 0
+  ret <vscale x 4 x bfloat> %b
+}
+
+define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) {
+; CHECK-LABEL: test_insert_into_undef_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 2 x bfloat> undef, bfloat %a, i32 0
+  ret <vscale x 2 x bfloat> %b
+}
+
 define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv4f32:
 ; CHECK:       // %bb.0:
@@ -313,6 +365,45 @@ define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
   ret <vscale x 8 x half> %res
 }
 
+define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) {
+; CHECK-LABEL: test_insert_with_index_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    mov z2.d, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x bfloat> undef, bfloat %h, i64 %idx
+  ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) {
+; CHECK-LABEL: test_insert_with_index_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.s, #0, #1
+; CHECK-NEXT:    mov z2.s, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 4 x bfloat> undef, bfloat %h, i64 %idx
+  ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) {
+; CHECK-LABEL: test_insert_with_index_nxv8bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.h, #0, #1
+; CHECK-NEXT:    mov z2.h, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 8 x bfloat> undef, bfloat %h, i64 %idx
+  ret <vscale x 8 x bfloat> %res
+}
+
 define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
 ; CHECK-LABEL: test_insert_with_index_nxv2f32:
 ; CHECK:       // %bb.0:


        


More information about the llvm-commits mailing list