[llvm] 2a936be - [SVE] Selection failure with scalable insertelements

Wed Jun 16 07:55:15 PDT 2021

Author: Dylan Fleming
Date: 2021-06-16T15:38:31+01:00
New Revision: 2a936be38864fea5f11adf68e611fabc37aca6c1

URL: https://github.com/llvm/llvm-project/commit/2a936be38864fea5f11adf68e611fabc37aca6c1
DIFF: https://github.com/llvm/llvm-project/commit/2a936be38864fea5f11adf68e611fabc37aca6c1.diff

LOG: [SVE] Selection failure with scalable insertelements

Reviewed By: efriedma, CarolineConcatto

Differential Revision: https://reviews.llvm.org/D104244

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-insert-element.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 198260d7c472..7e7ac6ddcc3e 100644

--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2258,12 +2258,30 @@ let Predicates = [HasSVE] in {
                         GPR64:$src)>;
 
   // Insert FP scalar into vector with scalar index
+  def : Pat<(nxv2f16 (vector_insert (nxv2f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)),
+            (CPY_ZPmV_H ZPR:$vec,
+                        (CMPEQ_PPzZZ_D (PTRUE_D 31),
+                                       (INDEX_II_D 0, 1),
+                                       (DUP_ZR_D GPR64:$index)),
+                        $src)>;
+  def : Pat<(nxv4f16 (vector_insert (nxv4f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)),
+            (CPY_ZPmV_H ZPR:$vec,
+                        (CMPEQ_PPzZZ_S (PTRUE_S 31),
+                                       (INDEX_II_S 0, 1),
+                                       (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+                        $src)>;
   def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)),
             (CPY_ZPmV_H ZPR:$vec,
                         (CMPEQ_PPzZZ_H (PTRUE_H 31),
                                        (INDEX_II_H 0, 1),
                                        (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
                         $src)>;
+  def : Pat<(nxv2f32 (vector_insert (nxv2f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)),
+            (CPY_ZPmV_S ZPR:$vec,
+                        (CMPEQ_PPzZZ_D (PTRUE_D 31),
+                                       (INDEX_II_D 0, 1),
+                                       (DUP_ZR_D GPR64:$index)),
+                        $src) >;
   def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)),
             (CPY_ZPmV_S ZPR:$vec,
                         (CMPEQ_PPzZZ_S (PTRUE_S 31),

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index c6250ff5f16a..da56ae9ba027 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -273,3 +273,82 @@ define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
   %b = insertelement <vscale x 2 x double> undef, double %a, i32 0
   ret <vscale x 2 x double> %b
 }
+
+; Insert scalar at index
+define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
+; CHECK-LABEL: test_insert_with_index_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    index z2.d, #0, #1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z1.d
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x half> undef, half %h, i64 %idx
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
+; CHECK-LABEL: test_insert_with_index_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, w0
+; CHECK-NEXT:    index z2.s, #0, #1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z2.s, z1.s
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 4 x half> undef, half %h, i64 %idx
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
+; CHECK-LABEL: test_insert_with_index_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.h, w0
+; CHECK-NEXT:    index z2.h, #0, #1
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z2.h, z1.h
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 8 x half> undef, half %h, i64 %idx
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
+; CHECK-LABEL: test_insert_with_index_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    index z2.d, #0, #1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z1.d
+; CHECK-NEXT:    mov z0.s, p0/m, s0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x float> undef, float %f, i64 %idx
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
+; CHECK-LABEL: test_insert_with_index_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, w0
+; CHECK-NEXT:    index z2.s, #0, #1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z2.s, z1.s
+; CHECK-NEXT:    mov z0.s, p0/m, s0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 4 x float> undef, float %f, i64 %idx
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_insert_with_index_nxv2f64(double %d, i64 %idx) {
+; CHECK-LABEL: test_insert_with_index_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    index z2.d, #0, #1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z1.d
+; CHECK-NEXT:    mov z0.d, p0/m, d0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x double> undef, double %d, i64 %idx
+  ret <vscale x 2 x double> %res
+}