[llvm] 1da52ef - [ARM] Add VGETLANEu patterns for v4f16 and v8f16

David Green via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 19 06:25:58 PDT 2021


Author: David Green
Date: 2021-09-19T14:25:21+01:00
New Revision: 1da52ef2943b67c0ec1ccd3b8e459d0e57e67a6d

URL: https://github.com/llvm/llvm-project/commit/1da52ef2943b67c0ec1ccd3b8e459d0e57e67a6d
DIFF: https://github.com/llvm/llvm-project/commit/1da52ef2943b67c0ec1ccd3b8e459d0e57e67a6d.diff

LOG: [ARM] Add VGETLANEu patterns for v4f16 and v8f16

These were apparently missing, having no pattern that could convert a
VGETLANEu of a v4f16 to an i32. Added bf16 whilst here, following the
same code.

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMInstrNEON.td
    llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll
    llvm/test/CodeGen/ARM/fp16-insert-extract.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 94e56413b0914..aaf3280ea1508 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -6446,6 +6446,18 @@ def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
           (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
                              (DSubReg_i16_reg imm:$lane))),
                      (SubReg_i16_lane imm:$lane))>;
+def : Pat<(ARMvgetlaneu (v8f16 QPR:$src), imm:$lane),
+          (VGETLNu16 (v4f16 (EXTRACT_SUBREG QPR:$src,
+                             (DSubReg_i16_reg imm:$lane))),
+                     (SubReg_i16_lane imm:$lane))>;
+def : Pat<(ARMvgetlaneu (v4f16 DPR:$src), imm:$lane),
+          (VGETLNu16 (v4f16 DPR:$src), imm:$lane)>;
+def : Pat<(ARMvgetlaneu (v8bf16 QPR:$src), imm:$lane),
+          (VGETLNu16 (v4bf16 (EXTRACT_SUBREG QPR:$src,
+                             (DSubReg_i16_reg imm:$lane))),
+                     (SubReg_i16_lane imm:$lane))>;
+def : Pat<(ARMvgetlaneu (v4bf16 DPR:$src), imm:$lane),
+          (VGETLNu16 (v4bf16 DPR:$src), imm:$lane)>;
 }
 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
           (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,

diff  --git a/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll b/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll
index 55d01deb5cc16..3416e5032f88f 100644
--- a/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll
+++ b/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll
@@ -43,3 +43,55 @@ entry:
   %0 = extractelement <4 x bfloat> %v, i32 1
   ret bfloat %0
 }
+
+define i16 @bextract_v4i16(<4 x bfloat> %a) {
+; CHECK-LABEL: bextract_v4i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vmov.u16 r0, d16[0]
+; CHECK-NEXT:    bx lr
+entry:
+  %elt = extractelement <4 x bfloat> %a, i32 0
+  %t = bitcast bfloat %elt to i16
+  ret i16 %t
+}
+
+define i16 @bextract_v8i16(<8 x bfloat> %a) {
+; CHECK-LABEL: bextract_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vmov.u16 r0, d16[0]
+; CHECK-NEXT:    bx lr
+entry:
+  %elt = extractelement <8 x bfloat> %a, i32 0
+  %t = bitcast bfloat %elt to i16
+  ret i16 %t
+}
+
+define i32 @bextract_v4s32(<4 x bfloat> %a) {
+; CHECK-LABEL: bextract_v4s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vmov.u16 r0, d16[0]
+; CHECK-NEXT:    sxth r0, r0
+; CHECK-NEXT:    bx lr
+entry:
+  %elt = extractelement <4 x bfloat> %a, i32 0
+  %t = bitcast bfloat %elt to i16
+  %s = sext i16 %t to i32
+  ret i32 %s
+}
+
+define i32 @bextract_v8s32(<8 x bfloat> %a) {
+; CHECK-LABEL: bextract_v8s32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vmov.u16 r0, d16[0]
+; CHECK-NEXT:    sxth r0, r0
+; CHECK-NEXT:    bx lr
+entry:
+  %elt = extractelement <8 x bfloat> %a, i32 0
+  %t = bitcast bfloat %elt to i16
+  %s = sext i16 %t to i32
+  ret i32 %s
+}

diff  --git a/llvm/test/CodeGen/ARM/fp16-insert-extract.ll b/llvm/test/CodeGen/ARM/fp16-insert-extract.ll
index c53090ec026fe..d95d908548b37 100644
--- a/llvm/test/CodeGen/ARM/fp16-insert-extract.ll
+++ b/llvm/test/CodeGen/ARM/fp16-insert-extract.ll
@@ -295,3 +295,76 @@ entry:
   ret <8 x half> %r
 }
 
+define i16 @extract_v4i16(<4 x half> %a) {
+; CHECKHARD-LABEL: extract_v4i16:
+; CHECKHARD:       @ %bb.0: @ %entry
+; CHECKHARD-NEXT:    vmov.u16 r0, d0[0]
+; CHECKHARD-NEXT:    bx lr
+;
+; CHECKSOFT-LABEL: extract_v4i16:
+; CHECKSOFT:       @ %bb.0: @ %entry
+; CHECKSOFT-NEXT:    vmov d16, r0, r1
+; CHECKSOFT-NEXT:    vmov.u16 r0, d16[0]
+; CHECKSOFT-NEXT:    bx lr
+entry:
+  %elt = extractelement <4 x half> %a, i32 0
+  %t = bitcast half %elt to i16
+  ret i16 %t
+}
+
+define i16 @extract_v8i16(<8 x half> %a) {
+; CHECKHARD-LABEL: extract_v8i16:
+; CHECKHARD:       @ %bb.0: @ %entry
+; CHECKHARD-NEXT:    vmov.u16 r0, d0[0]
+; CHECKHARD-NEXT:    bx lr
+;
+; CHECKSOFT-LABEL: extract_v8i16:
+; CHECKSOFT:       @ %bb.0: @ %entry
+; CHECKSOFT-NEXT:    vmov d16, r0, r1
+; CHECKSOFT-NEXT:    vmov.u16 r0, d16[0]
+; CHECKSOFT-NEXT:    bx lr
+entry:
+  %elt = extractelement <8 x half> %a, i32 0
+  %t = bitcast half %elt to i16
+  ret i16 %t
+}
+
+define i32 @extract_v4s32(<4 x half> %a) {
+; CHECKHARD-LABEL: extract_v4s32:
+; CHECKHARD:       @ %bb.0: @ %entry
+; CHECKHARD-NEXT:    vmov.u16 r0, d0[0]
+; CHECKHARD-NEXT:    sxth r0, r0
+; CHECKHARD-NEXT:    bx lr
+;
+; CHECKSOFT-LABEL: extract_v4s32:
+; CHECKSOFT:       @ %bb.0: @ %entry
+; CHECKSOFT-NEXT:    vmov d16, r0, r1
+; CHECKSOFT-NEXT:    vmov.u16 r0, d16[0]
+; CHECKSOFT-NEXT:    sxth r0, r0
+; CHECKSOFT-NEXT:    bx lr
+entry:
+  %elt = extractelement <4 x half> %a, i32 0
+  %t = bitcast half %elt to i16
+  %s = sext i16 %t to i32
+  ret i32 %s
+}
+
+define i32 @extract_v8s32(<8 x half> %a) {
+; CHECKHARD-LABEL: extract_v8s32:
+; CHECKHARD:       @ %bb.0: @ %entry
+; CHECKHARD-NEXT:    vmov.u16 r0, d0[0]
+; CHECKHARD-NEXT:    sxth r0, r0
+; CHECKHARD-NEXT:    bx lr
+;
+; CHECKSOFT-LABEL: extract_v8s32:
+; CHECKSOFT:       @ %bb.0: @ %entry
+; CHECKSOFT-NEXT:    vmov d16, r0, r1
+; CHECKSOFT-NEXT:    vmov.u16 r0, d16[0]
+; CHECKSOFT-NEXT:    sxth r0, r0
+; CHECKSOFT-NEXT:    bx lr
+entry:
+  %elt = extractelement <8 x half> %a, i32 0
+  %t = bitcast half %elt to i16
+  %s = sext i16 %t to i32
+  ret i32 %s
+}


        


More information about the llvm-commits mailing list