[PATCH] D78252: [AArch64] FMLA/FMLS patterns improvement.
Pavel Iliin via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 15 16:34:09 PDT 2020
ilinpv created this revision.
ilinpv added reviewers: samparker, dmgreen, SjoerdMeijer.
Herald added subscribers: cfe-commits, danielkiss, hiraditya, kristof.beyls.
Herald added a project: clang.
FMLA/FMLS 8H duplane indexed patterns added.
Fixes https://bugs.llvm.org/show_bug.cgi?id=45467
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D78252
Files:
clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
Index: llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
===================================================================
--- llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
+++ llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
@@ -29,8 +29,7 @@
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: dup v2.8h, v2.h[0]
-; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
entry:
%lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
@@ -57,8 +56,7 @@
; CHECK: .Lt_vfmaq_laneq_f16$local:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: dup v2.8h, v2.h[0]
-; CHECK-NEXT: fmla v0.8h, v1.8h, v2.8h
+; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
entry:
%lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
@@ -148,9 +146,7 @@
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: fneg v1.8h, v1.8h
-; CHECK-NEXT: dup v2.8h, v2.h[0]
-; CHECK-NEXT: fmla v0.8h, v2.8h, v1.8h
+; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
entry:
%sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
@@ -179,8 +175,7 @@
; CHECK: .Lt_vfmsq_laneq_f16$local:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: dup v2.8h, v2.h[0]
-; CHECK-NEXT: fmls v0.8h, v2.8h, v1.8h
+; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
; CHECK-NEXT: ret
entry:
%sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8052,6 +8052,15 @@
}
multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ // 1 variant for the .8h version: DUPLANE from 128-bit
+ def : Pat<(v8f16 (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn),
+ (v8f16 (AArch64duplane16 (v8f16 V128:$Rm),
+ VectorIndexS:$idx)))),
+ (!cast<Instruction>(INST # "v8i16_indexed")
+ V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ } // Predicates = [HasNEON, HasFullFP16]
+
// 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
(AArch64duplane32 (v4f32 V128:$Rm),
Index: clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
===================================================================
--- clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
+++ clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-constrained.c
@@ -105,7 +105,7 @@
// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
// UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+// CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
// COMMONIR: ret <8 x half> [[FMLA]]
float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
return vfmaq_lane_f16(a, b, c, 3);
@@ -213,7 +213,6 @@
// COMMON-LABEL: test_vfmsq_lane_f16
// COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b
-// CHECK-ASM: fneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
// COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
// COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
// COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
@@ -223,7 +222,7 @@
// COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
// UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
// CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
-// CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
+// CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
// COMMONIR: ret <8 x half> [[FMLA]]
float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
return vfmsq_lane_f16(a, b, c, 3);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D78252.257881.patch
Type: text/x-patch
Size: 5055 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20200415/5662d1cd/attachment-0001.bin>
More information about the cfe-commits
mailing list