[PATCH] D123491: [AArch64] Add missing HasNEON predicate in scalar FABD patterns

Alexander Richardson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 11 03:19:58 PDT 2022


arichardson created this revision.
arichardson added reviewers: john.brawn, t.p.northover, samparker, SjoerdMeijer.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
arichardson requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

I was trying to compile with -march=+nosimd and hit the folloing assertion:
`Attempting to emit FABD64 instruction but the Feature_HasNEON predicate(s) are not met`.
This adds a HasNEON predicate to the patterns which was omitted in commit
21d9b33d62772c58267cc0aa725e35ac9a4661db for some reason.
The new code generation matches GCC with -mcpu=<cpu>+nosimd:
https://godbolt.org/z/n1Y7xh5jo


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D123491

Files:
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/fabd-no-neon.ll


Index: llvm/test/CodeGen/AArch64/fabd-no-neon.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/fabd-no-neon.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; Disabling NEON used to incorrectly emit FABD instructions which resulted in
+; an assertion in verifyInstructionPredicates() while emitting the output.
+; RUN: llc -mtriple=aarch64 -mattr=+v8.2a,+fullfp16,+neon < %s | FileCheck %s --check-prefix NEON-ENABLED
+; RUN: llc -mtriple=aarch64 -mattr=+v8.2a,+fullfp16,-neon < %s | FileCheck %s --check-prefix NEON-DISABLED
+; Note: We need to use -filetype=obj to trigger verifyInstructionPredicates()
+; checks since it is not called when emitting assembly output.
+; RUN: llc -mtriple=aarch64 -mattr=+v8.2a,+fullfp16,-neon -o /dev/null %s -filetype=obj
+
+target triple = "aarch64-unknown-unknown-elf"
+
+define half @fabd16(half %f1, half %f2) local_unnamed_addr nounwind {
+; NEON-ENABLED-LABEL: fabd16:
+; NEON-ENABLED:       // %bb.0: // %bb
+; NEON-ENABLED-NEXT:    fabd h0, h0, h1
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: fabd16:
+; NEON-DISABLED:       // %bb.0: // %bb
+; NEON-DISABLED-NEXT:    fsub h0, h0, h1
+; NEON-DISABLED-NEXT:    fabs h0, h0
+; NEON-DISABLED-NEXT:    ret
+bb:
+  %sub = fsub half %f1, %f2
+  %abs = tail call half @llvm.fabs.f16(half %sub)
+  ret half %abs
+}
+
+define float @fabd32(float %f1, float %f2) local_unnamed_addr nounwind {
+; NEON-ENABLED-LABEL: fabd32:
+; NEON-ENABLED:       // %bb.0: // %bb
+; NEON-ENABLED-NEXT:    fabd s0, s0, s1
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: fabd32:
+; NEON-DISABLED:       // %bb.0: // %bb
+; NEON-DISABLED-NEXT:    fsub s0, s0, s1
+; NEON-DISABLED-NEXT:    fabs s0, s0
+; NEON-DISABLED-NEXT:    ret
+bb:
+  %sub = fsub float %f1, %f2
+  %abs = tail call float @llvm.fabs.f32(float %sub)
+  ret float %abs
+}
+
+define double @fabd64(double %f1, double %f2) local_unnamed_addr nounwind {
+; NEON-ENABLED-LABEL: fabd64:
+; NEON-ENABLED:       // %bb.0: // %bb
+; NEON-ENABLED-NEXT:    fabd d0, d0, d1
+; NEON-ENABLED-NEXT:    ret
+;
+; NEON-DISABLED-LABEL: fabd64:
+; NEON-DISABLED:       // %bb.0: // %bb
+; NEON-DISABLED-NEXT:    fsub d0, d0, d1
+; NEON-DISABLED-NEXT:    fabs d0, d0
+; NEON-DISABLED-NEXT:    ret
+bb:
+  %sub = fsub double %f1, %f2
+  %abs = tail call double @llvm.fabs.f64(double %sub)
+  ret double %abs
+}
+
+declare half @llvm.fabs.f16(half)
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4780,11 +4780,13 @@
 defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
 def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FABD64 FPR64:$Rn, FPR64:$Rm)>;
-let Predicates = [HasFullFP16] in {
+let Predicates = [HasNEON, HasFullFP16] in {
 def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
 }
+let Predicates = [HasNEON] in {
 def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
 def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
+}
 defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
                                      int_aarch64_neon_facge>;
 defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D123491.421863.patch
Type: text/x-patch
Size: 3534 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220411/c682abdf/attachment.bin>


More information about the llvm-commits mailing list