[PATCH] D91354: [AArch64] Lower @llvm.complex.multiply using fcmla (WIP).

Mon Oct 4 03:08:36 PDT 2021

fhahn updated this revision to Diff 376843.
fhahn added a comment.

rebase, still WIP


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91354/new/

https://reviews.llvm.org/D91354

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/complex-intrinsics.ll


Index: llvm/test/CodeGen/AArch64/complex-intrinsics.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/AArch64/complex-intrinsics.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr="+complxnum" | FileCheck %s
+
+declare <2 x float> @llvm.complex.multiply.v2f32(<2 x float>, <2 x float>)
+
+define <2 x float> @test_fcmla_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: test_fcmla_v2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d2, #0000000000000000
+; CHECK-NEXT:    fcmla.2s v2, v0, v1, #0
+; CHECK-NEXT:    fcmla.2s v2, v0, v1, #90
+; CHECK-NEXT:    fmov d0, d2
+; CHECK-NEXT:    ret
+  %res = call <2 x float> @llvm.complex.multiply.v2f32(<2 x float> %a, <2 x float> %b)
+  ret <2 x float> %res
+}
+
+declare <4 x float> @llvm.complex.multiply.v4f32(<4 x float>, <4 x float>)
+
+define <4 x float> @test_fcmla_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fcmla_v4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-NEXT:    fcmla.4s v2, v0, v1, #0
+; CHECK-NEXT:    fcmla.4s v2, v0, v1, #90
+; CHECK-NEXT:    mov.16b v0, v2
+; CHECK-NEXT:    ret
+  %res = call <4 x float> @llvm.complex.multiply.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %res
+}
+
+; FIXME: Crashes during type legalization.
+; declare <8 x float> @llvm.complex.multiply.v8f32(<8 x float>, <8 x float>)
+
+; define <8 x float> @test_fcmla_v8f32(<8 x float> %a, <8 x float> %b) {
+;  %res = call <8 x float> @llvm.complex.multiply.v8f32(<8 x float> %a, <8 x float> %b)
+;  ret <8 x float> %res
+;}
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3933,6 +3933,16 @@
   SDLoc dl(Op);
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
+  case Intrinsic::complex_multiply: {
+    auto ResVT = Op.getValueType();
+    SDValue Part1 =
+        DAG.getNode(AArch64ISD::FCMLA, dl, Op.getValueType(),
+                    DAG.getConstantFP(0.0, dl, ResVT), Op.getOperand(1),
+                    Op.getOperand(2), DAG.getTargetConstant(0, dl, MVT::i32));
+    return DAG.getNode(AArch64ISD::FCMLA, dl, Op.getValueType(), Part1,
+                       Op.getOperand(1), Op.getOperand(2),
+                       DAG.getTargetConstant(90, dl, MVT::i32));
+  }
   case Intrinsic::thread_pointer: {
     EVT PtrVT = getPointerTy(DAG.getDataLayout());
     return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D91354.376843.patch
Type: text/x-patch
Size: 2756 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211004/bb38968e/attachment.bin>