[llvm] [AArch64] Avoid NEON fixed-point SCVTF in Streaming-SVE mode. (PR #91924)

Mon May 13 00:08:57 PDT 2024

https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/91924

NEON fixed-point SCVTF instruction is not available in Streaming-SVE
mode. There is no equivalent SVE instruction, so we simply expand to a
regular FDIV operation.

>From ad09d20ca48d3fe89b936fd5413340f70004bc87 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 26 Apr 2024 12:07:15 +0100
Subject: [PATCH 1/2] [AArch64] NFC: Precursory test

---
 .../sve-streaming-mode-fixed-length-fp-arith.ll     | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
index c2d6ed4e9ccf9..82689f66e8455 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
@@ -214,6 +214,19 @@ define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) {
   ret <2 x float> %res
 }
 
+; Test that we don't optimise this using a NEON instruction, when
+; NEON is not available.
+define <2 x float> @fdiv_v232_pow2(<2 x i32> %in) {
+; CHECK-LABEL: fdiv_v232_pow2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf v0.2s, v0.2s, #4
+; CHECK-NEXT:    ret
+entry:
+  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 16.0, float 16.0>
+  ret <2 x float> %div.i
+}
+
 define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fdiv_v4f32:
 ; CHECK:       // %bb.0:

>From 9d4307af0a0d0cc41cab5d77b4fb608200ab7f65 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 29 Apr 2024 15:18:19 +0100
Subject: [PATCH 2/2] [AArch64] Avoid NEON fixed-point SCVTF in Streaming-SVE
 mode.

NEON fixed-point SCVTF instruction is not available in Streaming-SVE
mode. There is no equivalent SVE instruction, so we simply expand to a
regular FDIV operation.
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp            | 2 +-
 .../AArch64/sve-streaming-mode-fixed-length-fp-arith.ll    | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7344387ffe552..a76dc7b140b07 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17912,7 +17912,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
 static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
                                   const AArch64Subtarget *Subtarget) {
-  if (!Subtarget->hasNEON())
+  if (!Subtarget->isNeonAvailable())
     return SDValue();
 
   SDValue Op = N->getOperand(0);
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
index 82689f66e8455..c20e8521375a9 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
@@ -219,7 +219,12 @@ define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) {
 define <2 x float> @fdiv_v232_pow2(<2 x i32> %in) {
 ; CHECK-LABEL: fdiv_v232_pow2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    scvtf v0.2s, v0.2s, #4
+; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    fmov z1.s, #16.00000000
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %vcvt.i = sitofp <2 x i32> %in to <2 x float>