[llvm] [LV][AArch64] LoopVectorizer allows scalable frem instructions (PR #76247)

Fri Dec 22 08:06:26 PST 2023

https://github.com/paschalis-mpeis created https://github.com/llvm/llvm-project/pull/76247

In AArch64, when an 'frem' instruction uses scalable vectors, it will be replaced with a vector library call. LoopVectorize is now aware of that so it no longer returns invalid costs.

When it is not scalable, it returns the default costs, which are delegated to the BaseT TTI Implementation.

---

# Dependencies:
- merged after: #76166
- (not stacked PR)

>From 8e85c511665a6acd2e30902520b6fa388f17ae7b Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Thu, 21 Dec 2023 09:29:42 +0000
Subject: [PATCH] [LV][AArch64] LoopVectorizer allows scalable frem
 instructions

In AArch64, when an 'frem' instruction uses scalable vectors, it will be
replaced with a vector library call. LoopVectorize is now aware of that
so it no longer returns invalid costs.

When it is not scalable, it returns the default costs, which are
delegated to the BaseT TTI Implementation.
---
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  1 -
 .../AArch64/AArch64TargetTransformInfo.cpp    | 15 ++++++++++++
 .../CostModel/AArch64/arith-fp-sve.ll         | 23 ++++++++++---------
 .../Analysis/CostModel/AArch64/arith-fp.ll    |  2 +-
 4 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 3f76dfdaac317c..a50176d3ad7f7c 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -17,7 +17,6 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include <optional>
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b5b8b68291786d..0a76c670c68a8b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2902,6 +2902,21 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
     if (!Ty->getScalarType()->isFP128Ty())
       return LT.first;
     [[fallthrough]];
+  case ISD::FREM: {
+    // Scalable frem instructions will be replaced with Vector library calls.
+    if (Ty->isScalableTy()) {
+      SmallVector<Type *, 4> OpTypes;
+      for (auto &Op : CxtI->operands())
+        OpTypes.push_back(Op->getType());
+
+      InstructionCost ScalableCost =
+          getCallInstrCost(nullptr, Ty, OpTypes, CostKind);
+      return ScalableCost;
+    } else {
+      return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+                                           Op2Info);
+    }
+  }
   case ISD::FMUL:
   case ISD::FDIV:
     // These nodes are marked as 'custom' just to lower them to SVE.
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
index 18a1c31c03f748..682bb5a58a7846 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
@@ -1,7 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 -mattr=+sve | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mattr=+sve -mattr=+fullfp16 -enable-no-nans-fp-math -disable-output -passes="print<cost-model>" %s 2>&1 | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
 
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @fadd() {
 ; CHECK-LABEL: 'fadd'
@@ -137,14 +138,14 @@ define void @fdiv() {
 
 define void @frem() {
 ; CHECK-LABEL: 'frem'
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4F16 = frem <vscale x 4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8F16 = frem <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V16F16 = frem <vscale x 16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2F32 = frem <vscale x 2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4F32 = frem <vscale x 4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8F32 = frem <vscale x 8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2F64 = frem <vscale x 2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4F64 = frem <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F16 = frem <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F16 = frem <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16F16 = frem <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = frem <vscale x 2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = frem <vscale x 4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = frem <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = frem <vscale x 2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = frem <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V4F16 = frem <vscale x 4 x half> undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index c352892354fc24..403ee8e861387e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
 
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
 
 define i32 @fadd(i32 %arg) {
 ; CHECK-LABEL: 'fadd'