[llvm] 7108b2d - [SVE] Fix LoopVectorizer test scalalable-call.ll

Wed Mar 31 07:02:18 PDT 2021

Author: Sander de Smalen
Date: 2021-03-31T14:52:49+01:00
New Revision: 7108b2dec17d895aec874477f62ccc6c663c2307

URL: https://github.com/llvm/llvm-project/commit/7108b2dec17d895aec874477f62ccc6c663c2307
DIFF: https://github.com/llvm/llvm-project/commit/7108b2dec17d895aec874477f62ccc6c663c2307.diff

LOG: [SVE] Fix LoopVectorizer test scalalable-call.ll

This marks FSIN and other operations to EXPAND for scalable
vectors, so that they are not assumed to be legal by the cost-model.

Depends on D97470

Reviewed By: dmgreen, paulwalker-arm

Differential Revision: https://reviews.llvm.org/D97471

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5d9d44fd86442..e74a16e4f80db 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -344,6 +344,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setCondCodeAction(ISD::SETUGT, VT, Expand);
       setCondCodeAction(ISD::SETUEQ, VT, Expand);
       setCondCodeAction(ISD::SETUNE, VT, Expand);
+
+      setOperationAction(ISD::FREM, VT, Expand);
+      setOperationAction(ISD::FPOW, VT, Expand);
+      setOperationAction(ISD::FPOWI, VT, Expand);
+      setOperationAction(ISD::FCOS, VT, Expand);
+      setOperationAction(ISD::FSIN, VT, Expand);
+      setOperationAction(ISD::FSINCOS, VT, Expand);
+      setOperationAction(ISD::FEXP, VT, Expand);
+      setOperationAction(ISD::FEXP2, VT, Expand);
+      setOperationAction(ISD::FLOG, VT, Expand);
+      setOperationAction(ISD::FLOG2, VT, Expand);
+      setOperationAction(ISD::FLOG10, VT, Expand);
     }
   }
 

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index be21f61956baa..e2ffb8d544ff6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4973,8 +4973,8 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
   bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost;
   assert((UseVectorIntrinsic || !NeedToScalarize) &&
          "Instruction should be scalarized elsewhere.");
-  assert(IntrinsicCost.isValid() && CallCost.isValid() &&
-         "Cannot have invalid costs while widening");
+  assert((IntrinsicCost.isValid() || CallCost.isValid()) &&
+         "Either the intrinsic cost or vector call cost must be valid");
 
   for (unsigned Part = 0; Part < UF; ++Part) {
     SmallVector<Value *, 4> Args;
@@ -8538,8 +8538,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, VFRange &Range,
     InstructionCost CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
     InstructionCost IntrinsicCost = ID ? CM.getVectorIntrinsicCost(CI, VF) : 0;
     bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost;
-    assert(IntrinsicCost.isValid() && CallCost.isValid() &&
-           "Cannot have invalid costs while widening");
+    assert((IntrinsicCost.isValid() || CallCost.isValid()) &&
+           "Either the intrinsic cost or vector call cost must be valid");
     return UseVectorIntrinsic || !NeedToScalarize;
   };
 

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index b3b0d171bb149..1ed729bc81a69 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -181,4 +181,38 @@ declare <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x
 declare <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1>)
 declare <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1>)
 
+define void @unsupported_fp_ops(<vscale x 4 x float> %vec) {
+; CHECK-LABEL: 'unsupported_fp_ops'
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %sin = call <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %vec)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %cos = call <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %vec)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %pow = call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %vec, <vscale x 4 x float> %vec)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32(<vscale x 4 x float> %vec, i32 42)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %vec)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp2 = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %vec)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log = call <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %vec)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log2 = call <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %vec)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log10 = call <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %vec)
+
+  %sin = call <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float> %vec)
+  %cos = call <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float> %vec)
+  %pow = call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %vec, <vscale x 4 x float> %vec)
+  %powi = call <vscale x 4 x float> @llvm.powi.nxv4f32(<vscale x 4 x float> %vec, i32 42)
+  %exp = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %vec)
+  %exp2 = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %vec)
+  %log = call <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float> %vec)
+  %log2 = call <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float> %vec)
+  %log10 = call <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float> %vec)
+  ret void
+}
+
+declare <vscale x 4 x float> @llvm.sin.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.powi.nxv4f32(<vscale x 4 x float>, i32)
+declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.log.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.log2.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.log10.nxv4f32(<vscale x 4 x float>)
+
 attributes #0 = { "target-features"="+sve,+bf16" }

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
index bb70cbcfd4e96..8fa057f0f888b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -72,11 +72,10 @@ for.end:
 }
 
 define void @vec_intrinsic(i64 %N, double* nocapture readonly %a) {
-;; FIXME: Should be calling sin_vec, once the cost of scalarizing is handled.
 ; CHECK-LABEL: @vec_intrinsic
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>*
-; CHECK: call fast <vscale x 2 x double> @llvm.sin.nxv2f64(<vscale x 2 x double> %[[LOAD]])
+; CHECK: call fast <vscale x 2 x double> @sin_vec(<vscale x 2 x double> %[[LOAD]])
 entry:
   %cmp7 = icmp sgt i64 %N, 0
   br i1 %cmp7, label %for.body, label %for.end