[llvm] 6e68fa9 - [SLP]Fix PR106909: add a check for unsafe FP operations.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 1 07:13:14 PDT 2024


Author: Alexey Bataev
Date: 2024-09-01T07:10:09-07:00
New Revision: 6e68fa921bb7bf5ceb0b5036bb7d71399d5c7906

URL: https://github.com/llvm/llvm-project/commit/6e68fa921bb7bf5ceb0b5036bb7d71399d5c7906
DIFF: https://github.com/llvm/llvm-project/commit/6e68fa921bb7bf5ceb0b5036bb7d71399d5c7906.diff

LOG: [SLP]Fix PR106909: add a check for unsafe FP operations.

NEON has non-IEEE compliant denormal flushing and the compiler should
check if it safe to vectorize instructions for NEON in non-fast math
mode.

Fixes https://github.com/llvm/llvm-project/issues/106909

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/ARM/invalid-fp-operations.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 993fd6ab1b0b41..58137cd6f543f2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6584,6 +6584,13 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
     OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const {
   assert(S.MainOp && "Expected instructions with same/alternate opcodes only.");
 
+  if (S.MainOp->getType()->isFloatingPointTy() &&
+      TTI->isFPVectorizationPotentiallyUnsafe() && any_of(VL, [](Value *V) {
+        auto *I = dyn_cast<Instruction>(V);
+        return I && (I->isBinaryOp() || isa<CallInst>(I)) && !I->isFast();
+      }))
+    return TreeEntry::NeedToGather;
+
   unsigned ShuffleOrOp =
       S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
   auto *VL0 = cast<Instruction>(S.OpValue);

diff  --git a/llvm/test/Transforms/SLPVectorizer/ARM/invalid-fp-operations.ll b/llvm/test/Transforms/SLPVectorizer/ARM/invalid-fp-operations.ll
index 22100c9ab88c6e..033e605b5a1a40 100644
--- a/llvm/test/Transforms/SLPVectorizer/ARM/invalid-fp-operations.ll
+++ b/llvm/test/Transforms/SLPVectorizer/ARM/invalid-fp-operations.ll
@@ -4,10 +4,31 @@
 define void @test(ptr %a, ptr %b, ptr %r) {
 ; CHECK-LABEL: define void @test(
 ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[R:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[R]], align 4
+; CHECK-NEXT:    [[V_A_0:%.*]] = load float, ptr [[A]], align 4
+; CHECK-NEXT:    [[A_1:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[V_A_1:%.*]] = load float, ptr [[A_1]], align 4
+; CHECK-NEXT:    [[A_2:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT:    [[V_A_2:%.*]] = load float, ptr [[A_2]], align 4
+; CHECK-NEXT:    [[A_3:%.*]] = getelementptr i8, ptr [[A]], i64 12
+; CHECK-NEXT:    [[V_A_3:%.*]] = load float, ptr [[A_3]], align 4
+; CHECK-NEXT:    [[V_B_0:%.*]] = load float, ptr [[B]], align 4
+; CHECK-NEXT:    [[B_1:%.*]] = getelementptr i8, ptr [[B]], i64 4
+; CHECK-NEXT:    [[V_B_1:%.*]] = load float, ptr [[B_1]], align 4
+; CHECK-NEXT:    [[B_2:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT:    [[V_B_2:%.*]] = load float, ptr [[B_2]], align 4
+; CHECK-NEXT:    [[B_3:%.*]] = getelementptr i8, ptr [[B]], i64 12
+; CHECK-NEXT:    [[V_B_3:%.*]] = load float, ptr [[B_3]], align 4
+; CHECK-NEXT:    [[V_R_0:%.*]] = fadd float [[V_A_0]], [[V_B_0]]
+; CHECK-NEXT:    [[V_R_1:%.*]] = fadd float [[V_A_1]], [[V_B_1]]
+; CHECK-NEXT:    [[V_R_2:%.*]] = fadd float [[V_A_2]], [[V_B_2]]
+; CHECK-NEXT:    [[V_R_3:%.*]] = fadd float [[V_A_3]], [[V_B_3]]
+; CHECK-NEXT:    store float [[V_R_0]], ptr [[R]], align 4
+; CHECK-NEXT:    [[R_1:%.*]] = getelementptr i8, ptr [[R]], i64 4
+; CHECK-NEXT:    store float [[V_R_1]], ptr [[R_1]], align 4
+; CHECK-NEXT:    [[R_2:%.*]] = getelementptr i8, ptr [[R]], i64 8
+; CHECK-NEXT:    store float [[V_R_2]], ptr [[R_2]], align 4
+; CHECK-NEXT:    [[R_3:%.*]] = getelementptr i8, ptr [[R]], i64 12
+; CHECK-NEXT:    store float [[V_R_3]], ptr [[R_3]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %v.a.0 = load float, ptr %a


        


More information about the llvm-commits mailing list