[llvm] 3567908 - [SLP] add fadd reduction test to show broken FMF propagation; NFC

Wed Dec 30 08:33:07 PST 2020

Author: Sanjay Patel
Date: 2020-12-30T11:27:50-05:00
New Revision: 3567908d8ceb95afe50961c7a953c202131235c5

URL: https://github.com/llvm/llvm-project/commit/3567908d8ceb95afe50961c7a953c202131235c5
DIFF: https://github.com/llvm/llvm-project/commit/3567908d8ceb95afe50961c7a953c202131235c5.diff

LOG: [SLP] add fadd reduction test to show broken FMF propagation; NFC

Added: 
    

Modified: 
    llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 5663c88b6366..8e175f1acda9 100644

--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -1766,4 +1766,39 @@ bb.1:
   ret void
 }
 
+; FIXME: This is a miscompile.
+; The FMF on the reduction should match the incoming insts.
+
+define float @fadd_v4f32_fmf(float* %p) {
+; CHECK-LABEL: @fadd_v4f32_fmf(
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:    [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+; STORE-LABEL: @fadd_v4f32_fmf(
+; STORE-NEXT:    [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
+; STORE-NEXT:    [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; STORE-NEXT:    [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; STORE-NEXT:    [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
+; STORE-NEXT:    [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; STORE-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
+; STORE-NEXT:    ret float [[TMP3]]
+;
+  %p1 = getelementptr inbounds float, float* %p, i64 1
+  %p2 = getelementptr inbounds float, float* %p, i64 2
+  %p3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %p1, align 4
+  %t2 = load float, float* %p2, align 4
+  %t3 = load float, float* %p3, align 4
+  %add1 = fadd reassoc nsz float %t1, %t0
+  %add2 = fadd reassoc nsz float %t2, %add1
+  %add3 = fadd reassoc nsz float %t3, %add2
+  ret float %add3
+}
+
 declare i32 @__gxx_personality_v0(...)