[llvm] 4456805 - [X86] Don't fold (fneg (fma (fneg X), Y, (fneg Z))) to (fma X, Y, Z)

Jim Lin via llvm-commits llvm-commits at lists.llvm.org
Fri May 21 07:59:44 PDT 2021


Author: Jim Lin
Date: 2021-05-21T23:02:19+08:00
New Revision: 445680593889199667d60207e302bc870f650fa5

URL: https://github.com/llvm/llvm-project/commit/445680593889199667d60207e302bc870f650fa5
DIFF: https://github.com/llvm/llvm-project/commit/445680593889199667d60207e302bc870f650fa5.diff

LOG: [X86] Don't fold (fneg (fma (fneg X), Y, (fneg Z))) to (fma X, Y, Z)

Check if it has no signed zeros flag (nsz) in getNegatedExpression for x86.
This patch fixed miscompilation: https://alive2.llvm.org/ce/z/XxwBAJ

Reviewed By: RKSimon, spatel

Differential Revision: https://reviews.llvm.org/D90901

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll
    llvm/test/CodeGen/X86/fma-fneg-combine.ll
    llvm/test/CodeGen/X86/fma-signed-zero.ll
    llvm/test/CodeGen/X86/fma_patterns.ll
    llvm/test/CodeGen/X86/fma_patterns_wide.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 893c7ab20782..b212ae61fa66 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47045,6 +47045,7 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
   EVT VT = Op.getValueType();
   EVT SVT = VT.getScalarType();
   unsigned Opc = Op.getOpcode();
+  SDNodeFlags Flags = Op.getNode()->getFlags();
   switch (Opc) {
   case ISD::FMA:
   case X86ISD::FMSUB:
@@ -47059,6 +47060,11 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
         !isOperationLegal(ISD::FMA, VT))
       break;
 
+    // Don't fold (fneg (fma (fneg x), y, (fneg z))) to (fma x, y, z)
+    // if it may have signed zeros.
+    if (!Flags.hasNoSignedZeros())
+      break;
+
     // This is always negatible for free but we might be able to remove some
     // extra operand negations as well.
     SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());

diff  --git a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll
index 3d814b81dcfc..33fd6abecd5b 100644
--- a/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll
+++ b/llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll
@@ -20,7 +20,7 @@ define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c)  {
 ; X64-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
 ; X64-NEXT:    retq
   %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c
-  %r = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
+  %r = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
   ret <8 x float> %r
 }
 
@@ -34,7 +34,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
 ; X64-NEXT:    retq
-  %t0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
+  %t0 = tail call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
   %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %t0
   ret <4 x float> %sub.i
 }
@@ -57,7 +57,7 @@ define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c)  {
   %b0 = extractelement <4 x float> %b, i64 0
   %c0 = extractelement <4 x float> %c, i64 0
   %negb0 = fneg float %b0
-  %t0 = tail call float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2
+  %t0 = tail call nsz float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2
   %i = insertelement <4 x float> %a, float %t0, i64 0
   %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %i
   ret <4 x float> %sub.i
@@ -74,7 +74,7 @@ define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
 ; X64-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
 ; X64-NEXT:    retq
   %negc = fneg <8 x float> %c
-  %t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2
+  %t0 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2
   %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t0
   ret <8 x float> %sub.i
 }
@@ -91,7 +91,7 @@ define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
 ; X64-NEXT:    retq
   %sub.c = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c
   %negsubc = fneg <8 x float> %sub.c
-  %t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2
+  %t0 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2
   ret <8 x float> %t0
 }
 
@@ -105,7 +105,7 @@ define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
 ; X64-NEXT:    retq
-  %t0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
+  %t0 = tail call nsz <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
   %sub.i = fsub <2 x double> <double -0.0, double -0.0>, %t0
   ret <2 x double> %sub.i
 }
@@ -125,7 +125,7 @@ define <8 x float> @test7(float %a, <8 x float> %b, <8 x float> %c)  {
   %t0 = insertelement <8 x float> undef, float %a, i32 0
   %t1 = fsub <8 x float> <float -0.0, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %t0
   %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer
-  %t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
+  %t3 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
   ret <8 x float> %t3
 
 }
@@ -145,6 +145,6 @@ define <8 x float> @test8(float %a, <8 x float> %b, <8 x float> %c)  {
   %t0 = fsub float -0.0, %a
   %t1 = insertelement <8 x float> undef, float %t0, i32 0
   %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer
-  %t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
+  %t3 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
   ret <8 x float> %t3
 }

diff  --git a/llvm/test/CodeGen/X86/fma-fneg-combine.ll b/llvm/test/CodeGen/X86/fma-fneg-combine.ll
index b9c3789d7875..f906c2887d0a 100644
--- a/llvm/test/CodeGen/X86/fma-fneg-combine.ll
+++ b/llvm/test/CodeGen/X86/fma-fneg-combine.ll
@@ -29,10 +29,17 @@ define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c)  {
 }
 
 define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
-; CHECK-LABEL: test2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
-; CHECK-NEXT:    retq
+; SKX-LABEL: test2:
+; SKX:       # %bb.0:
+; SKX-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
+; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; SKX-NEXT:    retq
+;
+; KNL-LABEL: test2:
+; KNL:       # %bb.0:
+; KNL-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
+; KNL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; KNL-NEXT:    retq
   %fma = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
   %neg = fneg <16 x float> %fma
   ret <16 x float> %neg
@@ -49,10 +56,17 @@ define <16 x float> @test2_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c
 }
 
 define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c)  {
-; CHECK-LABEL: test3:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
-; CHECK-NEXT:    retq
+; SKX-LABEL: test3:
+; SKX:       # %bb.0:
+; SKX-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
+; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; SKX-NEXT:    retq
+;
+; KNL-LABEL: test3:
+; KNL:       # %bb.0:
+; KNL-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
+; KNL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; KNL-NEXT:    retq
   %t0 = fneg <16 x float> %b
   %t1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %c)
   %sub.i = fneg <16 x float> %t1
@@ -71,10 +85,17 @@ define <16 x float> @test3_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c
 }
 
 define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
-; CHECK-LABEL: test4:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
-; CHECK-NEXT:    retq
+; SKX-LABEL: test4:
+; SKX:       # %bb.0:
+; SKX-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
+; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; SKX-NEXT:    retq
+;
+; KNL-LABEL: test4:
+; KNL:       # %bb.0:
+; KNL-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
+; KNL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; KNL-NEXT:    retq
   %t0 = fneg <16 x float> %b
   %t1 = fneg <16 x float> %c
   %t2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %t1)
@@ -106,10 +127,17 @@ entry:
 }
 
 define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
-; CHECK-LABEL: test6:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
-; CHECK-NEXT:    retq
+; SKX-LABEL: test6:
+; SKX:       # %bb.0:
+; SKX-NEXT:    vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
+; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; SKX-NEXT:    retq
+;
+; KNL-LABEL: test6:
+; KNL:       # %bb.0:
+; KNL-NEXT:    vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
+; KNL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; KNL-NEXT:    retq
   %t0 = fneg <16 x float> %b
   %t1 = fneg <16 x float> %c
   %t2 = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %t0, <16 x float> %t1, i32 10)
@@ -130,10 +158,18 @@ define <16 x float> @test6_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c
 }
 
 define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
-; CHECK-LABEL: test7:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
-; CHECK-NEXT:    retq
+; SKX-LABEL: test7:
+; SKX:       # %bb.0:
+; SKX-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
+; SKX-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; SKX-NEXT:    retq
+;
+; KNL-LABEL: test7:
+; KNL:       # %bb.0:
+; KNL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
+; KNL-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; KNL-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
   %t0 = fneg <8 x float> %c
   %t1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %t0)
   %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t1
@@ -163,10 +199,17 @@ entry:
 }
 
 define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) {
-; CHECK-LABEL: test9:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
-; CHECK-NEXT:    retq
+; SKX-LABEL: test9:
+; SKX:       # %bb.0:
+; SKX-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
+; SKX-NEXT:    vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX-NEXT:    retq
+;
+; KNL-LABEL: test9:
+; KNL:       # %bb.0:
+; KNL-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
+; KNL-NEXT:    vpxorq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; KNL-NEXT:    retq
   %t0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4)
   %sub.i = fneg <8 x double> %t0
   ret <8 x double> %sub.i

diff  --git a/llvm/test/CodeGen/X86/fma-signed-zero.ll b/llvm/test/CodeGen/X86/fma-signed-zero.ll
index c9cfe6eb0525..6f7d29b9d87f 100644
--- a/llvm/test/CodeGen/X86/fma-signed-zero.ll
+++ b/llvm/test/CodeGen/X86/fma-signed-zero.ll
@@ -9,7 +9,8 @@ declare float @llvm.fma.f32(float, float, float)
 define float @fneg_fma32(float %x, float %y, float %z) {
 ; CHECK-LABEL: fneg_fma32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
+; CHECK-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
+; CHECK-NEXT:    vxorps {{.*}}(%rip), %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %negx = fneg float %x
   %negz = fneg float %z
@@ -35,7 +36,8 @@ declare double @llvm.fma.f64(double, double, double)
 define double @fneg_fma64(double %x, double %y, double %z) {
 ; CHECK-LABEL: fneg_fma64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
+; CHECK-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
+; CHECK-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %negx = fneg double %x
   %negz = fneg double %z

diff  --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll
index f0748b48f852..f2bf812d9fef 100644
--- a/llvm/test/CodeGen/X86/fma_patterns.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns.ll
@@ -1308,10 +1308,10 @@ define float @test_f32_interp(float %x, float %y, float %t) {
 ; AVX512-NOINFS-NEXT:    vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
 ; AVX512-NOINFS-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
 ; AVX512-NOINFS-NEXT:    retq
-  %t1 = fsub float 1.0, %t
-  %tx = fmul float %x, %t
-  %ty = fmul float %y, %t1
-  %r = fadd float %tx, %ty
+  %t1 = fsub nsz float 1.0, %t
+  %tx = fmul nsz float %x, %t
+  %ty = fmul nsz float %y, %t1
+  %r = fadd nsz float %tx, %ty
   ret float %r
 }
 
@@ -1357,10 +1357,10 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
 ; AVX512-NOINFS-NEXT:    vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
 ; AVX512-NOINFS-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
 ; AVX512-NOINFS-NEXT:    retq
-  %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
-  %tx = fmul <4 x float> %x, %t
-  %ty = fmul <4 x float> %y, %t1
-  %r = fadd <4 x float> %tx, %ty
+  %t1 = fsub nsz <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
+  %tx = fmul nsz <4 x float> %x, %t
+  %ty = fmul nsz <4 x float> %y, %t1
+  %r = fadd nsz <4 x float> %tx, %ty
   ret <4 x float> %r
 }
 
@@ -1406,10 +1406,10 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float
 ; AVX512-NOINFS-NEXT:    vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
 ; AVX512-NOINFS-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
 ; AVX512-NOINFS-NEXT:    retq
-  %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
-  %tx = fmul <8 x float> %x, %t
-  %ty = fmul <8 x float> %y, %t1
-  %r = fadd <8 x float> %tx, %ty
+  %t1 = fsub nsz <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
+  %tx = fmul nsz <8 x float> %x, %t
+  %ty = fmul nsz <8 x float> %y, %t1
+  %r = fadd nsz <8 x float> %tx, %ty
   ret <8 x float> %r
 }
 
@@ -1455,10 +1455,10 @@ define double @test_f64_interp(double %x, double %y, double %t) {
 ; AVX512-NOINFS-NEXT:    vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
 ; AVX512-NOINFS-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
 ; AVX512-NOINFS-NEXT:    retq
-  %t1 = fsub double 1.0, %t
-  %tx = fmul double %x, %t
-  %ty = fmul double %y, %t1
-  %r = fadd double %tx, %ty
+  %t1 = fsub nsz double 1.0, %t
+  %tx = fmul nsz double %x, %t
+  %ty = fmul nsz double %y, %t1
+  %r = fadd nsz double %tx, %ty
   ret double %r
 }
 
@@ -1504,10 +1504,10 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
 ; AVX512-NOINFS-NEXT:    vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
 ; AVX512-NOINFS-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
 ; AVX512-NOINFS-NEXT:    retq
-  %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
-  %tx = fmul <2 x double> %x, %t
-  %ty = fmul <2 x double> %y, %t1
-  %r = fadd <2 x double> %tx, %ty
+  %t1 = fsub nsz <2 x double> <double 1.0, double 1.0>, %t
+  %tx = fmul nsz <2 x double> %x, %t
+  %ty = fmul nsz <2 x double> %y, %t1
+  %r = fadd nsz <2 x double> %tx, %ty
   ret <2 x double> %r
 }
 
@@ -1553,10 +1553,10 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
 ; AVX512-NOINFS-NEXT:    vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
 ; AVX512-NOINFS-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
 ; AVX512-NOINFS-NEXT:    retq
-  %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
-  %tx = fmul <4 x double> %x, %t
-  %ty = fmul <4 x double> %y, %t1
-  %r = fadd <4 x double> %tx, %ty
+  %t1 = fsub nsz <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
+  %tx = fmul nsz <4 x double> %x, %t
+  %ty = fmul nsz <4 x double> %y, %t1
+  %r = fadd nsz <4 x double> %tx, %ty
   ret <4 x double> %r
 }
 
@@ -1579,9 +1579,9 @@ define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
 ; AVX512-NEXT:    retq
-  %mul = fmul <4 x float> %a0, %a1
-  %add = fadd <4 x float> %mul, %a2
-  %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  %mul = fmul nsz <4 x float> %a0, %a1
+  %add = fadd nsz <4 x float> %mul, %a2
+  %neg = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
   ret <4 x float> %neg
 }
 
@@ -1600,9 +1600,9 @@ define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
 ; AVX512-NEXT:    retq
-  %mul = fmul <4 x double> %a0, %a1
-  %sub = fsub <4 x double> %mul, %a2
-  %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+  %mul = fmul nsz <4 x double> %a0, %a1
+  %sub = fsub nsz <4 x double> %mul, %a2
+  %neg = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   ret <4 x double> %neg
 }
 
@@ -1621,10 +1621,10 @@ define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
 ; AVX512-NEXT:    retq
-  %mul = fmul <4 x float> %a0, %a1
-  %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
-  %add = fadd <4 x float> %neg0, %a2
-  %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  %mul = fmul nsz <4 x float> %a0, %a1
+  %neg0 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+  %add = fadd nsz <4 x float> %neg0, %a2
+  %neg1 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
   ret <4 x float> %neg1
 }
 
@@ -1643,10 +1643,10 @@ define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1,
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
 ; AVX512-NEXT:    retq
-  %mul = fmul <4 x double> %a0, %a1
-  %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
-  %sub = fsub <4 x double> %neg0, %a2
-  %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+  %mul = fmul nsz  <4 x double> %a0, %a1
+  %neg0 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+  %sub = fsub nsz <4 x double> %neg0, %a2
+  %neg1 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   ret <4 x double> %neg1
 }
 

diff  --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
index d2cb64f87f06..a96435562bb7 100644
--- a/llvm/test/CodeGen/X86/fma_patterns_wide.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
@@ -868,10 +868,10 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
 ; AVX512-NOINFS-NEXT:    vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1
 ; AVX512-NOINFS-NEXT:    vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1
 ; AVX512-NOINFS-NEXT:    retq
-  %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
-  %tx = fmul <16 x float> %x, %t
-  %ty = fmul <16 x float> %y, %t1
-  %r = fadd <16 x float> %tx, %ty
+  %t1 = fsub nsz <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
+  %tx = fmul nsz <16 x float> %x, %t
+  %ty = fmul nsz <16 x float> %y, %t1
+  %r = fadd nsz <16 x float> %tx, %ty
   ret <16 x float> %r
 }
 
@@ -927,10 +927,10 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do
 ; AVX512-NOINFS-NEXT:    vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1
 ; AVX512-NOINFS-NEXT:    vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1
 ; AVX512-NOINFS-NEXT:    retq
-  %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
-  %tx = fmul <8 x double> %x, %t
-  %ty = fmul <8 x double> %y, %t1
-  %r = fadd <8 x double> %tx, %ty
+  %t1 = fsub nsz <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
+  %tx = fmul nsz <8 x double> %x, %t
+  %ty = fmul nsz <8 x double> %y, %t1
+  %r = fadd nsz <8 x double> %tx, %ty
   ret <8 x double> %r
 }
 
@@ -955,9 +955,9 @@ define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1,
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
 ; AVX512-NEXT:    retq
-  %mul = fmul <16 x float> %a0, %a1
-  %add = fadd <16 x float> %mul, %a2
-  %neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  %mul = fmul nsz <16 x float> %a0, %a1
+  %add = fadd nsz <16 x float> %mul, %a2
+  %neg = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
   ret <16 x float> %neg
 }
 
@@ -978,9 +978,9 @@ define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
 ; AVX512-NEXT:    retq
-  %mul = fmul <8 x double> %a0, %a1
-  %sub = fsub <8 x double> %mul, %a2
-  %neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+  %mul = fmul nsz <8 x double> %a0, %a1
+  %sub = fsub nsz <8 x double> %mul, %a2
+  %neg = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   ret <8 x double> %neg
 }
 
@@ -1001,10 +1001,10 @@ define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1,
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
 ; AVX512-NEXT:    retq
-  %mul = fmul <16 x float> %a0, %a1
-  %neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
-  %add = fadd <16 x float> %neg0, %a2
-  %neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  %mul = fmul nsz <16 x float> %a0, %a1
+  %neg0 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+  %add = fadd nsz <16 x float> %neg0, %a2
+  %neg1 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
   ret <16 x float> %neg1
 }
 
@@ -1025,10 +1025,10 @@ define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1,
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
 ; AVX512-NEXT:    retq
-  %mul = fmul <8 x double> %a0, %a1
-  %neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
-  %sub = fsub <8 x double> %neg0, %a2
-  %neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+  %mul = fmul nsz <8 x double> %a0, %a1
+  %neg0 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+  %sub = fsub nsz <8 x double> %neg0, %a2
+  %neg1 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   ret <8 x double> %neg1
 }
 


        


More information about the llvm-commits mailing list