[llvm] [X86] Don't rely on global -fp-contract=fast on X86 CodeGen tests (PR #158026)

Thu Sep 11 03:20:34 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Mikołaj Piróg (mikolaj-pirog)

<details>
<summary>Changes</summary>

IR has the `contract` to indicate that contraction is allowed. Testing shouldn't rely on global flag to perform contraction. This is a prerequisite before making backends rely only on the IR to perform contraction. See more here: https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract/80909/5

---

Patch is 100.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158026.diff


11 Files Affected:

- (modified) llvm/test/CodeGen/X86/avx512-fma.ll (+31-31) 
- (modified) llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll (+26-26) 
- (modified) llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/dag-combiner-fma-folding.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/fma-do-not-commute.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/fma_patterns.ll (+195-195) 
- (modified) llvm/test/CodeGen/X86/fma_patterns_wide.ll (+82-82) 
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+3-11) 
- (modified) llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll (+25-25) 


``````````diff

diff --git a/llvm/test/CodeGen/X86/avx512-fma.ll b/llvm/test/CodeGen/X86/avx512-fma.ll
index 97f8e5f4ea16c..54343ee771ff7 100644
--- a/llvm/test/CodeGen/X86/avx512-fma.ll
+++ b/llvm/test/CodeGen/X86/avx512-fma.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f  | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx  | FileCheck %s --check-prefix=ALL --check-prefix=SKX
 
 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
 ; ALL-LABEL: test_x86_fmadd_ps_z:
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <16 x float> %a0, %a1
-  %res = fadd <16 x float> %x, %a2
+  %x = fmul contract <16 x float> %a0, %a1
+  %res = fadd contract <16 x float> %x, %a2
   ret <16 x float> %res
 }
 
@@ -17,8 +17,8 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <16 x float> %a0, %a1
-  %res = fsub <16 x float> %x, %a2
+  %x = fmul contract <16 x float> %a0, %a1
+  %res = fsub contract <16 x float> %x, %a2
   ret <16 x float> %res
 }
 
@@ -27,8 +27,8 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <16 x float> %a0, %a1
-  %res = fsub <16 x float> %a2, %x
+  %x = fmul contract <16 x float> %a0, %a1
+  %res = fsub contract <16 x float> %a2, %x
   ret <16 x float> %res
 }
 
@@ -37,12 +37,12 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <16 x float> %a0, %a1
-  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
+  %x = fmul contract <16 x float> %a0, %a1
+  %y = fsub contract <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
                           float -0.000000e+00>, %x
-  %res = fsub <16 x float> %y, %a2
+  %res = fsub contract <16 x float> %y, %a2
   ret <16 x float> %res
 }
 
@@ -51,8 +51,8 @@ define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <8 x double> %a0, %a1
-  %res = fadd <8 x double> %x, %a2
+  %x = fmul contract <8 x double> %a0, %a1
+  %res = fadd contract <8 x double> %x, %a2
   ret <8 x double> %res
 }
 
@@ -61,8 +61,8 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
 ; ALL-NEXT:    retq
-  %x = fmul <8 x double> %a0, %a1
-  %res = fsub <8 x double> %x, %a2
+  %x = fmul contract <8 x double> %a0, %a1
+  %res = fsub contract <8 x double> %x, %a2
   ret <8 x double> %res
 }
 
@@ -71,8 +71,8 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
 ; ALL-NEXT:    retq
-  %x = fmul double %a0, %a1
-  %res = fsub double %x, %a2
+  %x = fmul contract double %a0, %a1
+  %res = fsub contract double %x, %a2
   ret double %res
 }
 
@@ -82,8 +82,8 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, ptr %a2_ptr) {
 ; ALL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
 ; ALL-NEXT:    retq
   %a2 = load double , ptr%a2_ptr
-  %x = fmul double %a0, %a1
-  %res = fsub double %x, %a2
+  %x = fmul contract double %a0, %a1
+  %res = fsub contract double %x, %a2
   ret double %res
 }
 
@@ -93,8 +93,8 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, ptr %a2_ptr) {
 ; ALL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
 ; ALL-NEXT:    retq
   %a2 = load double , ptr%a2_ptr
-  %x = fmul double %a0, %a2
-  %res = fsub double %x, %a1
+  %x = fmul contract double %a0, %a2
+  %res = fsub contract double %x, %a1
   ret double %res
 }
 
@@ -103,8 +103,8 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
 ; ALL-NEXT:    retq
-  %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
-  %b2 = fadd <16 x float> %b1, %a2
+  %b1 = fmul contract <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  %b2 = fadd contract <16 x float> %b1, %a2
   ret <16 x float> %b2
 }
 
@@ -113,8 +113,8 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem
 ; ALL-NEXT:    retq
-  %b1 = fmul <16 x float> %a1, %a2
-  %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  %b1 = fmul contract <16 x float> %a1, %a2
+  %b2 = fadd contract <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
   ret <16 x float> %b2
 }
 
@@ -135,8 +135,8 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, pt
 ; SKX-NEXT:    vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1
 ; SKX-NEXT:    retq
   %a2   = load <16 x float>,ptr%a2_ptrt,align 1
-  %x = fmul <16 x float> %a0, %a2
-  %y = fadd <16 x float> %x, %a1
+  %x = fmul contract <16 x float> %a0, %a2
+  %y = fadd contract <16 x float> %x, %a1
   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
   ret <16 x float> %res
 }
@@ -160,8 +160,8 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, pt
 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
 ; SKX-NEXT:    retq
   %a2   = load <16 x float>,ptr%a2_ptrt,align 1
-  %x = fmul <16 x float> %a0, %a2
-  %y = fadd <16 x float> %x, %a1
+  %x = fmul contract <16 x float> %a0, %a2
+  %y = fadd contract <16 x float> %x, %a1
   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
   ret <16 x float> %res
 }
@@ -185,8 +185,8 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, pt
 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
 ; SKX-NEXT:    retq
   %a2   = load <16 x float>,ptr%a2_ptrt,align 1
-  %x = fmul <16 x float> %a1, %a0
-  %y = fadd <16 x float> %x, %a2
+  %x = fmul contract <16 x float> %a1, %a0
+  %y = fadd contract <16 x float> %x, %a2
   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
   ret <16 x float> %res
 }
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
index 36b95e744ba14..52e9507d43a1f 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown  --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown  -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
 
 ; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
 define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
@@ -18,9 +18,9 @@ define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half
 entry:
   %0 = bitcast <32 x half> %a to <16 x float>
   %1 = bitcast <32 x half> %b to <16 x float>
-  %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
+  %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
   %3 = bitcast <16 x float> %2 to <32 x half>
-  %add.i = fadd <32 x half> %3, %acc
+  %add.i = fadd contract <32 x half> %3, %acc
   ret <32 x half> %add.i
 }
 
@@ -39,9 +39,9 @@ define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half
 entry:
   %0 = bitcast <32 x half> %a to <16 x float>
   %1 = bitcast <32 x half> %b to <16 x float>
-  %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
+  %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
   %3 = bitcast <16 x float> %2 to <32 x half>
-  %add.i = fadd <32 x half> %3, %acc
+  %add.i = fadd contract <32 x half> %3, %acc
   ret <32 x half> %add.i
 }
 
@@ -60,9 +60,9 @@ define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half
 entry:
   %0 = bitcast <16 x half> %a to <8 x float>
   %1 = bitcast <16 x half> %b to <8 x float>
-  %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
+  %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
   %3 = bitcast <8 x float> %2 to <16 x half>
-  %add.i = fadd <16 x half> %3, %acc
+  %add.i = fadd contract <16 x half> %3, %acc
   ret <16 x half> %add.i
 }
 
@@ -81,9 +81,9 @@ define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half
 entry:
   %0 = bitcast <16 x half> %a to <8 x float>
   %1 = bitcast <16 x half> %b to <8 x float>
-  %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
+  %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
   %3 = bitcast <8 x float> %2 to <16 x half>
-  %add.i = fadd <16 x half> %3, %acc
+  %add.i = fadd contract <16 x half> %3, %acc
   ret <16 x half> %add.i
 }
 
@@ -102,9 +102,9 @@ define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b
 entry:
   %0 = bitcast <8 x half> %a to <4 x float>
   %1 = bitcast <8 x half> %b to <4 x float>
-  %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
+  %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
   %3 = bitcast <4 x float> %2 to <8 x half>
-  %add.i = fadd <8 x half> %3, %acc
+  %add.i = fadd contract <8 x half> %3, %acc
   ret <8 x half> %add.i
 }
 
@@ -123,9 +123,9 @@ define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b
 entry:
   %0 = bitcast <8 x half> %a to <4 x float>
   %1 = bitcast <8 x half> %b to <4 x float>
-  %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
+  %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
   %3 = bitcast <4 x float> %2 to <8 x half>
-  %add.i = fadd <8 x half> %3, %acc
+  %add.i = fadd contract <8 x half> %3, %acc
   ret <8 x half> %add.i
 }
 
@@ -138,9 +138,9 @@ define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x hal
 entry:
   %0 = bitcast <32 x half> %a to <16 x float>
   %1 = bitcast <32 x half> %b to <16 x float>
-  %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
+  %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
   %3 = bitcast <16 x float> %2 to <32 x half>
-  %add.i = fadd <32 x half> %3, %acc
+  %add.i = fadd contract <32 x half> %3, %acc
   ret <32 x half> %add.i
 }
 
@@ -152,9 +152,9 @@ define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x hal
 entry:
   %0 = bitcast <32 x half> %a to <16 x float>
   %1 = bitcast <32 x half> %b to <16 x float>
-  %2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
+  %2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
   %3 = bitcast <16 x float> %2 to <32 x half>
-  %add.i = fadd <32 x half> %3, %acc
+  %add.i = fadd contract <32 x half> %3, %acc
   ret <32 x half> %add.i
 }
 
@@ -166,9 +166,9 @@ define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x hal
 entry:
   %0 = bitcast <16 x half> %a to <8 x float>
   %1 = bitcast <16 x half> %b to <8 x float>
-  %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+  %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
   %3 = bitcast <8 x float> %2 to <16 x half>
-  %add.i = fadd <16 x half> %3, %acc
+  %add.i = fadd contract <16 x half> %3, %acc
   ret <16 x half> %add.i
 }
 
@@ -180,9 +180,9 @@ define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x hal
 entry:
   %0 = bitcast <16 x half> %a to <8 x float>
   %1 = bitcast <16 x half> %b to <8 x float>
-  %2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+  %2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
   %3 = bitcast <8 x float> %2 to <16 x half>
-  %add.i = fadd <16 x half> %3, %acc
+  %add.i = fadd contract <16 x half> %3, %acc
   ret <16 x half> %add.i
 }
 
@@ -194,9 +194,9 @@ define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> %
 entry:
   %0 = bitcast <8 x half> %a to <4 x float>
   %1 = bitcast <8 x half> %b to <4 x float>
-  %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+  %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
   %3 = bitcast <4 x float> %2 to <8 x half>
-  %add.i = fadd <8 x half> %3, %acc
+  %add.i = fadd contract <8 x half> %3, %acc
   ret <8 x half> %add.i
 }
 
@@ -208,9 +208,9 @@ define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> %
 entry:
   %0 = bitcast <8 x half> %a to <4 x float>
   %1 = bitcast <8 x half> %b to <4 x float>
-  %2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
+  %2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
   %3 = bitcast <4 x float> %2 to <8 x half>
-  %add.i = fadd <8 x half> %3, %acc
+ ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/158026