[clang] [llvm] Clang: emit llvm.minnum and llvm.maxnum with nsz always (PR #113133)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 2 16:05:33 PST 2024
================
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -vectorize-loops -vectorize-slp -O3 -triple x86_64 %s -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -vectorize-loops -vectorize-slp -O3 -ffp-exception-behavior=strict -DENSTRICT=1 -triple x86_64 %s -emit-llvm -o - 2>&1 | FileCheck %s --check-prefix=CHECK-STRICT
+
+float fminf (float, float);
+double fmin (double, double);
+long double fminl (long double, long double);
+float fmaxf (float, float);
+double fmax (double, double);
+long double fmaxl (long double, long double);
+
+// CHECK: call nsz float @llvm.minnum.f32
+// CHECK-STRICT: call nsz float @llvm.experimental.constrained.minnum.f32{{.*}} #2
+float fmin32(float a, float b) {
+ return fminf(a, b);
+}
+// CHECK: call nsz float @llvm.minnum.f32
+// CHECK-STRICT: call nsz float @llvm.experimental.constrained.minnum.f32{{.*}} #2
+float fmin32b(float a, float b) {
+ return __builtin_fminf(a, b);
+}
+#if !defined(ENSTRICT)
+// CHECK: call nsz <4 x float> @llvm.minnum.v4f32(<4 x float> %0, <4 x float> %1)
+float *pfmin32(float* a, float* b, float* restrict c) {
+ for (int i=0; i<4; i++)
+ c[i] = fminf(a[i], b[i]);
+ return c;
----------------
arsenm wrote:
Don't rely on vectorization here. You can check the behavior of the elementwise builtins
https://github.com/llvm/llvm-project/pull/113133
More information about the llvm-commits
mailing list