[clang] [llvm] Fix/172104 clang cl simd intrinsics (PR #172116)
Priyanshu Singh via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 13 01:16:09 PST 2025
https://github.com/dev-priyanshu15 updated https://github.com/llvm/llvm-project/pull/172116
>From 4d15af1f63bf72dcedd4ff18b9c9a63cf3e7c7e6 Mon Sep 17 00:00:00 2001
From: spriyanshucoder <s.priyanshu.coder at gmail.com>
Date: Sat, 13 Dec 2025 03:50:18 +0530
Subject: [PATCH 1/2] [LLVM][LoopVectorize] Fix SIGFPE crash in
getPredBlockCostDivisor
When computing block frequency division in getPredBlockCostDivisor,
the function could perform division by zero if BBFreq equals 0.
This caused a floating point exception crash when processing blocks
with zero block frequency information.
Add a guard check to return 1 when BBFreq is 0, avoiding the undefined
behavior while maintaining correct cost model behavior. A divisor of 1
indicates the block is executed at the same frequency as the header,
which is a safe default when frequency information is unavailable.
Fixes issue #172049
---
.../Transforms/Vectorize/LoopVectorize.cpp | 4 ++
.../LoopVectorize/crash-sigfpe-zero-freq.ll | 39 +++++++++++++++++++
2 files changed, 43 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/crash-sigfpe-zero-freq.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0108351f821f4..ed83aa3447bf9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2907,6 +2907,10 @@ uint64_t LoopVectorizationCostModel::getPredBlockCostDivisor(
uint64_t BBFreq = getBFI().getBlockFreq(BB).getFrequency();
assert(HeaderFreq >= BBFreq &&
"Header has smaller block freq than dominated BB?");
+ // Guard against division by zero when BBFreq is 0.
+ // In such cases, return 1 to avoid undefined behavior.
+ if (BBFreq == 0)
+ return 1;
return std::round((double)HeaderFreq / BBFreq);
}
diff --git a/llvm/test/Transforms/LoopVectorize/crash-sigfpe-zero-freq.ll b/llvm/test/Transforms/LoopVectorize/crash-sigfpe-zero-freq.ll
new file mode 100644
index 0000000000000..3b6f3b67d23c5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/crash-sigfpe-zero-freq.ll
@@ -0,0 +1,39 @@
+; Test case for crash with Floating point Exception in loop-vectorize pass
+; This test verifies that the loop vectorizer does not crash with SIGFPE
+; when processing blocks with zero block frequency.
+; See issue #172049
+
+; RUN: opt -passes=loop-vectorize -S %s
+
+; ModuleID = 'reduced.ll'
+source_filename = "reduced.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+target triple = "x86_64-unknown-linux-gnu"
+
+define ptr addrspace(1) @wombat() gc "statepoint-example" {
+bb:
+ br label %bb2
+
+bb1:
+ ret ptr addrspace(1) null
+
+bb2:
+ %phi = phi i64 [ %add, %bb6 ], [ 0, %bb ]
+ br i1 false, label %bb3, label %bb6
+
+bb3:
+ br i1 false, label %bb4, label %bb5, !prof !0
+
+bb4:
+ br label %bb6
+
+bb5:
+ br label %bb6
+
+bb6:
+ %add = add i64 %phi, 1
+ %icmp = icmp eq i64 %phi, 0
+ br i1 %icmp, label %bb2, label %bb1
+}
+
+!0 = !{!"branch_weights", i32 1, i32 0}
>From b42e14eb03dcdd440037a0b03efd5b72652954fd Mon Sep 17 00:00:00 2001
From: spriyanshucoder <s.priyanshu.coder at gmail.com>
Date: Sat, 13 Dec 2025 06:09:51 +0530
Subject: [PATCH 2/2] [clang] Enable SSE4.1 intrinsics for Windows MSVC targets
- Issue #172104
---
clang/lib/Basic/Targets/X86.cpp | 8 ++++++++
clang/test/Sema/simd-intrinsic-sse41-default.c | 16 ++++++++++++++++
2 files changed, 24 insertions(+)
create mode 100644 clang/test/Sema/simd-intrinsic-sse41-default.c
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index f00d435937b92..157ddd8aca8ad 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -157,6 +157,14 @@ bool X86TargetInfo::initFeatureMap(
if (getTriple().getArch() == llvm::Triple::x86_64)
setFeatureEnabled(Features, "sse2", true);
+ // Enable SSE4.1 for Windows MSVC targets to support SIMD intrinsics like
+ // _mm_mullo_epi32 without requiring explicit /arch: flags.
+ if ((getTriple().getArch() == llvm::Triple::x86_64 ||
+ getTriple().getArch() == llvm::Triple::x86) &&
+ getTriple().isWindowsMSVCEnvironment()) {
+ setFeatureEnabled(Features, "sse4.1", true);
+ }
+
using namespace llvm::X86;
SmallVector<StringRef, 16> CPUFeatures;
diff --git a/clang/test/Sema/simd-intrinsic-sse41-default.c b/clang/test/Sema/simd-intrinsic-sse41-default.c
new file mode 100644
index 0000000000000..aa5f4729da8d0
--- /dev/null
+++ b/clang/test/Sema/simd-intrinsic-sse41-default.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -x c -triple i386-pc-windows-msvc -fsyntax-only %s
+// RUN: %clang_cc1 -x c -triple x86_64-pc-windows-msvc -fsyntax-only %s
+// RUN: %clang_cc1 -x c -triple i386-unknown-linux-gnu -fsyntax-only %s
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -fsyntax-only %s
+
+// This test verifies that SSE4.1 intrinsics are available by default
+// without requiring explicit /arch: or -msse4.1 flags.
+
+#include <immintrin.h>
+
+void test_sse41_intrinsics(void) {
+ __m128i a = _mm_set1_epi32(1);
+ __m128i b = _mm_set1_epi32(2);
+ __m128i result = _mm_mullo_epi32(a, b);
+ (void)result;
+}
More information about the llvm-commits
mailing list