[clang] [Clang] [Lexer] Detect SSE4.2 availability at runtime in fastParseASCIIIdentifier (PR #171914)
Thibault Monnier via cfe-commits
cfe-commits at lists.llvm.org
Sun Dec 14 02:32:45 PST 2025
https://github.com/Thibault-Monnier updated https://github.com/llvm/llvm-project/pull/171914
>From 4fc9a07698e1a4627a050ba6fa9df3f1f8725451 Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <thibaultmonni at gmail.com>
Date: Thu, 11 Dec 2025 22:02:35 +0100
Subject: [PATCH 1/3] Detect sse4.2 availability at runtime to use it on modern
processors
---
clang/lib/Lex/Lexer.cpp | 35 ++++++++++++++++++++++++++---------
1 file changed, 26 insertions(+), 9 deletions(-)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index b282a600c0e56..3b8fa0b9b7f36 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -46,9 +46,7 @@
#include <string>
#include <tuple>
-#ifdef __SSE4_2__
#include <nmmintrin.h>
-#endif
using namespace clang;
@@ -1921,9 +1919,17 @@ bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C,
}
static const char *
-fastParseASCIIIdentifier(const char *CurPtr,
- [[maybe_unused]] const char *BufferEnd) {
-#ifdef __SSE4_2__
+fastParseASCIIIdentifierScalar(const char *CurPtr,
+ [[maybe_unused]] const char *BufferEnd) {
+ unsigned char C = *CurPtr;
+ while (isAsciiIdentifierContinue(C))
+ C = *++CurPtr;
+ return CurPtr;
+}
+
+__attribute__((target("sse4.2"))) static const char *
+fastParseASCIIIdentifierSSE42(const char *CurPtr,
+ [[maybe_unused]] const char *BufferEnd) {
alignas(16) static constexpr char AsciiIdentifierRange[16] = {
'_', '_', 'A', 'Z', 'a', 'z', '0', '9',
};
@@ -1943,12 +1949,23 @@ fastParseASCIIIdentifier(const char *CurPtr,
continue;
return CurPtr;
}
+
+ return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+}
+
+static bool supportsSSE42() {
+ static bool SupportsSSE42 = __builtin_cpu_supports("sse4.2");
+ return SupportsSSE42;
+}
+
+static const char *fastParseASCIIIdentifier(const char *CurPtr,
+ const char *BufferEnd) {
+#ifndef __SSE4_2__
+ if (LLVM_UNLIKELY(!supportsSSE42()))
+ return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
#endif
- unsigned char C = *CurPtr;
- while (isAsciiIdentifierContinue(C))
- C = *++CurPtr;
- return CurPtr;
+ return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
}
bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
>From ce3bf515e7a60bd58ff5871352979999f5864b4b Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <thibaultmonni at gmail.com>
Date: Thu, 11 Dec 2025 23:15:40 +0100
Subject: [PATCH 2/3] Only on x86
---
clang/lib/Lex/Lexer.cpp | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 3b8fa0b9b7f36..c195237dae1f4 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -46,7 +46,9 @@
#include <string>
#include <tuple>
+#if defined(__i386__) || defined(__x86_64__)
#include <nmmintrin.h>
+#endif
using namespace clang;
@@ -1927,6 +1929,8 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
return CurPtr;
}
+#if defined(__i386__) || defined(__x86_64__)
+
__attribute__((target("sse4.2"))) static const char *
fastParseASCIIIdentifierSSE42(const char *CurPtr,
[[maybe_unused]] const char *BufferEnd) {
@@ -1958,14 +1962,22 @@ static bool supportsSSE42() {
return SupportsSSE42;
}
+#endif
+
static const char *fastParseASCIIIdentifier(const char *CurPtr,
const char *BufferEnd) {
+#if !defined(__i386__) && !defined(__x86_64__)
+ return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
+#else
+
#ifndef __SSE4_2__
if (LLVM_UNLIKELY(!supportsSSE42()))
return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
#endif
return fastParseASCIIIdentifierSSE42(CurPtr, BufferEnd);
+
+#endif
}
bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
>From 2109fdd371822ec77f870c5edbbdfccaaa7615be Mon Sep 17 00:00:00 2001
From: Thibault-Monnier <thibaultmonni at gmail.com>
Date: Sun, 14 Dec 2025 11:32:30 +0100
Subject: [PATCH 3/3] Not on windows
---
clang/lib/Lex/Lexer.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index c195237dae1f4..86cfb47ca84d5 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Unicode.h"
#include "llvm/Support/UnicodeCharRanges.h"
+
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -1929,7 +1930,7 @@ fastParseASCIIIdentifierScalar(const char *CurPtr,
return CurPtr;
}
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) && !defined(_WIN32)
__attribute__((target("sse4.2"))) static const char *
fastParseASCIIIdentifierSSE42(const char *CurPtr,
@@ -1966,7 +1967,7 @@ static bool supportsSSE42() {
static const char *fastParseASCIIIdentifier(const char *CurPtr,
const char *BufferEnd) {
-#if !defined(__i386__) && !defined(__x86_64__)
+#if !defined(__i386__) && !defined(__x86_64__) || defined(_WIN32)
return fastParseASCIIIdentifierScalar(CurPtr, BufferEnd);
#else
More information about the cfe-commits
mailing list