[clang] [clang][x86] Add constexpr support for LZCNT/TZCNT intrinsics (PR #110499)
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 30 05:25:12 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/110499
None
>From b61842abda168ea02bb6b25598ce6d08757bb4f7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 30 Sep 2024 13:21:42 +0100
Subject: [PATCH] [clang][x86] Add constexpr support for LZCNT/TZCNT intrinsics
---
clang/docs/ReleaseNotes.rst | 6 ++++
clang/include/clang/Basic/BuiltinsX86.def | 8 ++---
clang/include/clang/Basic/BuiltinsX86_64.def | 4 +--
clang/lib/AST/ExprConstant.cpp | 18 +++++++++++
clang/lib/Headers/bmiintrin.h | 4 +++
clang/lib/Headers/lzcntintrin.h | 4 +++
clang/test/CodeGen/X86/bmi-builtins.c | 32 ++++++++++++++++++++
clang/test/CodeGen/X86/lzcnt-builtins.c | 27 ++++++++++++++++-
8 files changed, 96 insertions(+), 7 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 35c31452cef411..d48601db023553 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -511,6 +511,12 @@ X86 Support
* Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
``*_(mask(z)))_minmax_s[s|d|h]``.
+- The following bit manipulation intrinsics can now be used in constant expressions:
+ all lzcnt intrinsics in lzcntintrin.h
+ all bextr intrinsics in bmiintrin.h
+ all tzcnt intrinsics in bmiintrin.h
+ all bextr intrinsics in tbmintrin.h
+
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e4eb9bfbdd1735..e68dcd922acbff 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -551,13 +551,13 @@ TARGET_BUILTIN(__builtin_ia32_rdseed16_step, "UiUs*", "n", "rdseed")
TARGET_BUILTIN(__builtin_ia32_rdseed32_step, "UiUi*", "n", "rdseed")
// LZCNT
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "nc", "lzcnt")
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "nc", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "ncE", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "ncE", "lzcnt")
// BMI
TARGET_BUILTIN(__builtin_ia32_bextr_u32, "UiUiUi", "ncE", "bmi")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "nc", "")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "ncE", "")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "ncE", "")
// BMI2
TARGET_BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "nc", "bmi2")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 81fd46ee6d1663..5f4252c91b8847 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -70,9 +70,9 @@ TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcUOiUOiUOi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcUOiUOiUOi*", "n", "")
TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiUOi*", "n", "rdrnd")
TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiUOi*", "n", "rdseed")
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "nc", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "ncE", "lzcnt")
TARGET_BUILTIN(__builtin_ia32_bextr_u64, "UOiUOiUOi", "ncE", "bmi")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "ncE", "")
TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2")
TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2")
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 51856693944761..834a7a1e2eb239 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13486,6 +13486,24 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result &= llvm::maskTrailingOnes<uint64_t>(Length);
return Success(Result, E);
}
+
+ case clang::X86::BI__builtin_ia32_lzcnt_u16:
+ case clang::X86::BI__builtin_ia32_lzcnt_u32:
+ case clang::X86::BI__builtin_ia32_lzcnt_u64: {
+ APSInt Val;
+ if (!EvaluateInteger(E->getArg(0), Val, Info))
+ return false;
+ return Success(Val.countLeadingZeros(), E);
+ }
+
+ case clang::X86::BI__builtin_ia32_tzcnt_u16:
+ case clang::X86::BI__builtin_ia32_tzcnt_u32:
+ case clang::X86::BI__builtin_ia32_tzcnt_u64: {
+ APSInt Val;
+ if (!EvaluateInteger(E->getArg(0), Val, Info))
+ return false;
+ return Success(Val.countTrailingZeros(), E);
+ }
}
}
diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h
index 72c84d12c0e520..b0f44367633ce9 100644
--- a/clang/lib/Headers/bmiintrin.h
+++ b/clang/lib/Headers/bmiintrin.h
@@ -17,7 +17,11 @@
/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
instruction behaves as BSF on non-BMI targets, there is code that expects
to use it as a potentially faster version of BSF. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) constexpr
+#else
#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#endif
/// Counts the number of trailing zero bits in the operand.
///
diff --git a/clang/lib/Headers/lzcntintrin.h b/clang/lib/Headers/lzcntintrin.h
index f4ddce9d0e6834..d746d91d9fe5f9 100644
--- a/clang/lib/Headers/lzcntintrin.h
+++ b/clang/lib/Headers/lzcntintrin.h
@@ -15,7 +15,11 @@
#define __LZCNTINTRIN_H
/* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) constexpr
+#else
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
+#endif
#ifndef _MSC_VER
/// Counts the number of leading zero bits in the operand.
diff --git a/clang/test/CodeGen/X86/bmi-builtins.c b/clang/test/CodeGen/X86/bmi-builtins.c
index 530d38dcf342c0..6c0b2c440ea081 100644
--- a/clang/test/CodeGen/X86/bmi-builtins.c
+++ b/clang/test/CodeGen/X86/bmi-builtins.c
@@ -249,6 +249,26 @@ char bextr32_6[_bextr_u32(0x00000000, 0x00000000, 0x00000000) == 0x00000000 ? 1
char bextr32_7[_bextr_u32(0x000003F0, 0xFFFFFF04, 0xFFFFFF10) == 0x0000003F ? 1 : -1];
char bextr32_8[_bextr_u32(0x000003F0, 0xFFFFFF08, 0xFFFFFF30) == 0x00000003 ? 1 : -1];
+char tzcntu16_0[__tzcnt_u16(0x0000) == 16 ? 1 : -1];
+char tzcntu16_1[__tzcnt_u16(0x0001) == 0 ? 1 : -1];
+char tzcntu16_2[__tzcnt_u16(0x0010) == 4 ? 1 : -1];
+
+char tzcnt2u16_0[_tzcnt_u16(0x0000) == 16 ? 1 : -1];
+char tzcnt2u16_1[_tzcnt_u16(0x0001) == 0 ? 1 : -1];
+char tzcnt2u16_2[_tzcnt_u16(0x0010) == 4 ? 1 : -1];
+
+char tzcntu32_0[__tzcnt_u32(0x00000000) == 32 ? 1 : -1];
+char tzcntu32_1[__tzcnt_u32(0x00000001) == 0 ? 1 : -1];
+char tzcntu32_2[__tzcnt_u32(0x00000080) == 7 ? 1 : -1];
+
+char tzcnt2u32_0[_tzcnt_u32(0x00000000) == 32 ? 1 : -1];
+char tzcnt2u32_1[_tzcnt_u32(0x00000001) == 0 ? 1 : -1];
+char tzcnt2u32_2[_tzcnt_u32(0x00000080) == 7 ? 1 : -1];
+
+char tzcnt3u32_0[_mm_tzcnt_32(0x00000000) == 32 ? 1 : -1];
+char tzcnt3u32_1[_mm_tzcnt_32(0x00000001) == 0 ? 1 : -1];
+char tzcnt3u32_2[_mm_tzcnt_32(0x00000080) == 7 ? 1 : -1];
+
#ifdef __x86_64__
char bextr64_0[__bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
char bextr64_1[__bextr_u64(0xF000000000000001ULL, 0x0000000000004001ULL) == 0x7800000000000000ULL ? 1 : -1];
@@ -261,5 +281,17 @@ char bextr64_5[_bextr2_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFF1001ULL) == 0x00
char bextr64_6[_bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
char bextr64_7[_bextr_u64(0xF000000000000001ULL, 0x0000000000000001ULL, 0x0000000000000040ULL) == 0x7800000000000000ULL ? 1 : -1];
char bextr64_8[_bextr_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF10ULL) == 0x0000000000000000ULL ? 1 : -1];
+
+char tzcntu64_0[__tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char tzcntu64_1[__tzcnt_u64(0x0000000000000001ULL) == 0 ? 1 : -1];
+char tzcntu64_2[__tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];
+
+char tzcnt2u64_0[_tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char tzcnt2u64_1[_tzcnt_u64(0x0000000000000001ULL) == 0 ? 1 : -1];
+char tzcnt2u64_2[_tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];
+
+char tzcnt3u64_0[_mm_tzcnt_64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char tzcnt3u64_1[_mm_tzcnt_64(0x0000000000000001ULL) == 0 ? 1 : -1];
+char tzcnt3u64_2[_mm_tzcnt_64(0x0000000800000000ULL) == 35 ? 1 : -1];
#endif
#endif
\ No newline at end of file
diff --git a/clang/test/CodeGen/X86/lzcnt-builtins.c b/clang/test/CodeGen/X86/lzcnt-builtins.c
index 9255207ffaef4f..18ced89fc79b1c 100644
--- a/clang/test/CodeGen/X86/lzcnt-builtins.c
+++ b/clang/test/CodeGen/X86/lzcnt-builtins.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
#include <immintrin.h>
@@ -32,3 +33,27 @@ unsigned long long test__lzcnt_u64(unsigned long long __X)
// CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
return _lzcnt_u64(__X);
}
+
+
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+char lzcnt16_0[__lzcnt16(0x0000) == 16 ? 1 : -1];
+char lzcnt16_1[__lzcnt16(0x8000) == 0 ? 1 : -1];
+char lzcnt16_2[__lzcnt16(0x0010) == 11 ? 1 : -1];
+
+char lzcnt32_0[__lzcnt32(0x00000000) == 32 ? 1 : -1];
+char lzcnt32_1[__lzcnt32(0x80000000) == 0 ? 1 : -1];
+char lzcnt32_2[__lzcnt32(0x00000010) == 27 ? 1 : -1];
+
+char lzcnt64_0[__lzcnt64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char lzcnt64_1[__lzcnt64(0x8000000000000000ULL) == 0 ? 1 : -1];
+char lzcnt64_2[__lzcnt64(0x0000000100000000ULL) == 31 ? 1 : -1];
+
+char lzcntu32_0[_lzcnt_u32(0x00000000) == 32 ? 1 : -1];
+char lzcntu32_1[_lzcnt_u32(0x80000000) == 0 ? 1 : -1];
+char lzcntu32_2[_lzcnt_u32(0x00000010) == 27 ? 1 : -1];
+
+char lzcntu64_0[_lzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char lzcntu64_1[_lzcnt_u64(0x8000000000000000ULL) == 0 ? 1 : -1];
+char lzcntu64_2[_lzcnt_u64(0x0000000100000000ULL) == 31 ? 1 : -1];
+#endif
\ No newline at end of file
More information about the cfe-commits
mailing list