[clang] [llvm] [HLSL][DXIL][SPRIV] Added `GroupMemoryBarrier()` (PR #185383)
Sietze Riemersma via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 9 03:16:57 PDT 2026
https://github.com/KungFuDonkey updated https://github.com/llvm/llvm-project/pull/185383
>From b7b66542448dd5b3b66fe428ad3b98c39f2317a7 Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Sun, 8 Mar 2026 16:32:23 +0100
Subject: [PATCH 1/5] Added GroupMemoryBarrier
---
clang/include/clang/Basic/Builtins.td | 285 ++++++++++--------
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 5 +
clang/lib/CodeGen/CGHLSLRuntime.h | 2 +
.../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 11 +
.../builtins/GroupMemoryBarrier.hlsl | 20 ++
.../SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl | 6 +
llvm/include/llvm/IR/IntrinsicsDirectX.td | 3 +
llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 +
llvm/lib/Target/DirectX/DXIL.td | 2 +
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 178 ++++++-----
.../CodeGen/DirectX/group_memory_barrier.ll | 8 +
.../hlsl-intrinsics/group_memory_barrier.ll | 14 +
12 files changed, 333 insertions(+), 202 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl
create mode 100644 clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl
create mode 100644 llvm/test/CodeGen/DirectX/group_memory_barrier.ll
create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 4981711fe786d..c98236e4258d4 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -8,68 +8,64 @@
include "clang/Basic/BuiltinsBase.td"
-class FPMathTemplate : Template<["float", "double", "long double"],
- ["f", "", "l"]>;
+class FPMathTemplate
+ : Template<["float", "double", "long double"], ["f", "", "l"]>;
-class FPMathWithF16Template :
- Template<["float", "double", "long double", "__fp16"],
- ["f", "", "l", "f16"]>;
+class FPMathWithF16Template
+ : Template<["float", "double", "long double", "__fp16"], ["f", "", "l",
+ "f16"]>;
-class FPMathWithF16F128Template :
- Template<["float", "double", "long double", "__fp16", "__float128"],
- ["f", "", "l", "f16", "f128"]>;
+class FPMathWithF16F128Template
+ : Template<["float", "double", "long double", "__fp16", "__float128"],
+ ["f", "", "l", "f16", "f128"]>;
-class FPMathWithF128Template :
- Template<["float", "double", "long double", "__float128"],
- ["f", "", "l", "f128"]>;
+class FPMathWithF128Template
+ : Template<["float", "double", "long double", "__float128"], ["f", "", "l",
+ "f128"]>;
-class F16F128MathTemplate : Template<["__fp16", "__float128"],
- ["f16", "f128"]>;
+class F16F128MathTemplate : Template<["__fp16", "__float128"], ["f16", "f128"]>;
-class IntMathTemplate : Template<["int", "long int", "long long int"],
- ["", "l", "ll"], /*AsPrefix=*/1>;
+class IntMathTemplate
+ : Template<["int", "long int", "long long int"], ["", "l", "ll"],
+ /*AsPrefix=*/1>;
-class MSInt8_16_32Template : Template<["char", "short", "msint32_t"],
- ["8", "16", ""]>;
+class MSInt8_16_32Template
+ : Template<["char", "short", "msint32_t"], ["8", "16", ""]>;
class Int8_16_32_64Template
- : Template<["char", "short", "int", "long long int"],
- ["8", "16", "32", "64"]>;
+ : Template<["char", "short", "int", "long long int"], ["8", "16", "32",
+ "64"]>;
class MSInt8_16_32_64Template
- : Template<["char", "short", "msint32_t", "long long int"],
- ["8", "16", "", "64"]>;
+ : Template<["char", "short", "msint32_t", "long long int"], ["8", "16", "",
+ "64"]>;
-class MSInt16_32Template : Template<["short", "msint32_t"],
- ["16", ""]>;
+class MSInt16_32Template : Template<["short", "msint32_t"], ["16", ""]>;
-class MSUInt16_32_64Template :
- Template<["unsigned short", "unsigned int", "uint64_t"],
- ["16", "", "64"]>;
+class MSUInt16_32_64Template
+ : Template<["unsigned short", "unsigned int", "uint64_t"], ["16", "",
+ "64"]>;
-class MSInt32_64Template : Template<["msint32_t", "int64_t"],
- ["", "64"]>;
+class MSInt32_64Template : Template<["msint32_t", "int64_t"], ["", "64"]>;
-class FloatDoubleTemplate : Template<["float", "double"],
- ["f", ""]>;
+class FloatDoubleTemplate : Template<["float", "double"], ["f", ""]>;
// FIXME: These assume that char -> i8, short -> i16, int -> i32,
// long long -> i64.
-class SyncBuiltinsTemplate :
- Template<["char", "short", "int", "long long int", "__int128_t"],
- ["1", "2", "4", "8", "16"]>;
+class SyncBuiltinsTemplate
+ : Template<["char", "short", "int", "long long int", "__int128_t"],
+ ["1", "2", "4", "8", "16"]>;
-class BitInt8_16_32_64BuiltinsTemplate :
- Template<["unsigned char", "unsigned short", "uint32_t", "uint64_t"],
- ["8", "16", "32", "64"]>;
+class BitInt8_16_32_64BuiltinsTemplate
+ : Template<["unsigned char", "unsigned short", "uint32_t", "uint64_t"],
+ ["8", "16", "32", "64"]>;
-class BitShort_Int_Long_LongLongTemplate :
- Template<["short", "int", "long int", "long long int"],
- ["s", "", "l", "ll"]>;
+class BitShort_Int_Long_LongLongTemplate
+ : Template<["short", "int", "long int", "long long int"], ["s", "", "l",
+ "ll"]>;
-class BitInt_Long_LongLongTemplate :
- Template<["int", "long int", "long long int"],
- ["", "l", "ll"]>;
+class BitInt_Long_LongLongTemplate
+ : Template<["int", "long int", "long long int"], ["", "l", "ll"]>;
// Most of the types used in the prototypes are types from C, C++ or ObjC. There
// are a few builtin-specific types and qualifiers.
@@ -139,55 +135,64 @@ def CeilF16F128 : Builtin, F16F128MathTemplate {
def CosF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_cos"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def CoshF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_cosh"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def ErfF128 : Builtin {
let Spellings = ["__builtin_erff128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def ErfcF128 : Builtin {
let Spellings = ["__builtin_erfcf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def ExpF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_exp"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def Exp2F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_exp2"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def Exp10F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_exp10"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def Expm1F128 : Builtin {
let Spellings = ["__builtin_expm1f128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def FdimF128 : Builtin {
let Spellings = ["__builtin_fdimf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
@@ -199,7 +204,8 @@ def FloorF16F128 : Builtin, F16F128MathTemplate {
def FmaF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_fma"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, T, T)";
}
@@ -229,7 +235,8 @@ def FminimumNumF16F128 : Builtin, F16F128MathTemplate {
def Atan2F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_atan2"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, T)";
}
@@ -259,7 +266,8 @@ def FabsF128 : Builtin {
def FmodF16F128 : F16F128MathTemplate, Builtin {
let Spellings = ["__builtin_fmod"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, T)";
}
@@ -276,7 +284,7 @@ def HugeVal : Builtin, FPMathWithF128Template {
}
def HugeValF16 : Builtin {
- let Spellings = ["__builtin_huge_valf16"];
+ let Spellings = ["__builtin_huge_valf16"];
let Attributes = [NoThrow, Const, Constexpr];
let Prototype = "_Float16()";
}
@@ -295,7 +303,8 @@ def InfF16 : Builtin {
def LdexpF16F128 : F16F128MathTemplate, Builtin {
let Spellings = ["__builtin_ldexp"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, int)";
}
@@ -319,9 +328,10 @@ def NanF128 : Builtin {
let Prototype = "__float128(char const*)";
}
-def Nans : Builtin,
- Template<["float", "double", "long double", "_Float16", "__float128"],
- ["f", "", "l", "f16", "f128"]> {
+def Nans
+ : Builtin,
+ Template<["float", "double", "long double", "_Float16", "__float128"],
+ ["f", "", "l", "f16", "f128"]> {
let Spellings = ["__builtin_nans"];
let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Pure, Constexpr];
let Prototype = "T(char const*)";
@@ -335,19 +345,22 @@ def PowI : Builtin, FPMathTemplate {
def PowF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_pow"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, T)";
}
def HypotF128 : Builtin {
let Spellings = ["__builtin_hypotf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
def ILogbF128 : Builtin {
let Spellings = ["__builtin_ilogbf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "int(__float128)";
}
@@ -359,55 +372,64 @@ def LgammaF128 : Builtin {
def LLrintF128 : Builtin {
let Spellings = ["__builtin_llrintf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "long long int(__float128)";
}
def LLroundF128 : Builtin {
let Spellings = ["__builtin_llroundf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "long long int(__float128)";
}
def Log10F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_log10"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def Log1pF128 : Builtin {
let Spellings = ["__builtin_log1pf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def Log2F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_log2"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def LogbF128 : Builtin {
let Spellings = ["__builtin_logbf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def LogF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_log"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def LrintF128 : Builtin {
let Spellings = ["__builtin_lrintf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "long int(__float128)";
}
def LroundF128 : Builtin {
let Spellings = ["__builtin_lroundf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "long int(__float128)";
}
@@ -419,19 +441,22 @@ def NearbyintF128 : Builtin {
def NextafterF128 : Builtin {
let Spellings = ["__builtin_nextafterf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
def NexttowardF128 : Builtin {
let Spellings = ["__builtin_nexttowardf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
def RemainderF128 : Builtin {
let Spellings = ["__builtin_remainderf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+ ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
@@ -670,15 +695,13 @@ def Signbit : Builtin {
def SignbitF : Builtin {
let Spellings = ["__builtin_signbitf"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const,
- Constexpr];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
let Prototype = "int(float)";
}
def SignbitL : Builtin {
let Spellings = ["__builtin_signbitl"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const,
- Constexpr];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
let Prototype = "int(long double)";
}
@@ -748,8 +771,9 @@ def Clrsb : Builtin, BitInt_Long_LongLongTemplate {
// there exists native types on the target that are 32- and 64-bits wide, unless
// these conditions are fulfilled these builtins will operate on a not intended
// bitwidth.
-def BSwap : Builtin, Template<["unsigned short", "uint32_t", "uint64_t"],
- ["16", "32", "64"]> {
+def BSwap
+ : Builtin,
+ Template<["unsigned short", "uint32_t", "uint64_t"], ["16", "32", "64"]> {
let Spellings = ["__builtin_bswap"];
let Attributes = [NoThrow, Const, Constexpr];
let Prototype = "T(T)";
@@ -829,13 +853,15 @@ def BuiltinCalloc : Builtin {
def BuiltinConstantP : Builtin {
let Spellings = ["__builtin_constant_p"];
- let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments, Constexpr];
+ let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments,
+ Constexpr];
let Prototype = "int(...)";
}
def BuiltinClassifyType : Builtin {
let Spellings = ["__builtin_classify_type"];
- let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments, Constexpr];
+ let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments,
+ Constexpr];
let Prototype = "int(...)";
}
@@ -1139,7 +1165,8 @@ def StpncpyChk : Builtin {
def SNPrintfChk : Builtin {
let Spellings = ["__builtin___snprintf_chk"];
let Attributes = [FunctionWithBuiltinPrefix, PrintfFormat<4>];
- let Prototype = "int(char* restrict, size_t, int, size_t, char const* restrict, ...)";
+ let Prototype =
+ "int(char* restrict, size_t, int, size_t, char const* restrict, ...)";
}
def SPrintfChk : Builtin {
@@ -1151,13 +1178,15 @@ def SPrintfChk : Builtin {
def VSNPrintfChk : Builtin {
let Spellings = ["__builtin___vsnprintf_chk"];
let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<4>];
- let Prototype = "int(char* restrict, size_t, int, size_t, char const* restrict, __builtin_va_list)";
+ let Prototype = "int(char* restrict, size_t, int, size_t, char const* "
+ "restrict, __builtin_va_list)";
}
def VSPrintfChk : Builtin {
let Spellings = ["__builtin___vsprintf_chk"];
let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<3>];
- let Prototype = "int(char* restrict, int, size_t, char const* restrict, __builtin_va_list)";
+ let Prototype = "int(char* restrict, int, size_t, char const* restrict, "
+ "__builtin_va_list)";
}
def FPrintfChk : Builtin {
@@ -1175,7 +1204,8 @@ def PrintfChk : Builtin {
def VFPrintfChk : Builtin {
let Spellings = ["__builtin___vfprintf_chk"];
let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<2>];
- let Prototype = "int(FILE* restrict, int, char const* restrict, __builtin_va_list)";
+ let Prototype =
+ "int(FILE* restrict, int, char const* restrict, __builtin_va_list)";
}
def VPrintfChk : Builtin {
@@ -2553,7 +2583,8 @@ def SyncFetchAndUMax : Builtin {
let Prototype = "unsigned int(unsigned int volatile*, unsigned int)";
}
-// ignored glibc builtin, see https://sourceware.org/bugzilla/show_bug.cgi?id=25399
+// ignored glibc builtin, see
+// https://sourceware.org/bugzilla/show_bug.cgi?id=25399
def WarnMemsetZeroLen : Builtin {
let Spellings = ["__warn_memset_zero_len"];
let Attributes = [NoThrow, Pure];
@@ -2603,9 +2634,10 @@ def BittestAndSet : MSLangBuiltin, MSInt32_64Template {
let Prototype = "unsigned char(T*, T)";
}
-def MSByteswap : MSLibBuiltin<"stdlib.h">,
- Template<["unsigned short", "msuint32_t", "unsigned long long int"],
- ["_ushort", "_ulong", "_uint64"]> {
+def MSByteswap
+ : MSLibBuiltin<"stdlib.h">,
+ Template<["unsigned short", "msuint32_t", "unsigned long long int"],
+ ["_ushort", "_ulong", "_uint64"]> {
let Spellings = ["_byteswap"];
let Attributes = [NoThrow, Const];
let Prototype = "T(T)";
@@ -3216,21 +3248,24 @@ def VPrintf : LibBuiltin<"stdio.h"> {
def VfPrintf : LibBuiltin<"stdio.h"> {
let Spellings = ["vfprintf"];
let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
- let Prototype = "int(FILE* restrict, char const* restrict, __builtin_va_list)";
+ let Prototype =
+ "int(FILE* restrict, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
def VsnPrintf : LibBuiltin<"stdio.h"> {
let Spellings = ["vsnprintf"];
let Attributes = [NoThrow, VPrintfFormat<2>, NonNull<NonOptimizing, [2]>];
- let Prototype = "int(char* restrict, size_t, char const* restrict, __builtin_va_list)";
+ let Prototype =
+ "int(char* restrict, size_t, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
def VsPrintf : LibBuiltin<"stdio.h"> {
let Spellings = ["vsprintf"];
let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
- let Prototype = "int(char* restrict, char const* restrict, __builtin_va_list)";
+ let Prototype =
+ "int(char* restrict, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
@@ -3265,14 +3300,16 @@ def VScanf : LibBuiltin<"stdio.h"> {
def VFScanf : LibBuiltin<"stdio.h"> {
let Spellings = ["vfscanf"];
let Attributes = [VScanfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
- let Prototype = "int(FILE* restrict, char const* restrict, __builtin_va_list)";
+ let Prototype =
+ "int(FILE* restrict, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
def VSScanf : LibBuiltin<"stdio.h"> {
let Spellings = ["vsscanf"];
let Attributes = [VScanfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
- let Prototype = "int(char const* restrict, char const* restrict, __builtin_va_list)";
+ let Prototype =
+ "int(char const* restrict, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
@@ -4492,11 +4529,9 @@ def AssumeSeparateStorage : Builtin {
// Multiprecision Arithmetic Builtins.
-class MPATemplate : Template<
- ["unsigned char", "unsigned short", "unsigned int",
- "unsigned long int", "unsigned long long int"],
- ["b", "s", "",
- "l", "ll"]>;
+class MPATemplate : Template<["unsigned char", "unsigned short", "unsigned int",
+ "unsigned long int", "unsigned long long int"],
+ ["b", "s", "", "l", "ll"]>;
def Addc : Builtin, MPATemplate {
let Spellings = ["__builtin_addc"];
@@ -4531,9 +4566,9 @@ def MulOverflow : Builtin {
let Prototype = "bool(...)";
}
-class UOverflowTemplate :
- Template<["unsigned int", "unsigned long int", "unsigned long long int"],
- ["_overflow", "l_overflow", "ll_overflow"]>;
+class UOverflowTemplate
+ : Template<["unsigned int", "unsigned long int", "unsigned long long int"],
+ ["_overflow", "l_overflow", "ll_overflow"]>;
def UaddOverflow : Builtin, UOverflowTemplate {
let Spellings = ["__builtin_uadd"];
@@ -4553,9 +4588,9 @@ def UmulOverflow : Builtin, UOverflowTemplate {
let Prototype = "bool(T const, T const, T*)";
}
-class SOverflowTemplate :
- Template<["int", "long int", "long long int"],
- ["_overflow", "l_overflow", "ll_overflow"]>;
+class SOverflowTemplate
+ : Template<["int", "long int", "long long int"], ["_overflow", "l_overflow",
+ "ll_overflow"]>;
def SaddOverflow : Builtin, SOverflowTemplate {
let Spellings = ["__builtin_sadd"];
@@ -4815,7 +4850,8 @@ def PtrauthStringDiscriminator : Builtin {
// AllocToken builtins.
def InferAllocToken : Builtin {
let Spellings = ["__builtin_infer_alloc_token"];
- let Attributes = [NoThrow, Const, Pure, CustomTypeChecking, Constexpr, UnevaluatedArguments];
+ let Attributes = [NoThrow, Const, Pure, CustomTypeChecking, Constexpr,
+ UnevaluatedArguments];
let Prototype = "size_t(...)";
}
@@ -4918,7 +4954,8 @@ def GetPipeMaxPackets : OCLPipeLangBuiltin {
}
// OpenCL v2.0 s6.13.17 - Enqueue kernel functions.
-// Custom builtin check allows to perform special check of passed block arguments.
+// Custom builtin check allows to perform special check of passed block
+// arguments.
def EnqueueKernel : OCL_DSELangBuiltin {
let Spellings = ["enqueue_kernel"];
let Attributes = [CustomTypeChecking, NoThrow];
@@ -5006,7 +5043,7 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
}
// HLSL
-def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
+def HLSLAddUint64 : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_adduint64"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
@@ -5093,13 +5130,15 @@ def HLSLResourceUninitializedHandle : LangBuiltin<"HLSL_LANG"> {
def HLSLResourceHandleFromBinding : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_handlefrombinding"];
let Attributes = [NoThrow];
- let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, int32_t, uint32_t, char const*)";
+ let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, "
+ "int32_t, uint32_t, char const*)";
}
def HLSLResourceHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_handlefromimplicitbinding"];
let Attributes = [NoThrow];
- let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, int32_t, uint32_t, char const*)";
+ let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, "
+ "int32_t, uint32_t, char const*)";
}
def HLSLResourceCounterHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
@@ -5252,16 +5291,18 @@ def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
-def HLSLCrossFloat: LangBuiltin<"HLSL_LANG"> {
+def HLSLCrossFloat : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_crossf32"];
let Attributes = [NoThrow, Const];
- let Prototype = "_ExtVector<3, float>(_ExtVector<3, float>, _ExtVector<3, float>)";
+ let Prototype =
+ "_ExtVector<3, float>(_ExtVector<3, float>, _ExtVector<3, float>)";
}
-def HLSLCrossHalf: LangBuiltin<"HLSL_LANG"> {
+def HLSLCrossHalf : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_crossf16"];
let Attributes = [NoThrow, Const];
- let Prototype = "_ExtVector<3, __fp16>(_ExtVector<3, __fp16>, _ExtVector<3, __fp16>)";
+ let Prototype =
+ "_ExtVector<3, __fp16>(_ExtVector<3, __fp16>, _ExtVector<3, __fp16>)";
}
def HLSLDegrees : LangBuiltin<"HLSL_LANG"> {
@@ -5372,7 +5413,7 @@ def HLSLSign : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
-def HLSLStep: LangBuiltin<"HLSL_LANG"> {
+def HLSLStep : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_step"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
@@ -5390,19 +5431,25 @@ def HLSLBufferUpdateCounter : LangBuiltin<"HLSL_LANG"> {
let Prototype = "uint32_t(__hlsl_resource_t, int)";
}
-def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> {
+def HLSLSplitDouble : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_splitdouble"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}
-def HLSLClip: LangBuiltin<"HLSL_LANG"> {
+def HLSLClip : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_clip"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}
-def HLSLGroupMemoryBarrierWithGroupSync: LangBuiltin<"HLSL_LANG"> {
+def HLSLGroupMemoryBarrier : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_group_memory_barrier"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void()";
+}
+
+def HLSLGroupMemoryBarrierWithGroupSync : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_group_memory_barrier_with_group_sync"];
let Attributes = [NoThrow, Const];
let Prototype = "void()";
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 177787d2a9630..ae7325384f9f8 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -1355,6 +1355,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
"clip operands types mismatch");
return handleHlslClip(E, this);
+ case Builtin::BI__builtin_hlsl_group_memory_barrier: {
+ Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic();
+ return EmitRuntimeCall(
+ Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+ }
case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
Intrinsic::ID ID =
CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 466c809fdef78..11d99de157ba8 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -184,6 +184,8 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(NonUniformResourceIndex,
resource_nonuniformindex)
GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrier,
+ group_memory_barrier)
GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
group_memory_barrier_with_group_sync)
GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 7b6160091aece..f12b62df4733c 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -3411,6 +3411,17 @@ float3 radians(float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
float4 radians(float4);
+//===----------------------------------------------------------------------===//
+// GroupMemoryBarrierbuiltins
+//===----------------------------------------------------------------------===//
+
+/// \fn void GroupMemoryBarrier(void)
+/// \brief Blocks execution of all threads in a group until all group shared
+/// accesses have been completed.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier)
+__attribute__((convergent)) void GroupMemoryBarrier(void);
+
//===----------------------------------------------------------------------===//
// GroupMemoryBarrierWithGroupSync builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl
new file mode 100644
index 0000000000000..b52819973f677
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: -DTARGET=dx -check-prefixes=CHECK,CHECK-DXIL
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN: spirv-unknown-vulkan-compute %s \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: -DTARGET=spv -check-prefixes=CHECK,CHECK-SPIRV
+
+// CHECK-DXIL: define hidden void @
+// CHECK-SPIRV: define hidden spir_func void @
+void test_GroupMemoryBarrier() {
+// CHECK-DXIL: call void @llvm.[[TARGET]].group.memory.barrier()
+// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].group.memory.barrier()
+ GroupMemoryBarrier();
+}
+
+// CHECK: declare void @llvm.[[TARGET]].group.memory.barrier() #[[ATTRS:[0-9]+]]
+// CHECK-NOT: attributes #[[ATTRS]] = {{.+}}memory(none){{.+}}
+// CHECK: attributes #[[ATTRS]] = {{.+}}convergent{{.+}}
diff --git a/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl b/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl
new file mode 100644
index 0000000000000..5c5761c31eb90
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
+
+void test_too_many_arg() {
+ __builtin_hlsl_group_memory_barrier(0);
+ // expected-error at -1 {{too many arguments to function call, expected 0, have 1}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index e2b2feb927318..3810fc340962d 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -258,6 +258,9 @@ def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0,
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
+def int_dx_group_memory_barrier
+ : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
+
def int_dx_group_memory_barrier_with_group_sync
: DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 3fc18a254f672..e1328244ef7b0 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -138,6 +138,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
def int_spv_wave_prefix_product : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
+ def int_spv_group_memory_barrier : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
def int_spv_group_memory_barrier_with_group_sync : ClangBuiltin<"__builtin_spirv_group_barrier">,
DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index e64909b059d29..5ef1a7c130b4a 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -913,6 +913,8 @@ def GetDimensions : DXILOp<72, getDimensions> {
def Barrier : DXILOp<80, barrier> {
let Doc = "inserts a memory barrier in the shader";
let intrinsics = [
+ IntrinSelect<int_dx_group_memory_barrier,
+ [IntrinArgI32<BarrierMode_GroupMemoryBarrier>]>,
IntrinSelect<int_dx_group_memory_barrier_with_group_sync,
[IntrinArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync>]>,
];
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 7b4c047593a3a..704c0dd6dc5d7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -336,6 +336,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectWaveOpInst(Register ResVReg, SPIRVTypeInst ResType,
MachineInstr &I, unsigned Opcode) const;
+ bool selectBarrierInst(MachineInstr &I, unsigned Scope,
+ bool WithGroupSync) const;
+
bool selectWaveActiveCountBits(Register ResVReg, SPIRVTypeInst ResType,
MachineInstr &I) const;
@@ -1893,15 +1896,15 @@ bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg,
ValueReg = TmpReg;
}
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(NewOpcode))
- .addDef(ResVReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addUse(Ptr)
- .addUse(ScopeReg)
- .addUse(MemSemReg)
- .addUse(ValueReg)
- .constrainAllUses(TII, TRI, RBI);
- return true;
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(NewOpcode))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(Ptr)
+ .addUse(ScopeReg)
+ .addUse(MemSemReg)
+ .addUse(ValueReg)
+ .constrainAllUses(TII, TRI, RBI);
+ return true;
}
bool SPIRVInstructionSelector::selectUnmergeValues(MachineInstr &I) const {
@@ -2551,12 +2554,12 @@ bool SPIRVInstructionSelector::selectIntegerDotExpansion(
for (unsigned i = 1; i < GR.getScalarOrVectorComponentCount(VecType); i++) {
Register Elt = MRI->createVirtualRegister(GR.getRegClass(ResType));
- BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
- .addDef(Elt)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addUse(TmpVec)
- .addImm(i)
- .constrainAllUses(TII, TRI, RBI);
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+ .addDef(Elt)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(TmpVec)
+ .addImm(i)
+ .constrainAllUses(TII, TRI, RBI);
Register Sum = i < GR.getScalarOrVectorComponentCount(VecType) - 1
? MRI->createVirtualRegister(GR.getRegClass(ResType))
@@ -2757,13 +2760,13 @@ bool SPIRVInstructionSelector::selectSign(Register ResVReg,
? MRI->createVirtualRegister(&SPIRV::IDRegClass)
: ResVReg;
- BuildMI(BB, I, DL, TII.get(SPIRV::OpExtInst))
- .addDef(SignReg)
- .addUse(GR.getSPIRVTypeID(InputType))
- .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
- .addImm(SignOpcode)
- .addUse(InputRegister)
- .constrainAllUses(TII, TRI, RBI);
+ BuildMI(BB, I, DL, TII.get(SPIRV::OpExtInst))
+ .addDef(SignReg)
+ .addUse(GR.getSPIRVTypeID(InputType))
+ .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+ .addImm(SignOpcode)
+ .addUse(InputRegister)
+ .constrainAllUses(TII, TRI, RBI);
if (NeedsConversion) {
auto ConvertOpcode = IsFloatTy ? SPIRV::OpConvertFToS : SPIRV::OpSConvert;
@@ -2798,6 +2801,23 @@ bool SPIRVInstructionSelector::selectWaveOpInst(Register ResVReg,
return true;
}
+bool SPIRVInstructionSelector::selectBarrierInst(MachineInstr &I,
+ unsigned Scope,
+ bool WithGroupSync) const {
+ auto BarrierType =
+ WithGroupSync ? SPIRV::OpControlBarrier : SPIRV::OpMemoryBarrier;
+ Register MemSemReg =
+ buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I);
+ Register ScopeReg = buildI32Constant(Scope, I);
+ MachineBasicBlock &BB = *I.getParent();
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(BarrierType))
+ .addUse(ScopeReg)
+ .addUse(ScopeReg)
+ .addUse(MemSemReg)
+ .constrainAllUses(TII, TRI, RBI);
+ return true;
+}
+
bool SPIRVInstructionSelector::selectWaveActiveCountBits(
Register ResVReg, SPIRVTypeInst ResType, MachineInstr &I) const {
@@ -3848,22 +3868,22 @@ bool SPIRVInstructionSelector::selectDerivativeInst(
Register ConvertToVReg = MRI->createVirtualRegister(RegClass);
Register DpdOpVReg = MRI->createVirtualRegister(RegClass);
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
- .addDef(ConvertToVReg)
- .addUse(GR.getSPIRVTypeID(F32ConvertTy))
- .addUse(SrcReg)
- .constrainAllUses(TII, TRI, RBI);
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode))
- .addDef(DpdOpVReg)
- .addUse(GR.getSPIRVTypeID(F32ConvertTy))
- .addUse(ConvertToVReg)
- .constrainAllUses(TII, TRI, RBI);
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
- .addDef(ResVReg)
- .addUse(GR.getSPIRVTypeID(ResType))
- .addUse(DpdOpVReg)
- .constrainAllUses(TII, TRI, RBI);
- return true;
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
+ .addDef(ConvertToVReg)
+ .addUse(GR.getSPIRVTypeID(F32ConvertTy))
+ .addUse(SrcReg)
+ .constrainAllUses(TII, TRI, RBI);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode))
+ .addDef(DpdOpVReg)
+ .addUse(GR.getSPIRVTypeID(F32ConvertTy))
+ .addUse(ConvertToVReg)
+ .constrainAllUses(TII, TRI, RBI);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(DpdOpVReg)
+ .constrainAllUses(TII, TRI, RBI);
+ return true;
}
bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
@@ -4160,18 +4180,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true);
case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb
return selectFirstBitLow(ResVReg, ResType, I);
- case Intrinsic::spv_group_memory_barrier_with_group_sync: {
- Register MemSemReg =
- buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I);
- Register ScopeReg = buildI32Constant(SPIRV::Scope::Workgroup, I);
- MachineBasicBlock &BB = *I.getParent();
- BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpControlBarrier))
- .addUse(ScopeReg)
- .addUse(ScopeReg)
- .addUse(MemSemReg)
- .constrainAllUses(TII, TRI, RBI);
- return true;
- }
+ case Intrinsic::spv_group_memory_barrier:
+ return selectBarrierInst(I, SPIRV::Scope::Device, false);
+ case Intrinsic::spv_group_memory_barrier_with_group_sync:
+ return selectBarrierInst(I, SPIRV::Scope::Workgroup, true);
case Intrinsic::spv_generic_cast_to_ptr_explicit: {
Register PtrReg = I.getOperand(I.getNumExplicitDefs() + 1).getReg();
SPIRV::StorageClass::StorageClass ResSC =
@@ -4333,8 +4345,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
bool SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg,
SPIRVTypeInst ResType,
MachineInstr &I) const {
- // The images need to be loaded in the same basic block as their use. We defer
- // loading the image to the intrinsic that uses it.
+ // The images need to be loaded in the same basic block as their use. We
+ // defer loading the image to the intrinsic that uses it.
if (ResType->getOpcode() == SPIRV::OpTypeImage)
return true;
@@ -4381,9 +4393,9 @@ bool SPIRVInstructionSelector::selectUpdateCounter(Register &ResVReg,
Register CounterHandleReg = Intr.getOperand(2).getReg();
Register IncrReg = Intr.getOperand(3).getReg();
- // The counter handle is a pointer to the counter variable (which is a struct
- // containing an i32). We need to get a pointer to that i32 member to do the
- // atomic operation.
+ // The counter handle is a pointer to the counter variable (which is a
+ // struct containing an i32). We need to get a pointer to that i32 member to
+ // do the atomic operation.
#ifndef NDEBUG
SPIRVTypeInst CounterVarType = GR.getSPIRVTypeForVReg(CounterHandleReg);
SPIRVTypeInst CounterVarPointeeType = GR.getPointeeType(CounterVarType);
@@ -4442,8 +4454,8 @@ bool SPIRVInstructionSelector::selectUpdateCounter(Register &ResVReg,
}
// In HLSL, IncrementCounter returns the value *before* the increment, while
- // DecrementCounter returns the value *after* the decrement. Both are lowered
- // to the same atomic intrinsic which returns the value *before* the
+ // DecrementCounter returns the value *after* the decrement. Both are
+ // lowered to the same atomic intrinsic which returns the value *before* the
// operation. So for decrements (negative IncrVal), we must subtract the
// increment value from the result to get the post-decrement value.
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpIAddS))
@@ -4462,8 +4474,8 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic(Register &ResVReg,
// this will generate invalid code. A proper solution is to move
// the OpLoad from selectHandleFromBinding here. However, to do
// that we will need to change the return type of the intrinsic.
- // We will do that when we can, but for now trying to move forward with other
- // issues.
+ // We will do that when we can, but for now trying to move forward with
+ // other issues.
Register ImageReg = I.getOperand(2).getReg();
auto *ImageDef = cast<GIntrinsic>(getVRegDef(*MRI, ImageReg));
Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg));
@@ -4682,8 +4694,8 @@ bool SPIRVInstructionSelector::selectGatherIntrinsic(Register &ResVReg,
auto Dim = static_cast<SPIRV::Dim::Dim>(ImageType->getOperand(2).getImm());
if (Dim != SPIRV::Dim::DIM_2D && Dim != SPIRV::Dim::DIM_Cube &&
Dim != SPIRV::Dim::DIM_Rect) {
- I.emitGenericError(
- "Gather operations are only supported for 2D, Cube, and Rect images.");
+ I.emitGenericError("Gather operations are only supported for 2D, Cube, "
+ "and Rect images.");
return false;
}
@@ -4806,10 +4818,10 @@ bool SPIRVInstructionSelector::selectResourceGetPointer(Register &ResVReg,
Register ResourcePtr = I.getOperand(2).getReg();
SPIRVTypeInst RegType = GR.getSPIRVTypeForVReg(ResourcePtr, I.getMF());
if (RegType->getOpcode() == SPIRV::OpTypeImage) {
- // For texel buffers, the index into the image is part of the OpImageRead or
- // OpImageWrite instructions. So we will do nothing in this case. This
- // intrinsic will be combined with the load or store when selecting the load
- // or store.
+ // For texel buffers, the index into the image is part of the OpImageRead
+ // or OpImageWrite instructions. So we will do nothing in this case. This
+ // intrinsic will be combined with the load or store when selecting the
+ // load or store.
return true;
}
@@ -4927,8 +4939,8 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic(
// this will generate invalid code. A proper solution is to move
// the OpLoad from selectHandleFromBinding here. However, to do
// that we will need to change the return type of the intrinsic.
- // We will do that when we can, but for now trying to move forward with other
- // issues.
+ // We will do that when we can, but for now trying to move forward with
+ // other issues.
Register ImageReg = I.getOperand(1).getReg();
auto *ImageDef = cast<GIntrinsic>(getVRegDef(*MRI, ImageReg));
Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg));
@@ -5246,8 +5258,8 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
/*SwapPrimarySide=*/false);
default:
- report_fatal_error(
- "spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits.");
+ report_fatal_error("spv_firstbituhigh and spv_firstbitshigh only support "
+ "16,32,64 bits.");
}
}
@@ -5258,8 +5270,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
Register OpReg = I.getOperand(2).getReg();
SPIRVTypeInst OpType = GR.getSPIRVTypeForVReg(OpReg);
// OpUConvert treats the operand bits as an unsigned i16 and zero extends it
- // to an unsigned i32. As this leaves all the least significant bits unchanged
- // so the first set bit from the LSB side doesn't change.
+ // to an unsigned i32. As this leaves all the least significant bits
+ // unchanged so the first set bit from the LSB side doesn't change.
unsigned ExtendOpcode = SPIRV::OpUConvert;
unsigned BitSetOpcode = GL::FindILsb;
@@ -5340,11 +5352,11 @@ bool SPIRVInstructionSelector::selectBranchCond(MachineInstr &I) const {
// G_BRCOND to create an OpBranchConditional. We should hit G_BR first, and
// generate the OpBranchConditional in selectBranch above.
//
- // If an OpBranchConditional has been generated, we simply return, as the work
- // is alread done. If there is no OpBranchConditional, LLVM must be relying on
- // implicit fallthrough to the next basic block, so we need to create an
- // OpBranchConditional with an explicit "false" argument pointing to the next
- // basic block that LLVM would fall through to.
+ // If an OpBranchConditional has been generated, we simply return, as the
+ // work is alread done. If there is no OpBranchConditional, LLVM must be
+ // relying on implicit fallthrough to the next basic block, so we need to
+ // create an OpBranchConditional with an explicit "false" argument pointing
+ // to the next basic block that LLVM would fall through to.
const MachineInstr *NextI = I.getNextNode();
// Check if this has already been successfully selected.
if (NextI != nullptr && NextI->getOpcode() == SPIRV::OpBranchConditional)
@@ -5458,8 +5470,8 @@ bool SPIRVInstructionSelector::selectGlobalValue(
auto GlobalVar = cast<GlobalVariable>(GV);
assert(GlobalVar->getName() != "llvm.global.annotations");
- // Skip empty declaration for GVs with initializers till we get the decl with
- // passed initializer.
+ // Skip empty declaration for GVs with initializers till we get the decl
+ // with passed initializer.
if (hasInitializer(GlobalVar) && !Init)
return true;
@@ -5475,9 +5487,9 @@ bool SPIRVInstructionSelector::selectGlobalValue(
ResVReg, ResType, GlobalIdent, GV, StorageClass, Init,
GlobalVar->isConstant(), LnkType, MIRBuilder, true);
// TODO: For AMDGCN, we pipe externally_initialized through via
- // HostAccessINTEL, with ReadWrite (3) access, which is we then handle during
- // reverse translation. We should remove this once SPIR-V gains the ability to
- // express the concept.
+ // HostAccessINTEL, with ReadWrite (3) access, which is we then handle
+ // during reverse translation. We should remove this once SPIR-V gains the
+ // ability to express the concept.
if (GlobalVar->isExternallyInitialized() &&
STI.getTargetTriple().getVendor() == Triple::AMD) {
constexpr unsigned ReadWriteINTEL = 3u;
@@ -5746,8 +5758,8 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
buildPointerToResource(SPIRVTypeInst(VarType), SC, Set, Binding,
ArraySize, IndexReg, Name, MIRBuilder);
- // The handle for the buffer is the pointer to the resource. For an image, the
- // handle is the image object. So images get an extra load.
+ // The handle for the buffer is the pointer to the resource. For an image,
+ // the handle is the image object. So images get an extra load.
uint32_t LoadOpcode =
IsStructuredBuffer ? SPIRV::OpCopyObject : SPIRV::OpLoad;
GR.assignSPIRVTypeToVReg(ResType, HandleReg, *Pos.getMF());
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
new file mode 100644
index 0000000000000..2c865d9965b73
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
+
+define void @test_group_memory_barrier() {
+entry:
+ ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
+ call void @llvm.dx.group.memory.barrier()
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
new file mode 100644
index 0000000000000..30697a2490c0f
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
@@ -0,0 +1,14 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: OpMemoryModel Logical GLSL450
+
+define void @test_group_memory_barrier() {
+entry:
+ ; CHECK: %[[#TY:]] = OpTypeInt 32 0
+ ; CHECK-DAG: %[[#MEM_SEM:]] = OpConstant %[[#TY]] 16
+ ; CHECK-DAG: %[[#EXEC_AND_MEM_SCOPE:]] = OpConstant %[[#TY]] 2
+ ; CHECK: OpMemoryBarrier %[[#EXEC_AND_MEM_SCOPE]] %[[#EXEC_AND_MEM_SCOPE]] %[[#MEM_SEM]]
+ call void @llvm.spv.group.memory.barrier()
+ ret void
+}
>From 80c2bb4745bfb1b06dca9c03b05c0eb4c944c8dc Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Mon, 9 Mar 2026 10:41:04 +0100
Subject: [PATCH 2/5] Passing tests and reverted basic/builtins.tdd
---
clang/include/clang/Basic/Builtins.td | 279 ++++++++----------
...er.hlsl => GroupMemoryBarrier-errors.hlsl} | 0
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 17 +-
.../CodeGen/DirectX/group_memory_barrier.ll | 4 +-
.../group_memory_barrier_with_group_sync.ll | 2 +-
.../hlsl-intrinsics/group_memory_barrier.ll | 2 +-
6 files changed, 134 insertions(+), 170 deletions(-)
rename clang/test/SemaHLSL/BuiltIns/{GroupMemoryBarrier.hlsl => GroupMemoryBarrier-errors.hlsl} (100%)
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index c98236e4258d4..10b2e930792c7 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -8,64 +8,68 @@
include "clang/Basic/BuiltinsBase.td"
-class FPMathTemplate
- : Template<["float", "double", "long double"], ["f", "", "l"]>;
+class FPMathTemplate : Template<["float", "double", "long double"],
+ ["f", "", "l"]>;
-class FPMathWithF16Template
- : Template<["float", "double", "long double", "__fp16"], ["f", "", "l",
- "f16"]>;
+class FPMathWithF16Template :
+ Template<["float", "double", "long double", "__fp16"],
+ ["f", "", "l", "f16"]>;
-class FPMathWithF16F128Template
- : Template<["float", "double", "long double", "__fp16", "__float128"],
- ["f", "", "l", "f16", "f128"]>;
+class FPMathWithF16F128Template :
+ Template<["float", "double", "long double", "__fp16", "__float128"],
+ ["f", "", "l", "f16", "f128"]>;
-class FPMathWithF128Template
- : Template<["float", "double", "long double", "__float128"], ["f", "", "l",
- "f128"]>;
+class FPMathWithF128Template :
+ Template<["float", "double", "long double", "__float128"],
+ ["f", "", "l", "f128"]>;
-class F16F128MathTemplate : Template<["__fp16", "__float128"], ["f16", "f128"]>;
+class F16F128MathTemplate : Template<["__fp16", "__float128"],
+ ["f16", "f128"]>;
-class IntMathTemplate
- : Template<["int", "long int", "long long int"], ["", "l", "ll"],
- /*AsPrefix=*/1>;
+class IntMathTemplate : Template<["int", "long int", "long long int"],
+ ["", "l", "ll"], /*AsPrefix=*/1>;
-class MSInt8_16_32Template
- : Template<["char", "short", "msint32_t"], ["8", "16", ""]>;
+class MSInt8_16_32Template : Template<["char", "short", "msint32_t"],
+ ["8", "16", ""]>;
class Int8_16_32_64Template
- : Template<["char", "short", "int", "long long int"], ["8", "16", "32",
- "64"]>;
+ : Template<["char", "short", "int", "long long int"],
+ ["8", "16", "32", "64"]>;
class MSInt8_16_32_64Template
- : Template<["char", "short", "msint32_t", "long long int"], ["8", "16", "",
- "64"]>;
+ : Template<["char", "short", "msint32_t", "long long int"],
+ ["8", "16", "", "64"]>;
-class MSInt16_32Template : Template<["short", "msint32_t"], ["16", ""]>;
+class MSInt16_32Template : Template<["short", "msint32_t"],
+ ["16", ""]>;
-class MSUInt16_32_64Template
- : Template<["unsigned short", "unsigned int", "uint64_t"], ["16", "",
- "64"]>;
+class MSUInt16_32_64Template :
+ Template<["unsigned short", "unsigned int", "uint64_t"],
+ ["16", "", "64"]>;
-class MSInt32_64Template : Template<["msint32_t", "int64_t"], ["", "64"]>;
+class MSInt32_64Template : Template<["msint32_t", "int64_t"],
+ ["", "64"]>;
-class FloatDoubleTemplate : Template<["float", "double"], ["f", ""]>;
+class FloatDoubleTemplate : Template<["float", "double"],
+ ["f", ""]>;
// FIXME: These assume that char -> i8, short -> i16, int -> i32,
// long long -> i64.
-class SyncBuiltinsTemplate
- : Template<["char", "short", "int", "long long int", "__int128_t"],
- ["1", "2", "4", "8", "16"]>;
+class SyncBuiltinsTemplate :
+ Template<["char", "short", "int", "long long int", "__int128_t"],
+ ["1", "2", "4", "8", "16"]>;
-class BitInt8_16_32_64BuiltinsTemplate
- : Template<["unsigned char", "unsigned short", "uint32_t", "uint64_t"],
- ["8", "16", "32", "64"]>;
+class BitInt8_16_32_64BuiltinsTemplate :
+ Template<["unsigned char", "unsigned short", "uint32_t", "uint64_t"],
+ ["8", "16", "32", "64"]>;
-class BitShort_Int_Long_LongLongTemplate
- : Template<["short", "int", "long int", "long long int"], ["s", "", "l",
- "ll"]>;
+class BitShort_Int_Long_LongLongTemplate :
+ Template<["short", "int", "long int", "long long int"],
+ ["s", "", "l", "ll"]>;
-class BitInt_Long_LongLongTemplate
- : Template<["int", "long int", "long long int"], ["", "l", "ll"]>;
+class BitInt_Long_LongLongTemplate :
+ Template<["int", "long int", "long long int"],
+ ["", "l", "ll"]>;
// Most of the types used in the prototypes are types from C, C++ or ObjC. There
// are a few builtin-specific types and qualifiers.
@@ -135,64 +139,55 @@ def CeilF16F128 : Builtin, F16F128MathTemplate {
def CosF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_cos"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def CoshF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_cosh"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def ErfF128 : Builtin {
let Spellings = ["__builtin_erff128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def ErfcF128 : Builtin {
let Spellings = ["__builtin_erfcf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def ExpF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_exp"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def Exp2F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_exp2"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def Exp10F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_exp10"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def Expm1F128 : Builtin {
let Spellings = ["__builtin_expm1f128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def FdimF128 : Builtin {
let Spellings = ["__builtin_fdimf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
@@ -204,8 +199,7 @@ def FloorF16F128 : Builtin, F16F128MathTemplate {
def FmaF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_fma"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, T, T)";
}
@@ -235,8 +229,7 @@ def FminimumNumF16F128 : Builtin, F16F128MathTemplate {
def Atan2F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_atan2"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, T)";
}
@@ -266,8 +259,7 @@ def FabsF128 : Builtin {
def FmodF16F128 : F16F128MathTemplate, Builtin {
let Spellings = ["__builtin_fmod"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, T)";
}
@@ -284,7 +276,7 @@ def HugeVal : Builtin, FPMathWithF128Template {
}
def HugeValF16 : Builtin {
- let Spellings = ["__builtin_huge_valf16"];
+ let Spellings = ["__builtin_huge_valf16"];
let Attributes = [NoThrow, Const, Constexpr];
let Prototype = "_Float16()";
}
@@ -303,8 +295,7 @@ def InfF16 : Builtin {
def LdexpF16F128 : F16F128MathTemplate, Builtin {
let Spellings = ["__builtin_ldexp"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, int)";
}
@@ -328,10 +319,9 @@ def NanF128 : Builtin {
let Prototype = "__float128(char const*)";
}
-def Nans
- : Builtin,
- Template<["float", "double", "long double", "_Float16", "__float128"],
- ["f", "", "l", "f16", "f128"]> {
+def Nans : Builtin,
+ Template<["float", "double", "long double", "_Float16", "__float128"],
+ ["f", "", "l", "f16", "f128"]> {
let Spellings = ["__builtin_nans"];
let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Pure, Constexpr];
let Prototype = "T(char const*)";
@@ -345,22 +335,19 @@ def PowI : Builtin, FPMathTemplate {
def PowF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_pow"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T, T)";
}
def HypotF128 : Builtin {
let Spellings = ["__builtin_hypotf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
def ILogbF128 : Builtin {
let Spellings = ["__builtin_ilogbf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "int(__float128)";
}
@@ -372,64 +359,55 @@ def LgammaF128 : Builtin {
def LLrintF128 : Builtin {
let Spellings = ["__builtin_llrintf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "long long int(__float128)";
}
def LLroundF128 : Builtin {
let Spellings = ["__builtin_llroundf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "long long int(__float128)";
}
def Log10F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_log10"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def Log1pF128 : Builtin {
let Spellings = ["__builtin_log1pf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def Log2F16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_log2"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def LogbF128 : Builtin {
let Spellings = ["__builtin_logbf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128)";
}
def LogF16F128 : Builtin, F16F128MathTemplate {
let Spellings = ["__builtin_log"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "T(T)";
}
def LrintF128 : Builtin {
let Spellings = ["__builtin_lrintf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "long int(__float128)";
}
def LroundF128 : Builtin {
let Spellings = ["__builtin_lroundf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "long int(__float128)";
}
@@ -441,22 +419,19 @@ def NearbyintF128 : Builtin {
def NextafterF128 : Builtin {
let Spellings = ["__builtin_nextafterf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
def NexttowardF128 : Builtin {
let Spellings = ["__builtin_nexttowardf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
def RemainderF128 : Builtin {
let Spellings = ["__builtin_remainderf128"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
- ConstIgnoringErrnoAndExceptions];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
let Prototype = "__float128(__float128, __float128)";
}
@@ -695,13 +670,15 @@ def Signbit : Builtin {
def SignbitF : Builtin {
let Spellings = ["__builtin_signbitf"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const,
+ Constexpr];
let Prototype = "int(float)";
}
def SignbitL : Builtin {
let Spellings = ["__builtin_signbitl"];
- let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
+ let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const,
+ Constexpr];
let Prototype = "int(long double)";
}
@@ -771,9 +748,8 @@ def Clrsb : Builtin, BitInt_Long_LongLongTemplate {
// there exists native types on the target that are 32- and 64-bits wide, unless
// these conditions are fulfilled these builtins will operate on a not intended
// bitwidth.
-def BSwap
- : Builtin,
- Template<["unsigned short", "uint32_t", "uint64_t"], ["16", "32", "64"]> {
+def BSwap : Builtin, Template<["unsigned short", "uint32_t", "uint64_t"],
+ ["16", "32", "64"]> {
let Spellings = ["__builtin_bswap"];
let Attributes = [NoThrow, Const, Constexpr];
let Prototype = "T(T)";
@@ -853,15 +829,13 @@ def BuiltinCalloc : Builtin {
def BuiltinConstantP : Builtin {
let Spellings = ["__builtin_constant_p"];
- let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments,
- Constexpr];
+ let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments, Constexpr];
let Prototype = "int(...)";
}
def BuiltinClassifyType : Builtin {
let Spellings = ["__builtin_classify_type"];
- let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments,
- Constexpr];
+ let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments, Constexpr];
let Prototype = "int(...)";
}
@@ -1165,8 +1139,7 @@ def StpncpyChk : Builtin {
def SNPrintfChk : Builtin {
let Spellings = ["__builtin___snprintf_chk"];
let Attributes = [FunctionWithBuiltinPrefix, PrintfFormat<4>];
- let Prototype =
- "int(char* restrict, size_t, int, size_t, char const* restrict, ...)";
+ let Prototype = "int(char* restrict, size_t, int, size_t, char const* restrict, ...)";
}
def SPrintfChk : Builtin {
@@ -1178,15 +1151,13 @@ def SPrintfChk : Builtin {
def VSNPrintfChk : Builtin {
let Spellings = ["__builtin___vsnprintf_chk"];
let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<4>];
- let Prototype = "int(char* restrict, size_t, int, size_t, char const* "
- "restrict, __builtin_va_list)";
+ let Prototype = "int(char* restrict, size_t, int, size_t, char const* restrict, __builtin_va_list)";
}
def VSPrintfChk : Builtin {
let Spellings = ["__builtin___vsprintf_chk"];
let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<3>];
- let Prototype = "int(char* restrict, int, size_t, char const* restrict, "
- "__builtin_va_list)";
+ let Prototype = "int(char* restrict, int, size_t, char const* restrict, __builtin_va_list)";
}
def FPrintfChk : Builtin {
@@ -1204,8 +1175,7 @@ def PrintfChk : Builtin {
def VFPrintfChk : Builtin {
let Spellings = ["__builtin___vfprintf_chk"];
let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<2>];
- let Prototype =
- "int(FILE* restrict, int, char const* restrict, __builtin_va_list)";
+ let Prototype = "int(FILE* restrict, int, char const* restrict, __builtin_va_list)";
}
def VPrintfChk : Builtin {
@@ -2583,8 +2553,7 @@ def SyncFetchAndUMax : Builtin {
let Prototype = "unsigned int(unsigned int volatile*, unsigned int)";
}
-// ignored glibc builtin, see
-// https://sourceware.org/bugzilla/show_bug.cgi?id=25399
+// ignored glibc builtin, see https://sourceware.org/bugzilla/show_bug.cgi?id=25399
def WarnMemsetZeroLen : Builtin {
let Spellings = ["__warn_memset_zero_len"];
let Attributes = [NoThrow, Pure];
@@ -2634,10 +2603,9 @@ def BittestAndSet : MSLangBuiltin, MSInt32_64Template {
let Prototype = "unsigned char(T*, T)";
}
-def MSByteswap
- : MSLibBuiltin<"stdlib.h">,
- Template<["unsigned short", "msuint32_t", "unsigned long long int"],
- ["_ushort", "_ulong", "_uint64"]> {
+def MSByteswap : MSLibBuiltin<"stdlib.h">,
+ Template<["unsigned short", "msuint32_t", "unsigned long long int"],
+ ["_ushort", "_ulong", "_uint64"]> {
let Spellings = ["_byteswap"];
let Attributes = [NoThrow, Const];
let Prototype = "T(T)";
@@ -3248,24 +3216,21 @@ def VPrintf : LibBuiltin<"stdio.h"> {
def VfPrintf : LibBuiltin<"stdio.h"> {
let Spellings = ["vfprintf"];
let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
- let Prototype =
- "int(FILE* restrict, char const* restrict, __builtin_va_list)";
+ let Prototype = "int(FILE* restrict, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
def VsnPrintf : LibBuiltin<"stdio.h"> {
let Spellings = ["vsnprintf"];
let Attributes = [NoThrow, VPrintfFormat<2>, NonNull<NonOptimizing, [2]>];
- let Prototype =
- "int(char* restrict, size_t, char const* restrict, __builtin_va_list)";
+ let Prototype = "int(char* restrict, size_t, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
def VsPrintf : LibBuiltin<"stdio.h"> {
let Spellings = ["vsprintf"];
let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
- let Prototype =
- "int(char* restrict, char const* restrict, __builtin_va_list)";
+ let Prototype = "int(char* restrict, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
@@ -3300,16 +3265,14 @@ def VScanf : LibBuiltin<"stdio.h"> {
def VFScanf : LibBuiltin<"stdio.h"> {
let Spellings = ["vfscanf"];
let Attributes = [VScanfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
- let Prototype =
- "int(FILE* restrict, char const* restrict, __builtin_va_list)";
+ let Prototype = "int(FILE* restrict, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
def VSScanf : LibBuiltin<"stdio.h"> {
let Spellings = ["vsscanf"];
let Attributes = [VScanfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
- let Prototype =
- "int(char const* restrict, char const* restrict, __builtin_va_list)";
+ let Prototype = "int(char const* restrict, char const* restrict, __builtin_va_list)";
let AddBuiltinPrefixedAlias = 1;
}
@@ -4529,9 +4492,11 @@ def AssumeSeparateStorage : Builtin {
// Multiprecision Arithmetic Builtins.
-class MPATemplate : Template<["unsigned char", "unsigned short", "unsigned int",
- "unsigned long int", "unsigned long long int"],
- ["b", "s", "", "l", "ll"]>;
+class MPATemplate : Template<
+ ["unsigned char", "unsigned short", "unsigned int",
+ "unsigned long int", "unsigned long long int"],
+ ["b", "s", "",
+ "l", "ll"]>;
def Addc : Builtin, MPATemplate {
let Spellings = ["__builtin_addc"];
@@ -4566,9 +4531,9 @@ def MulOverflow : Builtin {
let Prototype = "bool(...)";
}
-class UOverflowTemplate
- : Template<["unsigned int", "unsigned long int", "unsigned long long int"],
- ["_overflow", "l_overflow", "ll_overflow"]>;
+class UOverflowTemplate :
+ Template<["unsigned int", "unsigned long int", "unsigned long long int"],
+ ["_overflow", "l_overflow", "ll_overflow"]>;
def UaddOverflow : Builtin, UOverflowTemplate {
let Spellings = ["__builtin_uadd"];
@@ -4588,9 +4553,9 @@ def UmulOverflow : Builtin, UOverflowTemplate {
let Prototype = "bool(T const, T const, T*)";
}
-class SOverflowTemplate
- : Template<["int", "long int", "long long int"], ["_overflow", "l_overflow",
- "ll_overflow"]>;
+class SOverflowTemplate :
+ Template<["int", "long int", "long long int"],
+ ["_overflow", "l_overflow", "ll_overflow"]>;
def SaddOverflow : Builtin, SOverflowTemplate {
let Spellings = ["__builtin_sadd"];
@@ -4850,8 +4815,7 @@ def PtrauthStringDiscriminator : Builtin {
// AllocToken builtins.
def InferAllocToken : Builtin {
let Spellings = ["__builtin_infer_alloc_token"];
- let Attributes = [NoThrow, Const, Pure, CustomTypeChecking, Constexpr,
- UnevaluatedArguments];
+ let Attributes = [NoThrow, Const, Pure, CustomTypeChecking, Constexpr, UnevaluatedArguments];
let Prototype = "size_t(...)";
}
@@ -4954,8 +4918,7 @@ def GetPipeMaxPackets : OCLPipeLangBuiltin {
}
// OpenCL v2.0 s6.13.17 - Enqueue kernel functions.
-// Custom builtin check allows to perform special check of passed block
-// arguments.
+// Custom builtin check allows to perform special check of passed block arguments.
def EnqueueKernel : OCL_DSELangBuiltin {
let Spellings = ["enqueue_kernel"];
let Attributes = [CustomTypeChecking, NoThrow];
@@ -5043,7 +5006,7 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
}
// HLSL
-def HLSLAddUint64 : LangBuiltin<"HLSL_LANG"> {
+def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_adduint64"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
@@ -5130,15 +5093,13 @@ def HLSLResourceUninitializedHandle : LangBuiltin<"HLSL_LANG"> {
def HLSLResourceHandleFromBinding : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_handlefrombinding"];
let Attributes = [NoThrow];
- let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, "
- "int32_t, uint32_t, char const*)";
+ let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, int32_t, uint32_t, char const*)";
}
def HLSLResourceHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_handlefromimplicitbinding"];
let Attributes = [NoThrow];
- let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, "
- "int32_t, uint32_t, char const*)";
+ let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, int32_t, uint32_t, char const*)";
}
def HLSLResourceCounterHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
@@ -5291,18 +5252,16 @@ def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
-def HLSLCrossFloat : LangBuiltin<"HLSL_LANG"> {
+def HLSLCrossFloat: LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_crossf32"];
let Attributes = [NoThrow, Const];
- let Prototype =
- "_ExtVector<3, float>(_ExtVector<3, float>, _ExtVector<3, float>)";
+ let Prototype = "_ExtVector<3, float>(_ExtVector<3, float>, _ExtVector<3, float>)";
}
-def HLSLCrossHalf : LangBuiltin<"HLSL_LANG"> {
+def HLSLCrossHalf: LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_crossf16"];
let Attributes = [NoThrow, Const];
- let Prototype =
- "_ExtVector<3, __fp16>(_ExtVector<3, __fp16>, _ExtVector<3, __fp16>)";
+ let Prototype = "_ExtVector<3, __fp16>(_ExtVector<3, __fp16>, _ExtVector<3, __fp16>)";
}
def HLSLDegrees : LangBuiltin<"HLSL_LANG"> {
@@ -5413,7 +5372,7 @@ def HLSLSign : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
-def HLSLStep : LangBuiltin<"HLSL_LANG"> {
+def HLSLStep: LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_step"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
@@ -5431,13 +5390,13 @@ def HLSLBufferUpdateCounter : LangBuiltin<"HLSL_LANG"> {
let Prototype = "uint32_t(__hlsl_resource_t, int)";
}
-def HLSLSplitDouble : LangBuiltin<"HLSL_LANG"> {
+def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_splitdouble"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}
-def HLSLClip : LangBuiltin<"HLSL_LANG"> {
+def HLSLClip: LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_clip"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
@@ -5449,7 +5408,7 @@ def HLSLGroupMemoryBarrier : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void()";
}
-def HLSLGroupMemoryBarrierWithGroupSync : LangBuiltin<"HLSL_LANG"> {
+def HLSLGroupMemoryBarrierWithGroupSync: LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_group_memory_barrier_with_group_sync"];
let Attributes = [NoThrow, Const];
let Prototype = "void()";
diff --git a/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl b/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier-errors.hlsl
similarity index 100%
rename from clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl
rename to clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier-errors.hlsl
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 704c0dd6dc5d7..b41e90480d3cb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/IntrinsicsSPIRV.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include <fstream>
#define DEBUG_TYPE "spirv-isel"
@@ -2810,11 +2811,15 @@ bool SPIRVInstructionSelector::selectBarrierInst(MachineInstr &I,
buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I);
Register ScopeReg = buildI32Constant(Scope, I);
MachineBasicBlock &BB = *I.getParent();
- BuildMI(BB, I, I.getDebugLoc(), TII.get(BarrierType))
- .addUse(ScopeReg)
- .addUse(ScopeReg)
- .addUse(MemSemReg)
- .constrainAllUses(TII, TRI, RBI);
+ auto MI =
+ BuildMI(BB, I, I.getDebugLoc(), TII.get(BarrierType)).addUse(ScopeReg);
+
+ // OpControlBarrier needs to also set Execution Scope
+ if (WithGroupSync) {
+ MI.addUse(ScopeReg);
+ }
+
+ MI.addUse(MemSemReg).constrainAllUses(TII, TRI, RBI);
return true;
}
@@ -4181,7 +4186,7 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb
return selectFirstBitLow(ResVReg, ResType, I);
case Intrinsic::spv_group_memory_barrier:
- return selectBarrierInst(I, SPIRV::Scope::Device, false);
+ return selectBarrierInst(I, SPIRV::Scope::Workgroup, false);
case Intrinsic::spv_group_memory_barrier_with_group_sync:
return selectBarrierInst(I, SPIRV::Scope::Workgroup, true);
case Intrinsic::spv_generic_cast_to_ptr_explicit: {
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
index 2c865d9965b73..ce828168c0db1 100644
--- a/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
@@ -2,7 +2,7 @@
define void @test_group_memory_barrier() {
entry:
- ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
+ ; CHECK: call void @dx.op.barrier(i32 80, i32 8)
call void @llvm.dx.group.memory.barrier()
ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
index baf93d4e177f0..ce3a20d331d0b 100644
--- a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -5,4 +5,4 @@ entry:
; CHECK: call void @dx.op.barrier(i32 80, i32 9)
call void @llvm.dx.group.memory.barrier.with.group.sync()
ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
index 30697a2490c0f..8884c738ed05c 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
@@ -8,7 +8,7 @@ entry:
; CHECK: %[[#TY:]] = OpTypeInt 32 0
; CHECK-DAG: %[[#MEM_SEM:]] = OpConstant %[[#TY]] 16
; CHECK-DAG: %[[#EXEC_AND_MEM_SCOPE:]] = OpConstant %[[#TY]] 2
- ; CHECK: OpMemoryBarrier %[[#EXEC_AND_MEM_SCOPE]] %[[#EXEC_AND_MEM_SCOPE]] %[[#MEM_SEM]]
+ ; CHECK: OpMemoryBarrier %[[#EXEC_AND_MEM_SCOPE]] %[[#MEM_SEM]]
call void @llvm.spv.group.memory.barrier()
ret void
}
>From 1f9409e3d539cf9219b3b2f816a675df6bc819d4 Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Mon, 9 Mar 2026 10:45:47 +0100
Subject: [PATCH 3/5] clang-format
---
clang/lib/CodeGen/CGHLSLRuntime.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 11d99de157ba8..548ed776d12bd 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -184,8 +184,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(NonUniformResourceIndex,
resource_nonuniformindex)
GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter)
- GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrier,
- group_memory_barrier)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrier, group_memory_barrier)
GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
group_memory_barrier_with_group_sync)
GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x)
>From d36f250525ee9e547d4ae758e801f342c0bffe1c Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Mon, 9 Mar 2026 10:47:09 +0100
Subject: [PATCH 4/5] clang-format
---
llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index b41e90480d3cb..095cc675683c5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -32,7 +32,6 @@
#include "llvm/IR/IntrinsicsSPIRV.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include <fstream>
#define DEBUG_TYPE "spirv-isel"
>From 0f3120cec0b541c95d0e11d93e4917184c25024f Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Mon, 9 Mar 2026 11:16:44 +0100
Subject: [PATCH 5/5] reformat SPIRVInstructionSelector.cpp
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 66 +++++++++----------
1 file changed, 33 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 095cc675683c5..c34e45f8cdf57 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -4349,8 +4349,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
bool SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg,
SPIRVTypeInst ResType,
MachineInstr &I) const {
- // The images need to be loaded in the same basic block as their use. We
- // defer loading the image to the intrinsic that uses it.
+ // The images need to be loaded in the same basic block as their use. We defer
+ // loading the image to the intrinsic that uses it.
if (ResType->getOpcode() == SPIRV::OpTypeImage)
return true;
@@ -4397,9 +4397,9 @@ bool SPIRVInstructionSelector::selectUpdateCounter(Register &ResVReg,
Register CounterHandleReg = Intr.getOperand(2).getReg();
Register IncrReg = Intr.getOperand(3).getReg();
- // The counter handle is a pointer to the counter variable (which is a
- // struct containing an i32). We need to get a pointer to that i32 member to
- // do the atomic operation.
+ // The counter handle is a pointer to the counter variable (which is a struct
+ // containing an i32). We need to get a pointer to that i32 member to do the
+ // atomic operation.
#ifndef NDEBUG
SPIRVTypeInst CounterVarType = GR.getSPIRVTypeForVReg(CounterHandleReg);
SPIRVTypeInst CounterVarPointeeType = GR.getPointeeType(CounterVarType);
@@ -4458,8 +4458,8 @@ bool SPIRVInstructionSelector::selectUpdateCounter(Register &ResVReg,
}
// In HLSL, IncrementCounter returns the value *before* the increment, while
- // DecrementCounter returns the value *after* the decrement. Both are
- // lowered to the same atomic intrinsic which returns the value *before* the
+ // DecrementCounter returns the value *after* the decrement. Both are lowered
+ // to the same atomic intrinsic which returns the value *before* the
// operation. So for decrements (negative IncrVal), we must subtract the
// increment value from the result to get the post-decrement value.
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpIAddS))
@@ -4478,8 +4478,8 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic(Register &ResVReg,
// this will generate invalid code. A proper solution is to move
// the OpLoad from selectHandleFromBinding here. However, to do
// that we will need to change the return type of the intrinsic.
- // We will do that when we can, but for now trying to move forward with
- // other issues.
+ // We will do that when we can, but for now trying to move forward with other
+ // issues.
Register ImageReg = I.getOperand(2).getReg();
auto *ImageDef = cast<GIntrinsic>(getVRegDef(*MRI, ImageReg));
Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg));
@@ -4698,8 +4698,8 @@ bool SPIRVInstructionSelector::selectGatherIntrinsic(Register &ResVReg,
auto Dim = static_cast<SPIRV::Dim::Dim>(ImageType->getOperand(2).getImm());
if (Dim != SPIRV::Dim::DIM_2D && Dim != SPIRV::Dim::DIM_Cube &&
Dim != SPIRV::Dim::DIM_Rect) {
- I.emitGenericError("Gather operations are only supported for 2D, Cube, "
- "and Rect images.");
+ I.emitGenericError(
+ "Gather operations are only supported for 2D, Cube, and Rect images.");
return false;
}
@@ -4822,10 +4822,10 @@ bool SPIRVInstructionSelector::selectResourceGetPointer(Register &ResVReg,
Register ResourcePtr = I.getOperand(2).getReg();
SPIRVTypeInst RegType = GR.getSPIRVTypeForVReg(ResourcePtr, I.getMF());
if (RegType->getOpcode() == SPIRV::OpTypeImage) {
- // For texel buffers, the index into the image is part of the OpImageRead
- // or OpImageWrite instructions. So we will do nothing in this case. This
- // intrinsic will be combined with the load or store when selecting the
- // load or store.
+ // For texel buffers, the index into the image is part of the OpImageRead or
+ // OpImageWrite instructions. So we will do nothing in this case. This
+ // intrinsic will be combined with the load or store when selecting the load
+ // or store.
return true;
}
@@ -4943,8 +4943,8 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic(
// this will generate invalid code. A proper solution is to move
// the OpLoad from selectHandleFromBinding here. However, to do
// that we will need to change the return type of the intrinsic.
- // We will do that when we can, but for now trying to move forward with
- // other issues.
+ // We will do that when we can, but for now trying to move forward with other
+ // issues.
Register ImageReg = I.getOperand(1).getReg();
auto *ImageDef = cast<GIntrinsic>(getVRegDef(*MRI, ImageReg));
Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg));
@@ -5262,8 +5262,8 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
/*SwapPrimarySide=*/false);
default:
- report_fatal_error("spv_firstbituhigh and spv_firstbitshigh only support "
- "16,32,64 bits.");
+ report_fatal_error(
+ "spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits.");
}
}
@@ -5274,8 +5274,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
Register OpReg = I.getOperand(2).getReg();
SPIRVTypeInst OpType = GR.getSPIRVTypeForVReg(OpReg);
// OpUConvert treats the operand bits as an unsigned i16 and zero extends it
- // to an unsigned i32. As this leaves all the least significant bits
- // unchanged so the first set bit from the LSB side doesn't change.
+ // to an unsigned i32. As this leaves all the least significant bits unchanged
+ // so the first set bit from the LSB side doesn't change.
unsigned ExtendOpcode = SPIRV::OpUConvert;
unsigned BitSetOpcode = GL::FindILsb;
@@ -5356,11 +5356,11 @@ bool SPIRVInstructionSelector::selectBranchCond(MachineInstr &I) const {
// G_BRCOND to create an OpBranchConditional. We should hit G_BR first, and
// generate the OpBranchConditional in selectBranch above.
//
- // If an OpBranchConditional has been generated, we simply return, as the
- // work is alread done. If there is no OpBranchConditional, LLVM must be
- // relying on implicit fallthrough to the next basic block, so we need to
- // create an OpBranchConditional with an explicit "false" argument pointing
- // to the next basic block that LLVM would fall through to.
+ // If an OpBranchConditional has been generated, we simply return, as the work
+ // is alread done. If there is no OpBranchConditional, LLVM must be relying on
+ // implicit fallthrough to the next basic block, so we need to create an
+ // OpBranchConditional with an explicit "false" argument pointing to the next
+ // basic block that LLVM would fall through to.
const MachineInstr *NextI = I.getNextNode();
// Check if this has already been successfully selected.
if (NextI != nullptr && NextI->getOpcode() == SPIRV::OpBranchConditional)
@@ -5474,8 +5474,8 @@ bool SPIRVInstructionSelector::selectGlobalValue(
auto GlobalVar = cast<GlobalVariable>(GV);
assert(GlobalVar->getName() != "llvm.global.annotations");
- // Skip empty declaration for GVs with initializers till we get the decl
- // with passed initializer.
+ // Skip empty declaration for GVs with initializers till we get the decl with
+ // passed initializer.
if (hasInitializer(GlobalVar) && !Init)
return true;
@@ -5491,9 +5491,9 @@ bool SPIRVInstructionSelector::selectGlobalValue(
ResVReg, ResType, GlobalIdent, GV, StorageClass, Init,
GlobalVar->isConstant(), LnkType, MIRBuilder, true);
// TODO: For AMDGCN, we pipe externally_initialized through via
- // HostAccessINTEL, with ReadWrite (3) access, which is we then handle
- // during reverse translation. We should remove this once SPIR-V gains the
- // ability to express the concept.
+ // HostAccessINTEL, with ReadWrite (3) access, which is we then handle during
+ // reverse translation. We should remove this once SPIR-V gains the ability to
+ // express the concept.
if (GlobalVar->isExternallyInitialized() &&
STI.getTargetTriple().getVendor() == Triple::AMD) {
constexpr unsigned ReadWriteINTEL = 3u;
@@ -5762,8 +5762,8 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
buildPointerToResource(SPIRVTypeInst(VarType), SC, Set, Binding,
ArraySize, IndexReg, Name, MIRBuilder);
- // The handle for the buffer is the pointer to the resource. For an image,
- // the handle is the image object. So images get an extra load.
+ // The handle for the buffer is the pointer to the resource. For an image, the
+ // handle is the image object. So images get an extra load.
uint32_t LoadOpcode =
IsStructuredBuffer ? SPIRV::OpCopyObject : SPIRV::OpLoad;
GR.assignSPIRVTypeToVReg(ResType, HandleReg, *Pos.getMF());
More information about the cfe-commits
mailing list