[clang] [llvm] [HLSL][DXIL][SPRIV] Added `GroupMemoryBarrier()` (PR #185383)

Mon Mar 9 03:16:57 PDT 2026

https://github.com/KungFuDonkey updated https://github.com/llvm/llvm-project/pull/185383

>From b7b66542448dd5b3b66fe428ad3b98c39f2317a7 Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Sun, 8 Mar 2026 16:32:23 +0100
Subject: [PATCH 1/5] Added GroupMemoryBarrier

---
 clang/include/clang/Basic/Builtins.td         | 285 ++++++++++--------
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          |   5 +
 clang/lib/CodeGen/CGHLSLRuntime.h             |   2 +
 .../lib/Headers/hlsl/hlsl_alias_intrinsics.h  |  11 +
 .../builtins/GroupMemoryBarrier.hlsl          |  20 ++
 .../SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl |   6 +
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   3 +
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |   1 +
 llvm/lib/Target/DirectX/DXIL.td               |   2 +
 .../Target/SPIRV/SPIRVInstructionSelector.cpp | 178 ++++++-----
 .../CodeGen/DirectX/group_memory_barrier.ll   |   8 +
 .../hlsl-intrinsics/group_memory_barrier.ll   |  14 +
 12 files changed, 333 insertions(+), 202 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/group_memory_barrier.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 4981711fe786d..c98236e4258d4 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -8,68 +8,64 @@
 
 include "clang/Basic/BuiltinsBase.td"
 
-class FPMathTemplate : Template<["float", "double", "long double"],
-                                ["f",     "",       "l"]>;
+class FPMathTemplate
+    : Template<["float", "double", "long double"], ["f", "", "l"]>;
 
-class FPMathWithF16Template :
-    Template<["float", "double", "long double", "__fp16"],
-             ["f",     "",       "l",           "f16"]>;
+class FPMathWithF16Template
+    : Template<["float", "double", "long double", "__fp16"], ["f", "", "l",
+                                                              "f16"]>;
 
-class FPMathWithF16F128Template :
-    Template<["float", "double", "long double", "__fp16", "__float128"],
-             ["f",     "",       "l",           "f16",    "f128"]>;
+class FPMathWithF16F128Template
+    : Template<["float", "double", "long double", "__fp16", "__float128"],
+               ["f", "", "l", "f16", "f128"]>;
 
-class FPMathWithF128Template :
-    Template<["float", "double", "long double", "__float128"],
-             ["f",     "",       "l",           "f128"]>;
+class FPMathWithF128Template
+    : Template<["float", "double", "long double", "__float128"], ["f", "", "l",
+                                                                  "f128"]>;
 
-class F16F128MathTemplate : Template<["__fp16", "__float128"],
-                                     ["f16",    "f128"]>;
+class F16F128MathTemplate : Template<["__fp16", "__float128"], ["f16", "f128"]>;
 
-class IntMathTemplate : Template<["int", "long int", "long long int"],
-                                 ["",     "l",       "ll"], /*AsPrefix=*/1>;
+class IntMathTemplate
+    : Template<["int", "long int", "long long int"], ["", "l", "ll"],
+               /*AsPrefix=*/1>;
 
-class MSInt8_16_32Template : Template<["char", "short", "msint32_t"],
-                                      ["8",    "16",    ""]>;
+class MSInt8_16_32Template
+    : Template<["char", "short", "msint32_t"], ["8", "16", ""]>;
 
 class Int8_16_32_64Template
-    : Template<["char", "short", "int", "long long int"],
-               ["8",    "16",    "32",  "64"]>;
+    : Template<["char", "short", "int", "long long int"], ["8", "16", "32",
+                                                           "64"]>;
 
 class MSInt8_16_32_64Template
-    : Template<["char", "short", "msint32_t", "long long int"],
-               ["8",    "16",    "",          "64"]>;
+    : Template<["char", "short", "msint32_t", "long long int"], ["8", "16", "",
+                                                                 "64"]>;
 
-class MSInt16_32Template : Template<["short", "msint32_t"],
-                                    ["16",    ""]>;
+class MSInt16_32Template : Template<["short", "msint32_t"], ["16", ""]>;
 
-class MSUInt16_32_64Template :
-    Template<["unsigned short", "unsigned int", "uint64_t"],
-             ["16",             "",             "64"]>;
+class MSUInt16_32_64Template
+    : Template<["unsigned short", "unsigned int", "uint64_t"], ["16", "",
+                                                                "64"]>;
 
-class MSInt32_64Template : Template<["msint32_t", "int64_t"],
-                                    ["",          "64"]>;
+class MSInt32_64Template : Template<["msint32_t", "int64_t"], ["", "64"]>;
 
-class FloatDoubleTemplate : Template<["float", "double"],
-                                     ["f",     ""]>;
+class FloatDoubleTemplate : Template<["float", "double"], ["f", ""]>;
 
 // FIXME: These assume that char -> i8, short -> i16, int -> i32,
 // long long -> i64.
-class SyncBuiltinsTemplate :
-    Template<["char", "short", "int", "long long int", "__int128_t"],
-             ["1",    "2",     "4",   "8",             "16"]>;
+class SyncBuiltinsTemplate
+    : Template<["char", "short", "int", "long long int", "__int128_t"],
+               ["1", "2", "4", "8", "16"]>;
 
-class BitInt8_16_32_64BuiltinsTemplate :
-    Template<["unsigned char", "unsigned short", "uint32_t", "uint64_t"],
-             ["8",             "16",             "32",       "64"]>;
+class BitInt8_16_32_64BuiltinsTemplate
+    : Template<["unsigned char", "unsigned short", "uint32_t", "uint64_t"],
+               ["8", "16", "32", "64"]>;
 
-class BitShort_Int_Long_LongLongTemplate :
-    Template<["short", "int", "long int", "long long int"],
-             ["s",     "",    "l",        "ll"]>;
+class BitShort_Int_Long_LongLongTemplate
+    : Template<["short", "int", "long int", "long long int"], ["s", "", "l",
+                                                               "ll"]>;
 
-class BitInt_Long_LongLongTemplate :
-    Template<["int", "long int", "long long int"],
-             ["",    "l",        "ll"]>;
+class BitInt_Long_LongLongTemplate
+    : Template<["int", "long int", "long long int"], ["", "l", "ll"]>;
 
 // Most of the types used in the prototypes are types from C, C++ or ObjC. There
 // are a few builtin-specific types and qualifiers.
@@ -139,55 +135,64 @@ def CeilF16F128 : Builtin, F16F128MathTemplate {
 
 def CosF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_cos"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def CoshF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_cosh"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def ErfF128 : Builtin {
   let Spellings = ["__builtin_erff128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def ErfcF128 : Builtin {
   let Spellings = ["__builtin_erfcf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def ExpF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_exp"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def Exp2F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_exp2"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def Exp10F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_exp10"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def Expm1F128 : Builtin {
   let Spellings = ["__builtin_expm1f128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def FdimF128 : Builtin {
   let Spellings = ["__builtin_fdimf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
@@ -199,7 +204,8 @@ def FloorF16F128 : Builtin, F16F128MathTemplate {
 
 def FmaF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_fma"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, T, T)";
 }
 
@@ -229,7 +235,8 @@ def FminimumNumF16F128 : Builtin, F16F128MathTemplate {
 
 def Atan2F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_atan2"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, T)";
 }
 
@@ -259,7 +266,8 @@ def FabsF128 : Builtin {
 
 def FmodF16F128 : F16F128MathTemplate, Builtin {
   let Spellings = ["__builtin_fmod"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, T)";
 }
 
@@ -276,7 +284,7 @@ def HugeVal : Builtin, FPMathWithF128Template {
 }
 
 def HugeValF16 : Builtin {
-       let Spellings = ["__builtin_huge_valf16"];
+  let Spellings = ["__builtin_huge_valf16"];
   let Attributes = [NoThrow, Const, Constexpr];
   let Prototype = "_Float16()";
 }
@@ -295,7 +303,8 @@ def InfF16 : Builtin {
 
 def LdexpF16F128 : F16F128MathTemplate, Builtin {
   let Spellings = ["__builtin_ldexp"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, int)";
 }
 
@@ -319,9 +328,10 @@ def NanF128 : Builtin {
   let Prototype = "__float128(char const*)";
 }
 
-def Nans : Builtin,
-    Template<["float", "double", "long double", "_Float16", "__float128"],
-             ["f",     "",       "l",           "f16",      "f128"]> {
+def Nans
+    : Builtin,
+      Template<["float", "double", "long double", "_Float16", "__float128"],
+               ["f", "", "l", "f16", "f128"]> {
   let Spellings = ["__builtin_nans"];
   let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Pure, Constexpr];
   let Prototype = "T(char const*)";
@@ -335,19 +345,22 @@ def PowI : Builtin, FPMathTemplate {
 
 def PowF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_pow"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, T)";
 }
 
 def HypotF128 : Builtin {
   let Spellings = ["__builtin_hypotf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
 def ILogbF128 : Builtin {
   let Spellings = ["__builtin_ilogbf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "int(__float128)";
 }
 
@@ -359,55 +372,64 @@ def LgammaF128 : Builtin {
 
 def LLrintF128 : Builtin {
   let Spellings = ["__builtin_llrintf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "long long int(__float128)";
 }
 
 def LLroundF128 : Builtin {
   let Spellings = ["__builtin_llroundf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "long long int(__float128)";
 }
 
 def Log10F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_log10"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def Log1pF128 : Builtin {
   let Spellings = ["__builtin_log1pf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def Log2F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_log2"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def LogbF128 : Builtin {
   let Spellings = ["__builtin_logbf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def LogF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_log"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def LrintF128 : Builtin {
   let Spellings = ["__builtin_lrintf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "long int(__float128)";
 }
 
 def LroundF128 : Builtin {
   let Spellings = ["__builtin_lroundf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "long int(__float128)";
 }
 
@@ -419,19 +441,22 @@ def NearbyintF128 : Builtin {
 
 def NextafterF128 : Builtin {
   let Spellings = ["__builtin_nextafterf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
 def NexttowardF128 : Builtin {
   let Spellings = ["__builtin_nexttowardf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
 def RemainderF128 : Builtin {
   let Spellings = ["__builtin_remainderf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
+                    ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
@@ -670,15 +695,13 @@ def Signbit : Builtin {
 
 def SignbitF : Builtin {
   let Spellings = ["__builtin_signbitf"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const,
-                    Constexpr];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
   let Prototype = "int(float)";
 }
 
 def SignbitL : Builtin {
   let Spellings = ["__builtin_signbitl"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const,
-                    Constexpr];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
   let Prototype = "int(long double)";
 }
 
@@ -748,8 +771,9 @@ def Clrsb : Builtin, BitInt_Long_LongLongTemplate {
 // there exists native types on the target that are 32- and 64-bits wide, unless
 // these conditions are fulfilled these builtins will operate on a not intended
 // bitwidth.
-def BSwap : Builtin, Template<["unsigned short", "uint32_t", "uint64_t"],
-                              ["16",             "32",       "64"]> {
+def BSwap
+    : Builtin,
+      Template<["unsigned short", "uint32_t", "uint64_t"], ["16", "32", "64"]> {
   let Spellings = ["__builtin_bswap"];
   let Attributes = [NoThrow, Const, Constexpr];
   let Prototype = "T(T)";
@@ -829,13 +853,15 @@ def BuiltinCalloc : Builtin {
 
 def BuiltinConstantP : Builtin {
   let Spellings = ["__builtin_constant_p"];
-  let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments, Constexpr];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments,
+                    Constexpr];
   let Prototype = "int(...)";
 }
 
 def BuiltinClassifyType : Builtin {
   let Spellings = ["__builtin_classify_type"];
-  let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments, Constexpr];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments,
+                    Constexpr];
   let Prototype = "int(...)";
 }
 
@@ -1139,7 +1165,8 @@ def StpncpyChk : Builtin {
 def SNPrintfChk : Builtin {
   let Spellings = ["__builtin___snprintf_chk"];
   let Attributes = [FunctionWithBuiltinPrefix, PrintfFormat<4>];
-  let Prototype = "int(char* restrict, size_t, int, size_t, char const* restrict, ...)";
+  let Prototype =
+      "int(char* restrict, size_t, int, size_t, char const* restrict, ...)";
 }
 
 def SPrintfChk : Builtin {
@@ -1151,13 +1178,15 @@ def SPrintfChk : Builtin {
 def VSNPrintfChk : Builtin {
   let Spellings = ["__builtin___vsnprintf_chk"];
   let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<4>];
-  let Prototype = "int(char* restrict, size_t, int, size_t, char const* restrict, __builtin_va_list)";
+  let Prototype = "int(char* restrict, size_t, int, size_t, char const* "
+                  "restrict, __builtin_va_list)";
 }
 
 def VSPrintfChk : Builtin {
   let Spellings = ["__builtin___vsprintf_chk"];
   let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<3>];
-  let Prototype = "int(char* restrict, int, size_t, char const* restrict, __builtin_va_list)";
+  let Prototype = "int(char* restrict, int, size_t, char const* restrict, "
+                  "__builtin_va_list)";
 }
 
 def FPrintfChk : Builtin {
@@ -1175,7 +1204,8 @@ def PrintfChk : Builtin {
 def VFPrintfChk : Builtin {
   let Spellings = ["__builtin___vfprintf_chk"];
   let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<2>];
-  let Prototype = "int(FILE* restrict, int, char const* restrict, __builtin_va_list)";
+  let Prototype =
+      "int(FILE* restrict, int, char const* restrict, __builtin_va_list)";
 }
 
 def VPrintfChk : Builtin {
@@ -2553,7 +2583,8 @@ def SyncFetchAndUMax : Builtin {
   let Prototype = "unsigned int(unsigned int volatile*, unsigned int)";
 }
 
-// ignored glibc builtin, see https://sourceware.org/bugzilla/show_bug.cgi?id=25399
+// ignored glibc builtin, see
+// https://sourceware.org/bugzilla/show_bug.cgi?id=25399
 def WarnMemsetZeroLen : Builtin {
   let Spellings = ["__warn_memset_zero_len"];
   let Attributes = [NoThrow, Pure];
@@ -2603,9 +2634,10 @@ def BittestAndSet : MSLangBuiltin, MSInt32_64Template {
   let Prototype = "unsigned char(T*, T)";
 }
 
-def MSByteswap : MSLibBuiltin<"stdlib.h">,
-    Template<["unsigned short", "msuint32_t", "unsigned long long int"],
-             ["_ushort",        "_ulong",     "_uint64"]> {
+def MSByteswap
+    : MSLibBuiltin<"stdlib.h">,
+      Template<["unsigned short", "msuint32_t", "unsigned long long int"],
+               ["_ushort", "_ulong", "_uint64"]> {
   let Spellings = ["_byteswap"];
   let Attributes = [NoThrow, Const];
   let Prototype = "T(T)";
@@ -3216,21 +3248,24 @@ def VPrintf : LibBuiltin<"stdio.h"> {
 def VfPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vfprintf"];
   let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
-  let Prototype = "int(FILE* restrict, char const* restrict, __builtin_va_list)";
+  let Prototype =
+      "int(FILE* restrict, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VsnPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vsnprintf"];
   let Attributes = [NoThrow, VPrintfFormat<2>, NonNull<NonOptimizing, [2]>];
-  let Prototype = "int(char* restrict, size_t, char const* restrict, __builtin_va_list)";
+  let Prototype =
+      "int(char* restrict, size_t, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VsPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vsprintf"];
   let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
-  let Prototype = "int(char* restrict, char const* restrict, __builtin_va_list)";
+  let Prototype =
+      "int(char* restrict, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
@@ -3265,14 +3300,16 @@ def VScanf : LibBuiltin<"stdio.h"> {
 def VFScanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vfscanf"];
   let Attributes = [VScanfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
-  let Prototype = "int(FILE* restrict, char const* restrict, __builtin_va_list)";
+  let Prototype =
+      "int(FILE* restrict, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VSScanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vsscanf"];
   let Attributes = [VScanfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
-  let Prototype = "int(char const* restrict, char const* restrict, __builtin_va_list)";
+  let Prototype =
+      "int(char const* restrict, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
@@ -4492,11 +4529,9 @@ def AssumeSeparateStorage : Builtin {
 
 // Multiprecision Arithmetic Builtins.
 
-class MPATemplate : Template<
-    ["unsigned char",     "unsigned short",        "unsigned int",
-     "unsigned long int", "unsigned long long int"],
-    ["b",                 "s",                     "",
-     "l",                 "ll"]>;
+class MPATemplate : Template<["unsigned char", "unsigned short", "unsigned int",
+                              "unsigned long int", "unsigned long long int"],
+                             ["b", "s", "", "l", "ll"]>;
 
 def Addc : Builtin, MPATemplate {
   let Spellings = ["__builtin_addc"];
@@ -4531,9 +4566,9 @@ def MulOverflow : Builtin {
   let Prototype = "bool(...)";
 }
 
-class UOverflowTemplate :
-    Template<["unsigned int", "unsigned long int", "unsigned long long int"],
-             ["_overflow",    "l_overflow",        "ll_overflow"]>;
+class UOverflowTemplate
+    : Template<["unsigned int", "unsigned long int", "unsigned long long int"],
+               ["_overflow", "l_overflow", "ll_overflow"]>;
 
 def UaddOverflow : Builtin, UOverflowTemplate {
   let Spellings = ["__builtin_uadd"];
@@ -4553,9 +4588,9 @@ def UmulOverflow : Builtin, UOverflowTemplate {
   let Prototype = "bool(T const, T const, T*)";
 }
 
-class SOverflowTemplate :
-    Template<["int",          "long int",          "long long int"],
-             ["_overflow",    "l_overflow",        "ll_overflow"]>;
+class SOverflowTemplate
+    : Template<["int", "long int", "long long int"], ["_overflow", "l_overflow",
+                                                      "ll_overflow"]>;
 
 def SaddOverflow : Builtin, SOverflowTemplate {
   let Spellings = ["__builtin_sadd"];
@@ -4815,7 +4850,8 @@ def PtrauthStringDiscriminator : Builtin {
 // AllocToken builtins.
 def InferAllocToken : Builtin {
   let Spellings = ["__builtin_infer_alloc_token"];
-  let Attributes = [NoThrow, Const, Pure, CustomTypeChecking, Constexpr, UnevaluatedArguments];
+  let Attributes = [NoThrow, Const, Pure, CustomTypeChecking, Constexpr,
+                    UnevaluatedArguments];
   let Prototype = "size_t(...)";
 }
 
@@ -4918,7 +4954,8 @@ def GetPipeMaxPackets : OCLPipeLangBuiltin {
 }
 
 // OpenCL v2.0 s6.13.17 - Enqueue kernel functions.
-// Custom builtin check allows to perform special check of passed block arguments.
+// Custom builtin check allows to perform special check of passed block
+// arguments.
 def EnqueueKernel : OCL_DSELangBuiltin {
   let Spellings = ["enqueue_kernel"];
   let Attributes = [CustomTypeChecking, NoThrow];
@@ -5006,7 +5043,7 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
 }
 
 // HLSL
-def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
+def HLSLAddUint64 : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_adduint64"];
   let Attributes = [NoThrow, Const];
   let Prototype = "void(...)";
@@ -5093,13 +5130,15 @@ def HLSLResourceUninitializedHandle : LangBuiltin<"HLSL_LANG"> {
 def HLSLResourceHandleFromBinding : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_resource_handlefrombinding"];
   let Attributes = [NoThrow];
-  let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, int32_t, uint32_t, char const*)";
+  let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, "
+                  "int32_t, uint32_t, char const*)";
 }
 
 def HLSLResourceHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_resource_handlefromimplicitbinding"];
   let Attributes = [NoThrow];
-  let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, int32_t, uint32_t, char const*)";
+  let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, "
+                  "int32_t, uint32_t, char const*)";
 }
 
 def HLSLResourceCounterHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
@@ -5252,16 +5291,18 @@ def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-def HLSLCrossFloat: LangBuiltin<"HLSL_LANG"> {
+def HLSLCrossFloat : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_crossf32"];
   let Attributes = [NoThrow, Const];
-  let Prototype = "_ExtVector<3, float>(_ExtVector<3, float>, _ExtVector<3, float>)";
+  let Prototype =
+      "_ExtVector<3, float>(_ExtVector<3, float>, _ExtVector<3, float>)";
 }
 
-def HLSLCrossHalf: LangBuiltin<"HLSL_LANG"> {
+def HLSLCrossHalf : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_crossf16"];
   let Attributes = [NoThrow, Const];
-  let Prototype = "_ExtVector<3, __fp16>(_ExtVector<3, __fp16>, _ExtVector<3, __fp16>)";
+  let Prototype =
+      "_ExtVector<3, __fp16>(_ExtVector<3, __fp16>, _ExtVector<3, __fp16>)";
 }
 
 def HLSLDegrees : LangBuiltin<"HLSL_LANG"> {
@@ -5372,7 +5413,7 @@ def HLSLSign : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-def HLSLStep: LangBuiltin<"HLSL_LANG"> {
+def HLSLStep : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_step"];
   let Attributes = [NoThrow, Const];
   let Prototype = "void(...)";
@@ -5390,19 +5431,25 @@ def HLSLBufferUpdateCounter : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "uint32_t(__hlsl_resource_t, int)";
 }
 
-def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> {
+def HLSLSplitDouble : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_splitdouble"];
   let Attributes = [NoThrow, Const];
   let Prototype = "void(...)";
 }
 
-def HLSLClip: LangBuiltin<"HLSL_LANG"> {
+def HLSLClip : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_clip"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
   let Prototype = "void(...)";
 }
 
-def HLSLGroupMemoryBarrierWithGroupSync: LangBuiltin<"HLSL_LANG"> {
+def HLSLGroupMemoryBarrier : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_group_memory_barrier"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void()";
+}
+
+def HLSLGroupMemoryBarrierWithGroupSync : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_group_memory_barrier_with_group_sync"];
   let Attributes = [NoThrow, Const];
   let Prototype = "void()";
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 177787d2a9630..ae7325384f9f8 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -1355,6 +1355,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
            "clip operands types mismatch");
     return handleHlslClip(E, this);
+  case Builtin::BI__builtin_hlsl_group_memory_barrier: {
+    Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic();
+    return EmitRuntimeCall(
+        Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
+  }
   case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
     Intrinsic::ID ID =
         CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 466c809fdef78..11d99de157ba8 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -184,6 +184,8 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(NonUniformResourceIndex,
                                    resource_nonuniformindex)
   GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrier,
+                                   group_memory_barrier)
   GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
                                    group_memory_barrier_with_group_sync)
   GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 7b6160091aece..f12b62df4733c 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -3411,6 +3411,17 @@ float3 radians(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_radians)
 float4 radians(float4);
 
+//===----------------------------------------------------------------------===//
+// GroupMemoryBarrierbuiltins
+//===----------------------------------------------------------------------===//
+
+/// \fn void GroupMemoryBarrier(void)
+/// \brief Blocks execution of all threads in a group until all group shared
+/// accesses have been completed.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier)
+__attribute__((convergent)) void GroupMemoryBarrier(void);
+
 //===----------------------------------------------------------------------===//
 // GroupMemoryBarrierWithGroupSync builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl
new file mode 100644
index 0000000000000..b52819973f677
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/GroupMemoryBarrier.hlsl
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   -DTARGET=dx -check-prefixes=CHECK,CHECK-DXIL
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   -DTARGET=spv -check-prefixes=CHECK,CHECK-SPIRV
+
+// CHECK-DXIL: define hidden void @
+// CHECK-SPIRV: define hidden spir_func void @
+void test_GroupMemoryBarrier() {
+// CHECK-DXIL: call void @llvm.[[TARGET]].group.memory.barrier()
+// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].group.memory.barrier()
+  GroupMemoryBarrier();
+}
+
+// CHECK: declare void @llvm.[[TARGET]].group.memory.barrier() #[[ATTRS:[0-9]+]]
+// CHECK-NOT: attributes #[[ATTRS]] = {{.+}}memory(none){{.+}}
+// CHECK: attributes #[[ATTRS]] = {{.+}}convergent{{.+}}
diff --git a/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl b/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl
new file mode 100644
index 0000000000000..5c5761c31eb90
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
+
+void test_too_many_arg() {
+  __builtin_hlsl_group_memory_barrier(0);
+  // expected-error at -1 {{too many arguments to function call, expected 0, have 1}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index e2b2feb927318..3810fc340962d 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -258,6 +258,9 @@ def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0,
 def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 
+def int_dx_group_memory_barrier
+    : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
+
 def int_dx_group_memory_barrier_with_group_sync
     : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
 
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 3fc18a254f672..e1328244ef7b0 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -138,6 +138,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
   def int_spv_wave_prefix_product : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
   def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
   def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
+  def int_spv_group_memory_barrier : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
   def int_spv_group_memory_barrier_with_group_sync : ClangBuiltin<"__builtin_spirv_group_barrier">,
     DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
   def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index e64909b059d29..5ef1a7c130b4a 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -913,6 +913,8 @@ def GetDimensions : DXILOp<72, getDimensions> {
 def Barrier : DXILOp<80, barrier> {
   let Doc = "inserts a memory barrier in the shader";
   let intrinsics = [
+    IntrinSelect<int_dx_group_memory_barrier,
+                 [IntrinArgI32<BarrierMode_GroupMemoryBarrier>]>,
     IntrinSelect<int_dx_group_memory_barrier_with_group_sync,
                  [IntrinArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync>]>,
   ];
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 7b4c047593a3a..704c0dd6dc5d7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -336,6 +336,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool selectWaveOpInst(Register ResVReg, SPIRVTypeInst ResType,
                         MachineInstr &I, unsigned Opcode) const;
 
+  bool selectBarrierInst(MachineInstr &I, unsigned Scope,
+                         bool WithGroupSync) const;
+
   bool selectWaveActiveCountBits(Register ResVReg, SPIRVTypeInst ResType,
                                  MachineInstr &I) const;
 
@@ -1893,15 +1896,15 @@ bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg,
     ValueReg = TmpReg;
   }
 
-         BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(NewOpcode))
-             .addDef(ResVReg)
-             .addUse(GR.getSPIRVTypeID(ResType))
-             .addUse(Ptr)
-             .addUse(ScopeReg)
-             .addUse(MemSemReg)
-             .addUse(ValueReg)
-             .constrainAllUses(TII, TRI, RBI);
-         return true;
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(NewOpcode))
+      .addDef(ResVReg)
+      .addUse(GR.getSPIRVTypeID(ResType))
+      .addUse(Ptr)
+      .addUse(ScopeReg)
+      .addUse(MemSemReg)
+      .addUse(ValueReg)
+      .constrainAllUses(TII, TRI, RBI);
+  return true;
 }
 
 bool SPIRVInstructionSelector::selectUnmergeValues(MachineInstr &I) const {
@@ -2551,12 +2554,12 @@ bool SPIRVInstructionSelector::selectIntegerDotExpansion(
   for (unsigned i = 1; i < GR.getScalarOrVectorComponentCount(VecType); i++) {
     Register Elt = MRI->createVirtualRegister(GR.getRegClass(ResType));
 
-        BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
-            .addDef(Elt)
-            .addUse(GR.getSPIRVTypeID(ResType))
-            .addUse(TmpVec)
-            .addImm(i)
-            .constrainAllUses(TII, TRI, RBI);
+    BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+        .addDef(Elt)
+        .addUse(GR.getSPIRVTypeID(ResType))
+        .addUse(TmpVec)
+        .addImm(i)
+        .constrainAllUses(TII, TRI, RBI);
 
     Register Sum = i < GR.getScalarOrVectorComponentCount(VecType) - 1
                        ? MRI->createVirtualRegister(GR.getRegClass(ResType))
@@ -2757,13 +2760,13 @@ bool SPIRVInstructionSelector::selectSign(Register ResVReg,
                          ? MRI->createVirtualRegister(&SPIRV::IDRegClass)
                          : ResVReg;
 
-      BuildMI(BB, I, DL, TII.get(SPIRV::OpExtInst))
-          .addDef(SignReg)
-          .addUse(GR.getSPIRVTypeID(InputType))
-          .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
-          .addImm(SignOpcode)
-          .addUse(InputRegister)
-          .constrainAllUses(TII, TRI, RBI);
+  BuildMI(BB, I, DL, TII.get(SPIRV::OpExtInst))
+      .addDef(SignReg)
+      .addUse(GR.getSPIRVTypeID(InputType))
+      .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+      .addImm(SignOpcode)
+      .addUse(InputRegister)
+      .constrainAllUses(TII, TRI, RBI);
 
   if (NeedsConversion) {
     auto ConvertOpcode = IsFloatTy ? SPIRV::OpConvertFToS : SPIRV::OpSConvert;
@@ -2798,6 +2801,23 @@ bool SPIRVInstructionSelector::selectWaveOpInst(Register ResVReg,
   return true;
 }
 
+bool SPIRVInstructionSelector::selectBarrierInst(MachineInstr &I,
+                                                 unsigned Scope,
+                                                 bool WithGroupSync) const {
+  auto BarrierType =
+      WithGroupSync ? SPIRV::OpControlBarrier : SPIRV::OpMemoryBarrier;
+  Register MemSemReg =
+      buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I);
+  Register ScopeReg = buildI32Constant(Scope, I);
+  MachineBasicBlock &BB = *I.getParent();
+  BuildMI(BB, I, I.getDebugLoc(), TII.get(BarrierType))
+      .addUse(ScopeReg)
+      .addUse(ScopeReg)
+      .addUse(MemSemReg)
+      .constrainAllUses(TII, TRI, RBI);
+  return true;
+}
+
 bool SPIRVInstructionSelector::selectWaveActiveCountBits(
     Register ResVReg, SPIRVTypeInst ResType, MachineInstr &I) const {
 
@@ -3848,22 +3868,22 @@ bool SPIRVInstructionSelector::selectDerivativeInst(
   Register ConvertToVReg = MRI->createVirtualRegister(RegClass);
   Register DpdOpVReg = MRI->createVirtualRegister(RegClass);
 
-      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
-          .addDef(ConvertToVReg)
-          .addUse(GR.getSPIRVTypeID(F32ConvertTy))
-          .addUse(SrcReg)
-          .constrainAllUses(TII, TRI, RBI);
-      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode))
-          .addDef(DpdOpVReg)
-          .addUse(GR.getSPIRVTypeID(F32ConvertTy))
-          .addUse(ConvertToVReg)
-          .constrainAllUses(TII, TRI, RBI);
-      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
-          .addDef(ResVReg)
-          .addUse(GR.getSPIRVTypeID(ResType))
-          .addUse(DpdOpVReg)
-          .constrainAllUses(TII, TRI, RBI);
-      return true;
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
+      .addDef(ConvertToVReg)
+      .addUse(GR.getSPIRVTypeID(F32ConvertTy))
+      .addUse(SrcReg)
+      .constrainAllUses(TII, TRI, RBI);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode))
+      .addDef(DpdOpVReg)
+      .addUse(GR.getSPIRVTypeID(F32ConvertTy))
+      .addUse(ConvertToVReg)
+      .constrainAllUses(TII, TRI, RBI);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert))
+      .addDef(ResVReg)
+      .addUse(GR.getSPIRVTypeID(ResType))
+      .addUse(DpdOpVReg)
+      .constrainAllUses(TII, TRI, RBI);
+  return true;
 }
 
 bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
@@ -4160,18 +4180,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
     return selectFirstBitHigh(ResVReg, ResType, I, /*IsSigned=*/true);
   case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb
     return selectFirstBitLow(ResVReg, ResType, I);
-  case Intrinsic::spv_group_memory_barrier_with_group_sync: {
-    Register MemSemReg =
-        buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I);
-    Register ScopeReg = buildI32Constant(SPIRV::Scope::Workgroup, I);
-    MachineBasicBlock &BB = *I.getParent();
-    BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpControlBarrier))
-        .addUse(ScopeReg)
-        .addUse(ScopeReg)
-        .addUse(MemSemReg)
-        .constrainAllUses(TII, TRI, RBI);
-    return true;
-  }
+  case Intrinsic::spv_group_memory_barrier:
+    return selectBarrierInst(I, SPIRV::Scope::Device, false);
+  case Intrinsic::spv_group_memory_barrier_with_group_sync:
+    return selectBarrierInst(I, SPIRV::Scope::Workgroup, true);
   case Intrinsic::spv_generic_cast_to_ptr_explicit: {
     Register PtrReg = I.getOperand(I.getNumExplicitDefs() + 1).getReg();
     SPIRV::StorageClass::StorageClass ResSC =
@@ -4333,8 +4345,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
 bool SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg,
                                                        SPIRVTypeInst ResType,
                                                        MachineInstr &I) const {
-  // The images need to be loaded in the same basic block as their use. We defer
-  // loading the image to the intrinsic that uses it.
+  // The images need to be loaded in the same basic block as their use. We
+  // defer loading the image to the intrinsic that uses it.
   if (ResType->getOpcode() == SPIRV::OpTypeImage)
     return true;
 
@@ -4381,9 +4393,9 @@ bool SPIRVInstructionSelector::selectUpdateCounter(Register &ResVReg,
   Register CounterHandleReg = Intr.getOperand(2).getReg();
   Register IncrReg = Intr.getOperand(3).getReg();
 
-  // The counter handle is a pointer to the counter variable (which is a struct
-  // containing an i32). We need to get a pointer to that i32 member to do the
-  // atomic operation.
+  // The counter handle is a pointer to the counter variable (which is a
+  // struct containing an i32). We need to get a pointer to that i32 member to
+  // do the atomic operation.
 #ifndef NDEBUG
   SPIRVTypeInst CounterVarType = GR.getSPIRVTypeForVReg(CounterHandleReg);
   SPIRVTypeInst CounterVarPointeeType = GR.getPointeeType(CounterVarType);
@@ -4442,8 +4454,8 @@ bool SPIRVInstructionSelector::selectUpdateCounter(Register &ResVReg,
   }
 
   // In HLSL, IncrementCounter returns the value *before* the increment, while
-  // DecrementCounter returns the value *after* the decrement. Both are lowered
-  // to the same atomic intrinsic which returns the value *before* the
+  // DecrementCounter returns the value *after* the decrement. Both are
+  // lowered to the same atomic intrinsic which returns the value *before* the
   // operation. So for decrements (negative IncrVal), we must subtract the
   // increment value from the result to get the post-decrement value.
   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpIAddS))
@@ -4462,8 +4474,8 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic(Register &ResVReg,
   // this will generate invalid code. A proper solution is to move
   // the OpLoad from selectHandleFromBinding here. However, to do
   // that we will need to change the return type of the intrinsic.
-  // We will do that when we can, but for now trying to move forward with other
-  // issues.
+  // We will do that when we can, but for now trying to move forward with
+  // other issues.
   Register ImageReg = I.getOperand(2).getReg();
   auto *ImageDef = cast<GIntrinsic>(getVRegDef(*MRI, ImageReg));
   Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg));
@@ -4682,8 +4694,8 @@ bool SPIRVInstructionSelector::selectGatherIntrinsic(Register &ResVReg,
   auto Dim = static_cast<SPIRV::Dim::Dim>(ImageType->getOperand(2).getImm());
   if (Dim != SPIRV::Dim::DIM_2D && Dim != SPIRV::Dim::DIM_Cube &&
       Dim != SPIRV::Dim::DIM_Rect) {
-    I.emitGenericError(
-        "Gather operations are only supported for 2D, Cube, and Rect images.");
+    I.emitGenericError("Gather operations are only supported for 2D, Cube, "
+                       "and Rect images.");
     return false;
   }
 
@@ -4806,10 +4818,10 @@ bool SPIRVInstructionSelector::selectResourceGetPointer(Register &ResVReg,
   Register ResourcePtr = I.getOperand(2).getReg();
   SPIRVTypeInst RegType = GR.getSPIRVTypeForVReg(ResourcePtr, I.getMF());
   if (RegType->getOpcode() == SPIRV::OpTypeImage) {
-    // For texel buffers, the index into the image is part of the OpImageRead or
-    // OpImageWrite instructions. So we will do nothing in this case. This
-    // intrinsic will be combined with the load or store when selecting the load
-    // or store.
+    // For texel buffers, the index into the image is part of the OpImageRead
+    // or OpImageWrite instructions. So we will do nothing in this case. This
+    // intrinsic will be combined with the load or store when selecting the
+    // load or store.
     return true;
   }
 
@@ -4927,8 +4939,8 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic(
   // this will generate invalid code. A proper solution is to move
   // the OpLoad from selectHandleFromBinding here. However, to do
   // that we will need to change the return type of the intrinsic.
-  // We will do that when we can, but for now trying to move forward with other
-  // issues.
+  // We will do that when we can, but for now trying to move forward with
+  // other issues.
   Register ImageReg = I.getOperand(1).getReg();
   auto *ImageDef = cast<GIntrinsic>(getVRegDef(*MRI, ImageReg));
   Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg));
@@ -5246,8 +5258,8 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
     return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
                                /*SwapPrimarySide=*/false);
   default:
-    report_fatal_error(
-        "spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits.");
+    report_fatal_error("spv_firstbituhigh and spv_firstbitshigh only support "
+                       "16,32,64 bits.");
   }
 }
 
@@ -5258,8 +5270,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
   Register OpReg = I.getOperand(2).getReg();
   SPIRVTypeInst OpType = GR.getSPIRVTypeForVReg(OpReg);
   // OpUConvert treats the operand bits as an unsigned i16 and zero extends it
-  // to an unsigned i32. As this leaves all the least significant bits unchanged
-  // so the first set bit from the LSB side doesn't change.
+  // to an unsigned i32. As this leaves all the least significant bits
+  // unchanged so the first set bit from the LSB side doesn't change.
   unsigned ExtendOpcode = SPIRV::OpUConvert;
   unsigned BitSetOpcode = GL::FindILsb;
 
@@ -5340,11 +5352,11 @@ bool SPIRVInstructionSelector::selectBranchCond(MachineInstr &I) const {
   // G_BRCOND to create an OpBranchConditional. We should hit G_BR first, and
   // generate the OpBranchConditional in selectBranch above.
   //
-  // If an OpBranchConditional has been generated, we simply return, as the work
-  // is alread done. If there is no OpBranchConditional, LLVM must be relying on
-  // implicit fallthrough to the next basic block, so we need to create an
-  // OpBranchConditional with an explicit "false" argument pointing to the next
-  // basic block that LLVM would fall through to.
+  // If an OpBranchConditional has been generated, we simply return, as the
+  // work is alread done. If there is no OpBranchConditional, LLVM must be
+  // relying on implicit fallthrough to the next basic block, so we need to
+  // create an OpBranchConditional with an explicit "false" argument pointing
+  // to the next basic block that LLVM would fall through to.
   const MachineInstr *NextI = I.getNextNode();
   // Check if this has already been successfully selected.
   if (NextI != nullptr && NextI->getOpcode() == SPIRV::OpBranchConditional)
@@ -5458,8 +5470,8 @@ bool SPIRVInstructionSelector::selectGlobalValue(
   auto GlobalVar = cast<GlobalVariable>(GV);
   assert(GlobalVar->getName() != "llvm.global.annotations");
 
-  // Skip empty declaration for GVs with initializers till we get the decl with
-  // passed initializer.
+  // Skip empty declaration for GVs with initializers till we get the decl
+  // with passed initializer.
   if (hasInitializer(GlobalVar) && !Init)
     return true;
 
@@ -5475,9 +5487,9 @@ bool SPIRVInstructionSelector::selectGlobalValue(
       ResVReg, ResType, GlobalIdent, GV, StorageClass, Init,
       GlobalVar->isConstant(), LnkType, MIRBuilder, true);
   // TODO: For AMDGCN, we pipe externally_initialized through via
-  // HostAccessINTEL, with ReadWrite (3) access, which is we then handle during
-  // reverse translation. We should remove this once SPIR-V gains the ability to
-  // express the concept.
+  // HostAccessINTEL, with ReadWrite (3) access, which is we then handle
+  // during reverse translation. We should remove this once SPIR-V gains the
+  // ability to express the concept.
   if (GlobalVar->isExternallyInitialized() &&
       STI.getTargetTriple().getVendor() == Triple::AMD) {
     constexpr unsigned ReadWriteINTEL = 3u;
@@ -5746,8 +5758,8 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
       buildPointerToResource(SPIRVTypeInst(VarType), SC, Set, Binding,
                              ArraySize, IndexReg, Name, MIRBuilder);
 
-  // The handle for the buffer is the pointer to the resource. For an image, the
-  // handle is the image object. So images get an extra load.
+  // The handle for the buffer is the pointer to the resource. For an image,
+  // the handle is the image object. So images get an extra load.
   uint32_t LoadOpcode =
       IsStructuredBuffer ? SPIRV::OpCopyObject : SPIRV::OpLoad;
   GR.assignSPIRVTypeToVReg(ResType, HandleReg, *Pos.getMF());
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
new file mode 100644
index 0000000000000..2c865d9965b73
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
+
+define void @test_group_memory_barrier() {
+entry:
+  ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
+  call void @llvm.dx.group.memory.barrier()
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
new file mode 100644
index 0000000000000..30697a2490c0f
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
@@ -0,0 +1,14 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: OpMemoryModel Logical GLSL450
+
+define void @test_group_memory_barrier() {
+entry:
+  ; CHECK: %[[#TY:]] = OpTypeInt 32 0
+  ; CHECK-DAG: %[[#MEM_SEM:]] = OpConstant %[[#TY]] 16
+  ; CHECK-DAG: %[[#EXEC_AND_MEM_SCOPE:]] = OpConstant %[[#TY]] 2
+  ; CHECK: OpMemoryBarrier %[[#EXEC_AND_MEM_SCOPE]] %[[#EXEC_AND_MEM_SCOPE]] %[[#MEM_SEM]]
+  call void @llvm.spv.group.memory.barrier()
+  ret void
+}

>From 80c2bb4745bfb1b06dca9c03b05c0eb4c944c8dc Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Mon, 9 Mar 2026 10:41:04 +0100
Subject: [PATCH 2/5] Passing tests and reverted basic/builtins.tdd

---
 clang/include/clang/Basic/Builtins.td         | 279 ++++++++----------
 ...er.hlsl => GroupMemoryBarrier-errors.hlsl} |   0
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |  17 +-
 .../CodeGen/DirectX/group_memory_barrier.ll   |   4 +-
 .../group_memory_barrier_with_group_sync.ll   |   2 +-
 .../hlsl-intrinsics/group_memory_barrier.ll   |   2 +-
 6 files changed, 134 insertions(+), 170 deletions(-)
 rename clang/test/SemaHLSL/BuiltIns/{GroupMemoryBarrier.hlsl => GroupMemoryBarrier-errors.hlsl} (100%)

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index c98236e4258d4..10b2e930792c7 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -8,64 +8,68 @@
 
 include "clang/Basic/BuiltinsBase.td"
 
-class FPMathTemplate
-    : Template<["float", "double", "long double"], ["f", "", "l"]>;
+class FPMathTemplate : Template<["float", "double", "long double"],
+                                ["f",     "",       "l"]>;
 
-class FPMathWithF16Template
-    : Template<["float", "double", "long double", "__fp16"], ["f", "", "l",
-                                                              "f16"]>;
+class FPMathWithF16Template :
+    Template<["float", "double", "long double", "__fp16"],
+             ["f",     "",       "l",           "f16"]>;
 
-class FPMathWithF16F128Template
-    : Template<["float", "double", "long double", "__fp16", "__float128"],
-               ["f", "", "l", "f16", "f128"]>;
+class FPMathWithF16F128Template :
+    Template<["float", "double", "long double", "__fp16", "__float128"],
+             ["f",     "",       "l",           "f16",    "f128"]>;
 
-class FPMathWithF128Template
-    : Template<["float", "double", "long double", "__float128"], ["f", "", "l",
-                                                                  "f128"]>;
+class FPMathWithF128Template :
+    Template<["float", "double", "long double", "__float128"],
+             ["f",     "",       "l",           "f128"]>;
 
-class F16F128MathTemplate : Template<["__fp16", "__float128"], ["f16", "f128"]>;
+class F16F128MathTemplate : Template<["__fp16", "__float128"],
+                                     ["f16",    "f128"]>;
 
-class IntMathTemplate
-    : Template<["int", "long int", "long long int"], ["", "l", "ll"],
-               /*AsPrefix=*/1>;
+class IntMathTemplate : Template<["int", "long int", "long long int"],
+                                 ["",     "l",       "ll"], /*AsPrefix=*/1>;
 
-class MSInt8_16_32Template
-    : Template<["char", "short", "msint32_t"], ["8", "16", ""]>;
+class MSInt8_16_32Template : Template<["char", "short", "msint32_t"],
+                                      ["8",    "16",    ""]>;
 
 class Int8_16_32_64Template
-    : Template<["char", "short", "int", "long long int"], ["8", "16", "32",
-                                                           "64"]>;
+    : Template<["char", "short", "int", "long long int"],
+               ["8",    "16",    "32",  "64"]>;
 
 class MSInt8_16_32_64Template
-    : Template<["char", "short", "msint32_t", "long long int"], ["8", "16", "",
-                                                                 "64"]>;
+    : Template<["char", "short", "msint32_t", "long long int"],
+               ["8",    "16",    "",          "64"]>;
 
-class MSInt16_32Template : Template<["short", "msint32_t"], ["16", ""]>;
+class MSInt16_32Template : Template<["short", "msint32_t"],
+                                    ["16",    ""]>;
 
-class MSUInt16_32_64Template
-    : Template<["unsigned short", "unsigned int", "uint64_t"], ["16", "",
-                                                                "64"]>;
+class MSUInt16_32_64Template :
+    Template<["unsigned short", "unsigned int", "uint64_t"],
+             ["16",             "",             "64"]>;
 
-class MSInt32_64Template : Template<["msint32_t", "int64_t"], ["", "64"]>;
+class MSInt32_64Template : Template<["msint32_t", "int64_t"],
+                                    ["",          "64"]>;
 
-class FloatDoubleTemplate : Template<["float", "double"], ["f", ""]>;
+class FloatDoubleTemplate : Template<["float", "double"],
+                                     ["f",     ""]>;
 
 // FIXME: These assume that char -> i8, short -> i16, int -> i32,
 // long long -> i64.
-class SyncBuiltinsTemplate
-    : Template<["char", "short", "int", "long long int", "__int128_t"],
-               ["1", "2", "4", "8", "16"]>;
+class SyncBuiltinsTemplate :
+    Template<["char", "short", "int", "long long int", "__int128_t"],
+             ["1",    "2",     "4",   "8",             "16"]>;
 
-class BitInt8_16_32_64BuiltinsTemplate
-    : Template<["unsigned char", "unsigned short", "uint32_t", "uint64_t"],
-               ["8", "16", "32", "64"]>;
+class BitInt8_16_32_64BuiltinsTemplate :
+    Template<["unsigned char", "unsigned short", "uint32_t", "uint64_t"],
+             ["8",             "16",             "32",       "64"]>;
 
-class BitShort_Int_Long_LongLongTemplate
-    : Template<["short", "int", "long int", "long long int"], ["s", "", "l",
-                                                               "ll"]>;
+class BitShort_Int_Long_LongLongTemplate :
+    Template<["short", "int", "long int", "long long int"],
+             ["s",     "",    "l",        "ll"]>;
 
-class BitInt_Long_LongLongTemplate
-    : Template<["int", "long int", "long long int"], ["", "l", "ll"]>;
+class BitInt_Long_LongLongTemplate :
+    Template<["int", "long int", "long long int"],
+             ["",    "l",        "ll"]>;
 
 // Most of the types used in the prototypes are types from C, C++ or ObjC. There
 // are a few builtin-specific types and qualifiers.
@@ -135,64 +139,55 @@ def CeilF16F128 : Builtin, F16F128MathTemplate {
 
 def CosF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_cos"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def CoshF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_cosh"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def ErfF128 : Builtin {
   let Spellings = ["__builtin_erff128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def ErfcF128 : Builtin {
   let Spellings = ["__builtin_erfcf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def ExpF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_exp"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def Exp2F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_exp2"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def Exp10F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_exp10"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def Expm1F128 : Builtin {
   let Spellings = ["__builtin_expm1f128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def FdimF128 : Builtin {
   let Spellings = ["__builtin_fdimf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
@@ -204,8 +199,7 @@ def FloorF16F128 : Builtin, F16F128MathTemplate {
 
 def FmaF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_fma"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, T, T)";
 }
 
@@ -235,8 +229,7 @@ def FminimumNumF16F128 : Builtin, F16F128MathTemplate {
 
 def Atan2F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_atan2"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, T)";
 }
 
@@ -266,8 +259,7 @@ def FabsF128 : Builtin {
 
 def FmodF16F128 : F16F128MathTemplate, Builtin {
   let Spellings = ["__builtin_fmod"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, T)";
 }
 
@@ -284,7 +276,7 @@ def HugeVal : Builtin, FPMathWithF128Template {
 }
 
 def HugeValF16 : Builtin {
-  let Spellings = ["__builtin_huge_valf16"];
+       let Spellings = ["__builtin_huge_valf16"];
   let Attributes = [NoThrow, Const, Constexpr];
   let Prototype = "_Float16()";
 }
@@ -303,8 +295,7 @@ def InfF16 : Builtin {
 
 def LdexpF16F128 : F16F128MathTemplate, Builtin {
   let Spellings = ["__builtin_ldexp"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, int)";
 }
 
@@ -328,10 +319,9 @@ def NanF128 : Builtin {
   let Prototype = "__float128(char const*)";
 }
 
-def Nans
-    : Builtin,
-      Template<["float", "double", "long double", "_Float16", "__float128"],
-               ["f", "", "l", "f16", "f128"]> {
+def Nans : Builtin,
+    Template<["float", "double", "long double", "_Float16", "__float128"],
+             ["f",     "",       "l",           "f16",      "f128"]> {
   let Spellings = ["__builtin_nans"];
   let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Pure, Constexpr];
   let Prototype = "T(char const*)";
@@ -345,22 +335,19 @@ def PowI : Builtin, FPMathTemplate {
 
 def PowF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_pow"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T, T)";
 }
 
 def HypotF128 : Builtin {
   let Spellings = ["__builtin_hypotf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
 def ILogbF128 : Builtin {
   let Spellings = ["__builtin_ilogbf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "int(__float128)";
 }
 
@@ -372,64 +359,55 @@ def LgammaF128 : Builtin {
 
 def LLrintF128 : Builtin {
   let Spellings = ["__builtin_llrintf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "long long int(__float128)";
 }
 
 def LLroundF128 : Builtin {
   let Spellings = ["__builtin_llroundf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "long long int(__float128)";
 }
 
 def Log10F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_log10"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def Log1pF128 : Builtin {
   let Spellings = ["__builtin_log1pf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def Log2F16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_log2"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def LogbF128 : Builtin {
   let Spellings = ["__builtin_logbf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128)";
 }
 
 def LogF16F128 : Builtin, F16F128MathTemplate {
   let Spellings = ["__builtin_log"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "T(T)";
 }
 
 def LrintF128 : Builtin {
   let Spellings = ["__builtin_lrintf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "long int(__float128)";
 }
 
 def LroundF128 : Builtin {
   let Spellings = ["__builtin_lroundf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "long int(__float128)";
 }
 
@@ -441,22 +419,19 @@ def NearbyintF128 : Builtin {
 
 def NextafterF128 : Builtin {
   let Spellings = ["__builtin_nextafterf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
 def NexttowardF128 : Builtin {
   let Spellings = ["__builtin_nexttowardf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
 def RemainderF128 : Builtin {
   let Spellings = ["__builtin_remainderf128"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow,
-                    ConstIgnoringErrnoAndExceptions];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, ConstIgnoringErrnoAndExceptions];
   let Prototype = "__float128(__float128, __float128)";
 }
 
@@ -695,13 +670,15 @@ def Signbit : Builtin {
 
 def SignbitF : Builtin {
   let Spellings = ["__builtin_signbitf"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const,
+                    Constexpr];
   let Prototype = "int(float)";
 }
 
 def SignbitL : Builtin {
   let Spellings = ["__builtin_signbitl"];
-  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const, Constexpr];
+  let Attributes = [FunctionWithBuiltinPrefix, NoThrow, Const,
+                    Constexpr];
   let Prototype = "int(long double)";
 }
 
@@ -771,9 +748,8 @@ def Clrsb : Builtin, BitInt_Long_LongLongTemplate {
 // there exists native types on the target that are 32- and 64-bits wide, unless
 // these conditions are fulfilled these builtins will operate on a not intended
 // bitwidth.
-def BSwap
-    : Builtin,
-      Template<["unsigned short", "uint32_t", "uint64_t"], ["16", "32", "64"]> {
+def BSwap : Builtin, Template<["unsigned short", "uint32_t", "uint64_t"],
+                              ["16",             "32",       "64"]> {
   let Spellings = ["__builtin_bswap"];
   let Attributes = [NoThrow, Const, Constexpr];
   let Prototype = "T(T)";
@@ -853,15 +829,13 @@ def BuiltinCalloc : Builtin {
 
 def BuiltinConstantP : Builtin {
   let Spellings = ["__builtin_constant_p"];
-  let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments,
-                    Constexpr];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments, Constexpr];
   let Prototype = "int(...)";
 }
 
 def BuiltinClassifyType : Builtin {
   let Spellings = ["__builtin_classify_type"];
-  let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments,
-                    Constexpr];
+  let Attributes = [NoThrow, Const, CustomTypeChecking, UnevaluatedArguments, Constexpr];
   let Prototype = "int(...)";
 }
 
@@ -1165,8 +1139,7 @@ def StpncpyChk : Builtin {
 def SNPrintfChk : Builtin {
   let Spellings = ["__builtin___snprintf_chk"];
   let Attributes = [FunctionWithBuiltinPrefix, PrintfFormat<4>];
-  let Prototype =
-      "int(char* restrict, size_t, int, size_t, char const* restrict, ...)";
+  let Prototype = "int(char* restrict, size_t, int, size_t, char const* restrict, ...)";
 }
 
 def SPrintfChk : Builtin {
@@ -1178,15 +1151,13 @@ def SPrintfChk : Builtin {
 def VSNPrintfChk : Builtin {
   let Spellings = ["__builtin___vsnprintf_chk"];
   let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<4>];
-  let Prototype = "int(char* restrict, size_t, int, size_t, char const* "
-                  "restrict, __builtin_va_list)";
+  let Prototype = "int(char* restrict, size_t, int, size_t, char const* restrict, __builtin_va_list)";
 }
 
 def VSPrintfChk : Builtin {
   let Spellings = ["__builtin___vsprintf_chk"];
   let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<3>];
-  let Prototype = "int(char* restrict, int, size_t, char const* restrict, "
-                  "__builtin_va_list)";
+  let Prototype = "int(char* restrict, int, size_t, char const* restrict, __builtin_va_list)";
 }
 
 def FPrintfChk : Builtin {
@@ -1204,8 +1175,7 @@ def PrintfChk : Builtin {
 def VFPrintfChk : Builtin {
   let Spellings = ["__builtin___vfprintf_chk"];
   let Attributes = [FunctionWithBuiltinPrefix, VPrintfFormat<2>];
-  let Prototype =
-      "int(FILE* restrict, int, char const* restrict, __builtin_va_list)";
+  let Prototype = "int(FILE* restrict, int, char const* restrict, __builtin_va_list)";
 }
 
 def VPrintfChk : Builtin {
@@ -2583,8 +2553,7 @@ def SyncFetchAndUMax : Builtin {
   let Prototype = "unsigned int(unsigned int volatile*, unsigned int)";
 }
 
-// ignored glibc builtin, see
-// https://sourceware.org/bugzilla/show_bug.cgi?id=25399
+// ignored glibc builtin, see https://sourceware.org/bugzilla/show_bug.cgi?id=25399
 def WarnMemsetZeroLen : Builtin {
   let Spellings = ["__warn_memset_zero_len"];
   let Attributes = [NoThrow, Pure];
@@ -2634,10 +2603,9 @@ def BittestAndSet : MSLangBuiltin, MSInt32_64Template {
   let Prototype = "unsigned char(T*, T)";
 }
 
-def MSByteswap
-    : MSLibBuiltin<"stdlib.h">,
-      Template<["unsigned short", "msuint32_t", "unsigned long long int"],
-               ["_ushort", "_ulong", "_uint64"]> {
+def MSByteswap : MSLibBuiltin<"stdlib.h">,
+    Template<["unsigned short", "msuint32_t", "unsigned long long int"],
+             ["_ushort",        "_ulong",     "_uint64"]> {
   let Spellings = ["_byteswap"];
   let Attributes = [NoThrow, Const];
   let Prototype = "T(T)";
@@ -3248,24 +3216,21 @@ def VPrintf : LibBuiltin<"stdio.h"> {
 def VfPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vfprintf"];
   let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
-  let Prototype =
-      "int(FILE* restrict, char const* restrict, __builtin_va_list)";
+  let Prototype = "int(FILE* restrict, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VsnPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vsnprintf"];
   let Attributes = [NoThrow, VPrintfFormat<2>, NonNull<NonOptimizing, [2]>];
-  let Prototype =
-      "int(char* restrict, size_t, char const* restrict, __builtin_va_list)";
+  let Prototype = "int(char* restrict, size_t, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VsPrintf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vsprintf"];
   let Attributes = [NoThrow, VPrintfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
-  let Prototype =
-      "int(char* restrict, char const* restrict, __builtin_va_list)";
+  let Prototype = "int(char* restrict, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
@@ -3300,16 +3265,14 @@ def VScanf : LibBuiltin<"stdio.h"> {
 def VFScanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vfscanf"];
   let Attributes = [VScanfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
-  let Prototype =
-      "int(FILE* restrict, char const* restrict, __builtin_va_list)";
+  let Prototype = "int(FILE* restrict, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
 def VSScanf : LibBuiltin<"stdio.h"> {
   let Spellings = ["vsscanf"];
   let Attributes = [VScanfFormat<1>, NonNull<NonOptimizing, [0, 1]>];
-  let Prototype =
-      "int(char const* restrict, char const* restrict, __builtin_va_list)";
+  let Prototype = "int(char const* restrict, char const* restrict, __builtin_va_list)";
   let AddBuiltinPrefixedAlias = 1;
 }
 
@@ -4529,9 +4492,11 @@ def AssumeSeparateStorage : Builtin {
 
 // Multiprecision Arithmetic Builtins.
 
-class MPATemplate : Template<["unsigned char", "unsigned short", "unsigned int",
-                              "unsigned long int", "unsigned long long int"],
-                             ["b", "s", "", "l", "ll"]>;
+class MPATemplate : Template<
+    ["unsigned char",     "unsigned short",        "unsigned int",
+     "unsigned long int", "unsigned long long int"],
+    ["b",                 "s",                     "",
+     "l",                 "ll"]>;
 
 def Addc : Builtin, MPATemplate {
   let Spellings = ["__builtin_addc"];
@@ -4566,9 +4531,9 @@ def MulOverflow : Builtin {
   let Prototype = "bool(...)";
 }
 
-class UOverflowTemplate
-    : Template<["unsigned int", "unsigned long int", "unsigned long long int"],
-               ["_overflow", "l_overflow", "ll_overflow"]>;
+class UOverflowTemplate :
+    Template<["unsigned int", "unsigned long int", "unsigned long long int"],
+             ["_overflow",    "l_overflow",        "ll_overflow"]>;
 
 def UaddOverflow : Builtin, UOverflowTemplate {
   let Spellings = ["__builtin_uadd"];
@@ -4588,9 +4553,9 @@ def UmulOverflow : Builtin, UOverflowTemplate {
   let Prototype = "bool(T const, T const, T*)";
 }
 
-class SOverflowTemplate
-    : Template<["int", "long int", "long long int"], ["_overflow", "l_overflow",
-                                                      "ll_overflow"]>;
+class SOverflowTemplate :
+    Template<["int",          "long int",          "long long int"],
+             ["_overflow",    "l_overflow",        "ll_overflow"]>;
 
 def SaddOverflow : Builtin, SOverflowTemplate {
   let Spellings = ["__builtin_sadd"];
@@ -4850,8 +4815,7 @@ def PtrauthStringDiscriminator : Builtin {
 // AllocToken builtins.
 def InferAllocToken : Builtin {
   let Spellings = ["__builtin_infer_alloc_token"];
-  let Attributes = [NoThrow, Const, Pure, CustomTypeChecking, Constexpr,
-                    UnevaluatedArguments];
+  let Attributes = [NoThrow, Const, Pure, CustomTypeChecking, Constexpr, UnevaluatedArguments];
   let Prototype = "size_t(...)";
 }
 
@@ -4954,8 +4918,7 @@ def GetPipeMaxPackets : OCLPipeLangBuiltin {
 }
 
 // OpenCL v2.0 s6.13.17 - Enqueue kernel functions.
-// Custom builtin check allows to perform special check of passed block
-// arguments.
+// Custom builtin check allows to perform special check of passed block arguments.
 def EnqueueKernel : OCL_DSELangBuiltin {
   let Spellings = ["enqueue_kernel"];
   let Attributes = [CustomTypeChecking, NoThrow];
@@ -5043,7 +5006,7 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
 }
 
 // HLSL
-def HLSLAddUint64 : LangBuiltin<"HLSL_LANG"> {
+def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_adduint64"];
   let Attributes = [NoThrow, Const];
   let Prototype = "void(...)";
@@ -5130,15 +5093,13 @@ def HLSLResourceUninitializedHandle : LangBuiltin<"HLSL_LANG"> {
 def HLSLResourceHandleFromBinding : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_resource_handlefrombinding"];
   let Attributes = [NoThrow];
-  let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, "
-                  "int32_t, uint32_t, char const*)";
+  let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, int32_t, uint32_t, char const*)";
 }
 
 def HLSLResourceHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_resource_handlefromimplicitbinding"];
   let Attributes = [NoThrow];
-  let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, "
-                  "int32_t, uint32_t, char const*)";
+  let Prototype = "__hlsl_resource_t(__hlsl_resource_t, uint32_t, uint32_t, int32_t, uint32_t, char const*)";
 }
 
 def HLSLResourceCounterHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> {
@@ -5291,18 +5252,16 @@ def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-def HLSLCrossFloat : LangBuiltin<"HLSL_LANG"> {
+def HLSLCrossFloat: LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_crossf32"];
   let Attributes = [NoThrow, Const];
-  let Prototype =
-      "_ExtVector<3, float>(_ExtVector<3, float>, _ExtVector<3, float>)";
+  let Prototype = "_ExtVector<3, float>(_ExtVector<3, float>, _ExtVector<3, float>)";
 }
 
-def HLSLCrossHalf : LangBuiltin<"HLSL_LANG"> {
+def HLSLCrossHalf: LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_crossf16"];
   let Attributes = [NoThrow, Const];
-  let Prototype =
-      "_ExtVector<3, __fp16>(_ExtVector<3, __fp16>, _ExtVector<3, __fp16>)";
+  let Prototype = "_ExtVector<3, __fp16>(_ExtVector<3, __fp16>, _ExtVector<3, __fp16>)";
 }
 
 def HLSLDegrees : LangBuiltin<"HLSL_LANG"> {
@@ -5413,7 +5372,7 @@ def HLSLSign : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-def HLSLStep : LangBuiltin<"HLSL_LANG"> {
+def HLSLStep: LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_step"];
   let Attributes = [NoThrow, Const];
   let Prototype = "void(...)";
@@ -5431,13 +5390,13 @@ def HLSLBufferUpdateCounter : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "uint32_t(__hlsl_resource_t, int)";
 }
 
-def HLSLSplitDouble : LangBuiltin<"HLSL_LANG"> {
+def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_splitdouble"];
   let Attributes = [NoThrow, Const];
   let Prototype = "void(...)";
 }
 
-def HLSLClip : LangBuiltin<"HLSL_LANG"> {
+def HLSLClip: LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_clip"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
   let Prototype = "void(...)";
@@ -5449,7 +5408,7 @@ def HLSLGroupMemoryBarrier : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void()";
 }
 
-def HLSLGroupMemoryBarrierWithGroupSync : LangBuiltin<"HLSL_LANG"> {
+def HLSLGroupMemoryBarrierWithGroupSync: LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_group_memory_barrier_with_group_sync"];
   let Attributes = [NoThrow, Const];
   let Prototype = "void()";
diff --git a/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl b/clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier-errors.hlsl
similarity index 100%
rename from clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier.hlsl
rename to clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrier-errors.hlsl
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 704c0dd6dc5d7..b41e90480d3cb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -32,6 +32,7 @@
 #include "llvm/IR/IntrinsicsSPIRV.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <fstream>
 
 #define DEBUG_TYPE "spirv-isel"
 
@@ -2810,11 +2811,15 @@ bool SPIRVInstructionSelector::selectBarrierInst(MachineInstr &I,
       buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I);
   Register ScopeReg = buildI32Constant(Scope, I);
   MachineBasicBlock &BB = *I.getParent();
-  BuildMI(BB, I, I.getDebugLoc(), TII.get(BarrierType))
-      .addUse(ScopeReg)
-      .addUse(ScopeReg)
-      .addUse(MemSemReg)
-      .constrainAllUses(TII, TRI, RBI);
+  auto MI =
+      BuildMI(BB, I, I.getDebugLoc(), TII.get(BarrierType)).addUse(ScopeReg);
+
+  // OpControlBarrier needs to also set Execution Scope
+  if (WithGroupSync) {
+    MI.addUse(ScopeReg);
+  }
+
+  MI.addUse(MemSemReg).constrainAllUses(TII, TRI, RBI);
   return true;
 }
 
@@ -4181,7 +4186,7 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_firstbitlow: // There is no CL equivlent of FindILsb
     return selectFirstBitLow(ResVReg, ResType, I);
   case Intrinsic::spv_group_memory_barrier:
-    return selectBarrierInst(I, SPIRV::Scope::Device, false);
+    return selectBarrierInst(I, SPIRV::Scope::Workgroup, false);
   case Intrinsic::spv_group_memory_barrier_with_group_sync:
     return selectBarrierInst(I, SPIRV::Scope::Workgroup, true);
   case Intrinsic::spv_generic_cast_to_ptr_explicit: {
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
index 2c865d9965b73..ce828168c0db1 100644
--- a/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier.ll
@@ -2,7 +2,7 @@
 
 define void @test_group_memory_barrier() {
 entry:
-  ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
+  ; CHECK: call void @dx.op.barrier(i32 80, i32 8)
   call void @llvm.dx.group.memory.barrier()
   ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
index baf93d4e177f0..ce3a20d331d0b 100644
--- a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -5,4 +5,4 @@ entry:
   ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
   call void @llvm.dx.group.memory.barrier.with.group.sync()
   ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
index 30697a2490c0f..8884c738ed05c 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier.ll
@@ -8,7 +8,7 @@ entry:
   ; CHECK: %[[#TY:]] = OpTypeInt 32 0
   ; CHECK-DAG: %[[#MEM_SEM:]] = OpConstant %[[#TY]] 16
   ; CHECK-DAG: %[[#EXEC_AND_MEM_SCOPE:]] = OpConstant %[[#TY]] 2
-  ; CHECK: OpMemoryBarrier %[[#EXEC_AND_MEM_SCOPE]] %[[#EXEC_AND_MEM_SCOPE]] %[[#MEM_SEM]]
+  ; CHECK: OpMemoryBarrier %[[#EXEC_AND_MEM_SCOPE]] %[[#MEM_SEM]]
   call void @llvm.spv.group.memory.barrier()
   ret void
 }

>From 1f9409e3d539cf9219b3b2f816a675df6bc819d4 Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Mon, 9 Mar 2026 10:45:47 +0100
Subject: [PATCH 3/5] clang-format

---
 clang/lib/CodeGen/CGHLSLRuntime.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 11d99de157ba8..548ed776d12bd 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -184,8 +184,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(NonUniformResourceIndex,
                                    resource_nonuniformindex)
   GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrier,
-                                   group_memory_barrier)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrier, group_memory_barrier)
   GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync,
                                    group_memory_barrier_with_group_sync)
   GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x)

>From d36f250525ee9e547d4ae758e801f342c0bffe1c Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Mon, 9 Mar 2026 10:47:09 +0100
Subject: [PATCH 4/5] clang-format

---
 llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index b41e90480d3cb..095cc675683c5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -32,7 +32,6 @@
 #include "llvm/IR/IntrinsicsSPIRV.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <fstream>
 
 #define DEBUG_TYPE "spirv-isel"
 

>From 0f3120cec0b541c95d0e11d93e4917184c25024f Mon Sep 17 00:00:00 2001
From: KungFuDonkey <sietze.riemersma at gmail.com>
Date: Mon, 9 Mar 2026 11:16:44 +0100
Subject: [PATCH 5/5] reformat SPIRVInstructionSelector.cpp

---
 .../Target/SPIRV/SPIRVInstructionSelector.cpp | 66 +++++++++----------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 095cc675683c5..c34e45f8cdf57 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -4349,8 +4349,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
 bool SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg,
                                                        SPIRVTypeInst ResType,
                                                        MachineInstr &I) const {
-  // The images need to be loaded in the same basic block as their use. We
-  // defer loading the image to the intrinsic that uses it.
+  // The images need to be loaded in the same basic block as their use. We defer
+  // loading the image to the intrinsic that uses it.
   if (ResType->getOpcode() == SPIRV::OpTypeImage)
     return true;
 
@@ -4397,9 +4397,9 @@ bool SPIRVInstructionSelector::selectUpdateCounter(Register &ResVReg,
   Register CounterHandleReg = Intr.getOperand(2).getReg();
   Register IncrReg = Intr.getOperand(3).getReg();
 
-  // The counter handle is a pointer to the counter variable (which is a
-  // struct containing an i32). We need to get a pointer to that i32 member to
-  // do the atomic operation.
+  // The counter handle is a pointer to the counter variable (which is a struct
+  // containing an i32). We need to get a pointer to that i32 member to do the
+  // atomic operation.
 #ifndef NDEBUG
   SPIRVTypeInst CounterVarType = GR.getSPIRVTypeForVReg(CounterHandleReg);
   SPIRVTypeInst CounterVarPointeeType = GR.getPointeeType(CounterVarType);
@@ -4458,8 +4458,8 @@ bool SPIRVInstructionSelector::selectUpdateCounter(Register &ResVReg,
   }
 
   // In HLSL, IncrementCounter returns the value *before* the increment, while
-  // DecrementCounter returns the value *after* the decrement. Both are
-  // lowered to the same atomic intrinsic which returns the value *before* the
+  // DecrementCounter returns the value *after* the decrement. Both are lowered
+  // to the same atomic intrinsic which returns the value *before* the
   // operation. So for decrements (negative IncrVal), we must subtract the
   // increment value from the result to get the post-decrement value.
   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpIAddS))
@@ -4478,8 +4478,8 @@ bool SPIRVInstructionSelector::selectReadImageIntrinsic(Register &ResVReg,
   // this will generate invalid code. A proper solution is to move
   // the OpLoad from selectHandleFromBinding here. However, to do
   // that we will need to change the return type of the intrinsic.
-  // We will do that when we can, but for now trying to move forward with
-  // other issues.
+  // We will do that when we can, but for now trying to move forward with other
+  // issues.
   Register ImageReg = I.getOperand(2).getReg();
   auto *ImageDef = cast<GIntrinsic>(getVRegDef(*MRI, ImageReg));
   Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg));
@@ -4698,8 +4698,8 @@ bool SPIRVInstructionSelector::selectGatherIntrinsic(Register &ResVReg,
   auto Dim = static_cast<SPIRV::Dim::Dim>(ImageType->getOperand(2).getImm());
   if (Dim != SPIRV::Dim::DIM_2D && Dim != SPIRV::Dim::DIM_Cube &&
       Dim != SPIRV::Dim::DIM_Rect) {
-    I.emitGenericError("Gather operations are only supported for 2D, Cube, "
-                       "and Rect images.");
+    I.emitGenericError(
+        "Gather operations are only supported for 2D, Cube, and Rect images.");
     return false;
   }
 
@@ -4822,10 +4822,10 @@ bool SPIRVInstructionSelector::selectResourceGetPointer(Register &ResVReg,
   Register ResourcePtr = I.getOperand(2).getReg();
   SPIRVTypeInst RegType = GR.getSPIRVTypeForVReg(ResourcePtr, I.getMF());
   if (RegType->getOpcode() == SPIRV::OpTypeImage) {
-    // For texel buffers, the index into the image is part of the OpImageRead
-    // or OpImageWrite instructions. So we will do nothing in this case. This
-    // intrinsic will be combined with the load or store when selecting the
-    // load or store.
+    // For texel buffers, the index into the image is part of the OpImageRead or
+    // OpImageWrite instructions. So we will do nothing in this case. This
+    // intrinsic will be combined with the load or store when selecting the load
+    // or store.
     return true;
   }
 
@@ -4943,8 +4943,8 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic(
   // this will generate invalid code. A proper solution is to move
   // the OpLoad from selectHandleFromBinding here. However, to do
   // that we will need to change the return type of the intrinsic.
-  // We will do that when we can, but for now trying to move forward with
-  // other issues.
+  // We will do that when we can, but for now trying to move forward with other
+  // issues.
   Register ImageReg = I.getOperand(1).getReg();
   auto *ImageDef = cast<GIntrinsic>(getVRegDef(*MRI, ImageReg));
   Register NewImageReg = MRI->createVirtualRegister(MRI->getRegClass(ImageReg));
@@ -5262,8 +5262,8 @@ bool SPIRVInstructionSelector::selectFirstBitHigh(Register ResVReg,
     return selectFirstBitSet64(ResVReg, ResType, I, OpReg, BitSetOpcode,
                                /*SwapPrimarySide=*/false);
   default:
-    report_fatal_error("spv_firstbituhigh and spv_firstbitshigh only support "
-                       "16,32,64 bits.");
+    report_fatal_error(
+        "spv_firstbituhigh and spv_firstbitshigh only support 16,32,64 bits.");
   }
 }
 
@@ -5274,8 +5274,8 @@ bool SPIRVInstructionSelector::selectFirstBitLow(Register ResVReg,
   Register OpReg = I.getOperand(2).getReg();
   SPIRVTypeInst OpType = GR.getSPIRVTypeForVReg(OpReg);
   // OpUConvert treats the operand bits as an unsigned i16 and zero extends it
-  // to an unsigned i32. As this leaves all the least significant bits
-  // unchanged so the first set bit from the LSB side doesn't change.
+  // to an unsigned i32. As this leaves all the least significant bits unchanged
+  // so the first set bit from the LSB side doesn't change.
   unsigned ExtendOpcode = SPIRV::OpUConvert;
   unsigned BitSetOpcode = GL::FindILsb;
 
@@ -5356,11 +5356,11 @@ bool SPIRVInstructionSelector::selectBranchCond(MachineInstr &I) const {
   // G_BRCOND to create an OpBranchConditional. We should hit G_BR first, and
   // generate the OpBranchConditional in selectBranch above.
   //
-  // If an OpBranchConditional has been generated, we simply return, as the
-  // work is alread done. If there is no OpBranchConditional, LLVM must be
-  // relying on implicit fallthrough to the next basic block, so we need to
-  // create an OpBranchConditional with an explicit "false" argument pointing
-  // to the next basic block that LLVM would fall through to.
+  // If an OpBranchConditional has been generated, we simply return, as the work
+  // is alread done. If there is no OpBranchConditional, LLVM must be relying on
+  // implicit fallthrough to the next basic block, so we need to create an
+  // OpBranchConditional with an explicit "false" argument pointing to the next
+  // basic block that LLVM would fall through to.
   const MachineInstr *NextI = I.getNextNode();
   // Check if this has already been successfully selected.
   if (NextI != nullptr && NextI->getOpcode() == SPIRV::OpBranchConditional)
@@ -5474,8 +5474,8 @@ bool SPIRVInstructionSelector::selectGlobalValue(
   auto GlobalVar = cast<GlobalVariable>(GV);
   assert(GlobalVar->getName() != "llvm.global.annotations");
 
-  // Skip empty declaration for GVs with initializers till we get the decl
-  // with passed initializer.
+  // Skip empty declaration for GVs with initializers till we get the decl with
+  // passed initializer.
   if (hasInitializer(GlobalVar) && !Init)
     return true;
 
@@ -5491,9 +5491,9 @@ bool SPIRVInstructionSelector::selectGlobalValue(
       ResVReg, ResType, GlobalIdent, GV, StorageClass, Init,
       GlobalVar->isConstant(), LnkType, MIRBuilder, true);
   // TODO: For AMDGCN, we pipe externally_initialized through via
-  // HostAccessINTEL, with ReadWrite (3) access, which is we then handle
-  // during reverse translation. We should remove this once SPIR-V gains the
-  // ability to express the concept.
+  // HostAccessINTEL, with ReadWrite (3) access, which is we then handle during
+  // reverse translation. We should remove this once SPIR-V gains the ability to
+  // express the concept.
   if (GlobalVar->isExternallyInitialized() &&
       STI.getTargetTriple().getVendor() == Triple::AMD) {
     constexpr unsigned ReadWriteINTEL = 3u;
@@ -5762,8 +5762,8 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
       buildPointerToResource(SPIRVTypeInst(VarType), SC, Set, Binding,
                              ArraySize, IndexReg, Name, MIRBuilder);
 
-  // The handle for the buffer is the pointer to the resource. For an image,
-  // the handle is the image object. So images get an extra load.
+  // The handle for the buffer is the pointer to the resource. For an image, the
+  // handle is the image object. So images get an extra load.
   uint32_t LoadOpcode =
       IsStructuredBuffer ? SPIRV::OpCopyObject : SPIRV::OpLoad;
   GR.assignSPIRVTypeToVReg(ResType, HandleReg, *Pos.getMF());