[clang] [clang-tools-extra] [flang] [llvm] [mlir] [HLSL][SPIRV] Fix calling convention for call in entry function. (PR #110275)

Steven Perron via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 30 06:56:34 PDT 2024


Andrzej =?utf-8?q?WarzyƄski?= <andrzej.warzynski at arm.com>,Steven Perron
 <stevenperron at google.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/110275 at github.com>


https://github.com/s-perron updated https://github.com/llvm/llvm-project/pull/110275

>From 906a7bc775f1d22b642c1157c424a55b686cb46a Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron at google.com>
Date: Tue, 17 Sep 2024 16:22:57 -0400
Subject: [PATCH 01/12] [HLSL][SPIRV] Fix calling convention for call in entry
 function.

- Fix the calling convention used for the call in the entry point
  wrapper. No calling convention is currently set. I can easily use the
  calling convention of the function that is being called.

- Use the Microsoft CXX abi for name mangling in Vulkan. There is a
problem with the name mangling for an HLSL entry
point when it is called `main` when using the  Itanium CXX ABI. It
does not do name mangling on main because is treated as special.
However, in Vulkan SPIR-V, a function named "main" has no special
meaning, and should be treated like any other function. This also
allows us to match the DXIL codegen better, and to create fewer special
cases.

Part of https://github.com/llvm/llvm-project/issues/108567.
---
 clang/lib/AST/MicrosoftMangle.cpp             |  1 +
 clang/lib/Basic/Targets/SPIR.h                |  5 ++
 clang/lib/CodeGen/CGHLSLRuntime.cpp           |  1 +
 clang/test/CodeGenHLSL/builtins/saturate.hlsl | 32 ++++++------
 .../wave_get_lane_index_do_while.hlsl         |  2 +-
 .../builtins/wave_get_lane_index_simple.hlsl  |  2 +-
 .../builtins/wave_get_lane_index_subcall.hlsl |  6 +--
 .../CodeGenHLSL/convergence/do.while.hlsl     | 28 +++++-----
 clang/test/CodeGenHLSL/convergence/for.hlsl   | 52 +++++++++----------
 clang/test/CodeGenHLSL/convergence/while.hlsl | 38 +++++++-------
 clang/test/CodeGenHLSL/main_entry.hlsl        | 21 ++++++++
 .../semantics/DispatchThreadID.hlsl           |  4 +-
 12 files changed, 111 insertions(+), 81 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/main_entry.hlsl

diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 7b069c66aed598..9f8eeae061f65d 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -3164,6 +3164,7 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC,
   switch (CC) {
     default:
       break;
+    case CC_SpirFunction:
     case CC_Win64:
     case CC_X86_64SysV:
     case CC_C:
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index cc79562de2871e..8a89b0cef00192 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -316,6 +316,11 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo {
     SizeType = TargetInfo::UnsignedInt;
     resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
                     "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+
+    if (Triple.getOS() == llvm::Triple::Vulkan) {
+      // For Vulkan we want to same mangling as DirectX.
+      TheCXXABI.set(TargetCXXABI::Microsoft);
+    }
   }
 
   void getTargetDefines(const LangOptions &Opts,
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 59d8fc830dcc8f..7677df6736682d 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -415,6 +415,7 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
   }
 
   CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args);
+  CI->setCallingConv(Fn->getCallingConv());
   (void)CI;
   // FIXME: Handle codegen for return type semantics.
   // See: https://github.com/llvm/llvm-project/issues/57875
diff --git a/clang/test/CodeGenHLSL/builtins/saturate.hlsl b/clang/test/CodeGenHLSL/builtins/saturate.hlsl
index 65a3cd74621cc0..f818a2b9353fea 100644
--- a/clang/test/CodeGenHLSL/builtins/saturate.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/saturate.hlsl
@@ -19,77 +19,77 @@
 // NATIVE_HALF: call half @llvm.dx.saturate.f16(
 // NO_HALF: define noundef float @"?test_saturate_half
 // NO_HALF: call float @llvm.dx.saturate.f32(
-// SPIRV_HALF: define spir_func noundef half @_Z18test_saturate_halfDh(half
+// SPIRV_HALF: define spir_func noundef half @"?test_saturate_half
 // SPIRV_HALF: call half @llvm.spv.saturate.f16(half
-// SPIRV_NO_HALF: define spir_func noundef float @_Z18test_saturate_halfDh(float
+// SPIRV_NO_HALF: define spir_func noundef float @"?test_saturate_half
 // SPIRV_NO_HALF: call float @llvm.spv.saturate.f32(float
 half test_saturate_half(half p0) { return saturate(p0); }
 // NATIVE_HALF: define noundef <2 x half> @
 // NATIVE_HALF: call <2 x half> @llvm.dx.saturate.v2f16
 // NO_HALF: define noundef <2 x float> @"?test_saturate_half2
 // NO_HALF: call <2 x float> @llvm.dx.saturate.v2f32(
-// SPIRV_HALF: define spir_func noundef <2 x half> @_Z19test_saturate_half2Dv2_Dh(
+// SPIRV_HALF: define spir_func noundef <2 x half> @"?test_saturate_half2
 // SPIRV_HALF: call <2 x half> @llvm.spv.saturate.v2f16(<2 x half>
-// SPIRV_NO_HALF: define spir_func noundef <2 x float> @_Z19test_saturate_half2Dv2_Dh(<2 x float>
+// SPIRV_NO_HALF: define spir_func noundef <2 x float> @"?test_saturate_half2
 // SPIRV_NO_HALF: call <2 x float> @llvm.spv.saturate.v2f32(<2 x float>
 half2 test_saturate_half2(half2 p0) { return saturate(p0); }
 // NATIVE_HALF: define noundef <3 x half> @
 // NATIVE_HALF: call <3 x half> @llvm.dx.saturate.v3f16
 // NO_HALF: define noundef <3 x float> @"?test_saturate_half3
 // NO_HALF: call <3 x float> @llvm.dx.saturate.v3f32(
-// SPIRV_HALF: define spir_func noundef <3 x half> @_Z19test_saturate_half3Dv3_Dh(
+// SPIRV_HALF: define spir_func noundef <3 x half> @"?test_saturate_half3
 // SPIRV_HALF: call <3 x half> @llvm.spv.saturate.v3f16(<3 x half>
-// SPIRV_NO_HALF: define spir_func noundef <3 x float> @_Z19test_saturate_half3Dv3_Dh(<3 x float>
+// SPIRV_NO_HALF: define spir_func noundef <3 x float> @"?test_saturate_half3
 // SPIRV_NO_HALF: call <3 x float> @llvm.spv.saturate.v3f32(<3 x float>
 half3 test_saturate_half3(half3 p0) { return saturate(p0); }
 // NATIVE_HALF: define noundef <4 x half> @
 // NATIVE_HALF: call <4 x half> @llvm.dx.saturate.v4f16
 // NO_HALF: define noundef <4 x float> @"?test_saturate_half4
 // NO_HALF: call <4 x float> @llvm.dx.saturate.v4f32(
-// SPIRV_HALF: define spir_func noundef <4 x half> @_Z19test_saturate_half4Dv4_Dh(
+// SPIRV_HALF: define spir_func noundef <4 x half> @"?test_saturate_half4
 // SPIRV_HALF: call <4 x half> @llvm.spv.saturate.v4f16(<4 x half>
-// SPIRV_NO_HALF: define spir_func noundef <4 x float> @_Z19test_saturate_half4Dv4_Dh(<4 x float>
+// SPIRV_NO_HALF: define spir_func noundef <4 x float> @"?test_saturate_half4
 // SPIRV_NO_HALF: call <4 x float> @llvm.spv.saturate.v4f32(<4 x float>
 half4 test_saturate_half4(half4 p0) { return saturate(p0); }
 
 // CHECK: define noundef float @"?test_saturate_float
 // CHECK: call float @llvm.dx.saturate.f32(
-// SPIRV: define spir_func noundef float @_Z19test_saturate_floatf(float
+// SPIRV: define spir_func noundef float @"?test_saturate_float
 // SPIRV: call float @llvm.spv.saturate.f32(float
 float test_saturate_float(float p0) { return saturate(p0); }
 // CHECK: define noundef <2 x float> @"?test_saturate_float2
 // CHECK: call <2 x float> @llvm.dx.saturate.v2f32
-// SPIRV: define spir_func noundef <2 x float> @_Z20test_saturate_float2Dv2_f(<2 x float>
+// SPIRV: define spir_func noundef <2 x float> @"?test_saturate_float2
 // SPIRV: call <2 x float> @llvm.spv.saturate.v2f32(<2 x float>
 float2 test_saturate_float2(float2 p0) { return saturate(p0); }
 // CHECK: define noundef <3 x float> @"?test_saturate_float3
 // CHECK: call <3 x float> @llvm.dx.saturate.v3f32
-// SPIRV: define spir_func noundef <3 x float> @_Z20test_saturate_float3Dv3_f(<3 x float>
+// SPIRV: define spir_func noundef <3 x float> @"?test_saturate_float3
 // SPIRV: call <3 x float> @llvm.spv.saturate.v3f32(<3 x float>
 float3 test_saturate_float3(float3 p0) { return saturate(p0); }
 // CHECK: define noundef <4 x float> @"?test_saturate_float4
 // CHECK: call <4 x float> @llvm.dx.saturate.v4f32
-// SPIRV: define spir_func noundef <4 x float> @_Z20test_saturate_float4Dv4_f(<4 x float>
+// SPIRV: define spir_func noundef <4 x float> @"?test_saturate_float4
 // SPIRV: call <4 x float> @llvm.spv.saturate.v4f32(<4 x float>
 float4 test_saturate_float4(float4 p0) { return saturate(p0); }
 
 // CHECK: define noundef double @
 // CHECK: call double @llvm.dx.saturate.f64(
-// SPIRV: define spir_func noundef double @_Z20test_saturate_doubled(double
+// SPIRV: define spir_func noundef double @"?test_saturate_double
 // SPIRV: call double @llvm.spv.saturate.f64(double
 double test_saturate_double(double p0) { return saturate(p0); }
 // CHECK: define noundef <2 x double> @
 // CHECK: call <2 x double> @llvm.dx.saturate.v2f64
-// SPIRV: define spir_func noundef <2 x double> @_Z21test_saturate_double2Dv2_d(<2 x double>
+// SPIRV: define spir_func noundef <2 x double> @"?test_saturate_double2
 // SPIRV: call <2 x double> @llvm.spv.saturate.v2f64(<2 x double>
 double2 test_saturate_double2(double2 p0) { return saturate(p0); }
 // CHECK: define noundef <3 x double> @
 // CHECK: call <3 x double> @llvm.dx.saturate.v3f64
-// SPIRV: define spir_func noundef <3 x double> @_Z21test_saturate_double3Dv3_d(<3 x double>
+// SPIRV: define spir_func noundef <3 x double> @"?test_saturate_double3
 // SPIRV: call <3 x double> @llvm.spv.saturate.v3f64(<3 x double>
 double3 test_saturate_double3(double3 p0) { return saturate(p0); }
 // CHECK: define noundef <4 x double> @
 // CHECK: call <4 x double> @llvm.dx.saturate.v4f64
-// SPIRV: define spir_func noundef <4 x double> @_Z21test_saturate_double4Dv4_d(<4 x double>
+// SPIRV: define spir_func noundef <4 x double> @"?test_saturate_double4
 // SPIRV: call <4 x double> @llvm.spv.saturate.v4f64(<4 x double>
 double4 test_saturate_double4(double4 p0) { return saturate(p0); }
diff --git a/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_do_while.hlsl b/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_do_while.hlsl
index 9481b0d60a2723..c4cdcc9ffcdf3a 100644
--- a/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_do_while.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_do_while.hlsl
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-pc-vulkan-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-// CHECK: define spir_func void @main() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?main@@YAXXZ"() [[A0:#[0-9]+]] {
 void main() {
 // CHECK: entry:
 // CHECK:   %[[CT_ENTRY:[0-9]+]] = call token @llvm.experimental.convergence.entry()
diff --git a/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_simple.hlsl b/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_simple.hlsl
index 8f52d81091c180..37bd78381ce7d9 100644
--- a/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_simple.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_simple.hlsl
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-pc-vulkan-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-// CHECK: define spir_func noundef i32 @_Z6test_1v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func noundef i32 @"?test_1@@YAIXZ"() [[A0:#[0-9]+]] {
 // CHECK: %[[CI:[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: call i32 @__hlsl_wave_get_lane_index() [ "convergencectrl"(token %[[CI]]) ]
 uint test_1() {
diff --git a/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_subcall.hlsl b/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_subcall.hlsl
index 6ea80d692cd244..ce7d0ea94b722a 100644
--- a/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_subcall.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/wave_get_lane_index_subcall.hlsl
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
 // RUN:   spirv-pc-vulkan-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
-// CHECK: define spir_func noundef i32 @_Z6test_1v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func noundef i32 @"?test_1@@YAIXZ"() [[A0:#[0-9]+]] {
 // CHECK: %[[C1:[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: call i32 @__hlsl_wave_get_lane_index() [ "convergencectrl"(token %[[C1]]) ]
 uint test_1() {
@@ -10,9 +10,9 @@ uint test_1() {
 
 // CHECK-DAG: declare i32 @__hlsl_wave_get_lane_index() [[A1:#[0-9]+]]
 
-// CHECK: define spir_func noundef i32 @_Z6test_2v() [[A0]] {
+// CHECK: define spir_func noundef i32 @"?test_2@@YAIXZ"() [[A0]] {
 // CHECK: %[[C2:[0-9]+]] = call token @llvm.experimental.convergence.entry()
-// CHECK: call spir_func noundef i32 @_Z6test_1v() {{#[0-9]+}} [ "convergencectrl"(token %[[C2]]) ]
+// CHECK: call spir_func noundef i32 @"?test_1@@YAIXZ"() {{#[0-9]+}} [ "convergencectrl"(token %[[C2]]) ]
 uint test_2() {
   return test_1();
 }
diff --git a/clang/test/CodeGenHLSL/convergence/do.while.hlsl b/clang/test/CodeGenHLSL/convergence/do.while.hlsl
index ea5a45ba8fd780..55c6f1f5dbab56 100644
--- a/clang/test/CodeGenHLSL/convergence/do.while.hlsl
+++ b/clang/test/CodeGenHLSL/convergence/do.while.hlsl
@@ -8,27 +8,27 @@ void test1() {
   do {
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test1v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test1@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
 // CHECK: do.cond:
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test2() {
   do {
     foo();
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test2v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test2@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func void @_Z3foov() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: do.cond:
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test3() {
   do {
@@ -36,15 +36,15 @@ void test3() {
       foo();
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test3v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test3@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
 // CHECK: if.then:
-// CHECK:                    call spir_func void @_Z3foov() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: do.cond:
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test4() {
   do {
@@ -54,15 +54,15 @@ void test4() {
     }
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test4v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test4@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
 // CHECK: if.then:
-// CHECK:                    call spir_func void @_Z3foov() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: do.cond:
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test5() {
   do {
@@ -74,7 +74,7 @@ void test5() {
     }
   } while (cond());
 }
-// CHECK: define spir_func void @_Z5test5v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test5@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: do.body:
@@ -82,9 +82,9 @@ void test5() {
 // CHECK: while.cond:
 // CHECK:   [[T2:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T1]]) ]
 // CHECK: if.then:
-// CHECK:                    call spir_func void @_Z3foov() [[A3]] [ "convergencectrl"(token [[T2]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3]] [ "convergencectrl"(token [[T2]]) ]
 // CHECK: do.cond:
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 // CHECK-DAG: attributes [[A0]] = { {{.*}}convergent{{.*}} }
 // CHECK-DAG: attributes [[A3]] = { {{.*}}convergent{{.*}} }
diff --git a/clang/test/CodeGenHLSL/convergence/for.hlsl b/clang/test/CodeGenHLSL/convergence/for.hlsl
index 95f9a196bdb676..33366eb0aee3dd 100644
--- a/clang/test/CodeGenHLSL/convergence/for.hlsl
+++ b/clang/test/CodeGenHLSL/convergence/for.hlsl
@@ -10,68 +10,68 @@ void test1() {
     foo();
   }
 }
-// CHECK: define spir_func void @_Z5test1v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test1@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: for.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test2() {
   for (;cond();) {
     foo();
   }
 }
-// CHECK: define spir_func void @_Z5test2v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test2@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: for.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: for.body:
-// CHECK:                    call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test3() {
   for (cond();;) {
     foo();
   }
 }
-// CHECK: define spir_func void @_Z5test3v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test3@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T0]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T0]]) ]
 // CHECK: for.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test4() {
   for (cond();cond2();) {
     foo();
   }
 }
-// CHECK: define spir_func void @_Z5test4v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test4@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T0]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T0]]) ]
 // CHECK: for.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z5cond2v() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond2@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: for.body:
-// CHECK:                    call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test5() {
   for (cond();cond2();foo()) {
   }
 }
-// CHECK: define spir_func void @_Z5test5v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test5@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T0]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T0]]) ]
 // CHECK: for.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z5cond2v() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond2@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: for.inc:
-// CHECK:                    call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test6() {
   for (cond();cond2();foo()) {
@@ -81,23 +81,23 @@ void test6() {
     }
   }
 }
-// CHECK: define spir_func void @_Z5test6v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test6@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T0]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T0]]) ]
 // CHECK: for.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z5cond2v() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond2@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: for.body:
-// CHECK:   [[C1:%[a-zA-Z0-9]+]] = call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:   [[C1:%[a-zA-Z0-9]+]] = call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK:   br i1 [[C1]], label %if.then, label %if.end
 // CHECK: if.then:
-// CHECK:   call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:   call spir_func void @"?foo@@YAXXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK:   br label %for.end
 // CHECK: if.end:
 // CHECK:   br label %for.inc
 // CHECK: for.inc:
-// CHECK:                    call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test7() {
   for (cond();;) {
@@ -106,16 +106,16 @@ void test7() {
     }
   }
 }
-// CHECK: define spir_func void @_Z5test7v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test7@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T0]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T0]]) ]
 // CHECK: for.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: for.cond3:
 // CHECK:   [[T2:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T1]]) ]
-// CHECK:                    call spir_func void @_Z3foov() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T2]]) ]
+// CHECK:                    call spir_func void @"?foo@@YAXXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T2]]) ]
 
 // CHECK-DAG: attributes [[A0]] = { {{.*}}convergent{{.*}} }
 // CHECK-DAG: attributes [[A3]] = { {{.*}}convergent{{.*}} }
diff --git a/clang/test/CodeGenHLSL/convergence/while.hlsl b/clang/test/CodeGenHLSL/convergence/while.hlsl
index 92777000190d22..f96579c5df451a 100644
--- a/clang/test/CodeGenHLSL/convergence/while.hlsl
+++ b/clang/test/CodeGenHLSL/convergence/while.hlsl
@@ -8,26 +8,26 @@ void test1() {
   while (cond()) {
   }
 }
-// CHECK: define spir_func void @_Z5test1v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test1@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: while.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3:#[0-9]+]] [ "convergencectrl"(token [[T1]]) ]
 
 void test2() {
   while (cond()) {
     foo();
   }
 }
-// CHECK: define spir_func void @_Z5test2v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test2@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: while.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: while.body:
-// CHECK:   call spir_func void @_Z3foov() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:   call spir_func void @"?foo@@YAXXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 
 void test3() {
   while (cond()) {
@@ -36,16 +36,16 @@ void test3() {
     foo();
   }
 }
-// CHECK: define spir_func void @_Z5test3v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test3@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: while.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: if.then:
 // CHECK:   br label %while.end
 // CHECK: if.end:
-// CHECK:   call spir_func void @_Z3foov() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:   call spir_func void @"?foo@@YAXXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK:   br label %while.cond
 
 void test4() {
@@ -56,14 +56,14 @@ void test4() {
     }
   }
 }
-// CHECK: define spir_func void @_Z5test4v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test4@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: while.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: if.then:
-// CHECK:   call spir_func void @_Z3foov() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:   call spir_func void @"?foo@@YAXXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK:   br label %while.end
 // CHECK: if.end:
 // CHECK:   br label %while.cond
@@ -78,17 +78,17 @@ void test5() {
     }
   }
 }
-// CHECK: define spir_func void @_Z5test5v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test5@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: while.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: while.cond2:
 // CHECK:   [[T2:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T1]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T2]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T2]]) ]
 // CHECK: if.then:
-// CHECK:   call spir_func void @_Z3foov() [[A3]] [ "convergencectrl"(token [[T2]]) ]
+// CHECK:   call spir_func void @"?foo@@YAXXZ"() [[A3]] [ "convergencectrl"(token [[T2]]) ]
 // CHECK:   br label %while.end
 
 void test6() {
@@ -102,17 +102,17 @@ void test6() {
     }
   }
 }
-// CHECK: define spir_func void @_Z5test6v() [[A0:#[0-9]+]] {
+// CHECK: define spir_func void @"?test6@@YAXXZ"() [[A0:#[0-9]+]] {
 // CHECK: entry:
 // CHECK:   [[T0:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
 // CHECK: while.cond:
 // CHECK:   [[T1:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T0]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK: while.cond2:
 // CHECK:   [[T2:%[0-9]+]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[T1]]) ]
-// CHECK:                    call spir_func noundef i1 @_Z4condv() [[A3]] [ "convergencectrl"(token [[T2]]) ]
+// CHECK:                    call spir_func noundef i1 @"?cond@@YA_NXZ"() [[A3]] [ "convergencectrl"(token [[T2]]) ]
 // CHECK: if.then:
-// CHECK:   call spir_func void @_Z3foov() [[A3]] [ "convergencectrl"(token [[T1]]) ]
+// CHECK:   call spir_func void @"?foo@@YAXXZ"() [[A3]] [ "convergencectrl"(token [[T1]]) ]
 // CHECK:   br label %while.end
 
 // CHECK-DAG: attributes [[A0]] = { {{.*}}convergent{{.*}} }
diff --git a/clang/test/CodeGenHLSL/main_entry.hlsl b/clang/test/CodeGenHLSL/main_entry.hlsl
new file mode 100644
index 00000000000000..2ed2b14c180a7d
--- /dev/null
+++ b/clang/test/CodeGenHLSL/main_entry.hlsl
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-compute %s -hlsl-entry main \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK,DXIL
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-pc-vulkan-compute %s -hlsl-entry main \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK,SPIRV
+
+// Make sure the entry point is not mangled.
+// CHECK:define void @main()
+// DXIL:   call void @"?main@@YAXXZ"()
+// SPIRV:   call spir_func void @"?main@@YAXXZ"()
+// Make sure add function attribute and numthreads attribute.
+// CHECK:"hlsl.numthreads"="16,8,1"
+// CHECK:"hlsl.shader"="compute"
+[numthreads(16,8,1)]
+void main() {
+
+}
diff --git a/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl b/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl
index 59c1620334d0e3..975a7264fd3f01 100644
--- a/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl
+++ b/clang/test/CodeGenHLSL/semantics/DispatchThreadID.hlsl
@@ -6,7 +6,8 @@
 // CHECK:       define void @foo()
 // CHECK-DXIL:  %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0)
 // CHECK-SPIRV: %[[#ID:]] = call i32 @llvm.[[TARGET]].thread.id(i32 0)
-// CHECK:       call void @{{.*}}foo{{.*}}(i32 %[[#ID]])
+// CHECK-DXIL:       call void @{{.*}}foo{{.*}}(i32 %[[#ID]])
+// CHECK-SPIRV:      call spir_func void @{{.*}}foo{{.*}}(i32 %[[#ID]])
 [shader("compute")]
 [numthreads(8,8,1)]
 void foo(uint Idx : SV_DispatchThreadID) {}
@@ -17,6 +18,7 @@ void foo(uint Idx : SV_DispatchThreadID) {}
 // CHECK:       %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].thread.id(i32 1)
 // CHECK:       %[[#ID_XY:]] = insertelement <2 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1
 // CHECK-DXIL:  call void @{{.*}}bar{{.*}}(<2 x i32> %[[#ID_XY]])
+// CHECK-SPIRV:  call spir_func void @{{.*}}bar{{.*}}(<2 x i32> %[[#ID_XY]])
 [shader("compute")]
 [numthreads(8,8,1)]
 void bar(uint2 Idx : SV_DispatchThreadID) {}

>From 31829150814b59519958cfefd06ebf20dce748f2 Mon Sep 17 00:00:00 2001
From: Congcong Cai <congcongcai0907 at 163.com>
Date: Mon, 30 Sep 2024 21:25:22 +0800
Subject: [PATCH 02/12] [clang-tidy][NFC] optimize unused using decls
 performance (#110200)

Improve performance by moving the check forward to the matching stage
---
 .../clang-tidy/misc/UnusedUsingDeclsCheck.cpp    | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp
index 90b317527ee410..1ff61bae46b1ed 100644
--- a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp
@@ -25,6 +25,13 @@ AST_MATCHER_P(DeducedTemplateSpecializationType, refsToTemplatedDecl,
   return false;
 }
 
+AST_MATCHER_P(Type, asTagDecl, clang::ast_matchers::internal::Matcher<TagDecl>,
+              DeclMatcher) {
+  if (const TagDecl *ND = Node.getAsTagDecl())
+    return DeclMatcher.matches(*ND, Finder, Builder);
+  return false;
+}
+
 } // namespace
 
 // A function that helps to tell whether a TargetDecl in a UsingDecl will be
@@ -61,7 +68,8 @@ void UnusedUsingDeclsCheck::registerMatchers(MatchFinder *Finder) {
   Finder->addMatcher(userDefinedLiteral().bind("used"), this);
   Finder->addMatcher(
       loc(elaboratedType(unless(hasQualifier(nestedNameSpecifier())),
-                         hasUnqualifiedDesugaredType(type().bind("usedType")))),
+                         hasUnqualifiedDesugaredType(
+                             type(asTagDecl(tagDecl().bind("used")))))),
       this);
   // Cases where we can identify the UsingShadowDecl directly, rather than
   // just its target.
@@ -139,12 +147,6 @@ void UnusedUsingDeclsCheck::check(const MatchFinder::MatchResult &Result) {
     return;
   }
 
-  if (const auto *T = Result.Nodes.getNodeAs<Type>("usedType")) {
-    if (const auto *ND = T->getAsTagDecl())
-      RemoveNamedDecl(ND);
-    return;
-  }
-
   if (const auto *UsedShadow =
           Result.Nodes.getNodeAs<UsingShadowDecl>("usedShadow")) {
     removeFromFoundDecls(UsedShadow->getTargetDecl());

>From 3eac84ab61fa2be8c9101fa4fa274b177d7dbbde Mon Sep 17 00:00:00 2001
From: MingZhu Yan <69898423+trdthg at users.noreply.github.com>
Date: Mon, 30 Sep 2024 21:30:06 +0800
Subject: [PATCH 03/12] [mlir][doc][SPIR-V] Add missing `>` (#110464)

![image](https://github.com/user-attachments/assets/c3a8761f-647f-4a52-a68c-06a4cb543924)

If I'm not mistaken, there should be a right bracket here?

Signed-off-by: MingZhu Yan <yanmingzhu at iscas.ac.cn>
---
 mlir/docs/Dialects/SPIR-V.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/docs/Dialects/SPIR-V.md b/mlir/docs/Dialects/SPIR-V.md
index 51ba3482a379ab..c1d50f0fbe4071 100644
--- a/mlir/docs/Dialects/SPIR-V.md
+++ b/mlir/docs/Dialects/SPIR-V.md
@@ -388,7 +388,7 @@ This corresponds to SPIR-V [struct type][StructType]. Its syntax is
 ```
 struct-member-decoration ::= integer-literal? spirv-decoration*
 struct-type ::= `!spirv.struct<` spirv-type (`[` struct-member-decoration `]`)?
-                     (`, ` spirv-type (`[` struct-member-decoration `]`)?
+                     (`, ` spirv-type (`[` struct-member-decoration `]`)? `>`
 ```
 
 For Example,

>From b0b7a6c7e46521c983832bb4a8ef9d2d5c91bd1e Mon Sep 17 00:00:00 2001
From: David Truby <david.truby at arm.com>
Date: Mon, 30 Sep 2024 14:36:39 +0100
Subject: [PATCH 04/12] [flang] Implement GETUID and GETGID intrinsics
 (#108017)

GETUID and GETGID are non-standard intrinsics supported by a number of
other Fortran compilers. On supported platforms these intrinsics simply
call the POSIX getuid() and getgid() functions and return the result.
The only platform we support that does not have these is Windows.

Windows does not have the same concept of UIDs and GIDs, so on Windows
we issue a warning indicating this and return 1 from both functions.

Co-authored-by: Yi Wu <yi.wu2 at arm.com>

---------

Co-authored-by: Yi Wu <yi.wu2 at arm.com>
---
 flang/docs/Intrinsics.md                      |  2 +-
 flang/include/flang/Evaluate/target.h         |  6 ++++++
 .../flang/Optimizer/Builder/IntrinsicCall.h   |  4 ++++
 .../Optimizer/Builder/Runtime/Intrinsics.h    |  3 +++
 flang/include/flang/Runtime/extensions.h      | 14 ++++++++++++++
 flang/include/flang/Tools/TargetSetup.h       |  3 +++
 flang/lib/Evaluate/intrinsics.cpp             |  2 ++
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 18 ++++++++++++++++++
 .../Optimizer/Builder/Runtime/Intrinsics.cpp  | 16 ++++++++++++++++
 flang/lib/Semantics/check-call.cpp            | 16 ++++++++++++++++
 flang/lib/Semantics/check-call.h              |  2 ++
 flang/lib/Semantics/expression.cpp            |  3 +++
 flang/runtime/extensions.cpp                  | 19 +++++++++++++++++++
 flang/test/Semantics/windows.f90              | 12 ++++++++++++
 .../Optimizer/Builder/Runtime/CommandTest.cpp |  2 +-
 .../Builder/Runtime/IntrinsicsTest.cpp        | 17 +++++++++++++++++
 flang/unittests/Optimizer/CMakeLists.txt      |  1 +
 17 files changed, 138 insertions(+), 2 deletions(-)
 create mode 100644 flang/test/Semantics/windows.f90
 create mode 100644 flang/unittests/Optimizer/Builder/Runtime/IntrinsicsTest.cpp

diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index 87716731ead855..e288fdeec6cd22 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -765,7 +765,7 @@ This phase currently supports all the intrinsic procedures listed above but the
 | Coarray intrinsic functions | COSHAPE |
 | Object characteristic inquiry functions | ALLOCATED, ASSOCIATED, EXTENDS_TYPE_OF, IS_CONTIGUOUS, PRESENT, RANK, SAME_TYPE, STORAGE_SIZE |
 | Type inquiry intrinsic functions | BIT_SIZE, DIGITS, EPSILON, HUGE, KIND, MAXEXPONENT, MINEXPONENT, NEW_LINE, PRECISION, RADIX, RANGE, TINY|
-| Non-standard intrinsic functions | AND, OR, XOR, SHIFT, ZEXT, IZEXT, COSD, SIND, TAND, ACOSD, ASIND, ATAND, ATAN2D, COMPL, EQV, NEQV, INT8, JINT, JNINT, KNINT, QCMPLX, DREAL, DFLOAT, QEXT, QFLOAT, QREAL, DNUM, NUM, JNUM, KNUM, QNUM, RNUM, RAN, RANF, ILEN, SIZEOF, MCLOCK, SECNDS, COTAN, IBCHNG, ISHA, ISHC, ISHL, IXOR, IARG, IARGC, NARGS, GETPID, NUMARG, BADDRESS, IADDR, CACHESIZE, EOF, FP_CLASS, INT_PTR_KIND, ISNAN, MALLOC |
+| Non-standard intrinsic functions | AND, OR, XOR, SHIFT, ZEXT, IZEXT, COSD, SIND, TAND, ACOSD, ASIND, ATAND, ATAN2D, COMPL, EQV, NEQV, INT8, JINT, JNINT, KNINT, QCMPLX, DREAL, DFLOAT, QEXT, QFLOAT, QREAL, DNUM, NUM, JNUM, KNUM, QNUM, RNUM, RAN, RANF, ILEN, SIZEOF, MCLOCK, SECNDS, COTAN, IBCHNG, ISHA, ISHC, ISHL, IXOR, IARG, IARGC, NARGS, GETPID, NUMARG, BADDRESS, IADDR, CACHESIZE, EOF, FP_CLASS, INT_PTR_KIND, ISNAN, MALLOC, GETUID, GETGID |
 | Intrinsic subroutines |MVBITS (elemental), CPU_TIME, DATE_AND_TIME, EVENT_QUERY, EXECUTE_COMMAND_LINE, GET_COMMAND, GET_COMMAND_ARGUMENT, GET_ENVIRONMENT_VARIABLE, MOVE_ALLOC, RANDOM_INIT, RANDOM_NUMBER, RANDOM_SEED, SIGNAL, SLEEP, SYSTEM, SYSTEM_CLOCK |
 | Atomic intrinsic subroutines | ATOMIC_ADD |
 | Collective intrinsic subroutines | CO_REDUCE |
diff --git a/flang/include/flang/Evaluate/target.h b/flang/include/flang/Evaluate/target.h
index d076fcbf083078..b347c549e012da 100644
--- a/flang/include/flang/Evaluate/target.h
+++ b/flang/include/flang/Evaluate/target.h
@@ -102,6 +102,11 @@ class TargetCharacteristics {
   bool isPPC() const { return isPPC_; }
   void set_isPPC(bool isPPC = false);
 
+  bool isOSWindows() const { return isOSWindows_; }
+  void set_isOSWindows(bool isOSWindows = false) {
+    isOSWindows_ = isOSWindows;
+  };
+
   IeeeFeatures &ieeeFeatures() { return ieeeFeatures_; }
   const IeeeFeatures &ieeeFeatures() const { return ieeeFeatures_; }
 
@@ -111,6 +116,7 @@ class TargetCharacteristics {
   std::uint8_t align_[common::TypeCategory_enumSize][maxKind]{};
   bool isBigEndian_{false};
   bool isPPC_{false};
+  bool isOSWindows_{false};
   bool areSubnormalsFlushedToZero_{false};
   Rounding roundingMode_{defaultRounding};
   std::size_t procedurePointerByteSize_{8};
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 78bb82b17d4050..b2da6138fc9d8e 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -256,6 +256,10 @@ struct IntrinsicLibrary {
                         llvm::ArrayRef<mlir::Value> args);
   void genGetCommandArgument(mlir::ArrayRef<fir::ExtendedValue> args);
   void genGetEnvironmentVariable(llvm::ArrayRef<fir::ExtendedValue>);
+  mlir::Value genGetGID(mlir::Type resultType,
+                        llvm::ArrayRef<mlir::Value> args);
+  mlir::Value genGetUID(mlir::Type resultType,
+                        llvm::ArrayRef<mlir::Value> args);
   fir::ExtendedValue genIall(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genIand(mlir::Type, llvm::ArrayRef<mlir::Value>);
   fir::ExtendedValue genIany(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
index 240de5a899d37b..49d8249d6bcbce 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
@@ -47,6 +47,9 @@ void genDateAndTime(fir::FirOpBuilder &, mlir::Location,
 void genEtime(fir::FirOpBuilder &builder, mlir::Location loc,
               mlir::Value values, mlir::Value time);
 
+mlir::Value genGetUID(fir::FirOpBuilder &, mlir::Location);
+mlir::Value genGetGID(fir::FirOpBuilder &, mlir::Location);
+
 void genRandomInit(fir::FirOpBuilder &, mlir::Location, mlir::Value repeatable,
                    mlir::Value imageDistinct);
 void genRandomNumber(fir::FirOpBuilder &, mlir::Location, mlir::Value harvest);
diff --git a/flang/include/flang/Runtime/extensions.h b/flang/include/flang/Runtime/extensions.h
index fef651f3b2eedb..6a842bafc155f9 100644
--- a/flang/include/flang/Runtime/extensions.h
+++ b/flang/include/flang/Runtime/extensions.h
@@ -20,6 +20,14 @@
 #include <cstddef>
 #include <cstdint>
 
+#ifdef _WIN32
+// UID and GID don't exist on Windows, these exist to avoid errors.
+typedef std::uint32_t uid_t;
+typedef std::uint32_t gid_t;
+#else
+#include "sys/types.h" //pid_t
+#endif
+
 extern "C" {
 
 // CALL FLUSH(n) antedates the Fortran 2003 FLUSH statement.
@@ -35,6 +43,12 @@ std::int32_t FORTRAN_PROCEDURE_NAME(iargc)();
 void FORTRAN_PROCEDURE_NAME(getarg)(
     std::int32_t &n, char *arg, std::int64_t length);
 
+// Calls getgid()
+gid_t RTNAME(GetGID)();
+
+// Calls getuid()
+uid_t RTNAME(GetUID)();
+
 // GNU extension subroutine GETLOG(C).
 void FORTRAN_PROCEDURE_NAME(getlog)(char *name, std::int64_t length);
 
diff --git a/flang/include/flang/Tools/TargetSetup.h b/flang/include/flang/Tools/TargetSetup.h
index c8d32e8e87cf1e..f52b5ddaa8d49a 100644
--- a/flang/include/flang/Tools/TargetSetup.h
+++ b/flang/include/flang/Tools/TargetSetup.h
@@ -59,6 +59,9 @@ namespace Fortran::tools {
   if (targetTriple.isPPC())
     targetCharacteristics.set_isPPC(true);
 
+  if (targetTriple.isOSWindows())
+    targetCharacteristics.set_isOSWindows(true);
+
   // TODO: use target machine data layout to set-up the target characteristics
   // type size and alignment info.
 }
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index 17a09c080e72c4..2b11b40e27ad1b 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -523,7 +523,9 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
         {{"c", DefaultChar, Rank::scalar, Optionality::required,
             common::Intent::Out}},
         TypePattern{IntType, KindCode::greaterOrEqualToKind, 4}},
+    {"getgid", {}, DefaultInt},
     {"getpid", {}, DefaultInt},
+    {"getuid", {}, DefaultInt},
     {"huge",
         {{"x", SameIntOrReal, Rank::anyOrAssumedRank, Optionality::required,
             common::Intent::In, {ArgFlag::canBeMoldNull}}},
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 4e6d92213c1241..6c5bd3b9417e8e 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -293,7 +293,9 @@ static constexpr IntrinsicHandler handlers[]{
      &I::genGetCwd,
      {{{"c", asBox}, {"status", asAddr, handleDynamicOptional}}},
      /*isElemental=*/false},
+    {"getgid", &I::genGetGID},
     {"getpid", &I::genGetPID},
+    {"getuid", &I::genGetUID},
     {"iachar", &I::genIchar},
     {"iall",
      &I::genIall,
@@ -3650,6 +3652,14 @@ void IntrinsicLibrary::genGetCommand(llvm::ArrayRef<fir::ExtendedValue> args) {
   }
 }
 
+// GETGID
+mlir::Value IntrinsicLibrary::genGetGID(mlir::Type resultType,
+                                        llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 0 && "getgid takes no input");
+  return builder.createConvert(loc, resultType,
+                               fir::runtime::genGetGID(builder, loc));
+}
+
 // GETPID
 mlir::Value IntrinsicLibrary::genGetPID(mlir::Type resultType,
                                         llvm::ArrayRef<mlir::Value> args) {
@@ -3658,6 +3668,14 @@ mlir::Value IntrinsicLibrary::genGetPID(mlir::Type resultType,
                                fir::runtime::genGetPID(builder, loc));
 }
 
+// GETUID
+mlir::Value IntrinsicLibrary::genGetUID(mlir::Type resultType,
+                                        llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 0 && "getgid takes no input");
+  return builder.createConvert(loc, resultType,
+                               fir::runtime::genGetUID(builder, loc));
+}
+
 // GET_COMMAND_ARGUMENT
 void IntrinsicLibrary::genGetCommandArgument(
     llvm::ArrayRef<fir::ExtendedValue> args) {
diff --git a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
index aff3cadc3c300d..6bdc7d8c6bc823 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
@@ -120,6 +120,22 @@ void fir::runtime::genEtime(fir::FirOpBuilder &builder, mlir::Location loc,
   builder.create<fir::CallOp>(loc, runtimeFunc, args);
 }
 
+mlir::Value fir::runtime::genGetGID(fir::FirOpBuilder &builder,
+                                    mlir::Location loc) {
+  auto runtimeFunc =
+      fir::runtime::getRuntimeFunc<mkRTKey(GetGID)>(loc, builder);
+
+  return builder.create<fir::CallOp>(loc, runtimeFunc).getResult(0);
+}
+
+mlir::Value fir::runtime::genGetUID(fir::FirOpBuilder &builder,
+                                    mlir::Location loc) {
+  auto runtimeFunc =
+      fir::runtime::getRuntimeFunc<mkRTKey(GetUID)>(loc, builder);
+
+  return builder.create<fir::CallOp>(loc, runtimeFunc).getResult(0);
+}
+
 void fir::runtime::genRandomInit(fir::FirOpBuilder &builder, mlir::Location loc,
                                  mlir::Value repeatable,
                                  mlir::Value imageDistinct) {
diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp
index 71d1c083c31278..7c8427733e1dba 100644
--- a/flang/lib/Semantics/check-call.cpp
+++ b/flang/lib/Semantics/check-call.cpp
@@ -2028,6 +2028,22 @@ bool CheckPPCIntrinsic(const Symbol &generic, const Symbol &specific,
   return false;
 }
 
+bool CheckWindowsIntrinsic(
+    const Symbol &intrinsic, evaluate::FoldingContext &foldingContext) {
+  parser::ContextualMessages &messages{foldingContext.messages()};
+  // TODO: there are other intrinsics that are unsupported on Windows that
+  // should be added here.
+  if (intrinsic.name() == "getuid") {
+    messages.Say(
+        "User IDs do not exist on Windows. This function will always return 1"_warn_en_US);
+  }
+  if (intrinsic.name() == "getgid") {
+    messages.Say(
+        "Group IDs do not exist on Windows. This function will always return 1"_warn_en_US);
+  }
+  return true;
+}
+
 bool CheckArguments(const characteristics::Procedure &proc,
     evaluate::ActualArguments &actuals, SemanticsContext &context,
     const Scope &scope, bool treatingExternalAsImplicit,
diff --git a/flang/lib/Semantics/check-call.h b/flang/lib/Semantics/check-call.h
index 8553f3a31efb52..46bc61a601bd34 100644
--- a/flang/lib/Semantics/check-call.h
+++ b/flang/lib/Semantics/check-call.h
@@ -41,6 +41,8 @@ bool CheckArguments(const evaluate::characteristics::Procedure &,
 bool CheckPPCIntrinsic(const Symbol &generic, const Symbol &specific,
     const evaluate::ActualArguments &actuals,
     evaluate::FoldingContext &context);
+bool CheckWindowsIntrinsic(
+    const Symbol &intrinsic, evaluate::FoldingContext &context);
 bool CheckArgumentIsConstantExprInRange(
     const evaluate::ActualArguments &actuals, int index, int lowerBound,
     int upperBound, parser::ContextualMessages &messages);
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index 182ea5d441956c..364f99d73f5cc8 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -2916,6 +2916,9 @@ auto ExpressionAnalyzer::GetCalleeAndArguments(const parser::Name &name,
   } else {
     resolution = symbol;
   }
+  if (resolution && context_.targetCharacteristics().isOSWindows()) {
+    semantics::CheckWindowsIntrinsic(*resolution, GetFoldingContext());
+  }
   if (!resolution || resolution->attrs().test(semantics::Attr::INTRINSIC)) {
     auto name{resolution ? resolution->name() : ultimate.name()};
     if (std::optional<SpecificCall> specificCall{context_.intrinsics().Probe(
diff --git a/flang/runtime/extensions.cpp b/flang/runtime/extensions.cpp
index be3833db88b07a..f2823ca770bc5f 100644
--- a/flang/runtime/extensions.cpp
+++ b/flang/runtime/extensions.cpp
@@ -58,6 +58,24 @@ extern "C" {
 
 namespace Fortran::runtime {
 
+gid_t RTNAME(GetGID)() {
+#ifdef _WIN32
+  // Group IDs don't exist on Windows, return 1 to avoid errors
+  return 1;
+#else
+  return getgid();
+#endif
+}
+
+uid_t RTNAME(GetUID)() {
+#ifdef _WIN32
+  // User IDs don't exist on Windows, return 1 to avoid errors
+  return 1;
+#else
+  return getuid();
+#endif
+}
+
 void GetUsernameEnvVar(const char *envName, char *arg, std::int64_t length) {
   Descriptor name{*Descriptor::Create(
       1, std::strlen(envName) + 1, const_cast<char *>(envName), 0)};
@@ -66,6 +84,7 @@ void GetUsernameEnvVar(const char *envName, char *arg, std::int64_t length) {
   RTNAME(GetEnvVariable)
   (name, &value, nullptr, false, nullptr, __FILE__, __LINE__);
 }
+
 namespace io {
 // SUBROUTINE FLUSH(N)
 //   FLUSH N
diff --git a/flang/test/Semantics/windows.f90 b/flang/test/Semantics/windows.f90
new file mode 100644
index 00000000000000..8f9d1aa606c0a0
--- /dev/null
+++ b/flang/test/Semantics/windows.f90
@@ -0,0 +1,12 @@
+! RUN: %python %S/test_errors.py %s %flang --target=x86_64-pc-windows-msvc -Werror
+! RUN: %python %S/test_errors.py %s %flang --target=aarch64-pc-windows-msvc -Werror
+
+subroutine uid
+  !WARNING: User IDs do not exist on Windows. This function will always return 1
+  i = getuid()
+end subroutine uid
+
+subroutine gid
+  !WARNING: Group IDs do not exist on Windows. This function will always return 1
+  i = getgid()
+end subroutine gid
diff --git a/flang/unittests/Optimizer/Builder/Runtime/CommandTest.cpp b/flang/unittests/Optimizer/Builder/Runtime/CommandTest.cpp
index 58a151447d5b4f..8bc1e87814a98c 100644
--- a/flang/unittests/Optimizer/Builder/Runtime/CommandTest.cpp
+++ b/flang/unittests/Optimizer/Builder/Runtime/CommandTest.cpp
@@ -50,4 +50,4 @@ TEST_F(RuntimeCallTest, genGetPID) {
   mlir::Value result = fir::runtime::genGetPID(*firBuilder, loc);
   checkCallOp(result.getDefiningOp(), "_FortranAGetPID", /*nbArgs=*/0,
       /*addLocArgs=*/false);
-}
\ No newline at end of file
+}
diff --git a/flang/unittests/Optimizer/Builder/Runtime/IntrinsicsTest.cpp b/flang/unittests/Optimizer/Builder/Runtime/IntrinsicsTest.cpp
new file mode 100644
index 00000000000000..1440a5fd01c2b1
--- /dev/null
+++ b/flang/unittests/Optimizer/Builder/Runtime/IntrinsicsTest.cpp
@@ -0,0 +1,17 @@
+#include "flang/Optimizer/Builder/Runtime/Intrinsics.h"
+#include "RuntimeCallTestBase.h"
+#include "gtest/gtest.h"
+
+TEST_F(RuntimeCallTest, genGetGID) {
+  mlir::Location loc = firBuilder->getUnknownLoc();
+  mlir::Value result = fir::runtime::genGetGID(*firBuilder, loc);
+  checkCallOp(result.getDefiningOp(), "_FortranAGetGID", /*nbArgs=*/0,
+      /*addLocArgs=*/false);
+}
+
+TEST_F(RuntimeCallTest, genGetUID) {
+  mlir::Location loc = firBuilder->getUnknownLoc();
+  mlir::Value result = fir::runtime::genGetUID(*firBuilder, loc);
+  checkCallOp(result.getDefiningOp(), "_FortranAGetUID", /*nbArgs=*/0,
+      /*addLocArgs=*/false);
+}
diff --git a/flang/unittests/Optimizer/CMakeLists.txt b/flang/unittests/Optimizer/CMakeLists.txt
index 7299e3ee0529a9..c58fb226a175c9 100644
--- a/flang/unittests/Optimizer/CMakeLists.txt
+++ b/flang/unittests/Optimizer/CMakeLists.txt
@@ -25,6 +25,7 @@ add_flang_unittest(FlangOptimizerTests
   Builder/Runtime/CommandTest.cpp
   Builder/Runtime/CharacterTest.cpp
   Builder/Runtime/DerivedTest.cpp
+  Builder/Runtime/IntrinsicsTest.cpp
   Builder/Runtime/NumericTest.cpp
   Builder/Runtime/RaggedTest.cpp
   Builder/Runtime/ReductionTest.cpp

>From 1a58b56b6746a0929a7497b2a019f445b0779b15 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Mon, 30 Sep 2024 06:37:36 -0700
Subject: [PATCH 05/12] [LLVM][TableGen] Change GlobalISelEmitter to use const
 RecordKeeper (#110109)

Change GlobalISelEmitter to use const RecordKeeper.

This is a part of effort to have better const correctness in TableGen
backends:


https://discourse.llvm.org/t/psa-planned-changes-to-tablegen-getallderiveddefinitions-api-potential-downstream-breakages/81089
---
 .../GlobalISel/GlobalISelMatchTable.cpp       | 12 +--
 .../Common/GlobalISel/GlobalISelMatchTable.h  | 18 ++--
 .../GlobalISelMatchTableExecutorEmitter.cpp   |  5 +-
 .../GlobalISelMatchTableExecutorEmitter.h     |  4 +-
 llvm/utils/TableGen/GlobalISelEmitter.cpp     | 86 ++++++++++---------
 5 files changed, 65 insertions(+), 60 deletions(-)

diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index 2702e0ae33c775..5de5dd894f84ec 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -888,11 +888,9 @@ void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) {
       RM.getGISelFlags());
 }
 
-void RuleMatcher::definePhysRegOperand(Record *Reg, OperandMatcher &OM) {
-  if (!PhysRegOperands.contains(Reg)) {
+void RuleMatcher::definePhysRegOperand(const Record *Reg, OperandMatcher &OM) {
+  if (!PhysRegOperands.contains(Reg))
     PhysRegOperands[Reg] = &OM;
-    return;
-  }
 }
 
 InstructionMatcher &
@@ -904,7 +902,8 @@ RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const {
       ("Failed to lookup instruction " + SymbolicName).str().c_str());
 }
 
-const OperandMatcher &RuleMatcher::getPhysRegOperandMatcher(Record *Reg) const {
+const OperandMatcher &
+RuleMatcher::getPhysRegOperandMatcher(const Record *Reg) const {
   const auto &I = PhysRegOperands.find(Reg);
 
   if (I == PhysRegOperands.end()) {
@@ -1717,7 +1716,8 @@ OperandMatcher &InstructionMatcher::getOperand(unsigned OpIdx) {
   llvm_unreachable("Failed to lookup operand");
 }
 
-OperandMatcher &InstructionMatcher::addPhysRegInput(Record *Reg, unsigned OpIdx,
+OperandMatcher &InstructionMatcher::addPhysRegInput(const Record *Reg,
+                                                    unsigned OpIdx,
                                                     unsigned TempOpIdx) {
   assert(SymbolicName.empty());
   OperandMatcher *OM = new OperandMatcher(*this, OpIdx, "", TempOpIdx);
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
index aa4eae87573a3a..315606417fc9ea 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.h
@@ -494,7 +494,7 @@ class RuleMatcher : public Matcher {
 
   /// A map of anonymous physical register operands defined by the matchers that
   /// may be referenced by the renderers.
-  DenseMap<Record *, OperandMatcher *> PhysRegOperands;
+  DenseMap<const Record *, OperandMatcher *> PhysRegOperands;
 
   /// ID for the next instruction variable defined with
   /// implicitlyDefineInsnVar()
@@ -651,7 +651,7 @@ class RuleMatcher : public Matcher {
 
   void defineOperand(StringRef SymbolicName, OperandMatcher &OM);
 
-  void definePhysRegOperand(Record *Reg, OperandMatcher &OM);
+  void definePhysRegOperand(const Record *Reg, OperandMatcher &OM);
 
   Error defineComplexSubOperand(StringRef SymbolicName,
                                 const Record *ComplexPattern,
@@ -669,7 +669,7 @@ class RuleMatcher : public Matcher {
   InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const;
   OperandMatcher &getOperandMatcher(StringRef Name);
   const OperandMatcher &getOperandMatcher(StringRef Name) const;
-  const OperandMatcher &getPhysRegOperandMatcher(Record *) const;
+  const OperandMatcher &getPhysRegOperandMatcher(const Record *) const;
 
   void optimize() override;
   void emit(MatchTable &Table) override;
@@ -1759,7 +1759,7 @@ class InstructionMatcher final : public PredicateListMatcher<PredicateMatcher> {
   /// PhysRegInputs - List list has an entry for each explicitly specified
   /// physreg input to the pattern.  The first elt is the Register node, the
   /// second is the recorded slot number the input pattern match saved it in.
-  SmallVector<std::pair<Record *, unsigned>, 2> PhysRegInputs;
+  SmallVector<std::pair<const Record *, unsigned>, 2> PhysRegInputs;
 
   bool canAddNumOperandsCheck() const {
     // Add if it's allowed, and:
@@ -1799,10 +1799,10 @@ class InstructionMatcher final : public PredicateListMatcher<PredicateMatcher> {
                              unsigned AllocatedTemporariesBaseID,
                              bool IsVariadic = false);
   OperandMatcher &getOperand(unsigned OpIdx);
-  OperandMatcher &addPhysRegInput(Record *Reg, unsigned OpIdx,
+  OperandMatcher &addPhysRegInput(const Record *Reg, unsigned OpIdx,
                                   unsigned TempOpIdx);
 
-  ArrayRef<std::pair<Record *, unsigned>> getPhysRegInputs() const {
+  ArrayRef<std::pair<const Record *, unsigned>> getPhysRegInputs() const {
     return PhysRegInputs;
   }
 
@@ -1969,10 +1969,10 @@ class CopyRenderer : public OperandRenderer {
 class CopyPhysRegRenderer : public OperandRenderer {
 protected:
   unsigned NewInsnID;
-  Record *PhysReg;
+  const Record *PhysReg;
 
 public:
-  CopyPhysRegRenderer(unsigned NewInsnID, Record *Reg)
+  CopyPhysRegRenderer(unsigned NewInsnID, const Record *Reg)
       : OperandRenderer(OR_CopyPhysReg), NewInsnID(NewInsnID), PhysReg(Reg) {
     assert(PhysReg);
   }
@@ -1981,7 +1981,7 @@ class CopyPhysRegRenderer : public OperandRenderer {
     return R->getKind() == OR_CopyPhysReg;
   }
 
-  Record *getPhysReg() const { return PhysReg; }
+  const Record *getPhysReg() const { return PhysReg; }
 
   void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
 };
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.cpp
index 8790dc6028ef49..b7926e21ca661f 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.cpp
@@ -103,7 +103,7 @@ void GlobalISelMatchTableExecutorEmitter::emitSubtargetFeatureBitsetImpl(
 }
 
 void GlobalISelMatchTableExecutorEmitter::emitComplexPredicates(
-    raw_ostream &OS, ArrayRef<Record *> ComplexOperandMatchers) {
+    raw_ostream &OS, ArrayRef<const Record *> ComplexOperandMatchers) {
   // Emit complex predicate table and an enum to reference them with.
   OS << "// ComplexPattern predicates.\n"
      << "enum {\n"
@@ -174,7 +174,8 @@ void GlobalISelMatchTableExecutorEmitter::emitMatchTable(
 
 void GlobalISelMatchTableExecutorEmitter::emitExecutorImpl(
     raw_ostream &OS, const MatchTable &Table, ArrayRef<LLTCodeGen> TypeObjects,
-    ArrayRef<RuleMatcher> Rules, ArrayRef<Record *> ComplexOperandMatchers,
+    ArrayRef<RuleMatcher> Rules,
+    ArrayRef<const Record *> ComplexOperandMatchers,
     ArrayRef<StringRef> CustomOperandRenderers, StringRef IfDefName) {
   OS << "#ifdef " << IfDefName << "\n";
   emitTypeObjects(OS, TypeObjects);
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.h b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.h
index 6634c525480d34..862f1e83c169fe 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.h
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.h
@@ -42,7 +42,7 @@ class GlobalISelMatchTableExecutorEmitter {
   /// Emits an enum + an array that stores references to
   /// \p ComplexOperandMatchers.
   void emitComplexPredicates(raw_ostream &OS,
-                             ArrayRef<Record *> ComplexOperandMatchers);
+                             ArrayRef<const Record *> ComplexOperandMatchers);
 
   /// Emits an enum + an array that stores references to
   /// \p CustomOperandRenderers.
@@ -206,7 +206,7 @@ class GlobalISelMatchTableExecutorEmitter {
   void emitExecutorImpl(raw_ostream &OS, const gi::MatchTable &Table,
                         ArrayRef<gi::LLTCodeGen> TypeObjects,
                         ArrayRef<gi::RuleMatcher> Rules,
-                        ArrayRef<Record *> ComplexOperandMatchers,
+                        ArrayRef<const Record *> ComplexOperandMatchers,
                         ArrayRef<StringRef> CustomOperandRenderers,
                         StringRef IfDefName);
   void emitPredicateBitset(raw_ostream &OS, StringRef IfDefName);
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index c345662c008e5b..c53f705a38db8f 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -314,7 +314,7 @@ static Expected<LLTCodeGen> getInstResultType(const TreePatternNode &Dst,
 
 class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
 public:
-  explicit GlobalISelEmitter(RecordKeeper &RK);
+  explicit GlobalISelEmitter(const RecordKeeper &RK);
 
   void emitAdditionalImpl(raw_ostream &OS) override;
 
@@ -335,18 +335,18 @@ class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
 private:
   std::string ClassName;
 
-  RecordKeeper &RK;
+  const RecordKeeper &RK;
   const CodeGenDAGPatterns CGP;
   const CodeGenTarget &Target;
   CodeGenRegBank &CGRegs;
 
-  std::vector<Record *> AllPatFrags;
+  ArrayRef<const Record *> AllPatFrags;
 
   /// Keep track of the equivalence between SDNodes and Instruction by mapping
   /// SDNodes to the GINodeEquiv mapping. We need to map to the GINodeEquiv to
   /// check for attributes on the relation such as CheckMMOIsNonAtomic.
   /// This is defined using 'GINodeEquiv' in the target description.
-  DenseMap<const Record *, Record *> NodeEquivs;
+  DenseMap<const Record *, const Record *> NodeEquivs;
 
   /// Keep track of the equivalence between ComplexPattern's and
   /// GIComplexOperandMatcher. Map entries are specified by subclassing
@@ -379,8 +379,8 @@ class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
   void gatherTypeIDValues();
   void gatherNodeEquivs();
 
-  Record *findNodeEquiv(const Record *N) const;
-  const CodeGenInstruction *getEquivNode(Record &Equiv,
+  const Record *findNodeEquiv(const Record *N) const;
+  const CodeGenInstruction *getEquivNode(const Record &Equiv,
                                          const TreePatternNode &N) const;
 
   Error importRulePredicates(RuleMatcher &M,
@@ -472,7 +472,7 @@ class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
                        InstructionMatcher &InsnMatcher, bool &HasAddedMatcher);
 };
 
-StringRef getPatFragPredicateEnumName(Record *R) { return R->getName(); }
+StringRef getPatFragPredicateEnumName(const Record *R) { return R->getName(); }
 
 void GlobalISelEmitter::gatherOpcodeValues() {
   InstructionOpcodeMatcher::initOpcodeValuesMap(Target);
@@ -484,32 +484,35 @@ void GlobalISelEmitter::gatherTypeIDValues() {
 
 void GlobalISelEmitter::gatherNodeEquivs() {
   assert(NodeEquivs.empty());
-  for (Record *Equiv : RK.getAllDerivedDefinitions("GINodeEquiv"))
+  for (const Record *Equiv : RK.getAllDerivedDefinitions("GINodeEquiv"))
     NodeEquivs[Equiv->getValueAsDef("Node")] = Equiv;
 
   assert(ComplexPatternEquivs.empty());
-  for (Record *Equiv : RK.getAllDerivedDefinitions("GIComplexPatternEquiv")) {
-    Record *SelDAGEquiv = Equiv->getValueAsDef("SelDAGEquivalent");
+  for (const Record *Equiv :
+       RK.getAllDerivedDefinitions("GIComplexPatternEquiv")) {
+    const Record *SelDAGEquiv = Equiv->getValueAsDef("SelDAGEquivalent");
     if (!SelDAGEquiv)
       continue;
     ComplexPatternEquivs[SelDAGEquiv] = Equiv;
   }
 
   assert(SDNodeXFormEquivs.empty());
-  for (Record *Equiv : RK.getAllDerivedDefinitions("GISDNodeXFormEquiv")) {
-    Record *SelDAGEquiv = Equiv->getValueAsDef("SelDAGEquivalent");
+  for (const Record *Equiv :
+       RK.getAllDerivedDefinitions("GISDNodeXFormEquiv")) {
+    const Record *SelDAGEquiv = Equiv->getValueAsDef("SelDAGEquivalent");
     if (!SelDAGEquiv)
       continue;
     SDNodeXFormEquivs[SelDAGEquiv] = Equiv;
   }
 }
 
-Record *GlobalISelEmitter::findNodeEquiv(const Record *N) const {
+const Record *GlobalISelEmitter::findNodeEquiv(const Record *N) const {
   return NodeEquivs.lookup(N);
 }
 
 const CodeGenInstruction *
-GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode &N) const {
+GlobalISelEmitter::getEquivNode(const Record &Equiv,
+                                const TreePatternNode &N) const {
   if (N.getNumChildren() >= 1) {
     // setcc operation maps to two different G_* instructions based on the type.
     if (!Equiv.isValueUnset("IfFloatingPoint") &&
@@ -536,7 +539,7 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode &N) const {
   return &Target.getInstruction(Equiv.getValueAsDef("I"));
 }
 
-GlobalISelEmitter::GlobalISelEmitter(RecordKeeper &RK)
+GlobalISelEmitter::GlobalISelEmitter(const RecordKeeper &RK)
     : GlobalISelMatchTableExecutorEmitter(), RK(RK), CGP(RK),
       Target(CGP.getTargetInfo()), CGRegs(Target.getRegBank()) {
   ClassName = Target.getName().str() + "InstructionSelector";
@@ -721,7 +724,7 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
     const TreePatternNode &Src, unsigned &TempOpIdx) {
   const auto SavedFlags = Rule.setGISelFlags(Src.getGISelFlagsRecord());
 
-  Record *SrcGIEquivOrNull = nullptr;
+  const Record *SrcGIEquivOrNull = nullptr;
   const CodeGenInstruction *SrcGIOrNull = nullptr;
 
   // Start with the defined operands (i.e., the results of the root operator).
@@ -942,7 +945,7 @@ Error GlobalISelEmitter::importComplexPatternOperandMatcher(
 // Get the name to use for a pattern operand. For an anonymous physical register
 // input, this should use the register name.
 static StringRef getSrcChildName(const TreePatternNode &SrcChild,
-                                 Record *&PhysReg) {
+                                 const Record *&PhysReg) {
   StringRef SrcChildName = SrcChild.getName();
   if (SrcChildName.empty() && SrcChild.isLeaf()) {
     if (auto *ChildDefInit = dyn_cast<DefInit>(SrcChild.getLeafValue())) {
@@ -962,7 +965,7 @@ Error GlobalISelEmitter::importChildMatcher(
     const TreePatternNode &SrcChild, bool OperandIsAPointer,
     bool OperandIsImmArg, unsigned OpIdx, unsigned &TempOpIdx) {
 
-  Record *PhysReg = nullptr;
+  const Record *PhysReg = nullptr;
   std::string SrcChildName = std::string(getSrcChildName(SrcChild, PhysReg));
   if (!SrcChild.isLeaf() &&
       SrcChild.getOperator()->isSubClassOf("ComplexPattern")) {
@@ -1196,7 +1199,8 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
       auto &Child = DstChild.getChild(0);
       auto I = SDNodeXFormEquivs.find(DstChild.getOperator());
       if (I != SDNodeXFormEquivs.end()) {
-        Record *XFormOpc = DstChild.getOperator()->getValueAsDef("Opcode");
+        const Record *XFormOpc =
+            DstChild.getOperator()->getValueAsDef("Opcode");
         if (XFormOpc->getName() == "timm") {
           // If this is a TargetConstant, there won't be a corresponding
           // instruction to transform. Instead, this will refer directly to an
@@ -2290,65 +2294,65 @@ void GlobalISelEmitter::emitAdditionalImpl(raw_ostream &OS) {
 }
 
 void GlobalISelEmitter::emitMIPredicateFns(raw_ostream &OS) {
-  std::vector<Record *> MatchedRecords;
+  std::vector<const Record *> MatchedRecords;
   std::copy_if(AllPatFrags.begin(), AllPatFrags.end(),
-               std::back_inserter(MatchedRecords), [&](Record *R) {
+               std::back_inserter(MatchedRecords), [](const Record *R) {
                  return !R->getValueAsString("GISelPredicateCode").empty();
                });
-  emitMIPredicateFnsImpl<Record *>(
+  emitMIPredicateFnsImpl<const Record *>(
       OS,
       "  const MachineFunction &MF = *MI.getParent()->getParent();\n"
       "  const MachineRegisterInfo &MRI = MF.getRegInfo();\n"
       "  const auto &Operands = State.RecordedOperands;\n"
       "  (void)Operands;\n"
       "  (void)MRI;",
-      ArrayRef<Record *>(MatchedRecords), &getPatFragPredicateEnumName,
-      [&](Record *R) { return R->getValueAsString("GISelPredicateCode"); },
+      ArrayRef<const Record *>(MatchedRecords), &getPatFragPredicateEnumName,
+      [](const Record *R) { return R->getValueAsString("GISelPredicateCode"); },
       "PatFrag predicates.");
 }
 
 void GlobalISelEmitter::emitI64ImmPredicateFns(raw_ostream &OS) {
-  std::vector<Record *> MatchedRecords;
+  std::vector<const Record *> MatchedRecords;
   std::copy_if(AllPatFrags.begin(), AllPatFrags.end(),
-               std::back_inserter(MatchedRecords), [&](Record *R) {
+               std::back_inserter(MatchedRecords), [](const Record *R) {
                  bool Unset;
                  return !R->getValueAsString("ImmediateCode").empty() &&
                         !R->getValueAsBitOrUnset("IsAPFloat", Unset) &&
                         !R->getValueAsBit("IsAPInt");
                });
-  emitImmPredicateFnsImpl<Record *>(
-      OS, "I64", "int64_t", ArrayRef<Record *>(MatchedRecords),
+  emitImmPredicateFnsImpl<const Record *>(
+      OS, "I64", "int64_t", ArrayRef<const Record *>(MatchedRecords),
       &getPatFragPredicateEnumName,
-      [&](Record *R) { return R->getValueAsString("ImmediateCode"); },
+      [](const Record *R) { return R->getValueAsString("ImmediateCode"); },
       "PatFrag predicates.");
 }
 
 void GlobalISelEmitter::emitAPFloatImmPredicateFns(raw_ostream &OS) {
-  std::vector<Record *> MatchedRecords;
+  std::vector<const Record *> MatchedRecords;
   std::copy_if(AllPatFrags.begin(), AllPatFrags.end(),
-               std::back_inserter(MatchedRecords), [&](Record *R) {
+               std::back_inserter(MatchedRecords), [](const Record *R) {
                  bool Unset;
                  return !R->getValueAsString("ImmediateCode").empty() &&
                         R->getValueAsBitOrUnset("IsAPFloat", Unset);
                });
-  emitImmPredicateFnsImpl<Record *>(
-      OS, "APFloat", "const APFloat &", ArrayRef<Record *>(MatchedRecords),
-      &getPatFragPredicateEnumName,
-      [&](Record *R) { return R->getValueAsString("ImmediateCode"); },
+  emitImmPredicateFnsImpl<const Record *>(
+      OS, "APFloat", "const APFloat &",
+      ArrayRef<const Record *>(MatchedRecords), &getPatFragPredicateEnumName,
+      [](const Record *R) { return R->getValueAsString("ImmediateCode"); },
       "PatFrag predicates.");
 }
 
 void GlobalISelEmitter::emitAPIntImmPredicateFns(raw_ostream &OS) {
-  std::vector<Record *> MatchedRecords;
+  std::vector<const Record *> MatchedRecords;
   std::copy_if(AllPatFrags.begin(), AllPatFrags.end(),
-               std::back_inserter(MatchedRecords), [&](Record *R) {
+               std::back_inserter(MatchedRecords), [](const Record *R) {
                  return !R->getValueAsString("ImmediateCode").empty() &&
                         R->getValueAsBit("IsAPInt");
                });
-  emitImmPredicateFnsImpl<Record *>(
-      OS, "APInt", "const APInt &", ArrayRef<Record *>(MatchedRecords),
+  emitImmPredicateFnsImpl<const Record *>(
+      OS, "APInt", "const APInt &", ArrayRef<const Record *>(MatchedRecords),
       &getPatFragPredicateEnumName,
-      [&](Record *R) { return R->getValueAsString("ImmediateCode"); },
+      [](const Record *R) { return R->getValueAsString("ImmediateCode"); },
       "PatFrag predicates.");
 }
 
@@ -2461,7 +2465,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
     return A->getName() < B->getName();
   };
 
-  std::vector<Record *> ComplexPredicates =
+  std::vector<const Record *> ComplexPredicates =
       RK.getAllDerivedDefinitions("GIComplexOperandMatcher");
   llvm::sort(ComplexPredicates, OrderByName);
 

>From 668628ed00a00fb9fe82b6b1d238ce9e67f12eea Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Mon, 30 Sep 2024 06:47:39 -0700
Subject: [PATCH 06/12] [IR] Avoid repeated hash lookups (NFC) (#110450)

---
 llvm/lib/IR/AsmWriter.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 70e3af941bf77b..280e347739cdb6 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1338,12 +1338,8 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) {
 void SlotTracker::CreateAttributeSetSlot(AttributeSet AS) {
   assert(AS.hasAttributes() && "Doesn't need a slot!");
 
-  as_iterator I = asMap.find(AS);
-  if (I != asMap.end())
-    return;
-
-  unsigned DestSlot = asNext++;
-  asMap[AS] = DestSlot;
+  if (asMap.try_emplace(AS, asNext).second)
+    ++asNext;
 }
 
 /// Create a new slot for the specified Module

>From 4968f727d11f5eda79b774e013d52822e34ba1a2 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Mon, 30 Sep 2024 06:48:00 -0700
Subject: [PATCH 07/12] [ExecutionEngine] Avoid repeated hash lookups (NFC)
 (#110451)

---
 llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp
index eab0dfa47e1e7d..cdfcae86f79c28 100644
--- a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp
@@ -521,10 +521,8 @@ void COFFPlatform::pushInitializersLoop(PushInitializersSendResultFn SendResult,
       }
 
       for (auto *DepJD : JDDepMap[CurJD])
-        if (!Visited.count(DepJD)) {
+        if (Visited.insert(DepJD).second)
           Worklist.push_back(DepJD);
-          Visited.insert(DepJD);
-        }
     }
   });
 

>From 0fcc929033ba4ebb4a92d779045509ea3d5e542a Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Mon, 30 Sep 2024 06:49:04 -0700
Subject: [PATCH 08/12] [MachineLICM] Avoid repeated hash lookups (NFC)
 (#110452)

---
 llvm/lib/CodeGen/MachineLICM.cpp | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 3289a692221ba6..793ad75759ccb8 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -148,13 +148,13 @@ namespace {
     DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
 
     bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) {
-      if (ExitBlockMap.contains(CurLoop))
-        return is_contained(ExitBlockMap[CurLoop], MBB);
-
-      SmallVector<MachineBasicBlock *, 8> ExitBlocks;
-      CurLoop->getExitBlocks(ExitBlocks);
-      ExitBlockMap[CurLoop] = ExitBlocks;
-      return is_contained(ExitBlocks, MBB);
+      auto [It, Inserted] = ExitBlockMap.try_emplace(CurLoop);
+      if (Inserted) {
+        SmallVector<MachineBasicBlock *, 8> ExitBlocks;
+        CurLoop->getExitBlocks(ExitBlocks);
+        It->second = ExitBlocks;
+      }
+      return is_contained(It->second, MBB);
     }
 
     // Track 'estimated' register pressure.
@@ -1010,12 +1010,8 @@ MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
     if (RCCost == 0)
       continue;
     const int *PS = TRI->getRegClassPressureSets(RC);
-    for (; *PS != -1; ++PS) {
-      if (!Cost.contains(*PS))
-        Cost[*PS] = RCCost;
-      else
-        Cost[*PS] += RCCost;
-    }
+    for (; *PS != -1; ++PS)
+      Cost[*PS] += RCCost;
   }
   return Cost;
 }

>From c6fc2f734036d232ad6ef9867093a63084834f4a Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Mon, 30 Sep 2024 06:49:50 -0700
Subject: [PATCH 09/12] [Analysis] Avoid repeated hash lookups (NFC) (#110453)

---
 llvm/lib/Analysis/ReplayInlineAdvisor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
index 2ca02eb1741712..6db5737ef4268b 100644
--- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
@@ -114,7 +114,7 @@ std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdviceImpl(CallBase &CB) {
   // Replay decision, if it has one
   auto Iter = InlineSitesFromRemarks.find(Combined);
   if (Iter != InlineSitesFromRemarks.end()) {
-    if (InlineSitesFromRemarks[Combined]) {
+    if (Iter->second) {
       LLVM_DEBUG(dbgs() << "Replay Inliner: Inlined " << Callee << " @ "
                         << CallSiteLoc << "\n");
       return std::make_unique<DefaultInlineAdvice>(

>From 506b19ef6d290d9d64fa1256d6db0af7f831870a Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 30 Sep 2024 21:51:12 +0800
Subject: [PATCH 10/12] [SLP][REVEC] Fix cost model for getBuildVectorCost with
 FixedVectorType ScalarTy. (#110073)

BoUpSLP::gather always use CreateInsertVector for FixedVectorType
ScalarTy.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 10 +++-
 .../RISCV/revec-getGatherCost.ll              | 47 +++++++++++++++++--
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e45fcb2b5c790c..6b8ec55b30426c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9369,10 +9369,18 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
       bool NeedShuffle =
           count(VL, *It) > 1 &&
           (VL.front() != *It || !all_of(VL.drop_front(), IsaPred<UndefValue>));
-      if (!NeedShuffle)
+      if (!NeedShuffle) {
+        if (isa<FixedVectorType>(ScalarTy)) {
+          assert(SLPReVec && "FixedVectorType is not expected.");
+          return TTI.getShuffleCost(
+              TTI::SK_InsertSubvector, VecTy, {}, CostKind,
+              std::distance(VL.begin(), It) * getNumElements(ScalarTy),
+              cast<FixedVectorType>(ScalarTy));
+        }
         return TTI.getVectorInstrCost(Instruction::InsertElement, VecTy,
                                       CostKind, std::distance(VL.begin(), It),
                                       PoisonValue::get(VecTy), *It);
+      }
 
       SmallVector<int> ShuffleMask(VL.size(), PoisonMaskElem);
       transform(VL, ShuffleMask.begin(), [](Value *V) {
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
index 887f59bbda94d6..995cd7cfbc880b 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -1,19 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-10 -pass-remarks-output=%t %s | FileCheck %s
+; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-20 -pass-remarks-output=%t %s | FileCheck %s
 ; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
 
 ; YAML: --- !Passed
 ; YAML: Pass:            slp-vectorizer
 ; YAML: Name:            StoresVectorized
-; YAML: Function:        test
+; YAML: Function:        test1
 ; YAML: Args:
 ; YAML:   - String:          'Stores SLP vectorized with cost '
 ; YAML:   - Cost:            '6'
 ; YAML:   - String:          ' and with tree size '
 ; YAML:   - TreeSize:        '5'
 
-define void @test(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <4 x float> %load17, <4 x float> %fmuladd7, <4 x float> %fmuladd16, ptr %out_ptr) {
-; CHECK-LABEL: @test(
+define void @test1(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <4 x float> %load17, <4 x float> %fmuladd7, <4 x float> %fmuladd16, ptr %out_ptr) {
+; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VEXT165_I:%.*]] = shufflevector <4 x float> [[LOAD6:%.*]], <4 x float> [[LOAD7:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    [[VEXT309_I:%.*]] = shufflevector <4 x float> [[LOAD7]], <4 x float> [[LOAD8:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
@@ -40,3 +40,42 @@ entry:
 }
 
 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+; YAML: --- !Passed
+; YAML: Pass:            slp-vectorizer
+; YAML: Name:            StoresVectorized
+; YAML: Function:        test2
+; YAML: Args:
+; YAML:   - String:          'Stores SLP vectorized with cost '
+; YAML:   - Cost:            '16'
+; YAML:   - String:          ' and with tree size '
+; YAML:   - TreeSize:        '5'
+
+define void @test2(<8 x float> %load6, <8 x float> %load7, <8 x float> %load8, <8 x float> %load17, <8 x float> %fmuladd7, <8 x float> %fmuladd16, ptr %out_ptr) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VEXT165_I:%.*]] = shufflevector <8 x float> [[LOAD6:%.*]], <8 x float> [[LOAD7:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:    [[VEXT309_I:%.*]] = shufflevector <8 x float> [[LOAD7]], <8 x float> [[LOAD8:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:    [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[VEXT165_I]], i64 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[VEXT309_I]], i64 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 8)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP2]], <8 x float> [[LOAD17:%.*]], i64 0)
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[FMULADD7:%.*]], i64 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP5]], <8 x float> [[FMULADD16:%.*]], i64 8)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP4]], <16 x float> [[TMP6]])
+; CHECK-NEXT:    store <16 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %vext165.i = shufflevector <8 x float> %load6, <8 x float> %load7, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %vext309.i = shufflevector <8 x float> %load7, <8 x float> %load8, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %fmuladd8 = tail call noundef <8 x float> @llvm.fmuladd.v8f32(<8 x float> %vext165.i, <8 x float> %load17, <8 x float> %fmuladd7)
+  %fmuladd17 = tail call noundef <8 x float> @llvm.fmuladd.v8f32(<8 x float> %vext309.i, <8 x float> %load17, <8 x float> %fmuladd16)
+  %add.ptr.i.i = getelementptr inbounds i8, ptr %out_ptr, i64 32
+  store <8 x float> %fmuladd8, ptr %out_ptr, align 4
+  store <8 x float> %fmuladd17, ptr %add.ptr.i.i, align 4
+  ret void
+}
+
+declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)

>From 4d01c800ecfb63e1ec1532c53898584ea390e95a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= <andrzej.warzynski at arm.com>
Date: Mon, 30 Sep 2024 14:53:50 +0100
Subject: [PATCH 11/12] [mlir] Update the return type of
 `getNum{Dynamic|Scalable}Dims` (#110472)

Updates the return type of `getNumDynamicDims` and `getNumScalableDims`
from `int64_t` to `size_t`. This is for consistency with other
helpers/methods that return "size" and to reduce the number of
`static_cast`s in various places.
---
 .../mlir/Dialect/SparseTensor/IR/SparseTensorType.h      | 2 +-
 mlir/include/mlir/IR/BuiltinTypeInterfaces.td            | 2 +-
 mlir/include/mlir/IR/BuiltinTypes.td                     | 2 +-
 mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp   | 3 +--
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp                   | 3 +--
 mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp                 | 6 ++----
 mlir/lib/Dialect/Tensor/IR/TensorOps.cpp                 | 9 +++------
 7 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
index a154d7fa5fb6e5..620fd7c63146dd 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorType.h
@@ -293,7 +293,7 @@ class SparseTensorType {
   /// Returns the number of dimensions which have dynamic sizes.
   /// The return type is `int64_t` to maintain consistency with
   /// `ShapedType::Trait<T>::getNumDynamicDims`.
-  int64_t getNumDynamicDims() const { return rtp.getNumDynamicDims(); }
+  size_t getNumDynamicDims() const { return rtp.getNumDynamicDims(); }
 
   ArrayRef<LevelType> getLvlTypes() const { return enc.getLvlTypes(); }
   LevelType getLvlType(Level l) const {
diff --git a/mlir/include/mlir/IR/BuiltinTypeInterfaces.td b/mlir/include/mlir/IR/BuiltinTypeInterfaces.td
index db38e2e1bce22a..c9dcd546cf67c2 100644
--- a/mlir/include/mlir/IR/BuiltinTypeInterfaces.td
+++ b/mlir/include/mlir/IR/BuiltinTypeInterfaces.td
@@ -166,7 +166,7 @@ def ShapedTypeInterface : TypeInterface<"ShapedType"> {
 
     /// If this is a ranked type, return the number of dimensions with dynamic
     /// size. Otherwise, abort.
-    int64_t getNumDynamicDims() const {
+    size_t getNumDynamicDims() const {
       return llvm::count_if($_type.getShape(), ::mlir::ShapedType::isDynamic);
     }
 
diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td
index c738a8a3becc16..b2b41b16beec29 100644
--- a/mlir/include/mlir/IR/BuiltinTypes.td
+++ b/mlir/include/mlir/IR/BuiltinTypes.td
@@ -1253,7 +1253,7 @@ def Builtin_Vector : Builtin_Type<"Vector", "vector",
     }
 
     /// Get the number of scalable dimensions.
-    int64_t getNumScalableDims() const {
+    size_t getNumScalableDims() const {
       return llvm::count(getScalableDims(), true);
     }
 
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 04a8ff30ee946b..f1841b860ff81a 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -249,8 +249,7 @@ AllocTensorOp::getBufferType(Value value, const BufferizationOptions &options,
 LogicalResult AllocTensorOp::verify() {
   if (getCopy() && !getDynamicSizes().empty())
     return emitError("dynamic sizes not needed when copying a tensor");
-  if (!getCopy() && getType().getNumDynamicDims() !=
-                        static_cast<int64_t>(getDynamicSizes().size()))
+  if (!getCopy() && getType().getNumDynamicDims() != getDynamicSizes().size())
     return emitError("expected ")
            << getType().getNumDynamicDims() << " dynamic sizes";
   if (getCopy() && getCopy().getType() != getType())
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index f822c11aeec008..956877497d9338 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -2045,8 +2045,7 @@ void WaitOp::getCanonicalizationPatterns(RewritePatternSet &results,
 LogicalResult AllocOp::verify() {
   auto memRefType = llvm::cast<MemRefType>(getMemref().getType());
 
-  if (static_cast<int64_t>(getDynamicSizes().size()) !=
-      memRefType.getNumDynamicDims())
+  if (getDynamicSizes().size() != memRefType.getNumDynamicDims())
     return emitOpError("dimension operand count does not equal memref "
                        "dynamic dimension count");
 
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 75b9729e63648c..d579a27359dfa0 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -205,8 +205,7 @@ static LogicalResult verifyAllocLikeOp(AllocLikeOp op) {
   if (!memRefType)
     return op.emitOpError("result must be a memref");
 
-  if (static_cast<int64_t>(op.getDynamicSizes().size()) !=
-      memRefType.getNumDynamicDims())
+  if (op.getDynamicSizes().size() != memRefType.getNumDynamicDims())
     return op.emitOpError("dimension operand count does not equal memref "
                           "dynamic dimension count");
 
@@ -283,8 +282,7 @@ struct SimplifyAllocConst : public OpRewritePattern<AllocLikeOp> {
     // Create new memref type (which will have fewer dynamic dimensions).
     MemRefType newMemRefType =
         MemRefType::Builder(memrefType).setShape(newShapeConstants);
-    assert(static_cast<int64_t>(dynamicSizes.size()) ==
-           newMemRefType.getNumDynamicDims());
+    assert(dynamicSizes.size() == newMemRefType.getNumDynamicDims());
 
     // Create and insert the alloc op for the new memref.
     auto newAlloc = rewriter.create<AllocLikeOp>(
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 1ac96756e22b5e..defac8308b9092 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -179,8 +179,7 @@ static RankedTensorType
 foldDynamicToStaticDimSizes(RankedTensorType type, ValueRange dynamicSizes,
                             SmallVector<Value> &foldedDynamicSizes) {
   SmallVector<int64_t> staticShape(type.getShape());
-  assert(type.getNumDynamicDims() ==
-             static_cast<int64_t>(dynamicSizes.size()) &&
+  assert(type.getNumDynamicDims() == dynamicSizes.size() &&
          "incorrect number of dynamic sizes");
 
   // Compute new static and dynamic sizes.
@@ -894,8 +893,7 @@ void EmptyOp::build(OpBuilder &builder, OperationState &result,
 }
 
 LogicalResult EmptyOp::verify() {
-  if (getType().getNumDynamicDims() !=
-      static_cast<int64_t>(getDynamicSizes().size()))
+  if (getType().getNumDynamicDims() != getDynamicSizes().size())
     return emitOpError("incorrect number of dynamic sizes, has ")
            << getDynamicSizes().size() << ", expected "
            << getType().getNumDynamicDims();
@@ -3672,8 +3670,7 @@ void SplatOp::getAsmResultNames(
 }
 
 LogicalResult SplatOp::verify() {
-  if (getType().getNumDynamicDims() !=
-      static_cast<int64_t>(getDynamicSizes().size()))
+  if (getType().getNumDynamicDims() != getDynamicSizes().size())
     return emitOpError("incorrect number of dynamic sizes, has ")
            << getDynamicSizes().size() << ", expected "
            << getType().getNumDynamicDims();

>From be372c3585eff241ef41c7c48c2dee27f0ca94cc Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron at google.com>
Date: Mon, 30 Sep 2024 09:55:38 -0400
Subject: [PATCH 12/12] Fix options test

---
 clang/test/Options/enable_16bit_types_validation_spirv.hlsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Options/enable_16bit_types_validation_spirv.hlsl b/clang/test/Options/enable_16bit_types_validation_spirv.hlsl
index aeb7a8369f4034..ddae1e050de15f 100644
--- a/clang/test/Options/enable_16bit_types_validation_spirv.hlsl
+++ b/clang/test/Options/enable_16bit_types_validation_spirv.hlsl
@@ -4,7 +4,7 @@
 // SPIRV: error: '-fnative-half-type' option requires target HLSL Version >= 2018, but HLSL Version is 'hlsl2016'
 
 // valid: "spirv-unknown-vulkan-library"
-// valid: define spir_func void @main() #0 {
+// valid: define spir_func void @"?main@@YAXXZ"() #0 {
 
 [numthreads(1,1,1)]
 void main()



More information about the cfe-commits mailing list