[clang] [HLSL] Update indexed vector elements individually (PR #169144)
Helena Kotas via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 24 18:22:58 PST 2025
https://github.com/hekota updated https://github.com/llvm/llvm-project/pull/169144
>From 7ffd077dc7132f1f9b10cee36fcc61d7b24a7dd3 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Fri, 21 Nov 2025 19:00:47 -0800
Subject: [PATCH 1/2] [HLSL] Update indexed vector elements individually
Storing to individual elements of a vector through vector indexing
needs to be handled as separate stores. We need to avoid the load/modify/store
of the whole vector to prevent overwriting other vector elements that might be
getting updated in parallel.
Fixes #160208
Fixes #167729
---
clang/lib/CodeGen/CGExpr.cpp | 24 +++++++++++
clang/test/CodeGenHLSL/BoolVector.hlsl | 15 +++----
.../builtins/VectorElementStore.hlsl | 41 +++++++++++++++++++
clang/test/CodeGenHLSL/builtins/lit.hlsl | 6 ++-
4 files changed, 75 insertions(+), 11 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index f2451b16e78be..763ce6369fc51 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2575,6 +2575,30 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
bool isInit) {
if (!Dst.isSimple()) {
if (Dst.isVectorElt()) {
+ if (getLangOpts().HLSL) {
+ // In HLSL, storing to individual elements of a vector through
+ // VectorElt LValue needs to be handled as separate store. We need to
+ // avoid the load/modify/store sequence to prevent overwriting other
+ // elements that might be getting updated in parallel.
+ Address DstAddr = Dst.getVectorAddress();
+ llvm::Type *DestAddrTy = DstAddr.getElementType();
+ llvm::Type *ElemTy = DestAddrTy->getScalarType();
+ CharUnits ElemAlign = CharUnits::fromQuantity(
+ CGM.getDataLayout().getPrefTypeAlign(ElemTy));
+
+ llvm::Value *Val = Src.getScalarVal();
+ if (Val->getType()->getPrimitiveSizeInBits() <
+ ElemTy->getScalarSizeInBits())
+ Val = Builder.CreateZExt(Val, ElemTy->getScalarType());
+
+ llvm::Value *Idx = Dst.getVectorIdx();
+ llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
+ Address DstElemAddr =
+ Builder.CreateGEP(DstAddr, {Zero, Idx}, DestAddrTy, ElemAlign);
+ Builder.CreateStore(Val, DstElemAddr, Dst.isVolatileQualified());
+ return;
+ }
+
// Read/modify/write the vector, inserting the new element.
llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(),
Dst.isVolatileQualified());
diff --git a/clang/test/CodeGenHLSL/BoolVector.hlsl b/clang/test/CodeGenHLSL/BoolVector.hlsl
index d5054a5a92b5d..6be90e8f51ce2 100644
--- a/clang/test/CodeGenHLSL/BoolVector.hlsl
+++ b/clang/test/CodeGenHLSL/BoolVector.hlsl
@@ -69,9 +69,8 @@ bool fn4() {
// CHECK-LABEL: define hidden void {{.*}}fn5{{.*}}
// CHECK: [[Arr:%.*]] = alloca <2 x i32>, align 8
// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[Arr]], align 8
-// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[Arr]], align 8
-// CHECK-NEXT: [[V:%.*]] = insertelement <2 x i32> [[L]], i32 0, i32 1
-// CHECK-NEXT: store <2 x i32> [[V]], ptr [[Arr]], align 8
+// CHECK-NEXT: [[Ptr:%.*]] = getelementptr <2 x i32>, ptr [[Arr]]
+// CHECK-NEXT: store i32 0, ptr [[Ptr]], align 4
// CHECK-NEXT: ret void
void fn5() {
bool2 Arr = {true,true};
@@ -86,10 +85,9 @@ void fn5() {
// CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[V]], align 4
// CHECK-NEXT: [[LV:%.*]] = trunc i32 [[Y]] to i1
// CHECK-NEXT: [[BV:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], i32 0, i32 0
-// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[BV]], align 1
// CHECK-NEXT: [[Z:%.*]] = zext i1 [[LV]] to i32
-// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 [[Z]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[BV]], align 1
+// CHECK-NEXT: [[Ptr:%.*]] = getelementptr <2 x i32>, ptr [[BV]], i32 0, i32 1
+// CHECK-NEXT: store i32 [[Z]], ptr [[Ptr]], align 4
// CHECK-NEXT: ret void
void fn6() {
bool V = false;
@@ -101,9 +99,8 @@ void fn6() {
// CHECK: [[Arr:%.*]] = alloca [2 x <2 x i32>], align 8
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Arr]], ptr align 8 {{.*}}, i32 16, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[Arr]], i32 0, i32 0
-// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[Idx]], align 8
-// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 0, i32 1
-// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[Idx]], align 8
+// CHECK-NEXT: %[[Ptr:.*]] = getelementptr <2 x i32>, ptr [[Idx]], i32 0, i32 1
+// CHECK-NEXT: store i32 0, ptr %[[Ptr]], align 4
// CHECK-NEXT: ret void
void fn7() {
bool2 Arr[2] = {{true,true}, {false,false}};
diff --git a/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl b/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl
new file mode 100644
index 0000000000000..e0c3aa54aaeba
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -finclude-default-header -emit-llvm -disable-llvm-passes \
+// RUN: -triple dxil-pc-shadermodel6.3-library %s -o - | FileCheck %s
+
+// Test groupshared vector element store for uint.
+// CHECK-LABEL: test_uint4
+// CHECK: [[VAL:%.*]] = load i32, ptr %Val.addr, align 4
+// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4
+// CHECK: [[PTR:%.*]] = getelementptr <4 x i32>, ptr addrspace(3) @SMem, i32 0, i32 [[IDX]]
+// CHECK: store i32 [[VAL]], ptr addrspace(3) [[PTR]], align 4
+// CHECK-: ret void
+groupshared uint4 SMem;
+void test_uint4(uint Idx, uint Val) {
+ SMem[Idx] = Val;
+}
+
+// Test local vector element store for bool.
+// CHECK: [[COND1:%.*]] = load i32, ptr addrspace(3) @Cond, align 4
+// CHECK: [[COND2:%.*]] = trunc i32 [[COND1]] to i1
+// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4
+// CHECK: [[COND3:%.*]] = zext i1 [[COND2]] to i32
+// CHECK: [[PTR:%.*]] = getelementptr <3 x i32>, ptr %Val, i32 0, i32 [[IDX]]
+// CHECK: store i32 [[COND3]], ptr [[PTR]], align 4
+// CHECK: ret
+groupshared bool Cond;
+bool3 test_bool(uint Idx) {
+ bool3 Val = { false, false, false};
+ Val[Idx] = Cond;
+ return Val;
+}
+
+// Test resource vector element store for float.
+// CHECK: [[VAL:%.*]] = load float, ptr %Val.addr, align 4
+// CHECK: [[RES_PTR:%.*]] = call {{.*}} ptr @_ZN4hlsl18RWStructuredBufferIDv4_fEixEj(ptr {{.*}} @_ZL3Buf, i32 noundef 0)
+// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4
+// CHECK: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[RES_PTR]], i32 0, i32 [[IDX]]
+// CHECK: store float [[VAL]], ptr [[PTR]], align 4
+// CHECK: ret void
+RWStructuredBuffer<float4> Buf : register(u0);
+void test_float(uint Idx, float Val) {
+ Buf[0][Idx] = Val;
+}
diff --git a/clang/test/CodeGenHLSL/builtins/lit.hlsl b/clang/test/CodeGenHLSL/builtins/lit.hlsl
index b7979960de9f6..364c2e8794ea2 100644
--- a/clang/test/CodeGenHLSL/builtins/lit.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lit.hlsl
@@ -11,7 +11,8 @@
// CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn half [[LOG]], %{{.*}}
// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(half %mul.i)
// CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, half 0xH0000, half %{{.*}}
-// CHECK: %vecins.i = insertelement <4 x half> %{{.*}}, half %hlsl.select7.i, i32 2
+// CHECK: [[PTR:%.*]] = getelementptr <4 x half>, ptr %Result.i, i32 0, i32 2
+// CHECK: store half %hlsl.select7.i, ptr [[PTR]], align 2
// CHECK: ret <4 x half> %{{.*}}
half4 test_lit_half(half NDotL, half NDotH, half M) { return lit(NDotL, NDotH, M); }
@@ -26,6 +27,7 @@ half4 test_lit_half(half NDotL, half NDotH, half M) { return lit(NDotL, NDotH, M
// CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float [[LOG]], %{{.*}}
// CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32(float %mul.i)
// CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, float 0.000000e+00, float %{{.*}}
-// CHECK: %vecins.i = insertelement <4 x float> %{{.*}}, float %hlsl.select7.i, i32 2
+// CHECK: [[PTR:%.*]] = getelementptr <4 x float>, ptr %Result.i, i32 0, i32 2
+// CHECK: store float %hlsl.select7.i, ptr [[PTR]], align 4
// CHECK: ret <4 x float> %{{.*}}
float4 test_lit_float(float NDotL, float NDotH, float M) { return lit(NDotL, NDotH, M); }
>From 7c81f23667cb1205779e8ff515450388a4637f7a Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Mon, 24 Nov 2025 18:22:39 -0800
Subject: [PATCH 2/2] code review feedback - add assert and update comment
---
clang/lib/CodeGen/CGExpr.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 763ce6369fc51..f04be5bfd9daf 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2576,16 +2576,18 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
if (!Dst.isSimple()) {
if (Dst.isVectorElt()) {
if (getLangOpts().HLSL) {
- // In HLSL, storing to individual elements of a vector through
- // VectorElt LValue needs to be handled as separate store. We need to
- // avoid the load/modify/store sequence to prevent overwriting other
- // elements that might be getting updated in parallel.
+ // HLSL allows direct access to vector elements, so storing to
+ // individual elements of a vector through VectorElt is handled as
+ // separate store instructions.
Address DstAddr = Dst.getVectorAddress();
llvm::Type *DestAddrTy = DstAddr.getElementType();
llvm::Type *ElemTy = DestAddrTy->getScalarType();
CharUnits ElemAlign = CharUnits::fromQuantity(
CGM.getDataLayout().getPrefTypeAlign(ElemTy));
+ assert(ElemTy->getScalarSizeInBits() >= 8 &&
+ "vector element type must be at least byte-sized");
+
llvm::Value *Val = Src.getScalarVal();
if (Val->getType()->getPrimitiveSizeInBits() <
ElemTy->getScalarSizeInBits())
More information about the cfe-commits
mailing list