[clang] [HLSL] Update vector elements individually (PR #169090)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 21 12:02:00 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Helena Kotas (hekota)
<details>
<summary>Changes</summary>
Storing to individual elements of a vector through vector swizzle components needs to be handled as separate stores. We need to avoid the load/modify/store of the whole vector to prevent overwriting other vector elements that might be getting updated in parallel.
Fixes #<!-- -->152815
---
Full diff: https://github.com/llvm/llvm-project/pull/169090.diff
4 Files Affected:
- (modified) clang/lib/CodeGen/CGExpr.cpp (+39-8)
- (modified) clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl (+9-3)
- (modified) clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl (+15-10)
- (added) clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl (+96)
``````````diff
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index f2451b16e78be..45c8f4df03ab0 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2801,18 +2801,50 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
LValue Dst) {
llvm::Value *SrcVal = Src.getScalarVal();
Address DstAddr = Dst.getExtVectorAddress();
+ const llvm::Constant *Elts = Dst.getExtVectorElts();
if (DstAddr.getElementType()->getScalarSizeInBits() >
SrcVal->getType()->getScalarSizeInBits())
SrcVal = Builder.CreateZExt(
SrcVal, convertTypeForLoadStore(Dst.getType(), SrcVal->getType()));
- // HLSL allows storing to scalar values through ExtVector component LValues.
- // To support this we need to handle the case where the destination address is
- // a scalar.
- if (!DstAddr.getElementType()->isVectorTy()) {
- assert(!Dst.getType()->isVectorType() &&
- "this should only occur for non-vector l-values");
- Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified());
+ if (getLangOpts().HLSL) {
+ llvm::Type *DestAddrTy = DstAddr.getElementType();
+ // HLSL allows storing to scalar values through ExtVector component LValues.
+ // To support this we need to handle the case where the destination address
+ // is a scalar.
+ if (!DestAddrTy->isVectorTy()) {
+ assert(!Dst.getType()->isVectorType() &&
+ "this should only occur for non-vector l-values");
+ Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified());
+ return;
+ }
+
+ // In HLSL, storing to individual elements of a vector through ExtVector
+ // components needs to be handled as separate store instructions. We need to
+ // avoid the load/modify/store sequence to prevent overwriting other
+ // elements that might be getting updated in parallel.
+ // If we are updating multiple elements, Dst and Src are vectors; for
+ // a single element update they are scalars.
+ const VectorType *VTy = Dst.getType()->getAs<VectorType>();
+ unsigned NumSrcElts = VTy ? VTy->getNumElements() : 1;
+ CharUnits ElemAlign = CharUnits::fromQuantity(
+ CGM.getDataLayout().getPrefTypeAlign(DestAddrTy->getScalarType()));
+ llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
+
+ for (unsigned I = 0; I != NumSrcElts; ++I) {
+ llvm::Value *Val = VTy ? Builder.CreateExtractElement(
+ SrcVal, llvm::ConstantInt::get(Int32Ty, I))
+ : SrcVal;
+ unsigned FieldNo = getAccessedFieldNo(I, Elts);
+ Address DstElemAddr = Address::invalid();
+ if (FieldNo == 0)
+ DstElemAddr = DstAddr.withAlignment(ElemAlign);
+ else
+ DstElemAddr = Builder.CreateGEP(
+ DstAddr, {Zero, llvm::ConstantInt::get(Int32Ty, FieldNo)},
+ DestAddrTy, ElemAlign);
+ Builder.CreateStore(Val, DstElemAddr, Dst.isVolatileQualified());
+ }
return;
}
@@ -2820,7 +2852,6 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
// value now.
llvm::Value *Vec = Builder.CreateLoad(DstAddr, Dst.isVolatileQualified());
llvm::Type *VecTy = Vec->getType();
- const llvm::Constant *Elts = Dst.getExtVectorElts();
if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) {
unsigned NumSrcElts = VTy->getNumElements();
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index d0ba8f447b732..ec03804ad1a4c 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -101,10 +101,16 @@ void funky(inout int3 X) {
// Call the function with the temporary.
// CHECK: call void {{.*}}funky{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[ArgTmp]])
-// Shuffle it back.
+// Write it back.
// CHECK: [[RetVal:%.*]] = load <3 x i32>, ptr [[ArgTmp]]
-// CHECK: [[Vxyz:%.*]] = shufflevector <3 x i32> [[RetVal]], <3 x i32> poison, <3 x i32> <i32 2, i32 0, i32 1>
-// CHECK: store <3 x i32> [[Vxyz]], ptr [[V]]
+// CHECK: [[Src0:%.*]] = extractelement <3 x i32> [[RetVal]], i32 0
+// CHECK: [[PtrY:%.*]] = getelementptr <3 x i32>, ptr %V, i32 0, i32 1
+// CHECK: store i32 [[Src0]], ptr [[PtrY]], align 4
+// CHECK: [[Src1:%.*]] = extractelement <3 x i32> [[RetVal]], i32 1
+// CHECK: [[PtrZ:%.*]] = getelementptr <3 x i32>, ptr %V, i32 0, i32 2
+// CHECK: store i32 [[Src1]], ptr [[PtrZ]], align 4
+// CHECK: [[Src2:%.*]] = extractelement <3 x i32> [[RetVal]], i32 2
+// CHECK: store i32 [[Src2]], ptr %V, align 4
// OPT: ret <3 x i32> <i32 3, i32 1, i32 2>
export int3 case4() {
diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
index 7804239edccae..270598265c660 100644
--- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
@@ -259,9 +259,8 @@ bool AssignBool(bool V) {
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4
// CHECK-NEXT: [[LV1:%.*]] = trunc i32 [[B]] to i1
// CHECK-NEXT: [[D:%.*]] = zext i1 [[LV1]] to i32
-// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8
-// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i32 1
-// CHECK-NEXT: store <2 x i32> [[E]], ptr [[X]], align 8
+// CHECK-NEXT: [[C:%.*]] = getelementptr <2 x i32>, ptr [[X]], i32 0, i32 1
+// CHECK-NEXT: store i32 [[D]], ptr [[C]], align 4
// CHECK-NEXT: ret void
void AssignBool2(bool V) {
bool2 X = true.xx;
@@ -277,10 +276,13 @@ void AssignBool2(bool V) {
// CHECK-NEXT: [[Z:%.*]] = load <2 x i32>, ptr [[VAddr]], align 8
// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1>
// CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[LV]] to <2 x i32>
-// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8
-// CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
-// CHECK-NEXT: store <2 x i32> [[C]], ptr [[X]], align 8
+// CHECK-NEXT: [[V1:%.*]] = extractelement <2 x i32> [[B]], i32 0
+// CHECK-NEXT: store i32 [[V1]], ptr [[X]], align 4
+// CHECK-NEXT: [[V2:%.*]] = extractelement <2 x i32> [[B]], i32 1
+// CHECK-NEXT: [[X2:%.*]] = getelementptr <2 x i32>, ptr [[X]], i32 0, i32 1
+// CHECK-NEXT: store i32 [[V2]], ptr [[X2]], align 4
// CHECK-NEXT: ret void
+
void AssignBool3(bool2 V) {
bool2 X = {true,true};
X.xy = V;
@@ -313,10 +315,13 @@ bool2 AccessBools() {
// CHECK-NEXT: [[L1:%.*]] = shufflevector <1 x i32> [[L0]], <1 x i32> poison, <3 x i32> zeroinitializer
// CHECK-NEXT: [[TruncV:%.*]] = trunc <3 x i32> [[L1]] to <3 x i1>
// CHECK-NEXT: [[L2:%.*]] = zext <3 x i1> [[TruncV]] to <3 x i32>
-// CHECK-NEXT: [[L3:%.*]] = load <4 x i32>, ptr [[B]], align 16
-// CHECK-NEXT: [[L4:%.*]] = shufflevector <3 x i32> [[L2]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT: [[L5:%.*]] = shufflevector <4 x i32> [[L3]], <4 x i32> [[L4]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-// CHECK-NEXT: store <4 x i32> [[L5]], ptr [[B]], align 16
+// CHECK-NEXT: [[V1:%.*]] = extractelement <3 x i32> [[L2]], i32 0
+// CHECK-NEXT: store i32 [[V1]], ptr %B, align 4
+// CHECK-NEXT: [[V2:%.*]] = extractelement <3 x i32> [[L2]], i32 1
+// CHECK-NEXT: [[B2:%.*]] = getelementptr <4 x i32>, ptr %B, i32 0, i32 1
+// CHECK-NEXT: store i32 [[V2]], ptr [[B2]], align 4
+// CHECK-NEXT: [[V3:%.*]] = extractelement <3 x i32> [[L2]], i32 2
+// CHECK-NEXT: [[B3:%.*]] = getelementptr <4 x i32>, ptr %B, i32 0, i32 2
void BoolSizeMismatch() {
bool4 B = {true,true,true,true};
B.xyz = false.xxx;
diff --git a/clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl
new file mode 100644
index 0000000000000..c632e795098ea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -finclude-default-header -fnative-half-type \
+// RUN: -triple dxil-pc-shadermodel6.3-library %s -disable-llvm-passes \
+// RUN: -emit-llvm -o - | FileCheck %s
+
+// CHECK-LABEL: Single
+
+// Setup local vars.
+// CHECK: [[VecAddr:%.*]] = alloca <3 x i64>, align 32
+// CHECK-NEXT: [[AAddr:%.*]] = alloca i64, align 8
+// CHECK-NEXT: store <3 x i64> %vec, ptr [[VecAddr]], align 32
+// CHECK-NEXT: store i64 %a, ptr [[AAddr]], align 8
+
+// Update single element of the vector.
+// CHECK-NEXT: [[A:%.*]] = load i64, ptr [[AAddr]], align 8
+// CHECK-NEXT: [[Vy:%.*]] = getelementptr <3 x i64>, ptr [[VecAddr]], i32 0, i32 1
+// CHECK-NEXT: store i64 [[A]], ptr [[Vy]], align 8
+
+// Return.
+// CHECK-NEXT: [[RetVal:%.*]] = load <3 x i64>, ptr [[VecAddr]], align 32
+// CHECK-NEXT: ret <3 x i64> [[RetVal]]
+uint64_t3 Single(uint64_t3 vec, uint64_t a){
+ vec.y = a;
+ return vec;
+}
+
+// CHECK-LABEL: Double
+
+// Setup local vars.
+// CHECK: [[VecAddr:%.*]] = alloca <3 x float>, align 16
+// CHECK-NEXT: [[AAddr:%.*]] = alloca float, align 4
+// CHECK-NEXT: [[BAddr:%.*]] = alloca float, align 4
+// CHECK-NEXT: store <3 x float> %vec, ptr [[VecAddr]], align 16
+// CHECK-NEXT: store float %a, ptr [[AAddr]], align 4
+// CHECK-NEXT: store float %b, ptr [[BAddr]], align 4
+
+// Create temporary vector {a, b}.
+// CHECK-NEXT: [[A:%.*]] = load float, ptr [[AAddr]], align 4
+// CHECK-NEXT: [[TmpVec0:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
+// CHECK-NEXT: [[B:%.*]] = load float, ptr [[BAddr]], align 4
+// CHECK-NEXT: [[TmpVec1:%.*]] = insertelement <2 x float> [[TmpVec0]], float [[B]], i32 1
+
+// Update two elements of the vector from temporary vector.
+// CHECK-NEXT: [[TmpX:%.*]] = extractelement <2 x float> [[TmpVec1]], i32 0
+// CHECK-NEXT: [[VecZ:%.*]] = getelementptr <3 x float>, ptr [[VecAddr]], i32 0, i32 2
+// CHECK-NEXT: store float [[TmpX]], ptr [[VecZ]], align 4
+// CHECK-NEXT: [[TmpY:%.*]] = extractelement <2 x float> [[TmpVec1]], i32 1
+// CHECK-NEXT: [[VecY:%.*]] = getelementptr <3 x float>, ptr [[VecAddr]], i32 0, i32 1
+// CHECK-NEXT: store float [[TmpY]], ptr [[VecY]], align 4
+
+// Return.
+// CHECK-NEXT: [[RetVal:%.*]] = load <3 x float>, ptr [[VecAddr]], align 16
+// CHECK-NEXT: ret <3 x float> [[RetVal]]
+float3 Double(float3 vec, float a, float b) {
+ vec.zy = {a, b};
+ return vec;
+}
+
+// CHECK-LABEL: Shuffle
+
+// Setup local vars.
+// CHECK: [[VecAddr:%.*]] = alloca <4 x half>, align 8
+// CHECK-NEXT: [[AAddr:%.*]] = alloca half, align 2
+// CHECK-NEXT: [[BAddr:%.*]] = alloca half, align 2
+// CHECK-NEXT: store <4 x half> %vec, ptr [[VecAddr]], align 8
+// CHECK-NEXT: store half %a, ptr [[AAddr]], align 2
+// CHECK-NEXT: store half %b, ptr [[BAddr]], align 2
+
+// Create temporary vector {a, b, 13.74, a}.
+// CHECK-NEXT: [[A:%.*]] = load half, ptr [[AAddr]], align 2
+// CHECK-NEXT: [[TmpVec0:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
+// CHECK-NEXT: [[B:%.*]] = load half, ptr [[BAddr]], align 2
+// CHECK-NEXT: [[TmpVec1:%.*]] = insertelement <4 x half> [[TmpVec0]], half [[B]], i32 1
+// CHECK-NEXT: [[TmpVec2:%.*]] = insertelement <4 x half> %vecinit1, half 0xH4ADF, i32 2
+// CHECK-NEXT: [[A:%.*]] = load half, ptr [[AAddr]], align 2
+// CHECK-NEXT: [[TmpVec3:%.*]] = insertelement <4 x half> [[TmpVec2]], half [[A]], i32 3
+
+// Update four elements of the vector via mixed up swizzle from the temporary vector.
+// CHECK-NEXT: [[TmpX:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 0
+// CHECK-NEXT: [[VecZ:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 2
+// CHECK-NEXT: store half [[TmpX]], ptr [[VecZ]], align 2
+// CHECK-NEXT: [[TmpY:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 1
+// CHECK-NEXT: [[VecW:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 3
+// CHECK-NEXT: store half [[TmpY]], ptr [[VecW]], align 2
+// CHECK-NEXT: [[TmpZ:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 2
+// CHECK-NEXT: store half [[TmpZ]], ptr [[VecAddr]], align 2
+// CHECK-NEXT: [[TmpW:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 3
+// CHECK-NEXT: [[VecY:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 1
+// CHECK-NEXT: store half [[TmpW]], ptr [[VecY]], align 2
+
+// Return.
+// CHECK-NEXT: [[RetVal:%.*]] = load <4 x half>, ptr [[VecAddr]], align 8
+// CHECK-NEXT: ret <4 x half> [[RetVal]]
+half4 Shuffle(half4 vec, half a, half b) {
+ vec.zwxy = {a, b, 13.74, a};
+ return vec;
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/169090
More information about the cfe-commits
mailing list