[clang] [HLSL] Update vector elements individually (PR #169090)

via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 21 12:02:00 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Helena Kotas (hekota)

<details>
<summary>Changes</summary>

Storing to individual elements of a vector through vector swizzle components needs to be handled as separate stores. We need to avoid the load/modify/store of the whole vector to prevent overwriting other vector elements that might be getting updated in parallel.

Fixes #<!-- -->152815

---
Full diff: https://github.com/llvm/llvm-project/pull/169090.diff


4 Files Affected:

- (modified) clang/lib/CodeGen/CGExpr.cpp (+39-8) 
- (modified) clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl (+9-3) 
- (modified) clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl (+15-10) 
- (added) clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl (+96) 


``````````diff
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index f2451b16e78be..45c8f4df03ab0 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2801,18 +2801,50 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
                                                                LValue Dst) {
   llvm::Value *SrcVal = Src.getScalarVal();
   Address DstAddr = Dst.getExtVectorAddress();
+  const llvm::Constant *Elts = Dst.getExtVectorElts();
   if (DstAddr.getElementType()->getScalarSizeInBits() >
       SrcVal->getType()->getScalarSizeInBits())
     SrcVal = Builder.CreateZExt(
         SrcVal, convertTypeForLoadStore(Dst.getType(), SrcVal->getType()));
 
-  // HLSL allows storing to scalar values through ExtVector component LValues.
-  // To support this we need to handle the case where the destination address is
-  // a scalar.
-  if (!DstAddr.getElementType()->isVectorTy()) {
-    assert(!Dst.getType()->isVectorType() &&
-           "this should only occur for non-vector l-values");
-    Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified());
+  if (getLangOpts().HLSL) {
+    llvm::Type *DestAddrTy = DstAddr.getElementType();
+    // HLSL allows storing to scalar values through ExtVector component LValues.
+    // To support this we need to handle the case where the destination address
+    // is a scalar.
+    if (!DestAddrTy->isVectorTy()) {
+      assert(!Dst.getType()->isVectorType() &&
+             "this should only occur for non-vector l-values");
+      Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified());
+      return;
+    }
+
+    // In HLSL, storing to individual elements of a vector through ExtVector
+    // components needs to be handled as separate store instructions. We need to
+    // avoid the load/modify/store sequence to prevent overwriting other
+    // elements that might be getting updated in parallel.
+    // If we are updating multiple elements, Dst and Src are vectors; for
+    // a single element update they are scalars.
+    const VectorType *VTy = Dst.getType()->getAs<VectorType>();
+    unsigned NumSrcElts = VTy ? VTy->getNumElements() : 1;
+    CharUnits ElemAlign = CharUnits::fromQuantity(
+        CGM.getDataLayout().getPrefTypeAlign(DestAddrTy->getScalarType()));
+    llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
+
+    for (unsigned I = 0; I != NumSrcElts; ++I) {
+      llvm::Value *Val = VTy ? Builder.CreateExtractElement(
+                                   SrcVal, llvm::ConstantInt::get(Int32Ty, I))
+                             : SrcVal;
+      unsigned FieldNo = getAccessedFieldNo(I, Elts);
+      Address DstElemAddr = Address::invalid();
+      if (FieldNo == 0)
+        DstElemAddr = DstAddr.withAlignment(ElemAlign);
+      else
+        DstElemAddr = Builder.CreateGEP(
+            DstAddr, {Zero, llvm::ConstantInt::get(Int32Ty, FieldNo)},
+            DestAddrTy, ElemAlign);
+      Builder.CreateStore(Val, DstElemAddr, Dst.isVolatileQualified());
+    }
     return;
   }
 
@@ -2820,7 +2852,6 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
   // value now.
   llvm::Value *Vec = Builder.CreateLoad(DstAddr, Dst.isVolatileQualified());
   llvm::Type *VecTy = Vec->getType();
-  const llvm::Constant *Elts = Dst.getExtVectorElts();
 
   if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) {
     unsigned NumSrcElts = VTy->getNumElements();
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index d0ba8f447b732..ec03804ad1a4c 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -101,10 +101,16 @@ void funky(inout int3 X) {
 // Call the function with the temporary.
 // CHECK: call void {{.*}}funky{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[ArgTmp]])
 
-// Shuffle it back.
+// Write it back.
 // CHECK:  [[RetVal:%.*]] = load <3 x i32>, ptr [[ArgTmp]]
-// CHECK:  [[Vxyz:%.*]] = shufflevector <3 x i32> [[RetVal]], <3 x i32> poison, <3 x i32> <i32 2, i32 0, i32 1>
-// CHECK:  store <3 x i32> [[Vxyz]], ptr [[V]]
+// CHECK:  [[Src0:%.*]] = extractelement <3 x i32> [[RetVal]], i32 0
+// CHECK:  [[PtrY:%.*]] = getelementptr <3 x i32>, ptr %V, i32 0, i32 1
+// CHECK:  store i32 [[Src0]], ptr [[PtrY]], align 4
+// CHECK:  [[Src1:%.*]] = extractelement <3 x i32> [[RetVal]], i32 1
+// CHECK:  [[PtrZ:%.*]] = getelementptr <3 x i32>, ptr %V, i32 0, i32 2
+// CHECK:  store i32 [[Src1]], ptr [[PtrZ]], align 4
+// CHECK:  [[Src2:%.*]] = extractelement <3 x i32> [[RetVal]], i32 2
+// CHECK:  store i32 [[Src2]], ptr %V, align 4
 
 // OPT: ret <3 x i32> <i32 3, i32 1, i32 2>
 export int3 case4() {
diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
index 7804239edccae..270598265c660 100644
--- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
@@ -259,9 +259,8 @@ bool AssignBool(bool V) {
 // CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4
 // CHECK-NEXT: [[LV1:%.*]] = trunc i32 [[B]] to i1
 // CHECK-NEXT: [[D:%.*]] = zext i1 [[LV1]] to i32
-// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8
-// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i32 1
-// CHECK-NEXT: store <2 x i32> [[E]], ptr [[X]], align 8
+// CHECK-NEXT: [[C:%.*]] = getelementptr <2 x i32>, ptr [[X]], i32 0, i32 1
+// CHECK-NEXT: store i32 [[D]], ptr [[C]], align 4
 // CHECK-NEXT: ret void
 void AssignBool2(bool V) {
   bool2 X = true.xx;
@@ -277,10 +276,13 @@ void AssignBool2(bool V) {
 // CHECK-NEXT: [[Z:%.*]] = load <2 x i32>, ptr [[VAddr]], align 8
 // CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1>
 // CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[LV]] to <2 x i32>
-// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8
-// CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
-// CHECK-NEXT: store <2 x i32> [[C]], ptr [[X]], align 8
+// CHECK-NEXT: [[V1:%.*]] = extractelement <2 x i32> [[B]], i32 0
+// CHECK-NEXT: store i32 [[V1]], ptr [[X]], align 4
+// CHECK-NEXT: [[V2:%.*]] = extractelement <2 x i32> [[B]], i32 1
+// CHECK-NEXT: [[X2:%.*]] = getelementptr <2 x i32>, ptr [[X]], i32 0, i32 1
+// CHECK-NEXT: store i32 [[V2]], ptr [[X2]], align 4
 // CHECK-NEXT: ret void
+
 void AssignBool3(bool2 V) {
   bool2 X = {true,true};
   X.xy = V;
@@ -313,10 +315,13 @@ bool2 AccessBools() {
 // CHECK-NEXT: [[L1:%.*]] = shufflevector <1 x i32> [[L0]], <1 x i32> poison, <3 x i32> zeroinitializer
 // CHECK-NEXT: [[TruncV:%.*]] = trunc <3 x i32> [[L1]] to <3 x i1>
 // CHECK-NEXT: [[L2:%.*]] = zext <3 x i1> [[TruncV]] to <3 x i32>
-// CHECK-NEXT: [[L3:%.*]] = load <4 x i32>, ptr [[B]], align 16
-// CHECK-NEXT: [[L4:%.*]] = shufflevector <3 x i32> [[L2]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT: [[L5:%.*]] = shufflevector <4 x i32> [[L3]], <4 x i32> [[L4]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-// CHECK-NEXT: store <4 x i32> [[L5]], ptr [[B]], align 16
+// CHECK-NEXT: [[V1:%.*]] = extractelement <3 x i32> [[L2]], i32 0
+// CHECK-NEXT: store i32 [[V1]], ptr %B, align 4
+// CHECK-NEXT: [[V2:%.*]] = extractelement <3 x i32> [[L2]], i32 1
+// CHECK-NEXT: [[B2:%.*]] = getelementptr <4 x i32>, ptr %B, i32 0, i32 1
+// CHECK-NEXT: store i32 [[V2]], ptr [[B2]], align 4
+// CHECK-NEXT: [[V3:%.*]] = extractelement <3 x i32> [[L2]], i32 2
+// CHECK-NEXT: [[B3:%.*]] = getelementptr <4 x i32>, ptr %B, i32 0, i32 2
 void BoolSizeMismatch() {
   bool4 B = {true,true,true,true};
   B.xyz = false.xxx;
diff --git a/clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl
new file mode 100644
index 0000000000000..c632e795098ea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/VectorSwizzles.hlsl
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -finclude-default-header -fnative-half-type  \
+// RUN:   -triple dxil-pc-shadermodel6.3-library %s -disable-llvm-passes \
+// RUN:   -emit-llvm -o - | FileCheck %s
+
+// CHECK-LABEL: Single
+
+// Setup local vars.
+// CHECK: [[VecAddr:%.*]] = alloca <3 x i64>, align 32
+// CHECK-NEXT: [[AAddr:%.*]] = alloca i64, align 8
+// CHECK-NEXT: store <3 x i64> %vec, ptr [[VecAddr]], align 32
+// CHECK-NEXT: store i64 %a, ptr [[AAddr]], align 8
+
+// Update single element of the vector.
+// CHECK-NEXT: [[A:%.*]] = load i64, ptr [[AAddr]], align 8
+// CHECK-NEXT: [[Vy:%.*]] = getelementptr <3 x i64>, ptr [[VecAddr]], i32 0, i32 1
+// CHECK-NEXT: store i64 [[A]], ptr [[Vy]], align 8
+
+// Return.
+// CHECK-NEXT: [[RetVal:%.*]] = load <3 x i64>, ptr [[VecAddr]], align 32
+// CHECK-NEXT: ret <3 x i64> [[RetVal]]
+uint64_t3 Single(uint64_t3 vec, uint64_t a){
+    vec.y = a;
+    return vec;
+}
+
+// CHECK-LABEL: Double
+
+// Setup local vars.
+// CHECK: [[VecAddr:%.*]] = alloca <3 x float>, align 16
+// CHECK-NEXT: [[AAddr:%.*]] = alloca float, align 4
+// CHECK-NEXT: [[BAddr:%.*]] = alloca float, align 4
+// CHECK-NEXT: store <3 x float> %vec, ptr [[VecAddr]], align 16
+// CHECK-NEXT: store float %a, ptr [[AAddr]], align 4
+// CHECK-NEXT: store float %b, ptr [[BAddr]], align 4
+
+// Create temporary vector {a, b}.
+// CHECK-NEXT: [[A:%.*]] = load float, ptr [[AAddr]], align 4
+// CHECK-NEXT: [[TmpVec0:%.*]] = insertelement <2 x float> poison, float [[A]], i32 0
+// CHECK-NEXT: [[B:%.*]] = load float, ptr [[BAddr]], align 4
+// CHECK-NEXT: [[TmpVec1:%.*]] = insertelement <2 x float> [[TmpVec0]], float [[B]], i32 1
+
+// Update two elements of the vector from temporary vector.
+// CHECK-NEXT: [[TmpX:%.*]] = extractelement <2 x float> [[TmpVec1]], i32 0
+// CHECK-NEXT: [[VecZ:%.*]] = getelementptr <3 x float>, ptr [[VecAddr]], i32 0, i32 2
+// CHECK-NEXT: store float [[TmpX]], ptr [[VecZ]], align 4
+// CHECK-NEXT: [[TmpY:%.*]] = extractelement <2 x float> [[TmpVec1]], i32 1
+// CHECK-NEXT: [[VecY:%.*]] = getelementptr <3 x float>, ptr [[VecAddr]], i32 0, i32 1
+// CHECK-NEXT: store float [[TmpY]], ptr [[VecY]], align 4
+
+// Return.
+// CHECK-NEXT: [[RetVal:%.*]] = load <3 x float>, ptr [[VecAddr]], align 16
+// CHECK-NEXT: ret <3 x float> [[RetVal]]
+float3 Double(float3 vec, float a, float b) {
+    vec.zy = {a, b};
+    return vec;
+}
+
+// CHECK-LABEL: Shuffle
+
+// Setup local vars.
+// CHECK: [[VecAddr:%.*]] = alloca <4 x half>, align 8
+// CHECK-NEXT: [[AAddr:%.*]] = alloca half, align 2
+// CHECK-NEXT: [[BAddr:%.*]] = alloca half, align 2
+// CHECK-NEXT: store <4 x half> %vec, ptr [[VecAddr]], align 8
+// CHECK-NEXT: store half %a, ptr [[AAddr]], align 2
+// CHECK-NEXT: store half %b, ptr [[BAddr]], align 2
+
+// Create temporary vector {a, b, 13.74, a}.
+// CHECK-NEXT: [[A:%.*]] = load half, ptr [[AAddr]], align 2
+// CHECK-NEXT: [[TmpVec0:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
+// CHECK-NEXT: [[B:%.*]] = load half, ptr [[BAddr]], align 2
+// CHECK-NEXT: [[TmpVec1:%.*]] = insertelement <4 x half> [[TmpVec0]], half [[B]], i32 1
+// CHECK-NEXT: [[TmpVec2:%.*]] = insertelement <4 x half> %vecinit1, half 0xH4ADF, i32 2
+// CHECK-NEXT: [[A:%.*]] = load half, ptr [[AAddr]], align 2
+// CHECK-NEXT: [[TmpVec3:%.*]] = insertelement <4 x half> [[TmpVec2]], half [[A]], i32 3
+
+// Update four elements of the vector via mixed up swizzle from the temporary vector.
+// CHECK-NEXT: [[TmpX:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 0
+// CHECK-NEXT: [[VecZ:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 2
+// CHECK-NEXT: store half [[TmpX]], ptr [[VecZ]], align 2
+// CHECK-NEXT: [[TmpY:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 1
+// CHECK-NEXT: [[VecW:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 3
+// CHECK-NEXT: store half [[TmpY]], ptr [[VecW]], align 2
+// CHECK-NEXT: [[TmpZ:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 2
+// CHECK-NEXT: store half [[TmpZ]], ptr [[VecAddr]], align 2
+// CHECK-NEXT: [[TmpW:%.*]] = extractelement <4 x half> [[TmpVec3]], i32 3
+// CHECK-NEXT: [[VecY:%.*]] = getelementptr <4 x half>, ptr [[VecAddr]], i32 0, i32 1
+// CHECK-NEXT: store half [[TmpW]], ptr [[VecY]], align 2
+
+// Return.
+// CHECK-NEXT: [[RetVal:%.*]] = load <4 x half>, ptr [[VecAddr]], align 8
+// CHECK-NEXT: ret <4 x half> [[RetVal]]
+half4 Shuffle(half4 vec, half a, half b) {
+    vec.zwxy = {a, b, 13.74, a};
+    return vec;
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/169090


More information about the cfe-commits mailing list