[clang] [HLSL][Sema] Fix Struct Size Calculation containing 16/32 bit scalars (PR #128086)

Fri Feb 21 15:51:06 PST 2025

https://github.com/V-FEXrt updated https://github.com/llvm/llvm-project/pull/128086

>From bebfc20fac6b27d02bca9af328d0568018672c71 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Thu, 20 Feb 2025 16:16:16 -0700
Subject: [PATCH 1/3] [hlsl][Sema] Fix Struct Size Calculation containing 16
 and 32 bit wide scalars

---
 clang/lib/Sema/SemaHLSL.cpp               | 26 ++++++++--
 clang/test/CodeGenHLSL/cbuffer_align.hlsl | 60 +++++++++++++++++++++++
 2 files changed, 81 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/cbuffer_align.hlsl

diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index d26d85d5861b1..6f99b7899891d 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -172,6 +172,26 @@ Decl *SemaHLSL::ActOnStartBuffer(Scope *BufferScope, bool CBuffer,
   return Result;
 }
 
+static unsigned calculateLegacyCbufferFieldAlign(const ASTContext &Context,
+                                           QualType T) {
+  // Aggregate types are always aligned to new buffer rows
+  if (T->isAggregateType())
+    return 16;
+
+  assert(Context.getTypeSize(T) <= 64 && "Scalar bit widths larger than 64 not supported");
+
+  // 64 bit types such as double and uint64_t align to 8 bytes
+  if (Context.getTypeSize(T) == 64)
+    return 8;
+
+  // Half types align to 2 bytes only if native half is available
+  if (T->isHalfType() && Context.getLangOpts().NativeHalfType)
+    return 2;
+
+  // Everything else aligns to 4 bytes
+  return 4;
+}
+
 // Calculate the size of a legacy cbuffer type in bytes based on
 // https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules
 static unsigned calculateLegacyCbufferSize(const ASTContext &Context,
@@ -183,11 +203,7 @@ static unsigned calculateLegacyCbufferSize(const ASTContext &Context,
     for (const FieldDecl *Field : RD->fields()) {
       QualType Ty = Field->getType();
       unsigned FieldSize = calculateLegacyCbufferSize(Context, Ty);
-      // FIXME: This is not the correct alignment, it does not work for 16-bit
-      // types. See llvm/llvm-project#119641.
-      unsigned FieldAlign = 4;
-      if (Ty->isAggregateType())
-        FieldAlign = CBufferAlign;
+      unsigned FieldAlign = calculateLegacyCbufferFieldAlign(Context, Ty);
       Size = llvm::alignTo(Size, FieldAlign);
       Size += FieldSize;
     }
diff --git a/clang/test/CodeGenHLSL/cbuffer_align.hlsl b/clang/test/CodeGenHLSL/cbuffer_align.hlsl
new file mode 100644
index 0000000000000..31db2e0726020
--- /dev/null
+++ b/clang/test/CodeGenHLSL/cbuffer_align.hlsl
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=CHECK-HALF
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=CHECK-FLOAT
+
+
+struct S1 {
+  half a;   // 2 bytes + 2 bytes pad or 4 bytes
+  float b;  // 4 bytes
+  half c;   // 2 bytes + 2 bytes pad or 4 bytes
+  float d;  // 4 bytes
+  double e; // 8 bytes
+};
+
+struct S2 {
+  half a;   // 2 bytes or 4 bytes
+  half b;   // 2 bytes or 4 bytes
+  float e;  // 4 bytes or 4 bytes + 4 padding
+  double f; // 8 bytes
+};
+
+struct S3 {
+  half a;     // 2 bytes + 6 bytes pad or 4 bytes + 4 bytes pad
+  uint64_t b; // 8 bytes
+};
+
+struct S4 {
+  float a;  // 4 bytes
+  half b;   // 2 bytes or 4 bytes
+  half c;   // 2 bytes or 4 bytes + 4 bytes pad
+  double d; // 8 bytes
+};
+
+
+cbuffer CB0 {
+  // CHECK-HALF: @CB0.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB0, 24, 0))
+  // CHECK-FLOAT: @CB0.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB0, 24, 0))
+  S1 s1;
+}
+
+cbuffer CB1 {
+  // CHECK-HALF: @CB1.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 16, 0))
+  // CHECK-FLOAT: @CB1.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0))
+  S2 s2;
+}
+
+cbuffer CB2 {
+  // CHECK-HALF: @CB2.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 16, 0))
+  // CHECK-FLOAT: @CB2.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 16, 0))
+  S3 s3;
+}
+
+cbuffer CB3 {
+  // CHECK-HALF: @CB3.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB3, 16, 0))
+  // CHECK-FLOAT: @CB3.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB3, 24, 0))
+  S4 s4;
+}

>From e8b3032cbf09e2e3170a212b35ac0ad2f755ca9f Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Thu, 20 Feb 2025 16:16:47 -0700
Subject: [PATCH 2/3] format

---
 clang/lib/Sema/SemaHLSL.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 6f99b7899891d..cccfd4aec7eb2 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -173,12 +173,13 @@ Decl *SemaHLSL::ActOnStartBuffer(Scope *BufferScope, bool CBuffer,
 }
 
 static unsigned calculateLegacyCbufferFieldAlign(const ASTContext &Context,
-                                           QualType T) {
+                                                 QualType T) {
   // Aggregate types are always aligned to new buffer rows
   if (T->isAggregateType())
     return 16;
 
-  assert(Context.getTypeSize(T) <= 64 && "Scalar bit widths larger than 64 not supported");
+  assert(Context.getTypeSize(T) <= 64 &&
+         "Scalar bit widths larger than 64 not supported");
 
   // 64 bit types such as double and uint64_t align to 8 bytes
   if (Context.getTypeSize(T) == 64)

>From c6f3871c472d91faa6e052d4f13bd9aa0041db53 Mon Sep 17 00:00:00 2001
From: Ashley Coleman <ascoleman at microsoft.com>
Date: Fri, 21 Feb 2025 16:49:20 -0700
Subject: [PATCH 3/3] Address comments

---
 clang/lib/Sema/SemaHLSL.cpp               | 28 ++++---
 clang/test/CodeGenHLSL/cbuffer_align.hlsl | 97 +++++++++++++----------
 2 files changed, 70 insertions(+), 55 deletions(-)

diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index cccfd4aec7eb2..e4a446f3fe37e 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -174,23 +174,19 @@ Decl *SemaHLSL::ActOnStartBuffer(Scope *BufferScope, bool CBuffer,
 
 static unsigned calculateLegacyCbufferFieldAlign(const ASTContext &Context,
                                                  QualType T) {
-  // Aggregate types are always aligned to new buffer rows
-  if (T->isAggregateType())
+  // Arrays and Structs are always aligned to new buffer rows
+  if (T->isArrayType() || T->isStructureType())
     return 16;
 
+  // Vectors are aligned to the type they contain
+  if (const VectorType *VT = T->getAs<VectorType>())
+    return calculateLegacyCbufferFieldAlign(Context, VT->getElementType());
+
   assert(Context.getTypeSize(T) <= 64 &&
          "Scalar bit widths larger than 64 not supported");
 
-  // 64 bit types such as double and uint64_t align to 8 bytes
-  if (Context.getTypeSize(T) == 64)
-    return 8;
-
-  // Half types align to 2 bytes only if native half is available
-  if (T->isHalfType() && Context.getLangOpts().NativeHalfType)
-    return 2;
-
-  // Everything else aligns to 4 bytes
-  return 4;
+  // Scalar types are aligned to their byte width
+  return Context.getTypeSize(T) / 8;
 }
 
 // Calculate the size of a legacy cbuffer type in bytes based on
@@ -205,6 +201,14 @@ static unsigned calculateLegacyCbufferSize(const ASTContext &Context,
       QualType Ty = Field->getType();
       unsigned FieldSize = calculateLegacyCbufferSize(Context, Ty);
       unsigned FieldAlign = calculateLegacyCbufferFieldAlign(Context, Ty);
+
+      // If the field crosses the row boundary after alignment it drops to the
+      // next row
+      unsigned AlignSize = llvm::alignTo(Size, FieldAlign);
+      if ((AlignSize % CBufferAlign) + FieldSize > CBufferAlign) {
+        FieldAlign = CBufferAlign;
+      }
+
       Size = llvm::alignTo(Size, FieldAlign);
       Size += FieldSize;
     }
diff --git a/clang/test/CodeGenHLSL/cbuffer_align.hlsl b/clang/test/CodeGenHLSL/cbuffer_align.hlsl
index 31db2e0726020..77f6ee8919429 100644
--- a/clang/test/CodeGenHLSL/cbuffer_align.hlsl
+++ b/clang/test/CodeGenHLSL/cbuffer_align.hlsl
@@ -1,60 +1,71 @@
 // RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=CHECK-HALF
+// RUN:   -fsyntax-only -verify -verify-ignore-unexpected=warning
 
-// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=CHECK-FLOAT
+struct S0 {
+  half a;
+  half b;
+  half c;
+  half d;
+  half e;
+  half f;
+  half g;
+  half h;
+};
+
+cbuffer CB0Pass {
+  S0 s0p : packoffset(c0.x);
+  float f0p : packoffset(c1.x);
+}
 
+cbuffer CB0Fail {
+  S0 s0f : packoffset(c0.x);
+  float f0f : packoffset(c0.w);
+  // expected-error at -1 {{packoffset overlap between 'f0f', 's0f'}}
+}
 
 struct S1 {
-  half a;   // 2 bytes + 2 bytes pad or 4 bytes
-  float b;  // 4 bytes
-  half c;   // 2 bytes + 2 bytes pad or 4 bytes
-  float d;  // 4 bytes
-  double e; // 8 bytes
+  float a;
+  double b;
+  float c;
 };
 
-struct S2 {
-  half a;   // 2 bytes or 4 bytes
-  half b;   // 2 bytes or 4 bytes
-  float e;  // 4 bytes or 4 bytes + 4 padding
-  double f; // 8 bytes
-};
+cbuffer CB1Pass {
+  S1 s1p : packoffset(c0.x);
+  float f1p : packoffset(c1.y);
+}
 
-struct S3 {
-  half a;     // 2 bytes + 6 bytes pad or 4 bytes + 4 bytes pad
-  uint64_t b; // 8 bytes
-};
+cbuffer CB1Fail {
+  S1 s1f : packoffset(c0.x);
+  float f1f : packoffset(c1.x);
+  // expected-error at -1 {{packoffset overlap between 'f1f', 's1f'}}
+}
 
-struct S4 {
-  float a;  // 4 bytes
-  half b;   // 2 bytes or 4 bytes
-  half c;   // 2 bytes or 4 bytes + 4 bytes pad
-  double d; // 8 bytes
+struct S2 {
+  float3 a;
+  float2 b;
 };
 
-
-cbuffer CB0 {
-  // CHECK-HALF: @CB0.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB0, 24, 0))
-  // CHECK-FLOAT: @CB0.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB0, 24, 0))
-  S1 s1;
+cbuffer CB2Pass {
+  S2 s2p : packoffset(c0.x);
+  float f2p : packoffset(c1.z);
 }
 
-cbuffer CB1 {
-  // CHECK-HALF: @CB1.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 16, 0))
-  // CHECK-FLOAT: @CB1.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB1, 24, 0))
-  S2 s2;
+cbuffer CB2Fail {
+  S2 s2f : packoffset(c0.x);
+  float f2f : packoffset(c1.y);
+  // expected-error at -1 {{packoffset overlap between 'f2f', 's2f'}}
 }
 
-cbuffer CB2 {
-  // CHECK-HALF: @CB2.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 16, 0))
-  // CHECK-FLOAT: @CB2.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB2, 16, 0))
-  S3 s3;
-}
 
-cbuffer CB3 {
-  // CHECK-HALF: @CB3.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB3, 16, 0))
-  // CHECK-FLOAT: @CB3.cb = external constant target("dx.CBuffer", target("dx.Layout", %__cblayout_CB3, 24, 0))
-  S4 s4;
-}
+#if 0
+struct S3 {
+  float3 a;
+  float b;
+};
+
+struct S4 {
+  float2 a;
+  float2 b;
+};
+#endif