[llvm] [DirectX] Move the scalarizer pass to before dxil-flatten-arrays (PR #146800)

Deric C. via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 14 16:46:25 PDT 2025


https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/146800

>From fad3c2ffe348a19afba2e7d8fda2e1fdfe8d4bf3 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 1 Jul 2025 02:55:04 +0000
Subject: [PATCH 1/2] Move scalarizer to before dxil-flatten-arrays

---
 .../Target/DirectX/DXILDataScalarization.cpp  |   7 +-
 .../Target/DirectX/DirectXTargetMachine.cpp   |   2 +-
 llvm/test/CodeGen/DirectX/llc-pipeline.ll     |   4 +-
 .../DirectX/llc-vector-load-scalarize.ll      | 157 ++++++++++++++----
 llvm/test/CodeGen/DirectX/scalar-store.ll     |  30 +++-
 5 files changed, 160 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index c97c604fdbf77..cfe270d432144 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -302,7 +302,7 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
 
 bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   Value *PtrOperand = GEPI.getPointerOperand();
-  Type *OrigGEPType = GEPI.getPointerOperandType();
+  Type *OrigGEPType = GEPI.getSourceElementType();
   Type *NewGEPType = OrigGEPType;
   bool NeedsTransform = false;
 
@@ -319,6 +319,11 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     }
   }
 
+  // Scalar geps should remain scalars geps. The dxil-flatten-arrays pass will
+  // convert these scalar geps into flattened array geps
+  if (!isa<ArrayType>(OrigGEPType))
+    NewGEPType = OrigGEPType;
+
   // Note: We bail if this isn't a gep touched via alloca or global
   // transformations
   if (!NeedsTransform)
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 40fe6c6e639e4..84751d2db2266 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -107,10 +107,10 @@ class DirectXPassConfig : public TargetPassConfig {
     addPass(createDXILIntrinsicExpansionLegacyPass());
     addPass(createDXILCBufferAccessLegacyPass());
     addPass(createDXILDataScalarizationLegacyPass());
-    addPass(createDXILFlattenArraysLegacyPass());
     ScalarizerPassOptions DxilScalarOptions;
     DxilScalarOptions.ScalarizeLoadStore = true;
     addPass(createScalarizerPass(DxilScalarOptions));
+    addPass(createDXILFlattenArraysLegacyPass());
     addPass(createDXILForwardHandleAccessesLegacyPass());
     addPass(createDXILLegalizeLegacyPass());
     addPass(createDXILResourceImplicitBindingLegacyPass());
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 36fed88fc52d6..151603a7161c5 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -19,10 +19,12 @@
 ; CHECK-NEXT:   DXIL Intrinsic Expansion
 ; CHECK-NEXT:   DXIL CBuffer Access
 ; CHECK-NEXT:   DXIL Data Scalarization
-; CHECK-NEXT:   DXIL Array Flattener
 ; CHECK-NEXT:   FunctionPass Manager
 ; CHECK-NEXT:     Dominator Tree Construction
 ; CHECK-NEXT:     Scalarize vector operations
+; CHECK-NEXT:   DXIL Array Flattener
+; CHECK-NEXT:   FunctionPass Manager
+; CHECK-NEXT:     Dominator Tree Construction
 ; CHECK-NEXT:     DXIL Forward Handle Accesses
 ; CHECK-NEXT:     DXIL Legalizer
 ; CHECK-NEXT:   DXIL Resource Binding Analysis
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 78550adbe424a..1e2ff60c56684 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -8,30 +8,19 @@
 @staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>], align 4
 @"groupshared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x <4 x i32>]] zeroinitializer, align 16
 
-; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [8 x i32] zeroinitializer, align 16
-; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
-; CHECK: @staticArrayOfVecData.scalarized.1dim = internal global [12 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12], align 4
-; CHECK: @groupshared2dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [36 x i32] zeroinitializer, align 16
 
-; CHECK-NOT: @arrayofVecData
-; CHECK-NOT: @arrayofVecData.scalarized
-; CHECK-NOT: @vecData
-; CHECK-NOT: @staticArrayOfVecData
-; CHECK-NOT: @staticArrayOfVecData.scalarized
-; CHECK-NOT: @groupshared2dArrayofVectors
-; CHECK-NOT: @groupshared2dArrayofVectors.scalarized
 
 define <4 x i32> @load_array_vec_test() #0 {
 ; CHECK-LABEL: define <4 x i32> @load_array_vec_test(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 2), align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3), align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), align 4
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), i32 1), align 4
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), i32 2), align 4
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), i32 3), align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 5), align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 6), align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 7), align 4
 ; CHECK-NEXT:    [[DOTI05:%.*]] = add i32 [[TMP2]], [[TMP10]]
 ; CHECK-NEXT:    [[DOTI16:%.*]] = add i32 [[TMP4]], [[TMP12]]
 ; CHECK-NEXT:    [[DOTI27:%.*]] = add i32 [[TMP6]], [[TMP14]]
@@ -53,9 +42,9 @@ define <4 x i32> @load_vec_test() #0 {
 ; CHECK-LABEL: define <4 x i32> @load_vec_test(
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) @vecData.scalarized, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 1), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 2), align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 2), align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 3), align 4
 ; CHECK-NEXT:    [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP2]], i32 1
 ; CHECK-NEXT:    [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP3]], i32 2
@@ -66,6 +55,42 @@ define <4 x i32> @load_vec_test() #0 {
   ret <4 x i32> %1
 }
 
+define <4 x i32> @load_vec_from_scalar_gep_test() #0 {
+; CHECK-LABEL: define <4 x i32> @load_vec_from_scalar_gep_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DOTI04:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds nuw ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), align 4
+; CHECK-NEXT:    [[DOTI116:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 5), align 4
+; CHECK-NEXT:    [[DOTI228:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 6), align 4
+; CHECK-NEXT:    [[DOTI3310:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 7), align 4
+; CHECK-NEXT:    [[DOTUPTO011:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI04]], i32 0
+; CHECK-NEXT:    [[DOTUPTO112:%.*]] = insertelement <4 x i32> [[DOTUPTO011]], i32 [[DOTI116]], i32 1
+; CHECK-NEXT:    [[DOTUPTO213:%.*]] = insertelement <4 x i32> [[DOTUPTO112]], i32 [[DOTI228]], i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[DOTUPTO213]], i32 [[DOTI3310]], i32 3
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = getelementptr inbounds nuw i32, ptr addrspace(3) @"arrayofVecData", i32 4
+  %2 = load <4 x i32>, ptr addrspace(3) %1, align 4
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @load_vec_from_i8_gep_test() #0 {
+; CHECK-LABEL: define <4 x i32> @load_vec_from_i8_gep_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DOTI04:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds nuw ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), align 4
+; CHECK-NEXT:    [[DOTI116:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 5), align 4
+; CHECK-NEXT:    [[DOTI228:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 6), align 4
+; CHECK-NEXT:    [[DOTI3310:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 7), align 4
+; CHECK-NEXT:    [[DOTUPTO011:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI04]], i32 0
+; CHECK-NEXT:    [[DOTUPTO112:%.*]] = insertelement <4 x i32> [[DOTUPTO011]], i32 [[DOTI116]], i32 1
+; CHECK-NEXT:    [[DOTUPTO213:%.*]] = insertelement <4 x i32> [[DOTUPTO112]], i32 [[DOTI228]], i32 2
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[DOTUPTO213]], i32 [[DOTI3310]], i32 3
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = getelementptr inbounds nuw i8, ptr addrspace(3) @"arrayofVecData", i32 16
+  %2 = load <4 x i32>, ptr addrspace(3) %1, align 4
+  ret <4 x i32> %2
+}
+
 define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
 ; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test(
 ; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
@@ -73,11 +98,17 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[TMP3]]
 ; CHECK-NEXT:    [[DOTFLAT:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP2]]
 ; CHECK-NEXT:    [[DOTI0:%.*]] = load i32, ptr [[DOTFLAT]], align 4
-; CHECK-NEXT:    [[DOTFLAT_I1:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 1, [[TMP4]]
+; CHECK-NEXT:    [[DOTFLAT_I1:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP5]]
 ; CHECK-NEXT:    [[DOTI1:%.*]] = load i32, ptr [[DOTFLAT_I1]], align 4
-; CHECK-NEXT:    [[DOTFLAT_I2:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 2
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 2, [[TMP6]]
+; CHECK-NEXT:    [[DOTFLAT_I2:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP7]]
 ; CHECK-NEXT:    [[DOTI2:%.*]] = load i32, ptr [[DOTFLAT_I2]], align 4
-; CHECK-NEXT:    [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 3, [[TMP8]]
+; CHECK-NEXT:    [[DOTFLAT_I3:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP9]]
 ; CHECK-NEXT:    [[DOTI3:%.*]] = load i32, ptr [[DOTFLAT_I3]], align 4
 ; CHECK-NEXT:    [[DOTUPTO01:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI0]], i32 0
 ; CHECK-NEXT:    [[DOTUPTO12:%.*]] = insertelement <4 x i32> [[DOTUPTO01]], i32 [[DOTI1]], i32 1
@@ -90,17 +121,85 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
   ret <4 x i32> %4
 }
 
+define <4 x i32> @load_static_array_of_vec_from_scalar_gep_test(i32 %index) #0 {
+; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_from_scalar_gep_test(
+; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 0, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP3]]
+; CHECK-NEXT:    [[DOTI0:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 1, [[TMP5]]
+; CHECK-NEXT:    [[DOTI14:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP6]]
+; CHECK-NEXT:    [[DOTI11:%.*]] = load i32, ptr [[DOTI14]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 2, [[TMP7]]
+; CHECK-NEXT:    [[DOTI25:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP8]]
+; CHECK-NEXT:    [[DOTI22:%.*]] = load i32, ptr [[DOTI25]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 3, [[TMP9]]
+; CHECK-NEXT:    [[DOTI36:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP10]]
+; CHECK-NEXT:    [[DOTI33:%.*]] = load i32, ptr [[DOTI36]], align 4
+; CHECK-NEXT:    [[DOTUPTO07:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT:    [[DOTUPTO18:%.*]] = insertelement <4 x i32> [[DOTUPTO07]], i32 [[DOTI11]], i32 1
+; CHECK-NEXT:    [[DOTUPTO29:%.*]] = insertelement <4 x i32> [[DOTUPTO18]], i32 [[DOTI22]], i32 2
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[DOTUPTO29]], i32 [[DOTI33]], i32 3
+; CHECK-NEXT:    ret <4 x i32> [[TMP11]]
+;
+  %2 = mul i32 %index, 4
+  %3 = getelementptr inbounds i32, ptr @staticArrayOfVecData, i32 %2
+  %4 = load <4 x i32>, <4 x i32>* %3, align 4
+  ret <4 x i32> %4
+}
+
+define <4 x i32> @load_static_array_of_vec_from_i8_gep_test(i32 %index) #0 {
+; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_from_i8_gep_test(
+; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[INDEX]], 12
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr i32 [[TMP2]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 0, [[TMP12]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP3]]
+; CHECK-NEXT:    [[DOTI0:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP14:%.*]] = lshr i32 [[TMP5]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 1, [[TMP14]]
+; CHECK-NEXT:    [[DOTI14:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP6]]
+; CHECK-NEXT:    [[DOTI11:%.*]] = load i32, ptr [[DOTI14]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP15:%.*]] = lshr i32 [[TMP7]], 2
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 2, [[TMP15]]
+; CHECK-NEXT:    [[DOTI25:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP8]]
+; CHECK-NEXT:    [[DOTI22:%.*]] = load i32, ptr [[DOTI25]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr i32 [[TMP9]], 2
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 3, [[TMP13]]
+; CHECK-NEXT:    [[DOTI36:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP10]]
+; CHECK-NEXT:    [[DOTI33:%.*]] = load i32, ptr [[DOTI36]], align 4
+; CHECK-NEXT:    [[DOTUPTO07:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT:    [[DOTUPTO18:%.*]] = insertelement <4 x i32> [[DOTUPTO07]], i32 [[DOTI11]], i32 1
+; CHECK-NEXT:    [[DOTUPTO29:%.*]] = insertelement <4 x i32> [[DOTUPTO18]], i32 [[DOTI22]], i32 2
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[DOTUPTO29]], i32 [[DOTI33]], i32 3
+; CHECK-NEXT:    ret <4 x i32> [[TMP11]]
+;
+  %2 = mul i32 %index, 12
+  %3 = getelementptr inbounds i8, ptr @staticArrayOfVecData, i32 %2
+  %4 = load <4 x i32>, <4 x i32>* %3, align 4
+  ret <4 x i32> %4
+}
+
 define <4 x i32> @multid_load_test() #0 {
 ; CHECK-LABEL: define <4 x i32> @multid_load_test(
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 1), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 2), align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 3), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 2), align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 3), align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 16), align 4
-; CHECK-NEXT:    [[DOTI13:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 16), i32 1), align 4
-; CHECK-NEXT:    [[DOTI25:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 16), i32 2), align 4
-; CHECK-NEXT:    [[DOTI37:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 16), i32 3), align 4
+; CHECK-NEXT:    [[DOTI13:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 17), align 4
+; CHECK-NEXT:    [[DOTI25:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 18), align 4
+; CHECK-NEXT:    [[DOTI37:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 19), align 4
 ; CHECK-NEXT:    [[DOTI08:%.*]] = add i32 [[TMP1]], [[TMP5]]
 ; CHECK-NEXT:    [[DOTI19:%.*]] = add i32 [[TMP2]], [[DOTI13]]
 ; CHECK-NEXT:    [[DOTI210:%.*]] = add i32 [[TMP3]], [[DOTI25]]
diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll
index 7e9fe0e330661..a124c665ad15e 100644
--- a/llvm/test/CodeGen/DirectX/scalar-store.ll
+++ b/llvm/test/CodeGen/DirectX/scalar-store.ll
@@ -14,17 +14,31 @@
 
 ; CHECK-LABEL: store_array_vec_test
 define void @store_array_vec_test () local_unnamed_addr #0 {
-    ; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.1dim.*|%.*)}}, align {{4|8|16}}
-    ; CHECK-NEXT: ret void
-    store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"arrayofVecData", align 16 
-    store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3)   getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16 
-    ret void
- } 
+; CHECK-NEXT:    store float 1.000000e+00, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 16
+; CHECK-NEXT:    store float 2.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4
+; CHECK-NEXT:    store float 3.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 8
+; CHECK-NEXT:    store float 2.000000e+00, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 16
+; CHECK-NEXT:    store float 4.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), align 4
+; CHECK-NEXT:    store float 6.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 5), align 8
+; CHECK-NEXT:    ret void
+;
+  store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"arrayofVecData", align 16
+  store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 12), align 16
+  ret void
+}
 
 ; CHECK-LABEL: store_vec_test
 define void @store_vec_test(<4 x i32> %inputVec) #0 {
-  ; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4 
-  ; CHECK-NEXT: ret void
+; CHECK-NEXT:    [[INPUTVEC_I01:%.*]] = extractelement <4 x i32> %inputVec, i32 0
+; CHECK-NEXT:    store i32 [[INPUTVEC_I01]], ptr addrspace(3) @vecData.scalarized, align 4
+; CHECK-NEXT:    [[INPUTVEC_I12:%.*]] = extractelement <4 x i32> %inputVec, i32 1
+; CHECK-NEXT:    store i32 [[INPUTVEC_I12]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4
+; CHECK-NEXT:    [[INPUTVEC_I23:%.*]] = extractelement <4 x i32> %inputVec, i32 2
+; CHECK-NEXT:    store i32 [[INPUTVEC_I23]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 2), align 4
+; CHECK-NEXT:    [[INPUTVEC_I34:%.*]] = extractelement <4 x i32> %inputVec, i32 3
+; CHECK-NEXT:    store i32 [[INPUTVEC_I34]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 3), align 4
+; CHECK-NEXT:    ret void
+;
   store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4
   ret void
 }

>From 1ea2cc54a8f3d95f33c8e61d50881b1d74dbf225 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 3 Jul 2025 00:04:37 +0000
Subject: [PATCH 2/2] Re-add CHECK lines that were removed by script

---
 .../test/CodeGen/DirectX/llc-vector-load-scalarize.ll | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 1e2ff60c56684..27a892591a867 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -8,7 +8,18 @@
 @staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>], align 4
 @"groupshared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x <4 x i32>]] zeroinitializer, align 16
 
+; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [8 x i32] zeroinitializer, align 16
+; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
+; CHECK: @staticArrayOfVecData.scalarized.1dim = internal global [12 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12], align 4
+; CHECK: @groupshared2dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [36 x i32] zeroinitializer, align 16
 
+; CHECK-NOT: @arrayofVecData
+; CHECK-NOT: @arrayofVecData.scalarized
+; CHECK-NOT: @vecData
+; CHECK-NOT: @staticArrayOfVecData
+; CHECK-NOT: @staticArrayOfVecData.scalarized
+; CHECK-NOT: @groupshared2dArrayofVectors
+; CHECK-NOT: @groupshared2dArrayofVectors.scalarized
 
 define <4 x i32> @load_array_vec_test() #0 {
 ; CHECK-LABEL: define <4 x i32> @load_array_vec_test(



More information about the llvm-commits mailing list