[llvm] [DirectX] Move the scalarizer pass to before dxil-flatten-arrays (PR #146800)
Deric C. via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 14 16:46:25 PDT 2025
https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/146800
>From fad3c2ffe348a19afba2e7d8fda2e1fdfe8d4bf3 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 1 Jul 2025 02:55:04 +0000
Subject: [PATCH 1/2] Move scalarizer to before dxil-flatten-arrays
---
.../Target/DirectX/DXILDataScalarization.cpp | 7 +-
.../Target/DirectX/DirectXTargetMachine.cpp | 2 +-
llvm/test/CodeGen/DirectX/llc-pipeline.ll | 4 +-
.../DirectX/llc-vector-load-scalarize.ll | 157 ++++++++++++++----
llvm/test/CodeGen/DirectX/scalar-store.ll | 30 +++-
5 files changed, 160 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index c97c604fdbf77..cfe270d432144 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -302,7 +302,7 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
Value *PtrOperand = GEPI.getPointerOperand();
- Type *OrigGEPType = GEPI.getPointerOperandType();
+ Type *OrigGEPType = GEPI.getSourceElementType();
Type *NewGEPType = OrigGEPType;
bool NeedsTransform = false;
@@ -319,6 +319,11 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
}
}
+ // Scalar geps should remain scalars geps. The dxil-flatten-arrays pass will
+ // convert these scalar geps into flattened array geps
+ if (!isa<ArrayType>(OrigGEPType))
+ NewGEPType = OrigGEPType;
+
// Note: We bail if this isn't a gep touched via alloca or global
// transformations
if (!NeedsTransform)
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 40fe6c6e639e4..84751d2db2266 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -107,10 +107,10 @@ class DirectXPassConfig : public TargetPassConfig {
addPass(createDXILIntrinsicExpansionLegacyPass());
addPass(createDXILCBufferAccessLegacyPass());
addPass(createDXILDataScalarizationLegacyPass());
- addPass(createDXILFlattenArraysLegacyPass());
ScalarizerPassOptions DxilScalarOptions;
DxilScalarOptions.ScalarizeLoadStore = true;
addPass(createScalarizerPass(DxilScalarOptions));
+ addPass(createDXILFlattenArraysLegacyPass());
addPass(createDXILForwardHandleAccessesLegacyPass());
addPass(createDXILLegalizeLegacyPass());
addPass(createDXILResourceImplicitBindingLegacyPass());
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 36fed88fc52d6..151603a7161c5 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -19,10 +19,12 @@
; CHECK-NEXT: DXIL Intrinsic Expansion
; CHECK-NEXT: DXIL CBuffer Access
; CHECK-NEXT: DXIL Data Scalarization
-; CHECK-NEXT: DXIL Array Flattener
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Scalarize vector operations
+; CHECK-NEXT: DXIL Array Flattener
+; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: DXIL Forward Handle Accesses
; CHECK-NEXT: DXIL Legalizer
; CHECK-NEXT: DXIL Resource Binding Analysis
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 78550adbe424a..1e2ff60c56684 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -8,30 +8,19 @@
@staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>], align 4
@"groupshared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x <4 x i32>]] zeroinitializer, align 16
-; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [8 x i32] zeroinitializer, align 16
-; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
-; CHECK: @staticArrayOfVecData.scalarized.1dim = internal global [12 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12], align 4
-; CHECK: @groupshared2dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [36 x i32] zeroinitializer, align 16
-; CHECK-NOT: @arrayofVecData
-; CHECK-NOT: @arrayofVecData.scalarized
-; CHECK-NOT: @vecData
-; CHECK-NOT: @staticArrayOfVecData
-; CHECK-NOT: @staticArrayOfVecData.scalarized
-; CHECK-NOT: @groupshared2dArrayofVectors
-; CHECK-NOT: @groupshared2dArrayofVectors.scalarized
define <4 x i32> @load_array_vec_test() #0 {
; CHECK-LABEL: define <4 x i32> @load_array_vec_test(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 4
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), align 4
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 2), align 4
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3), align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 4
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 4
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), align 4
-; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), i32 1), align 4
-; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), i32 2), align 4
-; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), i32 3), align 4
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 5), align 4
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 6), align 4
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 7), align 4
; CHECK-NEXT: [[DOTI05:%.*]] = add i32 [[TMP2]], [[TMP10]]
; CHECK-NEXT: [[DOTI16:%.*]] = add i32 [[TMP4]], [[TMP12]]
; CHECK-NEXT: [[DOTI27:%.*]] = add i32 [[TMP6]], [[TMP14]]
@@ -53,9 +42,9 @@ define <4 x i32> @load_vec_test() #0 {
; CHECK-LABEL: define <4 x i32> @load_vec_test(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @vecData.scalarized, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 1), align 4
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 2), align 4
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3), align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 2), align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 3), align 4
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP3]], i32 2
@@ -66,6 +55,42 @@ define <4 x i32> @load_vec_test() #0 {
ret <4 x i32> %1
}
+define <4 x i32> @load_vec_from_scalar_gep_test() #0 {
+; CHECK-LABEL: define <4 x i32> @load_vec_from_scalar_gep_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DOTI04:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds nuw ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), align 4
+; CHECK-NEXT: [[DOTI116:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 5), align 4
+; CHECK-NEXT: [[DOTI228:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 6), align 4
+; CHECK-NEXT: [[DOTI3310:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 7), align 4
+; CHECK-NEXT: [[DOTUPTO011:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI04]], i32 0
+; CHECK-NEXT: [[DOTUPTO112:%.*]] = insertelement <4 x i32> [[DOTUPTO011]], i32 [[DOTI116]], i32 1
+; CHECK-NEXT: [[DOTUPTO213:%.*]] = insertelement <4 x i32> [[DOTUPTO112]], i32 [[DOTI228]], i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[DOTUPTO213]], i32 [[DOTI3310]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = getelementptr inbounds nuw i32, ptr addrspace(3) @"arrayofVecData", i32 4
+ %2 = load <4 x i32>, ptr addrspace(3) %1, align 4
+ ret <4 x i32> %2
+}
+
+define <4 x i32> @load_vec_from_i8_gep_test() #0 {
+; CHECK-LABEL: define <4 x i32> @load_vec_from_i8_gep_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DOTI04:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds nuw ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), align 4
+; CHECK-NEXT: [[DOTI116:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 5), align 4
+; CHECK-NEXT: [[DOTI228:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 6), align 4
+; CHECK-NEXT: [[DOTI3310:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 7), align 4
+; CHECK-NEXT: [[DOTUPTO011:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI04]], i32 0
+; CHECK-NEXT: [[DOTUPTO112:%.*]] = insertelement <4 x i32> [[DOTUPTO011]], i32 [[DOTI116]], i32 1
+; CHECK-NEXT: [[DOTUPTO213:%.*]] = insertelement <4 x i32> [[DOTUPTO112]], i32 [[DOTI228]], i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[DOTUPTO213]], i32 [[DOTI3310]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = getelementptr inbounds nuw i8, ptr addrspace(3) @"arrayofVecData", i32 16
+ %2 = load <4 x i32>, ptr addrspace(3) %1, align 4
+ ret <4 x i32> %2
+}
+
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test(
; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
@@ -73,11 +98,17 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP3]]
; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP2]]
; CHECK-NEXT: [[DOTI0:%.*]] = load i32, ptr [[DOTFLAT]], align 4
-; CHECK-NEXT: [[DOTFLAT_I1:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 1, [[TMP4]]
+; CHECK-NEXT: [[DOTFLAT_I1:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP5]]
; CHECK-NEXT: [[DOTI1:%.*]] = load i32, ptr [[DOTFLAT_I1]], align 4
-; CHECK-NEXT: [[DOTFLAT_I2:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 2, [[TMP6]]
+; CHECK-NEXT: [[DOTFLAT_I2:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP7]]
; CHECK-NEXT: [[DOTI2:%.*]] = load i32, ptr [[DOTFLAT_I2]], align 4
-; CHECK-NEXT: [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 3
+; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 3, [[TMP8]]
+; CHECK-NEXT: [[DOTFLAT_I3:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP9]]
; CHECK-NEXT: [[DOTI3:%.*]] = load i32, ptr [[DOTFLAT_I3]], align 4
; CHECK-NEXT: [[DOTUPTO01:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI0]], i32 0
; CHECK-NEXT: [[DOTUPTO12:%.*]] = insertelement <4 x i32> [[DOTUPTO01]], i32 [[DOTI1]], i32 1
@@ -90,17 +121,85 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
ret <4 x i32> %4
}
+define <4 x i32> @load_static_array_of_vec_from_scalar_gep_test(i32 %index) #0 {
+; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_from_scalar_gep_test(
+; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 0, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP3]]
+; CHECK-NEXT: [[DOTI0:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 1, [[TMP5]]
+; CHECK-NEXT: [[DOTI14:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP6]]
+; CHECK-NEXT: [[DOTI11:%.*]] = load i32, ptr [[DOTI14]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 2, [[TMP7]]
+; CHECK-NEXT: [[DOTI25:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP8]]
+; CHECK-NEXT: [[DOTI22:%.*]] = load i32, ptr [[DOTI25]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 3, [[TMP9]]
+; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP10]]
+; CHECK-NEXT: [[DOTI33:%.*]] = load i32, ptr [[DOTI36]], align 4
+; CHECK-NEXT: [[DOTUPTO07:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT: [[DOTUPTO18:%.*]] = insertelement <4 x i32> [[DOTUPTO07]], i32 [[DOTI11]], i32 1
+; CHECK-NEXT: [[DOTUPTO29:%.*]] = insertelement <4 x i32> [[DOTUPTO18]], i32 [[DOTI22]], i32 2
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[DOTUPTO29]], i32 [[DOTI33]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP11]]
+;
+ %2 = mul i32 %index, 4
+ %3 = getelementptr inbounds i32, ptr @staticArrayOfVecData, i32 %2
+ %4 = load <4 x i32>, <4 x i32>* %3, align 4
+ ret <4 x i32> %4
+}
+
+define <4 x i32> @load_static_array_of_vec_from_i8_gep_test(i32 %index) #0 {
+; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_from_i8_gep_test(
+; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[INDEX]], 12
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = lshr i32 [[TMP2]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 0, [[TMP12]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP3]]
+; CHECK-NEXT: [[DOTI0:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = lshr i32 [[TMP5]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 1, [[TMP14]]
+; CHECK-NEXT: [[DOTI14:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP6]]
+; CHECK-NEXT: [[DOTI11:%.*]] = load i32, ptr [[DOTI14]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP15:%.*]] = lshr i32 [[TMP7]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = add i32 2, [[TMP15]]
+; CHECK-NEXT: [[DOTI25:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP8]]
+; CHECK-NEXT: [[DOTI22:%.*]] = load i32, ptr [[DOTI25]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP9]], 2
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 3, [[TMP13]]
+; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[TMP10]]
+; CHECK-NEXT: [[DOTI33:%.*]] = load i32, ptr [[DOTI36]], align 4
+; CHECK-NEXT: [[DOTUPTO07:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT: [[DOTUPTO18:%.*]] = insertelement <4 x i32> [[DOTUPTO07]], i32 [[DOTI11]], i32 1
+; CHECK-NEXT: [[DOTUPTO29:%.*]] = insertelement <4 x i32> [[DOTUPTO18]], i32 [[DOTI22]], i32 2
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[DOTUPTO29]], i32 [[DOTI33]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP11]]
+;
+ %2 = mul i32 %index, 12
+ %3 = getelementptr inbounds i8, ptr @staticArrayOfVecData, i32 %2
+ %4 = load <4 x i32>, <4 x i32>* %3, align 4
+ ret <4 x i32> %4
+}
+
define <4 x i32> @multid_load_test() #0 {
; CHECK-LABEL: define <4 x i32> @multid_load_test(
; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 1), align 4
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 2), align 4
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 3), align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 2), align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 3), align 4
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 16), align 4
-; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 16), i32 1), align 4
-; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 16), i32 2), align 4
-; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 16), i32 3), align 4
+; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 17), align 4
+; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 18), align 4
+; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 19), align 4
; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP2]], [[DOTI13]]
; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP3]], [[DOTI25]]
diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll
index 7e9fe0e330661..a124c665ad15e 100644
--- a/llvm/test/CodeGen/DirectX/scalar-store.ll
+++ b/llvm/test/CodeGen/DirectX/scalar-store.ll
@@ -14,17 +14,31 @@
; CHECK-LABEL: store_array_vec_test
define void @store_array_vec_test () local_unnamed_addr #0 {
- ; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.1dim.*|%.*)}}, align {{4|8|16}}
- ; CHECK-NEXT: ret void
- store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"arrayofVecData", align 16
- store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16
- ret void
- }
+; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 16
+; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4
+; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 8
+; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 16
+; CHECK-NEXT: store float 4.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 4), align 4
+; CHECK-NEXT: store float 6.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 5), align 8
+; CHECK-NEXT: ret void
+;
+ store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"arrayofVecData", align 16
+ store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 12), align 16
+ ret void
+}
; CHECK-LABEL: store_vec_test
define void @store_vec_test(<4 x i32> %inputVec) #0 {
- ; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4
- ; CHECK-NEXT: ret void
+; CHECK-NEXT: [[INPUTVEC_I01:%.*]] = extractelement <4 x i32> %inputVec, i32 0
+; CHECK-NEXT: store i32 [[INPUTVEC_I01]], ptr addrspace(3) @vecData.scalarized, align 4
+; CHECK-NEXT: [[INPUTVEC_I12:%.*]] = extractelement <4 x i32> %inputVec, i32 1
+; CHECK-NEXT: store i32 [[INPUTVEC_I12]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4
+; CHECK-NEXT: [[INPUTVEC_I23:%.*]] = extractelement <4 x i32> %inputVec, i32 2
+; CHECK-NEXT: store i32 [[INPUTVEC_I23]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 2), align 4
+; CHECK-NEXT: [[INPUTVEC_I34:%.*]] = extractelement <4 x i32> %inputVec, i32 3
+; CHECK-NEXT: store i32 [[INPUTVEC_I34]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 3), align 4
+; CHECK-NEXT: ret void
+;
store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4
ret void
}
>From 1ea2cc54a8f3d95f33c8e61d50881b1d74dbf225 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 3 Jul 2025 00:04:37 +0000
Subject: [PATCH 2/2] Re-add CHECK lines that were removed by script
---
.../test/CodeGen/DirectX/llc-vector-load-scalarize.ll | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 1e2ff60c56684..27a892591a867 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -8,7 +8,18 @@
@staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>], align 4
@"groupshared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x <4 x i32>]] zeroinitializer, align 16
+; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [8 x i32] zeroinitializer, align 16
+; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
+; CHECK: @staticArrayOfVecData.scalarized.1dim = internal global [12 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12], align 4
+; CHECK: @groupshared2dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [36 x i32] zeroinitializer, align 16
+; CHECK-NOT: @arrayofVecData
+; CHECK-NOT: @arrayofVecData.scalarized
+; CHECK-NOT: @vecData
+; CHECK-NOT: @staticArrayOfVecData
+; CHECK-NOT: @staticArrayOfVecData.scalarized
+; CHECK-NOT: @groupshared2dArrayofVectors
+; CHECK-NOT: @groupshared2dArrayofVectors.scalarized
define <4 x i32> @load_array_vec_test() #0 {
; CHECK-LABEL: define <4 x i32> @load_array_vec_test(
More information about the llvm-commits
mailing list