[llvm] [DirectX] Scalarize `extractelement` and `insertelement` with dynamic indices (PR #141676)
Deric C. via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 21:04:45 PDT 2025
https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/141676
>From d3f1a51cef21e74b281ba1dbf38bff16410b20e1 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 27 May 2025 21:28:12 +0000
Subject: [PATCH 1/7] Scalarize extractelement with dynamic index
---
.../Target/DirectX/DXILDataScalarization.cpp | 66 ++++++++++++++-----
.../DirectX/scalarize-dynamic-vector-index.ll | 38 +++++++++++
2 files changed, 86 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 06708cec00cec..7bd0539c6bfe0 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -27,6 +27,19 @@ static const int MaxVecSize = 4;
using namespace llvm;
+// Recursively creates an array-like version of a given vector type.
+static Type *equivalentArrayTypeFromVector(Type *T) {
+ if (auto *VecTy = dyn_cast<VectorType>(T))
+ return ArrayType::get(VecTy->getElementType(),
+ dyn_cast<FixedVectorType>(VecTy)->getNumElements());
+ if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
+ Type *NewElementType = equivalentArrayTypeFromVector(ArrayTy->getElementType());
+ return ArrayType::get(NewElementType, ArrayTy->getNumElements());
+ }
+ // If it's not a vector or array, return the original type.
+ return T;
+}
+
class DXILDataScalarizationLegacy : public ModulePass {
public:
@@ -55,7 +68,7 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
bool visitCastInst(CastInst &CI) { return false; }
bool visitBitCastInst(BitCastInst &BCI) { return false; }
bool visitInsertElementInst(InsertElementInst &IEI) { return false; }
- bool visitExtractElementInst(ExtractElementInst &EEI) { return false; }
+ bool visitExtractElementInst(ExtractElementInst &EEI);
bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; }
bool visitPHINode(PHINode &PHI) { return false; }
bool visitLoadInst(LoadInst &LI);
@@ -90,20 +103,6 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) {
return nullptr; // Not found
}
-// Recursively creates an array version of the given vector type.
-static Type *replaceVectorWithArray(Type *T, LLVMContext &Ctx) {
- if (auto *VecTy = dyn_cast<VectorType>(T))
- return ArrayType::get(VecTy->getElementType(),
- dyn_cast<FixedVectorType>(VecTy)->getNumElements());
- if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
- Type *NewElementType =
- replaceVectorWithArray(ArrayTy->getElementType(), Ctx);
- return ArrayType::get(NewElementType, ArrayTy->getNumElements());
- }
- // If it's not a vector or array, return the original type.
- return T;
-}
-
static bool isArrayOfVectors(Type *T) {
if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
return isa<VectorType>(ArrType->getElementType());
@@ -116,8 +115,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
ArrayType *ArrType = cast<ArrayType>(AI.getAllocatedType());
IRBuilder<> Builder(&AI);
- LLVMContext &Ctx = AI.getContext();
- Type *NewType = replaceVectorWithArray(ArrType, Ctx);
+ Type *NewType = equivalentArrayTypeFromVector(ArrType);
AllocaInst *ArrAlloca =
Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize");
ArrAlloca->setAlignment(AI.getAlign());
@@ -173,6 +171,38 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
return false;
}
+bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
+ // If the index is a constant then we don't need to scalarize it
+ Value *Index = EEI.getIndexOperand();
+ Type *IndexTy = Index->getType();
+ if (isa<ConstantInt>(Index))
+ return false;
+
+ IRBuilder<> Builder(&EEI);
+ VectorType *VecTy = EEI.getVectorOperandType();
+ assert(VecTy->getElementCount().isFixed() &&
+ "Vector operand of ExtractElement must have a fixed size");
+
+ Type *ArrTy = equivalentArrayTypeFromVector(VecTy);
+ Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
+
+ for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
+ Value *EE = Builder.CreateExtractElement(EEI.getVectorOperand(), I);
+ Value *GEP = Builder.CreateInBoundsGEP(
+ ArrTy, ArrAlloca,
+ {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
+ Builder.CreateStore(EE, GEP);
+ }
+
+ Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
+ {ConstantInt::get(IndexTy, 0), Index});
+ Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), GEP);
+
+ EEI.replaceAllUsesWith(Load);
+ EEI.eraseFromParent();
+ return true;
+}
+
bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
unsigned NumOperands = GEPI.getNumOperands();
@@ -257,7 +287,7 @@ static bool findAndReplaceVectors(Module &M) {
for (GlobalVariable &G : M.globals()) {
Type *OrigType = G.getValueType();
- Type *NewType = replaceVectorWithArray(OrigType, Ctx);
+ Type *NewType = equivalentArrayTypeFromVector(OrigType);
if (OrigType != NewType) {
// Create a new global variable with the updated type
// Note: Initializer is set via transformInitializer
diff --git a/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
new file mode 100644
index 0000000000000..74e9202b540c1
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+define float @extract_float_vec_dynamic(<4 x float> %0, i32 %1) {
+; CHECK-LABEL: define float @extract_float_vec_dynamic(
+; CHECK-SAME: <4 x float> [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT: [[TMP3:%.*]] = alloca [4 x float], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 0
+; CHECK-NEXT: store float [[TMP4]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 1
+; CHECK-NEXT: store float [[TMP6]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 2
+; CHECK-NEXT: store float [[TMP8]], ptr [[TMP9]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 3
+; CHECK-NEXT: store float [[TMP10]], ptr [[TMP11]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 [[TMP1]]
+; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
+; CHECK-NEXT: ret float [[TMP13]]
+;
+ %e = extractelement <4 x float> %0, i32 %1
+ ret float %e
+}
+
+; An extractelement with a constant index should not be converted to array form
+define i16 @extract_i16_vec_constant(<4 x i16> %0) {
+; CHECK-LABEL: define i16 @extract_i16_vec_constant(
+; CHECK-SAME: <4 x i16> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
+; CHECK-NEXT: ret i16 [[E]]
+;
+ %e = extractelement <4 x i16> %0, i32 1
+ ret i16 %e
+}
+
>From cda7e887104eafcb6b16705df39d0e7fb4df02be Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 27 May 2025 21:52:03 +0000
Subject: [PATCH 2/7] Apply clang-format
---
llvm/lib/Target/DirectX/DXILDataScalarization.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 7bd0539c6bfe0..eb8c941c1f348 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -33,7 +33,8 @@ static Type *equivalentArrayTypeFromVector(Type *T) {
return ArrayType::get(VecTy->getElementType(),
dyn_cast<FixedVectorType>(VecTy)->getNumElements());
if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
- Type *NewElementType = equivalentArrayTypeFromVector(ArrayTy->getElementType());
+ Type *NewElementType =
+ equivalentArrayTypeFromVector(ArrayTy->getElementType());
return ArrayType::get(NewElementType, ArrayTy->getNumElements());
}
// If it's not a vector or array, return the original type.
>From be5d425ad04f461372849e4a4ce4f2dad19570e1 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 27 May 2025 22:41:03 +0000
Subject: [PATCH 3/7] Remove unnecessary assert
---
llvm/lib/Target/DirectX/DXILDataScalarization.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index eb8c941c1f348..17d8b6fc0d7ef 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -181,9 +181,7 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
IRBuilder<> Builder(&EEI);
VectorType *VecTy = EEI.getVectorOperandType();
- assert(VecTy->getElementCount().isFixed() &&
- "Vector operand of ExtractElement must have a fixed size");
-
+
Type *ArrTy = equivalentArrayTypeFromVector(VecTy);
Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
>From 15cf98cfec409af748ca1d3f4012a7efbb3ce43f Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 27 May 2025 23:38:08 +0000
Subject: [PATCH 4/7] Scalarize dynamically-indexed insertelement
---
.../Target/DirectX/DXILDataScalarization.cpp | 34 +++++++-
.../DirectX/scalarize-dynamic-vector-index.ll | 78 +++++++++++++------
2 files changed, 88 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 17d8b6fc0d7ef..04080055c2f75 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -68,7 +68,7 @@ class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
bool visitCastInst(CastInst &CI) { return false; }
bool visitBitCastInst(BitCastInst &BCI) { return false; }
- bool visitInsertElementInst(InsertElementInst &IEI) { return false; }
+ bool visitInsertElementInst(InsertElementInst &IEI);
bool visitExtractElementInst(ExtractElementInst &EEI);
bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; }
bool visitPHINode(PHINode &PHI) { return false; }
@@ -172,6 +172,38 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
return false;
}
+bool DataScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
+ Value *Vec = IEI.getOperand(0);
+ Value *Val = IEI.getOperand(1);
+ Value *Index = IEI.getOperand(2);
+ Type *IndexTy = Index->getType();
+
+ // If the index is a constant then we don't need to scalarize it
+ if (isa<ConstantInt>(Index))
+ return false;
+
+ IRBuilder<> Builder(&IEI);
+ Type *VecTy = Vec->getType();
+
+ Type *ArrTy = equivalentArrayTypeFromVector(VecTy);
+ Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
+
+ for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
+ Value *EE = Builder.CreateExtractElement(Vec, I);
+ Value *GEP = Builder.CreateInBoundsGEP(
+ ArrTy, ArrAlloca,
+ {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
+ Builder.CreateStore(EE, GEP);
+ }
+
+ Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
+ {ConstantInt::get(IndexTy, 0), Index});
+ Builder.CreateStore(Val, GEP);
+
+ IEI.eraseFromParent();
+ return true;
+}
+
bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
// If the index is a constant then we don't need to scalarize it
Value *Index = EEI.getIndexOperand();
diff --git a/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
index 74e9202b540c1..b1191d903fd49 100644
--- a/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
+++ b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
@@ -1,38 +1,70 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
-define float @extract_float_vec_dynamic(<4 x float> %0, i32 %1) {
+define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) {
; CHECK-LABEL: define float @extract_float_vec_dynamic(
-; CHECK-SAME: <4 x float> [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
-; CHECK-NEXT: [[TMP3:%.*]] = alloca [4 x float], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 0
+; CHECK-SAME: <4 x float> [[V:%.*]], i32 [[I:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = alloca [4 x float], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[V]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 0
+; CHECK-NEXT: store float [[TMP2]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[V]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 1
; CHECK-NEXT: store float [[TMP4]], ptr [[TMP5]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[V]], i64 2
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 2
; CHECK-NEXT: store float [[TMP6]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 2
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[V]], i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 3
; CHECK-NEXT: store float [[TMP8]], ptr [[TMP9]], align 4
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 3
-; CHECK-NEXT: store float [[TMP10]], ptr [[TMP11]], align 4
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP3]], i32 0, i32 [[TMP1]]
-; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
-; CHECK-NEXT: ret float [[TMP13]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP1]], i32 0, i32 [[I]]
+; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4
+; CHECK-NEXT: ret float [[TMP11]]
;
- %e = extractelement <4 x float> %0, i32 %1
- ret float %e
+ %ee = extractelement <4 x float> %v, i32 %i
+ ret float %ee
+}
+
+define void @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
+; CHECK-LABEL: define void @insert_i32_vec_dynamic(
+; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = alloca [3 x i32], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[V]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 0
+; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x i32> [[V]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 1
+; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x i32> [[V]], i64 2
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 2
+; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 [[I]]
+; CHECK-NEXT: store i32 [[A]], ptr [[TMP8]], align 4
+; CHECK-NEXT: ret void
+;
+ insertelement <3 x i32> %v, i32 %a, i32 %i
+ ret void
}
; An extractelement with a constant index should not be converted to array form
-define i16 @extract_i16_vec_constant(<4 x i16> %0) {
+define i16 @extract_i16_vec_constant(<4 x i16> %v) {
; CHECK-LABEL: define i16 @extract_i16_vec_constant(
-; CHECK-SAME: <4 x i16> [[TMP0:%.*]]) {
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
-; CHECK-NEXT: ret i16 [[E]]
+; CHECK-SAME: <4 x i16> [[V:%.*]]) {
+; CHECK-NEXT: [[EE:%.*]] = extractelement <4 x i16> [[V]], i32 1
+; CHECK-NEXT: ret i16 [[EE]]
+;
+ %ee = extractelement <4 x i16> %v, i32 1
+ ret i16 %ee
+}
+
+; An insertelement with a constant index should not be converted to array form
+define void @insert_half_vec_constant(<2 x half> %v, half %a) {
+; CHECK-LABEL: define void @insert_half_vec_constant(
+; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
+; CHECK-NEXT: ret void
;
- %e = extractelement <4 x i16> %0, i32 1
- ret i16 %e
+ insertelement <2 x half> %v, half %a, i32 1
+ ret void
}
>From 4c28344d9684b2c861db003eb6485349710ecafd Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 27 May 2025 23:53:46 +0000
Subject: [PATCH 5/7] Subroutine creating an array from a vector
---
.../Target/DirectX/DXILDataScalarization.cpp | 48 +++++++++----------
1 file changed, 23 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 04080055c2f75..a2e27e4f4ff8b 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -172,6 +172,22 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
return false;
}
+// Allocates and populates an array equivalent to the vector operand Vec.
+// Returns the array and the type of the array.
+static std::pair<Value *, Type *>
+allocaArrayFromVector(IRBuilder<> &Builder, Value *Vec, Type *IdxTy) {
+ Type *ArrTy = equivalentArrayTypeFromVector(Vec->getType());
+ Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
+ for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
+ Value *EE = Builder.CreateExtractElement(Vec, I);
+ Value *GEP = Builder.CreateInBoundsGEP(
+ ArrTy, ArrAlloca,
+ {ConstantInt::get(IdxTy, 0), ConstantInt::get(IdxTy, I)});
+ Builder.CreateStore(EE, GEP);
+ }
+ return std::make_pair(ArrAlloca, ArrTy);
+}
+
bool DataScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
Value *Vec = IEI.getOperand(0);
Value *Val = IEI.getOperand(1);
@@ -183,19 +199,9 @@ bool DataScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
return false;
IRBuilder<> Builder(&IEI);
- Type *VecTy = Vec->getType();
-
- Type *ArrTy = equivalentArrayTypeFromVector(VecTy);
- Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
-
- for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
- Value *EE = Builder.CreateExtractElement(Vec, I);
- Value *GEP = Builder.CreateInBoundsGEP(
- ArrTy, ArrAlloca,
- {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
- Builder.CreateStore(EE, GEP);
- }
-
+ std::pair<Value *, Type *> Arr = allocaArrayFromVector(Builder, Vec, IndexTy);
+ Value *ArrAlloca = Arr.first;
+ Type *ArrTy = Arr.second;
Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
{ConstantInt::get(IndexTy, 0), Index});
Builder.CreateStore(Val, GEP);
@@ -212,18 +218,10 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
return false;
IRBuilder<> Builder(&EEI);
- VectorType *VecTy = EEI.getVectorOperandType();
-
- Type *ArrTy = equivalentArrayTypeFromVector(VecTy);
- Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
-
- for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
- Value *EE = Builder.CreateExtractElement(EEI.getVectorOperand(), I);
- Value *GEP = Builder.CreateInBoundsGEP(
- ArrTy, ArrAlloca,
- {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
- Builder.CreateStore(EE, GEP);
- }
+ std::pair<Value *, Type *> Arr =
+ allocaArrayFromVector(Builder, EEI.getVectorOperand(), IndexTy);
+ Value *ArrAlloca = Arr.first;
+ Type *ArrTy = Arr.second;
Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
{ConstantInt::get(IndexTy, 0), Index});
>From 989d82e5e7cd047f8ec864f233a66f2536828daf Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Tue, 27 May 2025 23:58:57 +0000
Subject: [PATCH 6/7] Refactor visitExtractElementInst and
visitInsertElementInst
---
.../Target/DirectX/DXILDataScalarization.cpp | 30 ++++++++++++-------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index a2e27e4f4ff8b..8164850927e2a 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -188,36 +188,38 @@ allocaArrayFromVector(IRBuilder<> &Builder, Value *Vec, Type *IdxTy) {
return std::make_pair(ArrAlloca, ArrTy);
}
-bool DataScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
+static bool replaceDynamicInsertElementInst(InsertElementInst &IEI) {
+ IRBuilder<> Builder(&IEI);
+
Value *Vec = IEI.getOperand(0);
Value *Val = IEI.getOperand(1);
Value *Index = IEI.getOperand(2);
Type *IndexTy = Index->getType();
- // If the index is a constant then we don't need to scalarize it
- if (isa<ConstantInt>(Index))
- return false;
-
- IRBuilder<> Builder(&IEI);
std::pair<Value *, Type *> Arr = allocaArrayFromVector(Builder, Vec, IndexTy);
Value *ArrAlloca = Arr.first;
Type *ArrTy = Arr.second;
Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
{ConstantInt::get(IndexTy, 0), Index});
Builder.CreateStore(Val, GEP);
-
IEI.eraseFromParent();
return true;
}
-bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
+bool DataScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
// If the index is a constant then we don't need to scalarize it
- Value *Index = EEI.getIndexOperand();
- Type *IndexTy = Index->getType();
+ Value *Index = IEI.getOperand(2);
if (isa<ConstantInt>(Index))
return false;
+ return replaceDynamicInsertElementInst(IEI);
+}
+static bool replaceDynamicExtractElementInst(ExtractElementInst &EEI) {
IRBuilder<> Builder(&EEI);
+
+ Value *Index = EEI.getIndexOperand();
+ Type *IndexTy = Index->getType();
+
std::pair<Value *, Type *> Arr =
allocaArrayFromVector(Builder, EEI.getVectorOperand(), IndexTy);
Value *ArrAlloca = Arr.first;
@@ -232,6 +234,14 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
return true;
}
+bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
+ // If the index is a constant then we don't need to scalarize it
+ Value *Index = EEI.getIndexOperand();
+ if (isa<ConstantInt>(Index))
+ return false;
+ return replaceDynamicExtractElementInst(EEI);
+}
+
bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
unsigned NumOperands = GEPI.getNumOperands();
>From bfa93eac0112f859c7813acba37c307fe70624ec Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 29 May 2025 04:03:58 +0000
Subject: [PATCH 7/7] Complete scalarization of insertelement with dynamic
index
---
.../Target/DirectX/DXILDataScalarization.cpp | 58 +++++++++++--------
.../DirectX/scalarize-dynamic-vector-index.ll | 26 +++++----
2 files changed, 50 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 8164850927e2a..9ef43c938d9b5 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -172,36 +172,41 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
return false;
}
-// Allocates and populates an array equivalent to the vector operand Vec.
-// Returns the array and the type of the array.
-static std::pair<Value *, Type *>
-allocaArrayFromVector(IRBuilder<> &Builder, Value *Vec, Type *IdxTy) {
+static bool replaceDynamicInsertElementInst(InsertElementInst &IEI) {
+ IRBuilder<> Builder(&IEI);
+
+ Value *Vec = IEI.getOperand(0);
+ Value *Val = IEI.getOperand(1);
+ Value *Index = IEI.getOperand(2);
+ Type *IndexTy = Index->getType();
+
Type *ArrTy = equivalentArrayTypeFromVector(Vec->getType());
Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
- for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
+ const uint64_t ArrNumElems = ArrTy->getArrayNumElements();
+
+ SmallVector<Value *, 4> GEPs(ArrNumElems);
+ for (unsigned I = 0; I < ArrNumElems; ++I) {
Value *EE = Builder.CreateExtractElement(Vec, I);
Value *GEP = Builder.CreateInBoundsGEP(
ArrTy, ArrAlloca,
- {ConstantInt::get(IdxTy, 0), ConstantInt::get(IdxTy, I)});
+ {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
Builder.CreateStore(EE, GEP);
+ GEPs[I] = GEP;
}
- return std::make_pair(ArrAlloca, ArrTy);
-}
-static bool replaceDynamicInsertElementInst(InsertElementInst &IEI) {
- IRBuilder<> Builder(&IEI);
+ Value *GEPForStore = Builder.CreateInBoundsGEP(
+ ArrTy, ArrAlloca, {ConstantInt::get(IndexTy, 0), Index});
+ Builder.CreateStore(Val, GEPForStore);
- Value *Vec = IEI.getOperand(0);
- Value *Val = IEI.getOperand(1);
- Value *Index = IEI.getOperand(2);
- Type *IndexTy = Index->getType();
+ Value *NewIEI = PoisonValue::get(Vec->getType());
+ for (unsigned I = 0; I < ArrNumElems; ++I) {
+ Value *GEP = GEPs[I];
+ Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), GEP);
+ NewIEI =
+ Builder.CreateInsertElement(NewIEI, Load, ConstantInt::get(IndexTy, I));
+ }
- std::pair<Value *, Type *> Arr = allocaArrayFromVector(Builder, Vec, IndexTy);
- Value *ArrAlloca = Arr.first;
- Type *ArrTy = Arr.second;
- Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
- {ConstantInt::get(IndexTy, 0), Index});
- Builder.CreateStore(Val, GEP);
+ IEI.replaceAllUsesWith(NewIEI);
IEI.eraseFromParent();
return true;
}
@@ -220,10 +225,15 @@ static bool replaceDynamicExtractElementInst(ExtractElementInst &EEI) {
Value *Index = EEI.getIndexOperand();
Type *IndexTy = Index->getType();
- std::pair<Value *, Type *> Arr =
- allocaArrayFromVector(Builder, EEI.getVectorOperand(), IndexTy);
- Value *ArrAlloca = Arr.first;
- Type *ArrTy = Arr.second;
+ Type *ArrTy = equivalentArrayTypeFromVector(EEI.getVectorOperandType());
+ Value *ArrAlloca = Builder.CreateAlloca(ArrTy);
+ for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) {
+ Value *EE = Builder.CreateExtractElement(EEI.getVectorOperand(), I);
+ Value *GEP = Builder.CreateInBoundsGEP(
+ ArrTy, ArrAlloca,
+ {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, I)});
+ Builder.CreateStore(EE, GEP);
+ }
Value *GEP = Builder.CreateInBoundsGEP(ArrTy, ArrAlloca,
{ConstantInt::get(IndexTy, 0), Index});
diff --git a/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
index b1191d903fd49..1fe9868b88f65 100644
--- a/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
+++ b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll
@@ -25,8 +25,8 @@ define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) {
ret float %ee
}
-define void @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
-; CHECK-LABEL: define void @insert_i32_vec_dynamic(
+define <3 x i32> @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
+; CHECK-LABEL: define <3 x i32> @insert_i32_vec_dynamic(
; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = alloca [3 x i32], align 4
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[V]], i64 0
@@ -40,10 +40,16 @@ define void @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) {
; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i32], ptr [[TMP1]], i32 0, i32 [[I]]
; CHECK-NEXT: store i32 [[A]], ptr [[TMP8]], align 4
-; CHECK-NEXT: ret void
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i32> poison, i32 [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <3 x i32> [[TMP10]], i32 [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <3 x i32> [[TMP12]], i32 [[TMP13]], i32 2
+; CHECK-NEXT: ret <3 x i32> [[TMP14]]
;
- insertelement <3 x i32> %v, i32 %a, i32 %i
- ret void
+ %ie = insertelement <3 x i32> %v, i32 %a, i32 %i
+ ret <3 x i32> %ie
}
; An extractelement with a constant index should not be converted to array form
@@ -58,13 +64,13 @@ define i16 @extract_i16_vec_constant(<4 x i16> %v) {
}
; An insertelement with a constant index should not be converted to array form
-define void @insert_half_vec_constant(<2 x half> %v, half %a) {
-; CHECK-LABEL: define void @insert_half_vec_constant(
+define <2 x half> @insert_half_vec_constant(<2 x half> %v, half %a) {
+; CHECK-LABEL: define <2 x half> @insert_half_vec_constant(
; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1
-; CHECK-NEXT: ret void
+; CHECK-NEXT: ret <2 x half> [[TMP1]]
;
- insertelement <2 x half> %v, half %a, i32 1
- ret void
+ %ie = insertelement <2 x half> %v, half %a, i32 1
+ ret <2 x half> %ie
}
More information about the llvm-commits
mailing list