[llvm] [SPIRV] Improve vector legalization and type deduction (PR #175067)
Steven Perron via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 8 13:08:55 PST 2026
https://github.com/s-perron created https://github.com/llvm/llvm-project/pull/175067
This patch adds support for scalarizing vector loads in the legalizer and
implements legalization for the spv_const_composite intrinsic. It also
refactors stack temporary creation for vector operations to ensure correct
SPIR-V types are assigned. Additionally, type deduction in the
PostLegalizer is improved to handle GEP and Load instructions.
Fixes https://github.com/llvm/llvm-project/issues/170534
>From b998b55df9c9ff5c8855f80b6f0eb8ac3001f269 Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron at google.com>
Date: Thu, 8 Jan 2026 13:22:27 -0500
Subject: [PATCH] [SPIRV] Improve vector legalization and type deduction
This patch adds support for scalarizing vector loads in the legalizer and
implements legalization for the spv_const_composite intrinsic. It also
refactors stack temporary creation for vector operations to ensure correct
SPIR-V types are assigned. Additionally, type deduction in the
PostLegalizer is improved to handle GEP and Load instructions.
Fixes https://github.com/llvm/llvm-project/issues/170534
---
llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp | 159 +++++++++++++----
llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp | 85 ++++++++-
.../spv-extractelt-legalization.ll | 14 +-
.../vector-index-scalarization.ll | 166 ++++++++++++++++++
4 files changed, 385 insertions(+), 39 deletions(-)
create mode 100644 llvm/test/CodeGen/SPIRV/legalization/vector-index-scalarization.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index f6587ba068c0e..03d846cb90b4c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -535,6 +535,66 @@ static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpvType,
return ConvReg;
}
+static bool needsVectorLegalization(const LLT &Ty, const SPIRVSubtarget &ST) {
+ if (!Ty.isVector())
+ return false;
+ unsigned NumElements = Ty.getNumElements();
+ unsigned MaxVectorSize = ST.isShader() ? 4 : 16;
+ return (NumElements > 4 && !isPowerOf2_32(NumElements)) ||
+ NumElements > MaxVectorSize;
+}
+
+static bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI,
+ SPIRVGlobalRegistry *GR) {
+ MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ if (!DstTy.isVector())
+ return true;
+
+ const SPIRVSubtarget &ST = MI.getMF()->getSubtarget<SPIRVSubtarget>();
+ if (!needsVectorLegalization(DstTy, ST))
+ return true;
+
+ SmallVector<Register, 8> SplitRegs;
+ LLT EltTy = DstTy.getElementType();
+ unsigned NumElts = DstTy.getNumElements();
+
+ LLT PtrTy = MRI.getType(PtrReg);
+ auto Zero = MIRBuilder.buildConstant(LLT::scalar(32), 0);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ auto Idx = MIRBuilder.buildConstant(LLT::scalar(32), i);
+ Register EltPtr = MRI.createGenericVirtualRegister(PtrTy);
+
+ MIRBuilder.buildIntrinsic(Intrinsic::spv_gep, ArrayRef<Register>{EltPtr})
+ .addImm(1) // InBounds
+ .addUse(PtrReg)
+ .addUse(Zero.getReg(0))
+ .addUse(Idx.getReg(0));
+
+ MachinePointerInfo EltPtrInfo;
+ Align EltAlign = Align(1);
+ if (!MI.memoperands_empty()) {
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ EltPtrInfo =
+ MMO->getPointerInfo().getWithOffset(i * EltTy.getSizeInBytes());
+ EltAlign = commonAlignment(MMO->getAlign(), i * EltTy.getSizeInBytes());
+ }
+
+ Register EltReg = MRI.createGenericVirtualRegister(EltTy);
+ MIRBuilder.buildLoad(EltReg, EltPtr, EltPtrInfo, EltAlign);
+ SplitRegs.push_back(EltReg);
+ }
+
+ MIRBuilder.buildBuildVector(DstReg, SplitRegs);
+ MI.eraseFromParent();
+ return true;
+}
+
static bool legalizeStore(LegalizerHelper &Helper, MachineInstr &MI,
SPIRVGlobalRegistry *GR) {
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
@@ -623,18 +683,40 @@ bool SPIRVLegalizerInfo::legalizeCustom(
}
return true;
}
+ case TargetOpcode::G_LOAD:
+ return legalizeLoad(Helper, MI, GR);
case TargetOpcode::G_STORE:
return legalizeStore(Helper, MI, GR);
}
}
-static bool needsVectorLegalization(const LLT &Ty, const SPIRVSubtarget &ST) {
- if (!Ty.isVector())
- return false;
- unsigned NumElements = Ty.getNumElements();
- unsigned MaxVectorSize = ST.isShader() ? 4 : 16;
- return (NumElements > 4 && !isPowerOf2_32(NumElements)) ||
- NumElements > MaxVectorSize;
+static MachineInstrBuilder
+createStackTemporaryForVector(LegalizerHelper &Helper, SPIRVGlobalRegistry *GR,
+ Register SrcReg, LLT SrcTy,
+ MachinePointerInfo &PtrInfo, Align &VecAlign) {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+ VecAlign = Helper.getStackTemporaryAlignment(SrcTy);
+ auto StackTemp = Helper.createStackTemporary(
+ TypeSize::getFixed(SrcTy.getSizeInBytes()), VecAlign, PtrInfo);
+
+ // Set the type of StackTemp to a pointer to an array of the element type.
+ SPIRVType *SpvSrcTy = GR->getSPIRVTypeForVReg(SrcReg);
+ SPIRVType *EltSpvTy = GR->getScalarOrVectorComponentType(SpvSrcTy);
+ const Type *LLVMEltTy = GR->getTypeForSPIRVType(EltSpvTy);
+ const Type *LLVMArrTy =
+ ArrayType::get(const_cast<Type *>(LLVMEltTy), SrcTy.getNumElements());
+ SPIRVType *ArrSpvTy = GR->getOrCreateSPIRVType(
+ LLVMArrTy, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, true);
+ SPIRVType *PtrToArrSpvTy = GR->getOrCreateSPIRVPointerType(
+ ArrSpvTy, MIRBuilder, SPIRV::StorageClass::Function);
+
+ Register StackReg = StackTemp.getReg(0);
+ MRI.setRegClass(StackReg, GR->getRegClass(PtrToArrSpvTy));
+ GR->assignSPIRVTypeToVReg(PtrToArrSpvTy, StackReg, MIRBuilder.getMF());
+
+ return StackTemp;
}
static bool legalizeSpvBitcast(LegalizerHelper &Helper, MachineInstr &MI,
@@ -697,11 +779,10 @@ static bool legalizeSpvInsertElt(LegalizerHelper &Helper, MachineInstr &MI,
}
LLT EltTy = SrcTy.getElementType();
- Align VecAlign = Helper.getStackTemporaryAlignment(SrcTy);
-
+ Align VecAlign;
MachinePointerInfo PtrInfo;
- auto StackTemp = Helper.createStackTemporary(
- TypeSize::getFixed(SrcTy.getSizeInBytes()), VecAlign, PtrInfo);
+ auto StackTemp = createStackTemporaryForVector(Helper, GR, SrcReg, SrcTy,
+ PtrInfo, VecAlign);
MIRBuilder.buildStore(SrcReg, StackTemp, PtrInfo, VecAlign);
@@ -763,26 +844,10 @@ static bool legalizeSpvExtractElt(LegalizerHelper &Helper, MachineInstr &MI,
}
LLT EltTy = SrcTy.getElementType();
- Align VecAlign = Helper.getStackTemporaryAlignment(SrcTy);
-
+ Align VecAlign;
MachinePointerInfo PtrInfo;
- auto StackTemp = Helper.createStackTemporary(
- TypeSize::getFixed(SrcTy.getSizeInBytes()), VecAlign, PtrInfo);
-
- // Set the type of StackTemp to a pointer to an array of the element type.
- SPIRVType *SpvSrcTy = GR->getSPIRVTypeForVReg(SrcReg);
- SPIRVType *EltSpvTy = GR->getScalarOrVectorComponentType(SpvSrcTy);
- const Type *LLVMEltTy = GR->getTypeForSPIRVType(EltSpvTy);
- const Type *LLVMArrTy =
- ArrayType::get(const_cast<Type *>(LLVMEltTy), SrcTy.getNumElements());
- SPIRVType *ArrSpvTy = GR->getOrCreateSPIRVType(
- LLVMArrTy, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, true);
- SPIRVType *PtrToArrSpvTy = GR->getOrCreateSPIRVPointerType(
- ArrSpvTy, MIRBuilder, SPIRV::StorageClass::Function);
-
- Register StackReg = StackTemp.getReg(0);
- MRI.setRegClass(StackReg, GR->getRegClass(PtrToArrSpvTy));
- GR->assignSPIRVTypeToVReg(PtrToArrSpvTy, StackReg, *MI.getMF());
+ auto StackTemp = createStackTemporaryForVector(Helper, GR, SrcReg, SrcTy,
+ PtrInfo, VecAlign);
MIRBuilder.buildStore(SrcReg, StackTemp, PtrInfo, VecAlign);
@@ -807,6 +872,38 @@ static bool legalizeSpvExtractElt(LegalizerHelper &Helper, MachineInstr &MI,
return true;
}
+static bool legalizeSpvConstComposite(LegalizerHelper &Helper, MachineInstr &MI,
+ SPIRVGlobalRegistry *GR) {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ const SPIRVSubtarget &ST = MI.getMF()->getSubtarget<SPIRVSubtarget>();
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ if (!needsVectorLegalization(DstTy, ST))
+ return true;
+
+ SmallVector<Register, 8> SrcRegs;
+ if (MI.getNumOperands() == 2) {
+ // The "null" case: no values are attached.
+ LLT EltTy = DstTy.getElementType();
+ auto Zero = MIRBuilder.buildConstant(EltTy, 0);
+ SPIRVType *SpvDstTy = GR->getSPIRVTypeForVReg(DstReg);
+ SPIRVType *SpvEltTy = GR->getScalarOrVectorComponentType(SpvDstTy);
+ GR->assignSPIRVTypeToVReg(SpvEltTy, Zero.getReg(0), MIRBuilder.getMF());
+ for (unsigned i = 0; i < DstTy.getNumElements(); ++i)
+ SrcRegs.push_back(Zero.getReg(0));
+ } else {
+ for (unsigned i = 2; i < MI.getNumOperands(); ++i) {
+ SrcRegs.push_back(MI.getOperand(i).getReg());
+ }
+ }
+ MIRBuilder.buildBuildVector(DstReg, SrcRegs);
+ MI.eraseFromParent();
+ return true;
+}
+
bool SPIRVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
LLVM_DEBUG(dbgs() << "legalizeIntrinsic: " << MI);
@@ -818,6 +915,8 @@ bool SPIRVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeSpvInsertElt(Helper, MI, GR);
case Intrinsic::spv_extractelt:
return legalizeSpvExtractElt(Helper, MI, GR);
+ case Intrinsic::spv_const_composite:
+ return legalizeSpvConstComposite(Helper, MI, GR);
}
return true;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
index 5b4ddc267c9b8..40a5cac20698f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
@@ -195,6 +195,78 @@ static SPIRVType *deduceTypeFromUses(Register Reg, MachineFunction &MF,
return nullptr;
}
+static SPIRVType *deduceGEPType(MachineInstr *I, SPIRVGlobalRegistry *GR,
+ MachineIRBuilder &MIB) {
+ LLVM_DEBUG(dbgs() << "Deducing GEP type for: " << *I);
+ Register PtrReg = I->getOperand(3).getReg();
+ SPIRVType *PtrType = GR->getSPIRVTypeForVReg(PtrReg);
+ if (!PtrType) {
+ LLVM_DEBUG(dbgs() << " Could not get type for pointer operand.\n");
+ return nullptr;
+ }
+
+ SPIRVType *PointeeType = GR->getPointeeType(PtrType);
+ if (!PointeeType) {
+ LLVM_DEBUG(dbgs() << " Could not get pointee type from pointer type.\n");
+ return nullptr;
+ }
+
+ MachineRegisterInfo *MRI = MIB.getMRI();
+
+ // The first index (operand 4) steps over the pointer, so the type doesn't
+ // change.
+ for (unsigned i = 5; i < I->getNumOperands(); ++i) {
+ LLVM_DEBUG(dbgs() << " Traversing index " << i
+ << ", current type: " << *PointeeType);
+ switch (PointeeType->getOpcode()) {
+ case SPIRV::OpTypeArray:
+ case SPIRV::OpTypeRuntimeArray:
+ case SPIRV::OpTypeVector: {
+ Register ElemTypeReg = PointeeType->getOperand(1).getReg();
+ PointeeType = GR->getSPIRVTypeForVReg(ElemTypeReg);
+ break;
+ }
+ case SPIRV::OpTypeStruct: {
+ MachineOperand &IdxOp = I->getOperand(i);
+ if (!IdxOp.isReg()) {
+ LLVM_DEBUG(dbgs() << " Index is not a register.\n");
+ return nullptr;
+ }
+ MachineInstr *Def = MRI->getVRegDef(IdxOp.getReg());
+ if (!Def) {
+ LLVM_DEBUG(
+ dbgs() << " Could not find definition for index register.\n");
+ return nullptr;
+ }
+
+ uint64_t IndexVal = foldImm(IdxOp, MRI);
+ if (IndexVal >= PointeeType->getNumOperands() - 1) {
+ LLVM_DEBUG(dbgs() << " Struct index out of bounds.\n");
+ return nullptr;
+ }
+
+ Register MemberTypeReg = PointeeType->getOperand(IndexVal + 1).getReg();
+ PointeeType = GR->getSPIRVTypeForVReg(MemberTypeReg);
+ break;
+ }
+ default:
+ LLVM_DEBUG(dbgs() << " Unknown type opcode for GEP traversal.\n");
+ return nullptr;
+ }
+
+ if (!PointeeType) {
+ LLVM_DEBUG(dbgs() << " Could not resolve next pointee type.\n");
+ return nullptr;
+ }
+ }
+ LLVM_DEBUG(dbgs() << " Final pointee type: " << *PointeeType);
+
+ SPIRV::StorageClass::StorageClass SC = GR->getPointerStorageClass(PtrType);
+ SPIRVType *Res = GR->getOrCreateSPIRVPointerType(PointeeType, MIB, SC);
+ LLVM_DEBUG(dbgs() << " Deduced GEP type: " << *Res);
+ return Res;
+}
+
static SPIRVType *deduceResultTypeFromOperands(MachineInstr *I,
SPIRVGlobalRegistry *GR,
MachineIRBuilder &MIB) {
@@ -207,12 +279,23 @@ static SPIRVType *deduceResultTypeFromOperands(MachineInstr *I,
return deduceTypeFromOperandRange(I, MIB, GR, 1, I->getNumOperands());
case TargetOpcode::G_SHUFFLE_VECTOR:
return deduceTypeFromOperandRange(I, MIB, GR, 1, 3);
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+ case TargetOpcode::G_INTRINSIC: {
+ auto IntrinsicID = cast<GIntrinsic>(I)->getIntrinsicID();
+ if (IntrinsicID == Intrinsic::spv_gep)
+ return deduceGEPType(I, GR, MIB);
+ break;
+ }
+ case TargetOpcode::G_LOAD: {
+ SPIRVType *PtrType = deduceTypeFromSingleOperand(I, MIB, GR, 1);
+ return PtrType ? GR->getPointeeType(PtrType) : nullptr;
+ }
default:
if (I->getNumDefs() == 1 && I->getNumOperands() > 1 &&
I->getOperand(1).isReg())
return deduceTypeFromSingleOperand(I, MIB, GR, 1);
- return nullptr;
}
+ return nullptr;
}
static bool deduceAndAssignTypeForGUnmerge(MachineInstr *I, MachineFunction &MF,
diff --git a/llvm/test/CodeGen/SPIRV/legalization/spv-extractelt-legalization.ll b/llvm/test/CodeGen/SPIRV/legalization/spv-extractelt-legalization.ll
index 3188f8b31aac5..cdae023a54557 100644
--- a/llvm/test/CodeGen/SPIRV/legalization/spv-extractelt-legalization.ll
+++ b/llvm/test/CodeGen/SPIRV/legalization/spv-extractelt-legalization.ll
@@ -1,4 +1,5 @@
; RUN: llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val --target-env vulkan1.3 %}
; CHECK-DAG: %[[#Int:]] = OpTypeInt 32 0
; CHECK-DAG: %[[#Const0:]] = OpConstant %[[#Int]] 0
@@ -15,15 +16,16 @@
; CHECK-DAG: %[[#Const60:]] = OpConstant %[[#Int]] 60
; CHECK-DAG: %[[#Arr:]] = OpTypeArray %[[#Int]] %[[#]]
; CHECK-DAG: %[[#PtrArr:]] = OpTypePointer Function %[[#Arr]]
+; CHECK-DAG: %[[#PtrPriv:]] = OpTypePointer Private %[[#Int]]
- at G = addrspace(1) global i32 0, align 4
+ at G = internal addrspace(10) global i32 0, align 4
define void @main() #0 {
entry:
; CHECK: %[[#Var:]] = OpVariable %[[#PtrArr]] Function
; CHECK: %[[#Idx:]] = OpLoad %[[#Int]]
- %idx = load i32, ptr addrspace(1) @G, align 4
+ %idx = load i32, ptr addrspace(10) @G, align 4
; CHECK: %[[#PtrElt0:]] = OpInBoundsAccessChain %[[#]] %[[#Var]] %[[#Const0]]
@@ -55,12 +57,8 @@ entry:
%res = extractelement <6 x i32> %vec6, i32 %idx
; CHECK: OpStore {{.*}} %[[#Ld]]
- store i32 %res, ptr addrspace(1) @G, align 4
+ store i32 %res, ptr addrspace(10) @G, align 4
ret void
}
-attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
-
-
-
-
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/legalization/vector-index-scalarization.ll b/llvm/test/CodeGen/SPIRV/legalization/vector-index-scalarization.ll
new file mode 100644
index 0000000000000..23176465daa19
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/legalization/vector-index-scalarization.ll
@@ -0,0 +1,166 @@
+; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val --target-env vulkan1.3 %}
+
+; CHECK-DAG: %[[#int:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#c6:]] = OpConstant %[[#int]] 6
+; CHECK-DAG: %[[#arr6int:]] = OpTypeArray %[[#int]] %[[#c6]]
+; CHECK-DAG: %[[#ptr_arr6int:]] = OpTypePointer Function %[[#arr6int]]
+; CHECK-DAG: %[[#ptr_f_int:]] = OpTypePointer Function %[[#int]]
+; CHECK-DAG: %[[#ptr_p_int:]] = OpTypePointer Private %[[#int]]
+; CHECK-DAG: %[[#c0:]] = OpConstant %[[#int]] 0
+; CHECK-DAG: %[[#c1:]] = OpConstant %[[#int]] 1
+; CHECK-DAG: %[[#c2:]] = OpConstant %[[#int]] 2
+; CHECK-DAG: %[[#c3:]] = OpConstant %[[#int]] 3
+; CHECK-DAG: %[[#c4:]] = OpConstant %[[#int]] 4
+; CHECK-DAG: %[[#c5:]] = OpConstant %[[#int]] 5
+; CHECK-DAG: %[[#undef:]] = OpUndef %[[#int]]
+
+; CHECK-DAG: %[[#out:]] = OpVariable %[[#ptr_p_int]] Private
+
+
+ at out = internal addrspace(10) global i32 0
+ at idx = internal addrspace(10) global i32 0
+ at idx2 = internal addrspace(10) global i32 0
+ at val = internal addrspace(10) global i32 0
+
+; CHECK: %[[#test_full:]] = OpFunction %[[#]] None %[[#]]
+define void @test_full() #0 {
+ ; CHECK: %[[#label:]] = OpLabel
+ ; CHECK-DAG: %[[#V_INS:]] = OpVariable %[[#ptr_arr6int]] Function
+ ; CHECK-DAG: %[[#V_EXT:]] = OpVariable %[[#ptr_arr6int]] Function
+ ; CHECK-DAG: %[[#]] = OpLoad %[[#int]] %[[#]]
+ ; CHECK-DAG: %[[#]] = OpLoad %[[#int]] %[[#]]
+ ; CHECK-DAG: %[[#VAL:]] = OpLoad %[[#int]] %[[#]]
+ ; CHECK-DAG: %[[#IDX64:]] = OpUConvert %[[#]] %[[#]]
+ ; CHECK-DAG: %[[#IDX2_64:]] = OpUConvert %[[#]] %[[#]]
+
+
+ %idx = load i32, ptr addrspace(10) @idx
+ %idx2 = load i32, ptr addrspace(10) @idx2
+ %val = load i32, ptr addrspace(10) @val
+
+ %ptr = alloca <6 x i32>
+ %loaded = load <6 x i32>, ptr %ptr
+ %idx64 = zext i32 %idx to i64
+ %idx2_64 = zext i32 %idx2 to i64
+
+ ; Insertelement with dynamic index spills to stack
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c0]]
+; CHECK: OpStore %[[#]] %[[#undef]] Aligned 32
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c1]]
+; CHECK: OpStore %[[#]] %[[#undef]] Aligned 4
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c2]]
+; CHECK: OpStore %[[#]] %[[#undef]] Aligned 8
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c3]]
+; CHECK: OpStore %[[#]] %[[#undef]] Aligned 4
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c4]]
+; CHECK: OpStore %[[#]] %[[#undef]] Aligned 16
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c5]]
+; CHECK: OpStore %[[#]] %[[#undef]] Aligned 4
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#IDX64]]
+; CHECK: OpStore %[[#]] %[[#VAL]] Aligned 4
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c0]]
+; CHECK: %[[#V0:]] = OpLoad %[[#int]] %[[#]] Aligned 32
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c1]]
+; CHECK: %[[#V1:]] = OpLoad %[[#int]] %[[#]] Aligned 4
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c2]]
+; CHECK: %[[#V2:]] = OpLoad %[[#int]] %[[#]] Aligned 8
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c3]]
+; CHECK: %[[#V3:]] = OpLoad %[[#int]] %[[#]] Aligned 4
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c4]]
+; CHECK: %[[#V4:]] = OpLoad %[[#int]] %[[#]] Aligned 16
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c5]]
+; CHECK: %[[#V5:]] = OpLoad %[[#int]] %[[#]] Aligned 4
+ %inserted = insertelement <6 x i32> %loaded, i32 %val, i64 %idx64
+
+ ; Extractelement with dynamic index spills to stack
+
+
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c0]]
+; CHECK: OpStore %[[#]] %[[#V0]] Aligned 32
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c1]]
+; CHECK: OpStore %[[#]] %[[#V1]] Aligned 4
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c2]]
+; CHECK: OpStore %[[#]] %[[#V2]] Aligned 8
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c3]]
+; CHECK: OpStore %[[#]] %[[#V3]] Aligned 4
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c4]]
+; CHECK: OpStore %[[#]] %[[#V4]] Aligned 16
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c5]]
+; CHECK: OpStore %[[#]] %[[#V5]] Aligned 4
+; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#IDX2_64]]
+; CHECK: %[[#EXTRACTED:]] = OpLoad %[[#int]] %[[#]] Aligned 4
+ %extracted = extractelement <6 x i32> %inserted, i64 %idx2_64
+
+ ; CHECK: OpStore %[[#out]] %[[#EXTRACTED]]
+ store i32 %extracted, ptr addrspace(10) @out
+ ret void
+}
+
+; CHECK: %[[#test_undef:]] = OpFunction %[[#]] None %[[#]]
+define void @test_undef() #0 {
+ ; CHECK: %[[#label:]] = OpLabel
+ ; CHECK: %[[#V_UNDEF:]] = OpVariable %[[#ptr_arr6int]] Function
+ ; CHECK-DAG: %[[#IDX:]] = OpLoad %[[#int]] %[[#]]
+ ; CHECK-DAG: %[[#VAL:]] = OpLoad %[[#int]] %[[#]]
+ ; CHECK: %[[#IDX64:]] = OpUConvert %[[#]] %[[#IDX]]
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c0]]
+ ; CHECK: OpStore %[[#]] %[[#undef]] Aligned 32
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c1]]
+ ; CHECK: OpStore %[[#]] %[[#undef]] Aligned 4
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c2]]
+ ; CHECK: OpStore %[[#]] %[[#undef]] Aligned 8
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c3]]
+ ; CHECK: OpStore %[[#]] %[[#undef]] Aligned 4
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c4]]
+ ; CHECK: OpStore %[[#]] %[[#undef]] Aligned 16
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c5]]
+ ; CHECK: OpStore %[[#]] %[[#undef]] Aligned 4
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#IDX64]]
+ ; CHECK: OpStore %[[#]] %[[#VAL]] Aligned 4
+ ; CHECK: %[[#PTR0:]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c0]]
+ ; CHECK: %[[#RES:]] = OpLoad %[[#int]] %[[#PTR0]] Aligned 32
+ ; CHECK: OpStore %[[#out]] %[[#RES]]
+ %idx = load i32, ptr addrspace(10) @idx
+ %val = load i32, ptr addrspace(10) @val
+ %idx64 = zext i32 %idx to i64
+ %inserted = insertelement <6 x i32> undef, i32 %val, i64 %idx64
+ %extracted = extractelement <6 x i32> %inserted, i64 0
+ store i32 %extracted, ptr addrspace(10) @out
+ ret void
+}
+
+; CHECK: %[[#test_zero:]] = OpFunction %[[#]] None %[[#]]
+define void @test_zero() #0 {
+ ; CHECK: %[[#label:]] = OpLabel
+ ; CHECK: %[[#V_ZERO:]] = OpVariable %[[#ptr_arr6int]] Function
+ ; CHECK-DAG: %[[#IDX:]] = OpLoad %[[#int]] %[[#]]
+ ; CHECK-DAG: %[[#VAL:]] = OpLoad %[[#int]] %[[#]]
+ ; CHECK: %[[#IDX64:]] = OpUConvert %[[#]] %[[#IDX]]
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c0]]
+ ; CHECK: OpStore %[[#]] %[[#c0]] Aligned 32
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c1]]
+ ; CHECK: OpStore %[[#]] %[[#c0]] Aligned 4
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c2]]
+ ; CHECK: OpStore %[[#]] %[[#c0]] Aligned 8
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c3]]
+ ; CHECK: OpStore %[[#]] %[[#c0]] Aligned 4
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c4]]
+ ; CHECK: OpStore %[[#]] %[[#c0]] Aligned 16
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c5]]
+ ; CHECK: OpStore %[[#]] %[[#c0]] Aligned 4
+ ; CHECK: %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#IDX64]]
+ ; CHECK: OpStore %[[#]] %[[#VAL]] Aligned 4
+ ; CHECK: %[[#PTR0:]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c0]]
+ ; CHECK: %[[#RES:]] = OpLoad %[[#int]] %[[#PTR0]] Aligned 32
+ ; CHECK: OpStore %[[#out]] %[[#RES]]
+ %idx = load i32, ptr addrspace(10) @idx
+ %val = load i32, ptr addrspace(10) @val
+ %idx64 = zext i32 %idx to i64
+ %inserted = insertelement <6 x i32> zeroinitializer, i32 %val, i64 %idx64
+ %extracted = extractelement <6 x i32> %inserted, i64 0
+ store i32 %extracted, ptr addrspace(10) @out
+ ret void
+}
+
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
More information about the llvm-commits
mailing list