[llvm] [SPIRV] Improve vector legalization and type deduction (PR #175067)

Steven Perron via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 8 13:08:55 PST 2026


https://github.com/s-perron created https://github.com/llvm/llvm-project/pull/175067

This patch adds support for scalarizing vector loads in the legalizer and
implements legalization for the spv_const_composite intrinsic. It also
refactors stack temporary creation for vector operations to ensure correct
SPIR-V types are assigned. Additionally, type deduction in the
PostLegalizer is improved to handle GEP and Load instructions.

Fixes https://github.com/llvm/llvm-project/issues/170534


>From b998b55df9c9ff5c8855f80b6f0eb8ac3001f269 Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron at google.com>
Date: Thu, 8 Jan 2026 13:22:27 -0500
Subject: [PATCH] [SPIRV] Improve vector legalization and type deduction

This patch adds support for scalarizing vector loads in the legalizer and
implements legalization for the spv_const_composite intrinsic. It also
refactors stack temporary creation for vector operations to ensure correct
SPIR-V types are assigned. Additionally, type deduction in the
PostLegalizer is improved to handle GEP and Load instructions.

Fixes https://github.com/llvm/llvm-project/issues/170534
---
 llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp  | 159 +++++++++++++----
 llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp  |  85 ++++++++-
 .../spv-extractelt-legalization.ll            |  14 +-
 .../vector-index-scalarization.ll             | 166 ++++++++++++++++++
 4 files changed, 385 insertions(+), 39 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/legalization/vector-index-scalarization.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index f6587ba068c0e..03d846cb90b4c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -535,6 +535,66 @@ static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpvType,
   return ConvReg;
 }
 
+static bool needsVectorLegalization(const LLT &Ty, const SPIRVSubtarget &ST) {
+  if (!Ty.isVector())
+    return false;
+  unsigned NumElements = Ty.getNumElements();
+  unsigned MaxVectorSize = ST.isShader() ? 4 : 16;
+  return (NumElements > 4 && !isPowerOf2_32(NumElements)) ||
+         NumElements > MaxVectorSize;
+}
+
+static bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI,
+                         SPIRVGlobalRegistry *GR) {
+  MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  Register DstReg = MI.getOperand(0).getReg();
+  Register PtrReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+
+  if (!DstTy.isVector())
+    return true;
+
+  const SPIRVSubtarget &ST = MI.getMF()->getSubtarget<SPIRVSubtarget>();
+  if (!needsVectorLegalization(DstTy, ST))
+    return true;
+
+  SmallVector<Register, 8> SplitRegs;
+  LLT EltTy = DstTy.getElementType();
+  unsigned NumElts = DstTy.getNumElements();
+
+  LLT PtrTy = MRI.getType(PtrReg);
+  auto Zero = MIRBuilder.buildConstant(LLT::scalar(32), 0);
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    auto Idx = MIRBuilder.buildConstant(LLT::scalar(32), i);
+    Register EltPtr = MRI.createGenericVirtualRegister(PtrTy);
+
+    MIRBuilder.buildIntrinsic(Intrinsic::spv_gep, ArrayRef<Register>{EltPtr})
+        .addImm(1) // InBounds
+        .addUse(PtrReg)
+        .addUse(Zero.getReg(0))
+        .addUse(Idx.getReg(0));
+
+    MachinePointerInfo EltPtrInfo;
+    Align EltAlign = Align(1);
+    if (!MI.memoperands_empty()) {
+      MachineMemOperand *MMO = *MI.memoperands_begin();
+      EltPtrInfo =
+          MMO->getPointerInfo().getWithOffset(i * EltTy.getSizeInBytes());
+      EltAlign = commonAlignment(MMO->getAlign(), i * EltTy.getSizeInBytes());
+    }
+
+    Register EltReg = MRI.createGenericVirtualRegister(EltTy);
+    MIRBuilder.buildLoad(EltReg, EltPtr, EltPtrInfo, EltAlign);
+    SplitRegs.push_back(EltReg);
+  }
+
+  MIRBuilder.buildBuildVector(DstReg, SplitRegs);
+  MI.eraseFromParent();
+  return true;
+}
+
 static bool legalizeStore(LegalizerHelper &Helper, MachineInstr &MI,
                           SPIRVGlobalRegistry *GR) {
   MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
@@ -623,18 +683,40 @@ bool SPIRVLegalizerInfo::legalizeCustom(
     }
     return true;
   }
+  case TargetOpcode::G_LOAD:
+    return legalizeLoad(Helper, MI, GR);
   case TargetOpcode::G_STORE:
     return legalizeStore(Helper, MI, GR);
   }
 }
 
-static bool needsVectorLegalization(const LLT &Ty, const SPIRVSubtarget &ST) {
-  if (!Ty.isVector())
-    return false;
-  unsigned NumElements = Ty.getNumElements();
-  unsigned MaxVectorSize = ST.isShader() ? 4 : 16;
-  return (NumElements > 4 && !isPowerOf2_32(NumElements)) ||
-         NumElements > MaxVectorSize;
+static MachineInstrBuilder
+createStackTemporaryForVector(LegalizerHelper &Helper, SPIRVGlobalRegistry *GR,
+                              Register SrcReg, LLT SrcTy,
+                              MachinePointerInfo &PtrInfo, Align &VecAlign) {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+  VecAlign = Helper.getStackTemporaryAlignment(SrcTy);
+  auto StackTemp = Helper.createStackTemporary(
+      TypeSize::getFixed(SrcTy.getSizeInBytes()), VecAlign, PtrInfo);
+
+  // Set the type of StackTemp to a pointer to an array of the element type.
+  SPIRVType *SpvSrcTy = GR->getSPIRVTypeForVReg(SrcReg);
+  SPIRVType *EltSpvTy = GR->getScalarOrVectorComponentType(SpvSrcTy);
+  const Type *LLVMEltTy = GR->getTypeForSPIRVType(EltSpvTy);
+  const Type *LLVMArrTy =
+      ArrayType::get(const_cast<Type *>(LLVMEltTy), SrcTy.getNumElements());
+  SPIRVType *ArrSpvTy = GR->getOrCreateSPIRVType(
+      LLVMArrTy, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, true);
+  SPIRVType *PtrToArrSpvTy = GR->getOrCreateSPIRVPointerType(
+      ArrSpvTy, MIRBuilder, SPIRV::StorageClass::Function);
+
+  Register StackReg = StackTemp.getReg(0);
+  MRI.setRegClass(StackReg, GR->getRegClass(PtrToArrSpvTy));
+  GR->assignSPIRVTypeToVReg(PtrToArrSpvTy, StackReg, MIRBuilder.getMF());
+
+  return StackTemp;
 }
 
 static bool legalizeSpvBitcast(LegalizerHelper &Helper, MachineInstr &MI,
@@ -697,11 +779,10 @@ static bool legalizeSpvInsertElt(LegalizerHelper &Helper, MachineInstr &MI,
     }
 
     LLT EltTy = SrcTy.getElementType();
-    Align VecAlign = Helper.getStackTemporaryAlignment(SrcTy);
-
+    Align VecAlign;
     MachinePointerInfo PtrInfo;
-    auto StackTemp = Helper.createStackTemporary(
-        TypeSize::getFixed(SrcTy.getSizeInBytes()), VecAlign, PtrInfo);
+    auto StackTemp = createStackTemporaryForVector(Helper, GR, SrcReg, SrcTy,
+                                                   PtrInfo, VecAlign);
 
     MIRBuilder.buildStore(SrcReg, StackTemp, PtrInfo, VecAlign);
 
@@ -763,26 +844,10 @@ static bool legalizeSpvExtractElt(LegalizerHelper &Helper, MachineInstr &MI,
     }
 
     LLT EltTy = SrcTy.getElementType();
-    Align VecAlign = Helper.getStackTemporaryAlignment(SrcTy);
-
+    Align VecAlign;
     MachinePointerInfo PtrInfo;
-    auto StackTemp = Helper.createStackTemporary(
-        TypeSize::getFixed(SrcTy.getSizeInBytes()), VecAlign, PtrInfo);
-
-    // Set the type of StackTemp to a pointer to an array of the element type.
-    SPIRVType *SpvSrcTy = GR->getSPIRVTypeForVReg(SrcReg);
-    SPIRVType *EltSpvTy = GR->getScalarOrVectorComponentType(SpvSrcTy);
-    const Type *LLVMEltTy = GR->getTypeForSPIRVType(EltSpvTy);
-    const Type *LLVMArrTy =
-        ArrayType::get(const_cast<Type *>(LLVMEltTy), SrcTy.getNumElements());
-    SPIRVType *ArrSpvTy = GR->getOrCreateSPIRVType(
-        LLVMArrTy, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, true);
-    SPIRVType *PtrToArrSpvTy = GR->getOrCreateSPIRVPointerType(
-        ArrSpvTy, MIRBuilder, SPIRV::StorageClass::Function);
-
-    Register StackReg = StackTemp.getReg(0);
-    MRI.setRegClass(StackReg, GR->getRegClass(PtrToArrSpvTy));
-    GR->assignSPIRVTypeToVReg(PtrToArrSpvTy, StackReg, *MI.getMF());
+    auto StackTemp = createStackTemporaryForVector(Helper, GR, SrcReg, SrcTy,
+                                                   PtrInfo, VecAlign);
 
     MIRBuilder.buildStore(SrcReg, StackTemp, PtrInfo, VecAlign);
 
@@ -807,6 +872,38 @@ static bool legalizeSpvExtractElt(LegalizerHelper &Helper, MachineInstr &MI,
   return true;
 }
 
+static bool legalizeSpvConstComposite(LegalizerHelper &Helper, MachineInstr &MI,
+                                      SPIRVGlobalRegistry *GR) {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+  const SPIRVSubtarget &ST = MI.getMF()->getSubtarget<SPIRVSubtarget>();
+
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+
+  if (!needsVectorLegalization(DstTy, ST))
+    return true;
+
+  SmallVector<Register, 8> SrcRegs;
+  if (MI.getNumOperands() == 2) {
+    // The "null" case: no values are attached.
+    LLT EltTy = DstTy.getElementType();
+    auto Zero = MIRBuilder.buildConstant(EltTy, 0);
+    SPIRVType *SpvDstTy = GR->getSPIRVTypeForVReg(DstReg);
+    SPIRVType *SpvEltTy = GR->getScalarOrVectorComponentType(SpvDstTy);
+    GR->assignSPIRVTypeToVReg(SpvEltTy, Zero.getReg(0), MIRBuilder.getMF());
+    for (unsigned i = 0; i < DstTy.getNumElements(); ++i)
+      SrcRegs.push_back(Zero.getReg(0));
+  } else {
+    for (unsigned i = 2; i < MI.getNumOperands(); ++i) {
+      SrcRegs.push_back(MI.getOperand(i).getReg());
+    }
+  }
+  MIRBuilder.buildBuildVector(DstReg, SrcRegs);
+  MI.eraseFromParent();
+  return true;
+}
+
 bool SPIRVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                            MachineInstr &MI) const {
   LLVM_DEBUG(dbgs() << "legalizeIntrinsic: " << MI);
@@ -818,6 +915,8 @@ bool SPIRVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     return legalizeSpvInsertElt(Helper, MI, GR);
   case Intrinsic::spv_extractelt:
     return legalizeSpvExtractElt(Helper, MI, GR);
+  case Intrinsic::spv_const_composite:
+    return legalizeSpvConstComposite(Helper, MI, GR);
   }
   return true;
 }
diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
index 5b4ddc267c9b8..40a5cac20698f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp
@@ -195,6 +195,78 @@ static SPIRVType *deduceTypeFromUses(Register Reg, MachineFunction &MF,
   return nullptr;
 }
 
+static SPIRVType *deduceGEPType(MachineInstr *I, SPIRVGlobalRegistry *GR,
+                                MachineIRBuilder &MIB) {
+  LLVM_DEBUG(dbgs() << "Deducing GEP type for: " << *I);
+  Register PtrReg = I->getOperand(3).getReg();
+  SPIRVType *PtrType = GR->getSPIRVTypeForVReg(PtrReg);
+  if (!PtrType) {
+    LLVM_DEBUG(dbgs() << "  Could not get type for pointer operand.\n");
+    return nullptr;
+  }
+
+  SPIRVType *PointeeType = GR->getPointeeType(PtrType);
+  if (!PointeeType) {
+    LLVM_DEBUG(dbgs() << "  Could not get pointee type from pointer type.\n");
+    return nullptr;
+  }
+
+  MachineRegisterInfo *MRI = MIB.getMRI();
+
+  // The first index (operand 4) steps over the pointer, so the type doesn't
+  // change.
+  for (unsigned i = 5; i < I->getNumOperands(); ++i) {
+    LLVM_DEBUG(dbgs() << "  Traversing index " << i
+                      << ", current type: " << *PointeeType);
+    switch (PointeeType->getOpcode()) {
+    case SPIRV::OpTypeArray:
+    case SPIRV::OpTypeRuntimeArray:
+    case SPIRV::OpTypeVector: {
+      Register ElemTypeReg = PointeeType->getOperand(1).getReg();
+      PointeeType = GR->getSPIRVTypeForVReg(ElemTypeReg);
+      break;
+    }
+    case SPIRV::OpTypeStruct: {
+      MachineOperand &IdxOp = I->getOperand(i);
+      if (!IdxOp.isReg()) {
+        LLVM_DEBUG(dbgs() << "  Index is not a register.\n");
+        return nullptr;
+      }
+      MachineInstr *Def = MRI->getVRegDef(IdxOp.getReg());
+      if (!Def) {
+        LLVM_DEBUG(
+            dbgs() << "  Could not find definition for index register.\n");
+        return nullptr;
+      }
+
+      uint64_t IndexVal = foldImm(IdxOp, MRI);
+      if (IndexVal >= PointeeType->getNumOperands() - 1) {
+        LLVM_DEBUG(dbgs() << "  Struct index out of bounds.\n");
+        return nullptr;
+      }
+
+      Register MemberTypeReg = PointeeType->getOperand(IndexVal + 1).getReg();
+      PointeeType = GR->getSPIRVTypeForVReg(MemberTypeReg);
+      break;
+    }
+    default:
+      LLVM_DEBUG(dbgs() << "  Unknown type opcode for GEP traversal.\n");
+      return nullptr;
+    }
+
+    if (!PointeeType) {
+      LLVM_DEBUG(dbgs() << "  Could not resolve next pointee type.\n");
+      return nullptr;
+    }
+  }
+  LLVM_DEBUG(dbgs() << "  Final pointee type: " << *PointeeType);
+
+  SPIRV::StorageClass::StorageClass SC = GR->getPointerStorageClass(PtrType);
+  SPIRVType *Res = GR->getOrCreateSPIRVPointerType(PointeeType, MIB, SC);
+  LLVM_DEBUG(dbgs() << "  Deduced GEP type: " << *Res);
+  return Res;
+}
+
 static SPIRVType *deduceResultTypeFromOperands(MachineInstr *I,
                                                SPIRVGlobalRegistry *GR,
                                                MachineIRBuilder &MIB) {
@@ -207,12 +279,23 @@ static SPIRVType *deduceResultTypeFromOperands(MachineInstr *I,
     return deduceTypeFromOperandRange(I, MIB, GR, 1, I->getNumOperands());
   case TargetOpcode::G_SHUFFLE_VECTOR:
     return deduceTypeFromOperandRange(I, MIB, GR, 1, 3);
+  case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
+  case TargetOpcode::G_INTRINSIC: {
+    auto IntrinsicID = cast<GIntrinsic>(I)->getIntrinsicID();
+    if (IntrinsicID == Intrinsic::spv_gep)
+      return deduceGEPType(I, GR, MIB);
+    break;
+  }
+  case TargetOpcode::G_LOAD: {
+    SPIRVType *PtrType = deduceTypeFromSingleOperand(I, MIB, GR, 1);
+    return PtrType ? GR->getPointeeType(PtrType) : nullptr;
+  }
   default:
     if (I->getNumDefs() == 1 && I->getNumOperands() > 1 &&
         I->getOperand(1).isReg())
       return deduceTypeFromSingleOperand(I, MIB, GR, 1);
-    return nullptr;
   }
+  return nullptr;
 }
 
 static bool deduceAndAssignTypeForGUnmerge(MachineInstr *I, MachineFunction &MF,
diff --git a/llvm/test/CodeGen/SPIRV/legalization/spv-extractelt-legalization.ll b/llvm/test/CodeGen/SPIRV/legalization/spv-extractelt-legalization.ll
index 3188f8b31aac5..cdae023a54557 100644
--- a/llvm/test/CodeGen/SPIRV/legalization/spv-extractelt-legalization.ll
+++ b/llvm/test/CodeGen/SPIRV/legalization/spv-extractelt-legalization.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val --target-env vulkan1.3 %}
 
 ; CHECK-DAG: %[[#Int:]] = OpTypeInt 32 0
 ; CHECK-DAG: %[[#Const0:]] = OpConstant %[[#Int]] 0
@@ -15,15 +16,16 @@
 ; CHECK-DAG: %[[#Const60:]] = OpConstant %[[#Int]] 60
 ; CHECK-DAG: %[[#Arr:]] = OpTypeArray %[[#Int]] %[[#]]
 ; CHECK-DAG: %[[#PtrArr:]] = OpTypePointer Function %[[#Arr]]
+; CHECK-DAG: %[[#PtrPriv:]] = OpTypePointer Private %[[#Int]]
 
- at G = addrspace(1) global i32 0, align 4
+ at G = internal addrspace(10) global i32 0, align 4
 
 define void @main() #0 {
 entry:
 ; CHECK: %[[#Var:]] = OpVariable %[[#PtrArr]] Function
 
 ; CHECK: %[[#Idx:]] = OpLoad %[[#Int]]
-  %idx = load i32, ptr addrspace(1) @G, align 4
+  %idx = load i32, ptr addrspace(10) @G, align 4
 
 
 ; CHECK: %[[#PtrElt0:]] = OpInBoundsAccessChain %[[#]] %[[#Var]] %[[#Const0]]
@@ -55,12 +57,8 @@ entry:
   %res = extractelement <6 x i32> %vec6, i32 %idx
   
 ; CHECK: OpStore {{.*}} %[[#Ld]]
-  store i32 %res, ptr addrspace(1) @G, align 4
+  store i32 %res, ptr addrspace(10) @G, align 4
   ret void
 }
 
-attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
-
-
-
-
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/legalization/vector-index-scalarization.ll b/llvm/test/CodeGen/SPIRV/legalization/vector-index-scalarization.ll
new file mode 100644
index 0000000000000..23176465daa19
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/legalization/vector-index-scalarization.ll
@@ -0,0 +1,166 @@
+; RUN: llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val --target-env vulkan1.3 %}
+
+; CHECK-DAG: %[[#int:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#c6:]] = OpConstant %[[#int]] 6
+; CHECK-DAG: %[[#arr6int:]] = OpTypeArray %[[#int]] %[[#c6]]
+; CHECK-DAG: %[[#ptr_arr6int:]] = OpTypePointer Function %[[#arr6int]]
+; CHECK-DAG: %[[#ptr_f_int:]] = OpTypePointer Function %[[#int]]
+; CHECK-DAG: %[[#ptr_p_int:]] = OpTypePointer Private %[[#int]]
+; CHECK-DAG: %[[#c0:]] = OpConstant %[[#int]] 0
+; CHECK-DAG: %[[#c1:]] = OpConstant %[[#int]] 1
+; CHECK-DAG: %[[#c2:]] = OpConstant %[[#int]] 2
+; CHECK-DAG: %[[#c3:]] = OpConstant %[[#int]] 3
+; CHECK-DAG: %[[#c4:]] = OpConstant %[[#int]] 4
+; CHECK-DAG: %[[#c5:]] = OpConstant %[[#int]] 5
+; CHECK-DAG: %[[#undef:]] = OpUndef %[[#int]]
+
+; CHECK-DAG: %[[#out:]] = OpVariable %[[#ptr_p_int]] Private
+
+
+ at out = internal addrspace(10) global i32 0
+ at idx = internal addrspace(10) global i32 0
+ at idx2 = internal addrspace(10) global i32 0
+ at val = internal addrspace(10) global i32 0
+
+; CHECK: %[[#test_full:]] = OpFunction %[[#]] None %[[#]]
+define void @test_full() #0 {
+  ; CHECK:      %[[#label:]] = OpLabel
+  ; CHECK-DAG:  %[[#V_INS:]] = OpVariable %[[#ptr_arr6int]] Function
+  ; CHECK-DAG:  %[[#V_EXT:]] = OpVariable %[[#ptr_arr6int]] Function
+  ; CHECK-DAG:  %[[#]] = OpLoad %[[#int]] %[[#]]
+  ; CHECK-DAG:  %[[#]] = OpLoad %[[#int]] %[[#]]
+  ; CHECK-DAG:  %[[#VAL:]] = OpLoad %[[#int]] %[[#]]
+  ; CHECK-DAG:  %[[#IDX64:]] = OpUConvert %[[#]] %[[#]]
+  ; CHECK-DAG:  %[[#IDX2_64:]] = OpUConvert %[[#]] %[[#]]
+
+
+  %idx = load i32, ptr addrspace(10) @idx
+  %idx2 = load i32, ptr addrspace(10) @idx2
+  %val = load i32, ptr addrspace(10) @val
+
+  %ptr = alloca <6 x i32>
+  %loaded = load <6 x i32>, ptr %ptr
+  %idx64 = zext i32 %idx to i64
+  %idx2_64 = zext i32 %idx2 to i64
+
+  ; Insertelement with dynamic index spills to stack
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c0]]
+; CHECK:  OpStore %[[#]] %[[#undef]] Aligned 32
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c1]]
+; CHECK:  OpStore %[[#]] %[[#undef]] Aligned 4
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c2]]
+; CHECK:  OpStore %[[#]] %[[#undef]] Aligned 8
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c3]]
+; CHECK:  OpStore %[[#]] %[[#undef]] Aligned 4
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c4]]
+; CHECK:  OpStore %[[#]] %[[#undef]] Aligned 16
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c5]]
+; CHECK:  OpStore %[[#]] %[[#undef]] Aligned 4
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#IDX64]]
+; CHECK:  OpStore %[[#]] %[[#VAL]] Aligned 4
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c0]]
+; CHECK:  %[[#V0:]] = OpLoad %[[#int]] %[[#]] Aligned 32
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c1]]
+; CHECK:  %[[#V1:]] = OpLoad %[[#int]] %[[#]] Aligned 4
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c2]]
+; CHECK:  %[[#V2:]] = OpLoad %[[#int]] %[[#]] Aligned 8
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c3]]
+; CHECK:  %[[#V3:]] = OpLoad %[[#int]] %[[#]] Aligned 4
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c4]]
+; CHECK:  %[[#V4:]] = OpLoad %[[#int]] %[[#]] Aligned 16
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_INS]] %[[#c5]]
+; CHECK:  %[[#V5:]] = OpLoad %[[#int]] %[[#]] Aligned 4
+  %inserted = insertelement <6 x i32> %loaded, i32 %val, i64 %idx64
+
+  ; Extractelement with dynamic index spills to stack
+
+
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c0]]
+; CHECK:  OpStore %[[#]] %[[#V0]] Aligned 32
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c1]]
+; CHECK:  OpStore %[[#]] %[[#V1]] Aligned 4
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c2]]
+; CHECK:  OpStore %[[#]] %[[#V2]] Aligned 8
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c3]]
+; CHECK:  OpStore %[[#]] %[[#V3]] Aligned 4
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c4]]
+; CHECK:  OpStore %[[#]] %[[#V4]] Aligned 16
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#c5]]
+; CHECK:  OpStore %[[#]] %[[#V5]] Aligned 4
+; CHECK:  %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_EXT]] %[[#IDX2_64]]
+; CHECK:  %[[#EXTRACTED:]] = OpLoad %[[#int]] %[[#]] Aligned 4
+  %extracted = extractelement <6 x i32> %inserted, i64 %idx2_64
+
+  ; CHECK: OpStore %[[#out]] %[[#EXTRACTED]]
+  store i32 %extracted, ptr addrspace(10) @out
+  ret void
+}
+
+; CHECK: %[[#test_undef:]] = OpFunction %[[#]] None %[[#]]
+define void @test_undef() #0 {
+  ; CHECK:      %[[#label:]] = OpLabel
+  ; CHECK:      %[[#V_UNDEF:]] = OpVariable %[[#ptr_arr6int]] Function
+  ; CHECK-DAG:  %[[#IDX:]] = OpLoad %[[#int]] %[[#]]
+  ; CHECK-DAG:  %[[#VAL:]] = OpLoad %[[#int]] %[[#]]
+  ; CHECK:      %[[#IDX64:]] = OpUConvert %[[#]] %[[#IDX]]
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c0]]
+  ; CHECK:      OpStore %[[#]] %[[#undef]] Aligned 32
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c1]]
+  ; CHECK:      OpStore %[[#]] %[[#undef]] Aligned 4
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c2]]
+  ; CHECK:      OpStore %[[#]] %[[#undef]] Aligned 8
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c3]]
+  ; CHECK:      OpStore %[[#]] %[[#undef]] Aligned 4
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c4]]
+  ; CHECK:      OpStore %[[#]] %[[#undef]] Aligned 16
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c5]]
+  ; CHECK:      OpStore %[[#]] %[[#undef]] Aligned 4
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#IDX64]]
+  ; CHECK:      OpStore %[[#]] %[[#VAL]] Aligned 4
+  ; CHECK:      %[[#PTR0:]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_UNDEF]] %[[#c0]]
+  ; CHECK:      %[[#RES:]] = OpLoad %[[#int]] %[[#PTR0]] Aligned 32
+  ; CHECK:      OpStore %[[#out]] %[[#RES]]
+  %idx = load i32, ptr addrspace(10) @idx
+  %val = load i32, ptr addrspace(10) @val
+  %idx64 = zext i32 %idx to i64
+  %inserted = insertelement <6 x i32> undef, i32 %val, i64 %idx64
+  %extracted = extractelement <6 x i32> %inserted, i64 0
+  store i32 %extracted, ptr addrspace(10) @out
+  ret void
+}
+
+; CHECK: %[[#test_zero:]] = OpFunction %[[#]] None %[[#]]
+define void @test_zero() #0 {
+  ; CHECK:      %[[#label:]] = OpLabel
+  ; CHECK:      %[[#V_ZERO:]] = OpVariable %[[#ptr_arr6int]] Function
+  ; CHECK-DAG:  %[[#IDX:]] = OpLoad %[[#int]] %[[#]]
+  ; CHECK-DAG:  %[[#VAL:]] = OpLoad %[[#int]] %[[#]]
+  ; CHECK:      %[[#IDX64:]] = OpUConvert %[[#]] %[[#IDX]]
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c0]]
+  ; CHECK:      OpStore %[[#]] %[[#c0]] Aligned 32
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c1]]
+  ; CHECK:      OpStore %[[#]] %[[#c0]] Aligned 4
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c2]]
+  ; CHECK:      OpStore %[[#]] %[[#c0]] Aligned 8
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c3]]
+  ; CHECK:      OpStore %[[#]] %[[#c0]] Aligned 4
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c4]]
+  ; CHECK:      OpStore %[[#]] %[[#c0]] Aligned 16
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c5]]
+  ; CHECK:      OpStore %[[#]] %[[#c0]] Aligned 4
+  ; CHECK:      %[[#]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#IDX64]]
+  ; CHECK:      OpStore %[[#]] %[[#VAL]] Aligned 4
+  ; CHECK:      %[[#PTR0:]] = OpInBoundsAccessChain %[[#ptr_f_int]] %[[#V_ZERO]] %[[#c0]]
+  ; CHECK:      %[[#RES:]] = OpLoad %[[#int]] %[[#PTR0]] Aligned 32
+  ; CHECK:      OpStore %[[#out]] %[[#RES]]
+  %idx = load i32, ptr addrspace(10) @idx
+  %val = load i32, ptr addrspace(10) @val
+  %idx64 = zext i32 %idx to i64
+  %inserted = insertelement <6 x i32> zeroinitializer, i32 %val, i64 %idx64
+  %extracted = extractelement <6 x i32> %inserted, i64 0
+  store i32 %extracted, ptr addrspace(10) @out
+  ret void
+}
+
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }



More information about the llvm-commits mailing list