[llvm] e09f98a - AMDGPU: Fix LiveVariables error after optimizing VGPR ranges
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 17 06:38:40 PST 2022
Author: Matt Arsenault
Date: 2022-01-17T09:38:35-05:00
New Revision: e09f98a69a8d223168f0766a3574c4a09b167b81
URL: https://github.com/llvm/llvm-project/commit/e09f98a69a8d223168f0766a3574c4a09b167b81
DIFF: https://github.com/llvm/llvm-project/commit/e09f98a69a8d223168f0766a3574c4a09b167b81.diff
LOG: AMDGPU: Fix LiveVariables error after optimizing VGPR ranges
This was not removing the block from the live set depending on the
specific depth first visit order. Fixes a verifier error in the OpenCL
conformance tests.
Added:
llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir
Modified:
llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
index cea63d72de81f..e13e33ed54578 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
@@ -371,47 +371,42 @@ void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
// Re-calculate the liveness of \p Reg in the THEN-region
void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
-
- SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming;
-
- MachineBasicBlock *ThenEntry = nullptr;
- for (auto *Succ : If->successors()) {
- if (Succ != Flow) {
- ThenEntry = Succ;
- break;
+ SetVector<MachineBasicBlock *> Blocks;
+ SmallVector<MachineBasicBlock *> WorkList({If});
+
+ // Collect all successors until we see the flow block, where we should
+ // reconverge.
+ while (!WorkList.empty()) {
+ auto *MBB = WorkList.pop_back_val();
+ for (auto *Succ : MBB->successors()) {
+ if (Succ != Flow && !Blocks.contains(Succ)) {
+ WorkList.push_back(Succ);
+ Blocks.insert(Succ);
+ }
}
}
- assert(ThenEntry && "No successor in Then region?");
LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
- df_iterator_default_set<MachineBasicBlock *, 16> Visited;
-
- for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
- if (MBB == Flow)
- break;
-
+ for (MachineBasicBlock *MBB : Blocks) {
// Clear Live bit, as we will recalculate afterwards
LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
<< '\n');
OldVarInfo.AliveBlocks.reset(MBB->getNumber());
}
+ SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
+
// Get the blocks the Reg should be alive through
for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
++I) {
auto *UseMI = I->getParent();
if (UseMI->isPHI() && I->readsReg()) {
- if (Visited.contains(UseMI->getParent()))
+ if (Blocks.contains(UseMI->getParent()))
PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
}
}
- Visited.clear();
-
- for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) {
- if (MBB == Flow)
- break;
-
+ for (MachineBasicBlock *MBB : Blocks) {
SmallVector<MachineInstr *> Uses;
// PHI instructions has been processed before.
findNonPHIUsesInBlock(Reg, MBB, Uses);
@@ -438,7 +433,7 @@ void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
// Set the isKilled flag if we get new Kills in the THEN region.
for (auto *MI : OldVarInfo.Kills) {
- if (Visited.contains(MI->getParent()))
+ if (Blocks.contains(MI->getParent()))
MI->addRegisterKilled(Reg, TRI);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir b/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir
new file mode 100644
index 0000000000000..69cfbeeb9a491
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/block-should-not-be-in-alive-blocks.mir
@@ -0,0 +1,134 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs -start-after=unreachable-mbb-elimination -stop-after=phi-node-elimination -o - %s | FileCheck %s
+
+# FIXME: Should be able to just use run-pass, but need to keep
+# LiveVariables live after for the verifier. Also -start-before
+# doesn't work here for some reason.
+
+# LiveVariables needs to remove %bb.3 from the live blocks for %1
+# after the phi is introduced, but was previously missed due to
+# encountering the flow block first in the depth first search.
+
+---
+name: live_variable_update
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: live_variable_update
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY killed $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+ ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY1]], implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[COPY1]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], killed [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_]], [[COPY3]], implicit-def dead $scc
+ ; CHECK-NEXT: $exec_lo = S_MOV_B32_term killed [[S_AND_B32_]]
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %15, 0, implicit $exec
+ ; CHECK-NEXT: %7:vgpr_32, dead %8:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %7, %subreg.sub1
+ ; CHECK-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B]], killed [[GLOBAL_LOAD_UBYTE]], 0, 0, implicit $exec :: (store (s8), addrspace 1)
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.4, implicit undef $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ ; CHECK-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ ; CHECK-NEXT: S_BRANCH %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 killed [[S_XOR_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]]
+ ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_OR_SAVEEXEC_B32_]], implicit-def $scc
+ ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, killed [[S_AND_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.2(0x40000000), %bb.5(0x40000000)
+ liveins: $vgpr0, $sgpr4_sgpr5
+
+ %0:sgpr_64 = COPY $sgpr4_sgpr5
+ %1:vgpr_32 = COPY $vgpr0
+ %2:sreg_32 = V_CMP_NE_U32_e64 0, %1, implicit $exec
+ %3:sreg_32 = SI_IF killed %2, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.7(0x80000000)
+
+ %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ %5:vgpr_32, %6:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %4.sub0, %1, 0, implicit $exec
+ %7:vgpr_32, dead %8:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, %4.sub1, killed %6, 0, implicit $exec
+ %9:vreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
+ %10:vgpr_32 = GLOBAL_LOAD_UBYTE killed %9, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ %11:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ GLOBAL_STORE_BYTE killed %11, killed %10, 0, 0, implicit $exec :: (store (s8), addrspace 1)
+ S_BRANCH %bb.7
+
+ bb.2:
+ successors: %bb.4(0x40000000), %bb.3(0x40000000)
+
+ S_CBRANCH_SCC0 %bb.4, implicit undef $scc
+
+ bb.3:
+ successors: %bb.6(0x80000000)
+
+ S_BRANCH %bb.6
+
+ bb.4:
+ successors: %bb.6(0x80000000)
+
+ %12:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ %13:vgpr_32 = GLOBAL_LOAD_UBYTE killed %12, 0, 0, implicit $exec :: (load (s8), addrspace 1)
+ S_BRANCH %bb.6
+
+ bb.5:
+ successors: %bb.1(0x40000000), %bb.7(0x40000000)
+
+ %14:sreg_32 = SI_ELSE %3, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.6:
+ successors: %bb.5(0x80000000)
+
+ S_BRANCH %bb.5
+
+ bb.7:
+ SI_END_CF %14, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index 4061eda1b74f1..0b6645556af3a 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -443,4 +443,106 @@ end:
ret float %r2
}
+define amdgpu_kernel void @livevariables_update_missed_block(i8 addrspace(1)* %src1) {
+ ; SI-LABEL: name: livevariables_update_missed_block
+ ; SI: bb.0.entry:
+ ; SI-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
+ ; SI-NEXT: liveins: $vgpr0, $sgpr0_sgpr1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY killed $sgpr0_sgpr1
+ ; SI-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0
+ ; SI-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY1]](s32), implicit $exec
+ ; SI-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_NE_U32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; SI-NEXT: S_BRANCH %bb.2
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: bb.1.if.then:
+ ; SI-NEXT: successors: %bb.7(0x80000000)
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset.cast, align 4, addrspace 4)
+ ; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %50, 0, implicit $exec
+ ; SI-NEXT: %43:vgpr_32, dead %45:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+ ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %43, %subreg.sub1
+ ; SI-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8) from %ir.i10, addrspace 1)
+ ; SI-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ ; SI-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B]], killed [[GLOBAL_LOAD_UBYTE]], 0, 0, implicit $exec :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+ ; SI-NEXT: S_BRANCH %bb.7
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: bb.2.if.then9:
+ ; SI-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: S_CBRANCH_SCC0 %bb.4, implicit undef $scc
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: bb.3:
+ ; SI-NEXT: successors: %bb.6(0x80000000)
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: S_BRANCH %bb.6
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: bb.4.sw.bb:
+ ; SI-NEXT: successors: %bb.6(0x80000000)
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ ; SI-NEXT: [[GLOBAL_LOAD_UBYTE1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[V_MOV_B1]], 0, 0, implicit $exec :: (load (s8) from `i8 addrspace(1)* null`, addrspace 1)
+ ; SI-NEXT: S_BRANCH %bb.6
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: bb.5.Flow:
+ ; SI-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000)
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY1]](s32), %bb.0, undef %51:vgpr_32, %bb.6
+ ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; SI-NEXT: S_BRANCH %bb.1
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: bb.6.sw.bb18:
+ ; SI-NEXT: successors: %bb.5(0x80000000)
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI undef %39:vgpr_32, %bb.3, [[GLOBAL_LOAD_UBYTE1]], %bb.4
+ ; SI-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ ; SI-NEXT: GLOBAL_STORE_BYTE killed [[V_MOV_B2]], killed [[PHI1]], 0, 0, implicit $exec :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+ ; SI-NEXT: S_BRANCH %bb.5
+ ; SI-NEXT: {{ $}}
+ ; SI-NEXT: bb.7.UnifiedReturnBlock:
+ ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; SI-NEXT: S_ENDPGM 0
+entry:
+ %i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %i4 = add i32 0, %i2
+ %i5 = zext i32 %i4 to i64
+ %i6 = add i64 0, %i5
+ %add = add i64 %i6, 0
+ %cmp2 = icmp ult i64 %add, 1
+ br i1 %cmp2, label %if.then, label %if.then9
+
+if.then: ; preds = %entry
+ %i9 = mul i64 %i6, 1
+ %i10 = getelementptr inbounds i8, i8 addrspace(1)* %src1, i64 %i9
+ %i11 = load i8, i8 addrspace(1)* %i10, align 1
+ %i12 = insertelement <3 x i8> zeroinitializer, i8 %i11, i64 0
+ %i13 = insertelement <3 x i8> %i12, i8 0, i64 1
+ %i14 = insertelement <3 x i8> %i13, i8 0, i64 1
+ %i15 = select <3 x i1> zeroinitializer, <3 x i8> zeroinitializer, <3 x i8> %i14
+ %i16 = extractelement <3 x i8> %i15, i64 0
+ store i8 %i16, i8 addrspace(1)* null, align 1
+ ret void
+
+if.then9: ; preds = %entry
+ br i1 undef, label %sw.bb18, label %sw.bb
+
+sw.bb: ; preds = %if.then9
+ %i17 = load i8, i8 addrspace(1)* null, align 1
+ %i18 = insertelement <4 x i8> zeroinitializer, i8 %i17, i64 0
+ %a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
+ br label %sw.bb18
+
+sw.bb18: ; preds = %sw.bb, %if.then9
+ %a.sroa.0.0 = phi <4 x i8> [ %a.sroa.0.0.vecblend, %sw.bb ], [ undef, %if.then9 ]
+ %a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> <i32 undef, i32 1, i32 undef>
+ %i19 = insertelement <3 x i8> %a.sroa.0.0.vec.extract61, i8 0, i64 0
+ %i20 = select <3 x i1> zeroinitializer, <3 x i8> zeroinitializer, <3 x i8> %i19
+ %i21 = extractelement <3 x i8> %i20, i64 1
+ store i8 %i21, i8 addrspace(1)* null, align 1
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable willreturn }
More information about the llvm-commits
mailing list