[llvm] r309762 - [AMDGPU] Collapse adjacent SI_END_CF

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 1 16:14:32 PDT 2017


Author: rampitec
Date: Tue Aug  1 16:14:32 2017
New Revision: 309762

URL: http://llvm.org/viewvc/llvm-project?rev=309762&view=rev
Log:
[AMDGPU] Collapse adjacent SI_END_CF

Add a pass to remove redundant S_OR_B64 instructions enabling lanes in
the exec. If two SI_END_CF (lowered as S_OR_B64) come together without any
vector instructions between them we can only keep outer SI_END_CF, given
that CFG is structured and exec bits of the outer end statement are always
not less than exec bit of the inner one.

This needs to be done before the RA to eliminate saved exec bits registers
but after register coalescer to have no vector registers copies in between
of different end cf statements.

Differential Revision: https://reviews.llvm.org/D35967

Added:
    llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
    llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=309762&r1=309761&r2=309762&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Tue Aug  1 16:14:32 2017
@@ -44,6 +44,7 @@ FunctionPass *createSIShrinkInstructions
 FunctionPass *createSILoadStoreOptimizerPass();
 FunctionPass *createSIWholeQuadModePass();
 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
+FunctionPass *createSIOptimizeExecMaskingPreRAPass();
 FunctionPass *createSIFixSGPRCopiesPass();
 FunctionPass *createSIMemoryLegalizerPass();
 FunctionPass *createSIDebuggerInsertNopsPass();
@@ -121,6 +122,9 @@ extern char &AMDGPUUnifyMetadataID;
 void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
 extern char &SIFixControlFlowLiveIntervalsID;
 
+void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
+extern char &SIOptimizeExecMaskingPreRAID;
+
 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
 extern char &AMDGPUAnnotateUniformValuesPassID;
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=309762&r1=309761&r2=309762&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Tue Aug  1 16:14:32 2017
@@ -142,6 +142,7 @@ extern "C" void LLVMInitializeAMDGPUTarg
   initializeSIPeepholeSDWAPass(*PR);
   initializeSIShrinkInstructionsPass(*PR);
   initializeSIFixControlFlowLiveIntervalsPass(*PR);
+  initializeSIOptimizeExecMaskingPreRAPass(*PR);
   initializeSILoadStoreOptimizerPass(*PR);
   initializeAMDGPUAlwaysInlinePass(*PR);
   initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
@@ -781,6 +782,9 @@ void GCNPassConfig::addFastRegAlloc(Func
 }
 
 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+  if (getOptLevel() > CodeGenOpt::None)
+    insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
+
   // This needs to be run directly before register allocation because earlier
   // passes might recompute live intervals.
   insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);

Modified: llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt?rev=309762&r1=309761&r2=309762&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt Tue Aug  1 16:14:32 2017
@@ -97,6 +97,7 @@ add_llvm_target(AMDGPUCodeGen
   SIMachineScheduler.cpp
   SIMemoryLegalizer.cpp
   SIOptimizeExecMasking.cpp
+  SIOptimizeExecMaskingPreRA.cpp
   SIPeepholeSDWA.cpp
   SIRegisterInfo.cpp
   SIShrinkInstructions.cpp

Added: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp?rev=309762&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp (added)
+++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp Tue Aug  1 16:14:32 2017
@@ -0,0 +1,159 @@
+//===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass removes redundant S_OR_B64 instructions enabling lanes in
+/// the exec. If two SI_END_CF (lowered as S_OR_B64) come together without any
+/// vector instructions between them we can only keep outer SI_END_CF, given
+/// that CFG is structured and exec bits of the outer end statement are always
+/// not less than exec bit of the inner one.
+///
+/// This needs to be done before the RA to eliminate saved exec bits registers
+/// but after register coalescer to have no vector registers copies in between
+/// of different end cf statements.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
+
+namespace {
+
+class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
+    initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "SI optimize exec mask operations pre-RA";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LiveIntervals>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
+                      "SI optimize exec mask operations pre-RA", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
+                    "SI optimize exec mask operations pre-RA", false, false)
+
+char SIOptimizeExecMaskingPreRA::ID = 0;
+
+char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID;
+
+FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
+  return new SIOptimizeExecMaskingPreRA();
+}
+
+static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) {
+  return MI.getOpcode() == AMDGPU::S_OR_B64 &&
+         MI.modifiesRegister(AMDGPU::EXEC, TRI);
+}
+
+static bool isFullExecCopy(const MachineInstr& MI) {
+  return MI.isFullCopy() && MI.getOperand(1).getReg() == AMDGPU::EXEC;
+}
+
+static unsigned getOrNonExecReg(const MachineInstr &MI,
+                                const SIInstrInfo &TII) {
+  auto Op = TII.getNamedOperand(MI, AMDGPU::OpName::src1);
+  if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
+     return Op->getReg();
+  Op = TII.getNamedOperand(MI, AMDGPU::OpName::src0);
+  if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
+     return Op->getReg();
+  return AMDGPU::NoRegister;
+}
+
+static MachineInstr* getOrExecSource(const MachineInstr &MI,
+                                     const SIInstrInfo &TII,
+                                     const MachineRegisterInfo &MRI) {
+  auto SavedExec = getOrNonExecReg(MI, TII);
+  if (SavedExec == AMDGPU::NoRegister)
+    return nullptr;
+  auto SaveExecInst = MRI.getUniqueVRegDef(SavedExec);
+  if (!SaveExecInst || !isFullExecCopy(*SaveExecInst))
+    return nullptr;
+  return SaveExecInst;
+}
+
+bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(*MF.getFunction()))
+    return false;
+
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+  bool Changed = false;
+
+  for (MachineBasicBlock &MBB : MF) {
+    auto Lead = MBB.begin(), E = MBB.end();
+    if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
+      continue;
+
+    const MachineBasicBlock* Succ = *MBB.succ_begin();
+    if (!MBB.isLayoutSuccessor(Succ))
+      continue;
+
+    auto I = std::next(Lead);
+
+    for ( ; I != E; ++I)
+      if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI))
+        break;
+
+    if (I != E)
+      continue;
+
+    const auto NextLead = Succ->begin();
+    if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) ||
+        !getOrExecSource(*NextLead, *TII, MRI))
+      continue;
+
+    DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
+
+    unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII);
+    LIS->RemoveMachineInstrFromMaps(*Lead);
+    Lead->eraseFromParent();
+    if (SaveExecReg) {
+      LIS->removeInterval(SaveExecReg);
+      LIS->createAndComputeVirtRegInterval(SaveExecReg);
+    }
+
+    Changed = true;
+  }
+
+  if (Changed) {
+    // Recompute liveness for both reg units of exec.
+    LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC_LO, TRI));
+    LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC_HI, TRI));
+  }
+
+  return Changed;
+}

Added: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll?rev=309762&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.ll Tue Aug  1 16:14:32 2017
@@ -0,0 +1,188 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}simple_nested_if:
+; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
+; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF]]
+; GCN:      s_and_saveexec_b64
+; GCN-NEXT: ; mask branch [[ENDIF]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
+; GCN:      store_dword
+; GCN-NEXT: {{^}}[[ENDIF]]:
+; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
+; GCN-NEXT: s_endpgm
+define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
+bb:
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp1 = icmp ugt i32 %tmp, 1
+  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
+
+bb.outer.then:                                    ; preds = %bb
+  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
+  store i32 0, i32 addrspace(1)* %tmp4, align 4
+  %tmp5 = icmp eq i32 %tmp, 2
+  br i1 %tmp5, label %bb.outer.end, label %bb.inner.then
+
+bb.inner.then:                                    ; preds = %bb.outer.then
+  %tmp7 = add i32 %tmp, 1
+  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
+  store i32 1, i32 addrspace(1)* %tmp9, align 4
+  br label %bb.outer.end
+
+bb.outer.end:                                     ; preds = %bb.outer.then, %bb.inner.then, %bb
+  ret void
+}
+
+; GCN-LABEL: {{^}}uncollapsable_nested_if:
+; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
+; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
+; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
+; GCN-NEXT: ; mask branch [[ENDIF_INNER:BB[0-9_]+]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
+; GCN:      store_dword
+; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
+; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
+; GCN:      store_dword
+; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
+; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
+; GCN-NEXT: s_endpgm
+define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
+bb:
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp1 = icmp ugt i32 %tmp, 1
+  br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
+
+bb.outer.then:                                    ; preds = %bb
+  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
+  store i32 0, i32 addrspace(1)* %tmp4, align 4
+  %tmp5 = icmp eq i32 %tmp, 2
+  br i1 %tmp5, label %bb.inner.end, label %bb.inner.then
+
+bb.inner.then:                                    ; preds = %bb.outer.then
+  %tmp7 = add i32 %tmp, 1
+  %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
+  store i32 1, i32 addrspace(1)* %tmp8, align 4
+  br label %bb.inner.end
+
+bb.inner.end:                                     ; preds = %bb.inner.then, %bb.outer.then
+  %tmp9 = add i32 %tmp, 2
+  %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9
+  store i32 2, i32 addrspace(1)* %tmp10, align 4
+  br label %bb.outer.end
+
+bb.outer.end:                                     ; preds = %bb.inner.then, %bb
+  ret void
+}
+
+; GCN-LABEL: {{^}}nested_if_if_else:
+; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
+; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
+; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
+; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
+; GCN-NEXT: ; mask branch [[THEN_INNER:BB[0-9_]+]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
+; GCN:      store_dword
+; GCN-NEXT: {{^}}[[THEN_INNER]]:
+; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
+; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
+; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
+; GCN:      store_dword
+; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
+; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
+; GCN-NEXT: s_endpgm
+define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
+bb:
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
+  store i32 0, i32 addrspace(1)* %tmp1, align 4
+  %tmp2 = icmp ugt i32 %tmp, 1
+  br i1 %tmp2, label %bb.outer.then, label %bb.outer.end
+
+bb.outer.then:                                       ; preds = %bb
+  %tmp5 = icmp eq i32 %tmp, 2
+  br i1 %tmp5, label %bb.then, label %bb.else
+
+bb.then:                                             ; preds = %bb.outer.then
+  %tmp3 = add i32 %tmp, 1
+  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3
+  store i32 1, i32 addrspace(1)* %tmp4, align 4
+  br label %bb.outer.end
+
+bb.else:                                             ; preds = %bb.outer.then
+  %tmp7 = add i32 %tmp, 2
+  %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
+  store i32 2, i32 addrspace(1)* %tmp9, align 4
+  br label %bb.outer.end
+
+bb.outer.end:                                        ; preds = %bb, %bb.then, %bb.else
+  ret void
+}
+
+; GCN-LABEL: {{^}}nested_if_else_if:
+; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
+; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
+; GCN-NEXT: ; mask branch [[THEN_OUTER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
+; GCN:      store_dword
+; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
+; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
+; GCN:      store_dword
+; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
+; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
+; GCN-NEXT: {{^}}[[THEN_OUTER]]:
+; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
+; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
+; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
+; GCN:      store_dword
+; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
+; GCN-NEXT: ; mask branch [[ENDIF_INNER_OUTER_THEN:BB[0-9_]+]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
+; GCN:      store_dword
+; GCN-NEXT: {{^}}[[ENDIF_INNER_OUTER_THEN]]:
+; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_THEN]]
+; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
+; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]]
+; GCN-NEXT: s_endpgm
+define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
+bb:
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
+  store i32 0, i32 addrspace(1)* %tmp1, align 4
+  %cc1 = icmp ugt i32 %tmp, 1
+  br i1 %cc1, label %bb.outer.then, label %bb.outer.else
+
+bb.outer.then:
+  %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1
+  store i32 1, i32 addrspace(1)* %tmp2, align 4
+  %cc2 = icmp eq i32 %tmp, 2
+  br i1 %cc2, label %bb.inner.then, label %bb.outer.end
+
+bb.inner.then:
+  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2
+  store i32 2, i32 addrspace(1)* %tmp3, align 4
+  br label %bb.outer.end
+
+bb.outer.else:
+  %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3
+  store i32 3, i32 addrspace(1)* %tmp4, align 4
+  %cc3 = icmp eq i32 %tmp, 2
+  br i1 %cc3, label %bb.inner.then2, label %bb.outer.end
+
+bb.inner.then2:
+  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4
+  store i32 4, i32 addrspace(1)* %tmp5, align 4
+  br label %bb.outer.end
+
+bb.outer.end:
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone speculatable }




More information about the llvm-commits mailing list