[PATCH] R600/SI: Use VALU instructions for copying i1 values
Tom Stellard
thomas.stellard at amd.com
Mon Apr 28 18:02:28 PDT 2014
We can't use SALU instructions for this since they ignore the EXEC mask
and are always executed.
This fixes several OpenCV tests.
---
lib/Target/R600/AMDGPU.h | 4 +
lib/Target/R600/AMDGPUTargetMachine.cpp | 1 +
lib/Target/R600/CMakeLists.txt | 1 +
lib/Target/R600/SIFixSGPRCopies.cpp | 3 +-
lib/Target/R600/SIISelLowering.cpp | 2 +-
lib/Target/R600/SIInstructions.td | 11 +--
lib/Target/R600/SILowerControlFlow.cpp | 4 +-
lib/Target/R600/SILowerI1Copies.cpp | 130 ++++++++++++++++++++++++++++++++
lib/Target/R600/SIRegisterInfo.td | 2 +
test/CodeGen/R600/valu-i1.ll | 39 ++++++++++
10 files changed, 188 insertions(+), 9 deletions(-)
create mode 100644 lib/Target/R600/SILowerI1Copies.cpp
create mode 100644 test/CodeGen/R600/valu-i1.ll
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 3e1848b..5d0cf81 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -37,11 +37,15 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
// SI Passes
FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
+void initializeSILowerI1CopiesPass(PassRegistry &);
+extern char &SILowerI1CopiesID;
+
// Passes common to R600 and SI
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 6794d74..d3244f6 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -154,6 +154,7 @@ AMDGPUPassConfig::addPreISel() {
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+ addPass(createSILowerI1CopiesPass());
return false;
}
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 93a5117..3c6fa5a 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -45,6 +45,7 @@ add_llvm_target(R600CodeGen
SIInstrInfo.cpp
SIISelLowering.cpp
SILowerControlFlow.cpp
+ SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
SITypeRewriter.cpp
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index d524f44..f3f1f58 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -185,7 +185,8 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
const TargetRegisterClass *SrcRC;
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- DstRC == &AMDGPU::M0RegRegClass)
+ DstRC == &AMDGPU::M0RegRegClass ||
+ MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
return false;
SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 848e045..8be5465 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM) {
- addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
+ addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 00f9be6..27e7abe 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1398,6 +1398,12 @@ def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
let isCodeGenOnly = 1, isPseudo = 1 in {
+def V_MOV_I1 : InstSI <
+ (outs VReg_1:$dst),
+ (ins i1imm:$src),
+ "", [(set i1:$dst, (imm:$src))]
+>;
+
def LOAD_CONST : AMDGPUShaderInst <
(outs GPRF32:$dst),
(ins i32imm:$src),
@@ -1981,11 +1987,6 @@ def : Pat <
>;
def : Pat <
- (i1 imm:$imm),
- (S_MOV_B64 imm:$imm)
->;
-
-def : Pat <
(i64 InlineImm<i64>:$imm),
(S_MOV_B64 InlineImm<i64>:$imm)
>;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index c2f8696..1d99b2f 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -67,7 +67,7 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
- const TargetRegisterInfo *TRI;
+ const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -427,7 +427,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
- TRI = MF.getTarget().getRegisterInfo();
+ TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp
new file mode 100644
index 0000000..766380e
--- /dev/null
+++ b/lib/Target/R600/SILowerI1Copies.cpp
@@ -0,0 +1,130 @@
+//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// i1 values are usually inserted by the CFG Structurize pass and they are
+/// unique in that they can be copied from VALU to SALU registers.
+/// This is not possible for any other value type. Since there are no
+/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
+///
+//===----------------------------------------------------------------------===//
+//
+
+#define DEBUG_TYPE "si-i1-copies"
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerI1Copies : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SILowerI1Copies() : MachineFunctionPass(ID) {
+ initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) override;
+
+ virtual const char *getPassName() const override {
+ return "SI Lower il Copies";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+
+char SILowerI1Copies::ID = 0;
+
+char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID;
+
+FunctionPass *llvm::createSILowerI1CopiesPass() {
+ return new SILowerI1Copies();
+}
+
+bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ MF.getTarget().getInstrInfo());
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == AMDGPU::V_MOV_I1) {
+ MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
+ continue;
+ }
+
+ if (MI.getOpcode() != AMDGPU::COPY ||
+ !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
+ !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
+ continue;
+
+
+ const TargetRegisterClass *DstRC =
+ MRI.getRegClass(MI.getOperand(0).getReg());
+ const TargetRegisterClass *SrcRC =
+ MRI.getRegClass(MI.getOperand(1).getReg());
+
+ if (DstRC == &AMDGPU::VReg_1RegClass &&
+ TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64))
+ .addOperand(MI.getOperand(0))
+ .addImm(0)
+ .addImm(-1)
+ .addOperand(MI.getOperand(1))
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+ MI.eraseFromParent();
+ } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
+ SrcRC == &AMDGPU::VReg_1RegClass) {
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
+ .addOperand(MI.getOperand(0))
+ .addImm(0)
+ .addOperand(MI.getOperand(1))
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+ MI.eraseFromParent();
+ }
+
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 6d6d8b9..f1f01de 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -189,6 +189,8 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
+
//===----------------------------------------------------------------------===//
// [SV]Src_(32|64) register classes, can have either an immediate or an register
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll
new file mode 100644
index 0000000..5d5e3ff
--- /dev/null
+++ b/test/CodeGen/R600/valu-i1.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+
+; Make sure the i1 values created by the cfg structurizer pass are
+; moved using VALU instructions
+; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1
+; SI: V_MOV_B32_e32 v{{[0-9]}}, -1
+define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
+entry:
+ switch i32 %a, label %default [
+ i32 0, label %case0
+ i32 1, label %case1
+ ]
+
+case0:
+ %arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ store i32 0, i32 addrspace(1)* %arrayidx1, align 4
+ br label %end
+
+case1:
+ %arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ store i32 1, i32 addrspace(1)* %arrayidx5, align 4
+ br label %end
+
+default:
+ %cmp8 = icmp eq i32 %a, 2
+ %arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ br i1 %cmp8, label %if, label %else
+
+if:
+ store i32 2, i32 addrspace(1)* %arrayidx10, align 4
+ br label %end
+
+else:
+ store i32 3, i32 addrspace(1)* %arrayidx10, align 4
+ br label %end
+
+end:
+ ret void
+}
--
1.8.1.5
More information about the llvm-commits
mailing list