[llvm] r179166 - R600/SI: dynamical figure out the reg class of MIMG
Christian Konig
christian.koenig at amd.com
Wed Apr 10 01:39:17 PDT 2013
Author: ckoenig
Date: Wed Apr 10 03:39:16 2013
New Revision: 179166
URL: http://llvm.org/viewvc/llvm-project?rev=179166&view=rev
Log:
R600/SI: dynamical figure out the reg class of MIMG
Depending on the number of bits set in the writemask.
Signed-off-by: Christian König <christian.koenig at amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer at amd.com>
Modified:
llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/R600/SIISelLowering.cpp
llvm/trunk/lib/Target/R600/SIISelLowering.h
llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
llvm/trunk/lib/Target/R600/SIInstrInfo.td
llvm/trunk/lib/Target/R600/SIRegisterInfo.td
llvm/trunk/test/CodeGen/R600/llvm.SI.sample.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp?rev=179166&r1=179165&r2=179166&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp Wed Apr 10 03:39:16 2013
@@ -107,6 +107,9 @@ void AMDGPUAsmPrinter::EmitProgramInfo(M
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
isSGPR = false;
width = 2;
+ } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 3;
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
isSGPR = true;
width = 4;
Modified: llvm/trunk/lib/Target/R600/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.cpp?rev=179166&r1=179165&r2=179166&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.cpp Wed Apr 10 03:39:16 2013
@@ -720,7 +720,7 @@ unsigned SubIdx2Lane(unsigned Idx) {
void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
SelectionDAG &DAG) const {
SDNode *Users[4] = { };
- unsigned Writemask = 0;
+ unsigned Writemask = 0, Lane = 0;
// Try to figure out the used register components
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
@@ -731,7 +731,7 @@ void SITargetLowering::adjustWritemask(M
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
return;
- unsigned Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+ Lane = SubIdx2Lane(I->getConstantOperandVal(1));
// Abort if we have more than one user per component
if (Users[Lane])
@@ -752,6 +752,16 @@ void SITargetLowering::adjustWritemask(M
Ops.push_back(Node->getOperand(i));
Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+ // If we only got one lane, replace it with a copy
+ if (Writemask == (1U << Lane)) {
+ SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
+ SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ DebugLoc(), MVT::f32,
+ SDValue(Node, 0), RC);
+ DAG.ReplaceAllUsesWith(Users[Lane], Copy);
+ return;
+ }
+
// Update the users of the node with the new indices
for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
@@ -780,3 +790,28 @@ SDNode *SITargetLowering::PostISelFoldin
return foldOperands(Node, DAG);
}
+
+/// \brief Assign the register class depending on the number of
+/// bits set in the writemask
+void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
+ return;
+
+ unsigned VReg = MI->getOperand(0).getReg();
+ unsigned Writemask = MI->getOperand(1).getImm();
+ unsigned BitsSet = 0;
+ for (unsigned i = 0; i < 4; ++i)
+ BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+ const TargetRegisterClass *RC;
+ switch (BitsSet) {
+ default: return;
+ case 1: RC = &AMDGPU::VReg_32RegClass; break;
+ case 2: RC = &AMDGPU::VReg_64RegClass; break;
+ case 3: RC = &AMDGPU::VReg_96RegClass; break;
+ }
+
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MRI.setRegClass(VReg, RC);
+}
Modified: llvm/trunk/lib/Target/R600/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.h?rev=179166&r1=179165&r2=179166&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.h Wed Apr 10 03:39:16 2013
@@ -53,6 +53,8 @@ public:
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+ virtual void AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const;
int32_t analyzeImmediate(const SDNode *N) const;
};
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.cpp?rev=179166&r1=179165&r2=179166&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.cpp Wed Apr 10 03:39:16 2013
@@ -58,6 +58,10 @@ SIInstrInfo::copyPhysReg(MachineBasicBlo
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
};
+ const int16_t Sub0_2[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+ };
+
const int16_t Sub0_1[] = {
AMDGPU::sub0, AMDGPU::sub1, 0
};
@@ -125,6 +129,11 @@ SIInstrInfo::copyPhysReg(MachineBasicBlo
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_1;
+ } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_2;
+
} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
AMDGPU::SReg_128RegClass.contains(SrcReg));
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.td?rev=179166&r1=179165&r2=179166&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.td Wed Apr 10 03:39:16 2013
@@ -346,6 +346,7 @@ class MIMG_Load_Helper <bits<7> op, stri
[]> {
let mayLoad = 1;
let mayStore = 0;
+ let hasPostISelHook = 1;
}
//===----------------------------------------------------------------------===//
@@ -379,6 +380,7 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
+// Test if the supplied opcode is an MIMG instruction
def isMIMG : InstrMapping {
let FilterClass = "MIMG_Load_Helper";
let RowFields = ["Inst"];
Modified: llvm/trunk/lib/Target/R600/SIRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIRegisterInfo.td?rev=179166&r1=179165&r2=179166&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/R600/SIRegisterInfo.td Wed Apr 10 03:39:16 2013
@@ -94,6 +94,12 @@ def VGPR_64 : RegisterTuples<[sub0, sub1
[(add (trunc VGPR_32, 255)),
(add (shl VGPR_32, 1))]>;
+// VGPR 96-bit registers
+def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
+ [(add (trunc VGPR_32, 254)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2))]>;
+
// VGPR 128-bit registers
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add (trunc VGPR_32, 253)),
@@ -162,6 +168,10 @@ def VReg_32 : RegisterClass<"AMDGPU", [i
def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
+ let Size = 96;
+}
+
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
Modified: llvm/trunk/test/CodeGen/R600/llvm.SI.sample.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/llvm.SI.sample.ll?rev=179166&r1=179165&r2=179166&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/llvm.SI.sample.ll (original)
+++ llvm/trunk/test/CodeGen/R600/llvm.SI.sample.ll Wed Apr 10 03:39:16 2013
@@ -1,21 +1,21 @@
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 3
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 2
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 1
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 4
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 5
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 9
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 6
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 10
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 12
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
%v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
More information about the llvm-commits
mailing list