[llvm] r176487 - R600: Turn BUILD_VECTOR into Reg_Sequence
Vincent Lejeune
vljn at ovi.com
Tue Mar 5 07:04:49 PST 2013
Author: vljn
Date: Tue Mar 5 09:04:49 2013
New Revision: 176487
URL: http://llvm.org/viewvc/llvm-project?rev=176487&view=rev
Log:
R600: Turn BUILD_VECTOR into Reg_Sequence
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
Modified:
llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp
llvm/trunk/lib/Target/R600/CMakeLists.txt
llvm/trunk/test/CodeGen/R600/fdiv.v4f32.ll
Modified: llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp?rev=176487&r1=176486&r2=176487&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp Tue Mar 5 09:04:49 2013
@@ -162,6 +162,35 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNod
}
switch (Opc) {
default: break;
+ case ISD::BUILD_VECTOR: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ SDValue RegSeqArgs[9] = {
+ CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
+ SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
+ };
+ bool IsRegSeq = true;
+ for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+ IsRegSeq = false;
+ break;
+ }
+ RegSeqArgs[2 * i + 1] = N->getOperand(i);
+ }
+ if (!IsRegSeq)
+ break;
+ return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
+ RegSeqArgs, 2 * N->getNumOperands() + 1);
+ }
case ISD::ConstantFP:
case ISD::Constant: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
Modified: llvm/trunk/lib/Target/R600/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/CMakeLists.txt?rev=176487&r1=176486&r2=176487&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/R600/CMakeLists.txt Tue Mar 5 09:04:49 2013
@@ -37,7 +37,6 @@ add_llvm_target(R600CodeGen
R600ExpandSpecialInstrs.cpp
R600InstrInfo.cpp
R600ISelLowering.cpp
- R600LowerConstCopy.cpp
R600MachineFunctionInfo.cpp
R600RegisterInfo.cpp
SIAnnotateControlFlow.cpp
Modified: llvm/trunk/test/CodeGen/R600/fdiv.v4f32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fdiv.v4f32.ll?rev=176487&r1=176486&r2=176487&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fdiv.v4f32.ll (original)
+++ llvm/trunk/test/CodeGen/R600/fdiv.v4f32.ll Tue Mar 5 09:04:49 2013
@@ -1,13 +1,13 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
More information about the llvm-commits
mailing list