[PATCH] R600/SI: Fix selection failure on scalar_to_vector
Tom Stellard
tom at stellard.net
Wed Jun 11 07:15:31 PDT 2014
On Wed, Jun 11, 2014 at 02:01:06AM +0000, Matt Arsenault wrote:
> There seem to be only 2 places that produce these, and it's kind of tricky to hit them.
> Also fixes failure to bitcast between v2i16 and f32, although this for some reason wasn't
> actually broken in the simple bitcast testcase, but did in the scalar_to_vector one.
>
> http://reviews.llvm.org/D4098
>
LGTM.
> Files:
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> lib/Target/R600/SIInstructions.td
> test/CodeGen/R600/bitcast.ll
> test/CodeGen/R600/scalar_to_vector.ll
> Index: lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -256,6 +256,7 @@
> };
> return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
> }
> + case ISD::SCALAR_TO_VECTOR:
> case ISD::BUILD_VECTOR: {
> unsigned RegClassID;
> const AMDGPURegisterInfo *TRI =
> @@ -264,7 +265,8 @@
> static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
> EVT VT = N->getValueType(0);
> unsigned NumVectorElts = VT.getVectorNumElements();
> - assert(VT.getVectorElementType().bitsEq(MVT::i32));
> + EVT EltVT = VT.getVectorElementType();
> + assert(EltVT.bitsEq(MVT::i32));
> if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
> bool UseVReg = true;
> for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
> @@ -313,8 +315,7 @@
> SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
>
> if (NumVectorElts == 1) {
> - return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
> - VT.getVectorElementType(),
> + return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
> N->getOperand(0), RegClass);
> }
>
> @@ -323,11 +324,12 @@
> // 16 = Max Num Vector Elements
> // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
> // 1 = Vector Register Class
> - SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(N->getNumOperands() * 2 + 1);
> + SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
>
> RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
> bool IsRegSeq = true;
> - for (unsigned i = 0; i < N->getNumOperands(); i++) {
> + unsigned NOps = N->getNumOperands();
> + for (unsigned i = 0; i < NOps; i++) {
> // XXX: Why is this here?
> if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
> IsRegSeq = false;
> @@ -337,6 +339,20 @@
> RegSeqArgs[1 + (2 * i) + 1] =
> CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
> }
> +
> + if (NOps != NumVectorElts) {
> + // Fill in the missing undef elements if this was a scalar_to_vector.
> + assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
> +
> + MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
> + SDLoc(N), EltVT);
> + for (unsigned i = NOps; i < NumVectorElts; ++i) {
> + RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
> + RegSeqArgs[1 + (2 * i) + 1] =
> + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
> + }
> + }
> +
> if (!IsRegSeq)
> break;
> return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -1881,7 +1881,8 @@
> def : BitConvert <v2i32, v2f32, VReg_64>;
> def : BitConvert <v2i32, i64, VReg_64>;
> def : BitConvert <i64, v2i32, VReg_64>;
> -
> +def : BitConvert <v2f32, i64, VReg_64>;
> +def : BitConvert <i64, v2f32, VReg_64>;
> def : BitConvert <v4f32, v4i32, VReg_128>;
> def : BitConvert <v4i32, v4f32, VReg_128>;
>
> Index: test/CodeGen/R600/bitcast.ll
> ===================================================================
> --- test/CodeGen/R600/bitcast.ll
> +++ test/CodeGen/R600/bitcast.ll
> @@ -42,3 +42,17 @@
> store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
> ret void
> }
> +
> +define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
> + %load = load float addrspace(1)* %in, align 4
> + %bc = bitcast float %load to <2 x i16>
> + store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
> + %load = load <2 x i16> addrspace(1)* %in, align 4
> + %bc = bitcast <2 x i16> %load to float
> + store float %bc, float addrspace(1)* %out, align 4
> + ret void
> +}
> Index: test/CodeGen/R600/scalar_to_vector.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/scalar_to_vector.ll
> @@ -0,0 +1,80 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +
> +
> +; FUNC-LABEL: @scalar_to_vector_v2i32
> +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
> +; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: S_ENDPGM
> +define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> + %tmp1 = load i32 addrspace(1)* %in, align 4
> + %bc = bitcast i32 %tmp1 to <2 x i16>
> + %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
> + store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; FUNC-LABEL: @scalar_to_vector_v2f32
> +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
> +; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: S_ENDPGM
> +define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
> + %tmp1 = load float addrspace(1)* %in, align 4
> + %bc = bitcast float %tmp1 to <2 x i16>
> + %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
> + store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
> + ret void
> +}
> +
> +; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed
> +; to produce one, but for some reason never made it to selection.
> +
> +
> +; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> +; %tmp1 = load i32 addrspace(1)* %in, align 4
> +; %bc = bitcast i32 %tmp1 to <4 x i8>
> +
> +; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> +; store <8 x i8> %tmp2, <8 x i8> addrspace(1)* %out, align 4
> +; ret void
> +; }
> +
> +; define void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind {
> +; %newvec0 = insertelement <2 x i64> undef, i64 12345, i32 0
> +; %newvec1 = insertelement <2 x i64> %newvec0, i64 undef, i32 1
> +; %bc = bitcast <2 x i64> %newvec1 to <4 x i32>
> +; %add = add <4 x i32> %bc, <i32 1, i32 2, i32 3, i32 4>
> +; store <4 x i32> %add, <4 x i32> addrspace(1)* %out, align 16
> +; ret void
> +; }
> +
> +; define void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind {
> +; %newvec0 = insertelement <4 x i32> undef, i32 12345, i32 0
> +; %bc = bitcast <4 x i32> %newvec0 to <8 x i16>
> +; %add = add <8 x i16> %bc, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
> +; store <8 x i16> %add, <8 x i16> addrspace(1)* %out, align 16
> +; ret void
> +; }
> +
> +; define void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind {
> +; %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
> +; %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
> +; %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
> +; store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
> +; ret void
> +; }
> +
> +; define void @scalar_to_vector_test6(<4 x i16> addrspace(1)* %out) nounwind {
> +; %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
> +; %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
> +; %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
> +; store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
> +; ret void
> +; }
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list