[PATCH] R600/SI: Fix selection failure on scalar_to_vector

Wed Jun 11 07:15:31 PDT 2014

On Wed, Jun 11, 2014 at 02:01:06AM +0000, Matt Arsenault wrote:
> There seem to be only 2 places that produce these, and it's kind of tricky to hit them.
> Also fixes failure to bitcast between v2i16 and f32, although this for some reason wasn't 
> actually broken in the  simple bitcast testcase, but did in the scalar_to_vector one.
> 
> http://reviews.llvm.org/D4098
> 

LGTM.

> Files:
>   lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>   lib/Target/R600/SIInstructions.td
>   test/CodeGen/R600/bitcast.ll
>   test/CodeGen/R600/scalar_to_vector.ll

> Index: lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -256,6 +256,7 @@
>      };
>      return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
>    }
> +  case ISD::SCALAR_TO_VECTOR:
>    case ISD::BUILD_VECTOR: {
>      unsigned RegClassID;
>      const AMDGPURegisterInfo *TRI =
> @@ -264,7 +265,8 @@
>                     static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
>      EVT VT = N->getValueType(0);
>      unsigned NumVectorElts = VT.getVectorNumElements();
> -    assert(VT.getVectorElementType().bitsEq(MVT::i32));
> +    EVT EltVT = VT.getVectorElementType();
> +    assert(EltVT.bitsEq(MVT::i32));
>      if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
>        bool UseVReg = true;
>        for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
> @@ -313,8 +315,7 @@
>      SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
>  
>      if (NumVectorElts == 1) {
> -      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
> -                                  VT.getVectorElementType(),
> +      return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
>                                    N->getOperand(0), RegClass);
>      }
>  
> @@ -323,11 +324,12 @@
>      // 16 = Max Num Vector Elements
>      // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
>      // 1 = Vector Register Class
> -    SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(N->getNumOperands() * 2 + 1);
> +    SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
>  
>      RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
>      bool IsRegSeq = true;
> -    for (unsigned i = 0; i < N->getNumOperands(); i++) {
> +    unsigned NOps = N->getNumOperands();
> +    for (unsigned i = 0; i < NOps; i++) {
>        // XXX: Why is this here?
>        if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
>          IsRegSeq = false;
> @@ -337,6 +339,20 @@
>        RegSeqArgs[1 + (2 * i) + 1] =
>                CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
>      }
> +
> +    if (NOps != NumVectorElts) {
> +      // Fill in the missing undef elements if this was a scalar_to_vector.
> +      assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
> +
> +      MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
> +                                                     SDLoc(N), EltVT);
> +      for (unsigned i = NOps; i < NumVectorElts; ++i) {
> +        RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
> +        RegSeqArgs[1 + (2 * i) + 1] =
> +          CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
> +      }
> +    }
> +
>      if (!IsRegSeq)
>        break;
>      return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -1881,7 +1881,8 @@
>  def : BitConvert <v2i32, v2f32, VReg_64>;
>  def : BitConvert <v2i32, i64, VReg_64>;
>  def : BitConvert <i64, v2i32, VReg_64>;
> -
> +def : BitConvert <v2f32, i64, VReg_64>;
> +def : BitConvert <i64, v2f32, VReg_64>;
>  def : BitConvert <v4f32, v4i32, VReg_128>;
>  def : BitConvert <v4i32, v4f32, VReg_128>;
>  
> Index: test/CodeGen/R600/bitcast.ll
> ===================================================================
> --- test/CodeGen/R600/bitcast.ll
> +++ test/CodeGen/R600/bitcast.ll
> @@ -42,3 +42,17 @@
>    store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
>    ret void
>  }
> +
> +define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
> +  %load = load float addrspace(1)* %in, align 4
> +  %bc = bitcast float %load to <2 x i16>
> +  store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
> +  %load = load <2 x i16> addrspace(1)* %in, align 4
> +  %bc = bitcast <2 x i16> %load to float
> +  store float %bc, float addrspace(1)* %out, align 4
> +  ret void
> +}
> Index: test/CodeGen/R600/scalar_to_vector.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/scalar_to_vector.ll
> @@ -0,0 +1,80 @@
> +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
> +
> +
> +; FUNC-LABEL: @scalar_to_vector_v2i32
> +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
> +; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: S_ENDPGM
> +define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> +  %tmp1 = load i32 addrspace(1)* %in, align 4
> +  %bc = bitcast i32 %tmp1 to <2 x i16>
> +  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
> +  store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; FUNC-LABEL: @scalar_to_vector_v2f32
> +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
> +; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: BUFFER_STORE_SHORT [[RESULT]]
> +; SI: S_ENDPGM
> +define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
> +  %tmp1 = load float addrspace(1)* %in, align 4
> +  %bc = bitcast float %tmp1 to <2 x i16>
> +  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
> +  store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
> +  ret void
> +}
> +
> +; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed
> +; to produce one, but for some reason never made it to selection.
> +
> +
> +; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
> +;   %tmp1 = load i32 addrspace(1)* %in, align 4
> +;   %bc = bitcast i32 %tmp1 to <4 x i8>
> +
> +;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> +;   store <8 x i8> %tmp2, <8 x i8> addrspace(1)* %out, align 4
> +;   ret void
> +; }
> +
> +; define void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind {
> +;   %newvec0 = insertelement <2 x i64> undef, i64 12345, i32 0
> +;   %newvec1 = insertelement <2 x i64> %newvec0, i64 undef, i32 1
> +;   %bc = bitcast <2 x i64> %newvec1 to <4 x i32>
> +;   %add = add <4 x i32> %bc, <i32 1, i32 2, i32 3, i32 4>
> +;   store <4 x i32> %add, <4 x i32> addrspace(1)* %out, align 16
> +;   ret void
> +; }
> +
> +; define void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind {
> +;   %newvec0 = insertelement <4 x i32> undef, i32 12345, i32 0
> +;   %bc = bitcast <4 x i32> %newvec0 to <8 x i16>
> +;   %add = add <8 x i16> %bc, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
> +;   store <8 x i16> %add, <8 x i16> addrspace(1)* %out, align 16
> +;   ret void
> +; }
> +
> +; define void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind {
> +;   %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
> +;   %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
> +;   %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
> +;   store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
> +;   ret void
> +; }
> +
> +; define void @scalar_to_vector_test6(<4 x i16> addrspace(1)* %out) nounwind {
> +;   %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
> +;   %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
> +;   %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
> +;   store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
> +;   ret void
> +; }

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits