[llvm] r210569 - R600: Use BCNT_INT for evergreen

Tom Stellard tom at stellard.net
Wed Jun 11 14:00:05 PDT 2014


On Wed, Jun 11, 2014 at 02:29:19PM -0400, Jan Vesely wrote:
> Hi,
> 
> this patch regresses builtin-char-clz-1.0.generated.cl and
> builtin-uchar-clz-1.0.generated.cl piglit tests on my TURKS GPU.
> 
> all clz char16 kernels timeout:
> radeon 0000:01:00.0: GPU lockup CP stall for more than 10000msec
> 
> all other vector sizes (8,4,2,1) pass ok.
> 
> setting ISD::CTPOP, MVT::i32, Expand unconditionally is enough to make
> char16 tests pass again
> 

This is fixed in r210703.

-Tom

> regards,
> Jan
> 
> On Tue, 2014-06-10 at 19:18 +0000, Matt Arsenault wrote:
> > Author: arsenm
> > Date: Tue Jun 10 14:18:28 2014
> > New Revision: 210569
> > 
> > URL: http://llvm.org/viewvc/llvm-project?rev=210569&view=rev
> > Log:
> > R600: Use BCNT_INT for evergreen
> > 
> > Modified:
> >     llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
> >     llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h
> >     llvm/trunk/lib/Target/R600/EvergreenInstructions.td
> >     llvm/trunk/lib/Target/R600/SIISelLowering.cpp
> >     llvm/trunk/test/CodeGen/R600/ctpop.ll
> > 
> > Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=210569&r1=210568&r2=210569&view=diff
> > ==============================================================================
> > --- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
> > +++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Tue Jun 10 14:18:28 2014
> > @@ -220,9 +220,13 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
> >      setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
> >    }
> >  
> > +  if (!Subtarget->hasBCNT(32))
> > +    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
> > +
> > +  if (!Subtarget->hasBCNT(64))
> > +    setOperationAction(ISD::CTPOP, MVT::i64, Expand);
> > +
> >    for (MVT VT : { MVT::i32, MVT::i64 }) {
> > -    // TODO: Evergreen has BCNT_INT for CTPOP
> > -    setOperationAction(ISD::CTPOP, VT, Expand);
> >      setOperationAction(ISD::CTTZ, VT, Expand);
> >      setOperationAction(ISD::CTLZ, VT, Expand);
> >    }
> > 
> > Modified: llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h?rev=210569&r1=210568&r2=210569&view=diff
> > ==============================================================================
> > --- llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h (original)
> > +++ llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h Tue Jun 10 14:18:28 2014
> > @@ -80,6 +80,14 @@ public:
> >      return hasBFE();
> >    }
> >  
> > +  bool hasBCNT(unsigned Size) const {
> > +    if (Size == 32)
> > +      return (getGeneration() >= EVERGREEN);
> > +
> > +    assert(Size == 64);
> > +    return (getGeneration() >= SOUTHERN_ISLANDS);
> > +  }
> > +
> >    bool hasMulU24() const {
> >      return (getGeneration() >= EVERGREEN);
> >    }
> > 
> > Modified: llvm/trunk/lib/Target/R600/EvergreenInstructions.td
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/EvergreenInstructions.td?rev=210569&r1=210568&r2=210569&view=diff
> > ==============================================================================
> > --- llvm/trunk/lib/Target/R600/EvergreenInstructions.td (original)
> > +++ llvm/trunk/lib/Target/R600/EvergreenInstructions.td Tue Jun 10 14:18:28 2014
> > @@ -326,6 +326,8 @@ def MUL_UINT24_eg : R600_2OP <0xB5, "MUL
> >  def DOT4_eg : DOT4_Common<0xBE>;
> >  defm CUBE_eg : CUBE_Common<0xC0>;
> >  
> > +def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop>;
> > +
> >  let hasSideEffects = 1 in {
> >    def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>;
> >  }
> > 
> > Modified: llvm/trunk/lib/Target/R600/SIISelLowering.cpp
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.cpp?rev=210569&r1=210568&r2=210569&view=diff
> > ==============================================================================
> > --- llvm/trunk/lib/Target/R600/SIISelLowering.cpp (original)
> > +++ llvm/trunk/lib/Target/R600/SIISelLowering.cpp Tue Jun 10 14:18:28 2014
> > @@ -211,9 +211,6 @@ SITargetLowering::SITargetLowering(Targe
> >      setOperationAction(ISD::FRINT, MVT::f64, Legal);
> >    }
> >  
> > -  setOperationAction(ISD::CTPOP, MVT::i32, Legal);
> > -  setOperationAction(ISD::CTPOP, MVT::i64, Legal);
> > -
> >    setTargetDAGCombine(ISD::SELECT_CC);
> >    setTargetDAGCombine(ISD::SETCC);
> >  
> > 
> > Modified: llvm/trunk/test/CodeGen/R600/ctpop.ll
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/ctpop.ll?rev=210569&r1=210568&r2=210569&view=diff
> > ==============================================================================
> > --- llvm/trunk/test/CodeGen/R600/ctpop.ll (original)
> > +++ llvm/trunk/test/CodeGen/R600/ctpop.ll Tue Jun 10 14:18:28 2014
> > @@ -13,6 +13,8 @@ declare <16 x i32> @llvm.ctpop.v16i32(<1
> >  ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
> >  ; SI: BUFFER_STORE_DWORD [[VRESULT]],
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> >  define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
> >    %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
> >    store i32 %ctpop, i32 addrspace(1)* %out, align 4
> > @@ -26,6 +28,8 @@ define void @s_ctpop_i32(i32 addrspace(1
> >  ; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
> >  ; SI: BUFFER_STORE_DWORD [[RESULT]],
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> >  define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
> >    %val = load i32 addrspace(1)* %in, align 4
> >    %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
> > @@ -42,6 +46,9 @@ define void @v_ctpop_i32(i32 addrspace(1
> >  ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
> >  ; SI: BUFFER_STORE_DWORD [[RESULT]],
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> >  define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
> >    %val0 = load i32 addrspace(1)* %in0, align 4
> >    %val1 = load i32 addrspace(1)* %in1, align 4
> > @@ -56,6 +63,9 @@ define void @v_ctpop_add_chain_i32(i32 a
> >  ; SI: V_BCNT_U32_B32_e32
> >  ; SI: V_BCNT_U32_B32_e32
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> >  define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
> >    %val = load <2 x i32> addrspace(1)* %in, align 8
> >    %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
> > @@ -69,6 +79,11 @@ define void @v_ctpop_v2i32(<2 x i32> add
> >  ; SI: V_BCNT_U32_B32_e32
> >  ; SI: V_BCNT_U32_B32_e32
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> >  define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
> >    %val = load <4 x i32> addrspace(1)* %in, align 16
> >    %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
> > @@ -86,6 +101,15 @@ define void @v_ctpop_v4i32(<4 x i32> add
> >  ; SI: V_BCNT_U32_B32_e32
> >  ; SI: V_BCNT_U32_B32_e32
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> >  define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
> >    %val = load <8 x i32> addrspace(1)* %in, align 32
> >    %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
> > @@ -111,6 +135,23 @@ define void @v_ctpop_v8i32(<8 x i32> add
> >  ; SI: V_BCNT_U32_B32_e32
> >  ; SI: V_BCNT_U32_B32_e32
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> > +; EG: BCNT_INT
> >  define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
> >    %val = load <16 x i32> addrspace(1)* %in, align 32
> >    %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
> > @@ -123,6 +164,8 @@ define void @v_ctpop_v16i32(<16 x i32> a
> >  ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
> >  ; SI: BUFFER_STORE_DWORD [[RESULT]],
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> >  define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
> >    %val = load i32 addrspace(1)* %in, align 4
> >    %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
> > @@ -136,6 +179,8 @@ define void @v_ctpop_i32_add_inline_cons
> >  ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
> >  ; SI: BUFFER_STORE_DWORD [[RESULT]],
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> >  define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
> >    %val = load i32 addrspace(1)* %in, align 4
> >    %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
> > @@ -164,6 +209,8 @@ define void @v_ctpop_i32_add_literal(i32
> >  ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
> >  ; SI: BUFFER_STORE_DWORD [[RESULT]],
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> >  define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
> >    %val = load i32 addrspace(1)* %in, align 4
> >    %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
> > @@ -178,6 +225,8 @@ define void @v_ctpop_i32_add_var(i32 add
> >  ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
> >  ; SI: BUFFER_STORE_DWORD [[RESULT]],
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> >  define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
> >    %val = load i32 addrspace(1)* %in, align 4
> >    %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
> > @@ -192,6 +241,8 @@ define void @v_ctpop_i32_add_var_inv(i32
> >  ; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
> >  ; SI: BUFFER_STORE_DWORD [[RESULT]],
> >  ; SI: S_ENDPGM
> > +
> > +; EG: BCNT_INT
> >  define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
> >    %val = load i32 addrspace(1)* %in, align 4
> >    %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
> > 
> > 
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 
> -- 
> Jan Vesely <jan.vesely at rutgers.edu>



> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list