[PATCH] R600/SI: Add new CI arithmetic instructions.

Tom Stellard tom at stellard.net
Mon Feb 24 10:16:03 PST 2014


On Mon, Feb 10, 2014 at 12:39:26PM -0800, Matt Arsenault wrote:
> Does not include the larger part required to match the v_mad_i64_i32 / v_mad_u64_u32 instructions which should be very useful for pointer calculations.
> 

LGTM.

> 
> http://llvm-reviews.chandlerc.com/D2730
> 
> Files:
>   lib/Target/R600/AMDGPUISelLowering.cpp
>   lib/Target/R600/AMDGPUISelLowering.h
>   lib/Target/R600/SIISelLowering.cpp
>   lib/Target/R600/SIInstrInfo.td
>   lib/Target/R600/SIInstructions.td
>   test/CodeGen/R600/fceil.ll
>   test/CodeGen/R600/ffloor.ll
>   test/CodeGen/R600/ftrunc.ll

> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -45,6 +45,8 @@
>  AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
>    TargetLowering(TM, new TargetLoweringObjectFileELF()) {
>  
> +  Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
> +
>    // Initialize target lowering borrowed from AMDIL
>    InitAMDILLowering();
>  
> Index: lib/Target/R600/AMDGPUISelLowering.h
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.h
> +++ lib/Target/R600/AMDGPUISelLowering.h
> @@ -21,9 +21,13 @@
>  namespace llvm {
>  
>  class AMDGPUMachineFunction;
> +class AMDGPUSubtarget;
>  class MachineRegisterInfo;
>  
>  class AMDGPUTargetLowering : public TargetLowering {
> +protected:
> +  const AMDGPUSubtarget *Subtarget;
> +
>  private:
>    void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
>                               SmallVectorImpl<SDValue> &Args,
> Index: lib/Target/R600/SIISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/SIISelLowering.cpp
> +++ lib/Target/R600/SIISelLowering.cpp
> @@ -14,6 +14,7 @@
>  
>  #include "SIISelLowering.h"
>  #include "AMDGPU.h"
> +#include "AMDGPUSubtarget.h"
>  #include "AMDILIntrinsicInfo.h"
>  #include "SIInstrInfo.h"
>  #include "SIMachineFunctionInfo.h"
> @@ -30,7 +31,6 @@
>  
>  SITargetLowering::SITargetLowering(TargetMachine &TM) :
>      AMDGPUTargetLowering(TM) {
> -
>    addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
>    addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
>  
> @@ -147,6 +147,20 @@
>    setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
>    setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
>  
> +  for (int I = MVT::v1f64; I <= MVT::v8f64; ++I) {
> +    MVT::SimpleValueType VT = static_cast<MVT::SimpleValueType>(I);
> +    setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
> +    setOperationAction(ISD::FCEIL, MVT::f64, Expand);
> +    setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
> +  }
> +
> +  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
> +    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
> +    setOperationAction(ISD::FCEIL, MVT::f64, Legal);
> +    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
> +  }
> +
> +
>    setTargetDAGCombine(ISD::SELECT_CC);
>  
>    setTargetDAGCombine(ISD::SETCC);
> Index: lib/Target/R600/SIInstrInfo.td
> ===================================================================
> --- lib/Target/R600/SIInstrInfo.td
> +++ lib/Target/R600/SIInstrInfo.td
> @@ -379,6 +379,14 @@
>    opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
>  >, VOP <opName>;
>  
> +// For instructions with 32-bit and 64-bit operands, e.g. V_MAD_I64_I32
> +class VOP3_64_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
> +  op, (outs VReg_64:$dst),
> +  (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_64:$src2,
> +   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
> +  opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
> +>, VOP <opName>;
> +
>  //===----------------------------------------------------------------------===//
>  // Vector I/O classes
>  //===----------------------------------------------------------------------===//
> Index: lib/Target/R600/SIInstructions.td
> ===================================================================
> --- lib/Target/R600/SIInstructions.td
> +++ lib/Target/R600/SIInstructions.td
> @@ -29,6 +29,9 @@
>  def isSI : Predicate<"Subtarget.getGeneration() "
>                        ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
>  
> +def isCI : Predicate<"Subtarget.getGeneration() "
> +                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
> +
>  def WAIT_FLAG : InstFlag<"printWaitFlag">;
>  
>  let Predicates = [isSI] in {
> @@ -2099,6 +2102,55 @@
>  def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
>  def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
>  
> +let Predicates = [isCI] in {
> +
> +// Sea island new arithmetic instructinos
> +let neverHasSideEffects = 1 in {
> +defm V_TRUNC_F64 : VOP1_64 <0x00000017, "V_TRUNC_F64",
> +  [(set f64:$dst, (ftrunc f64:$src0))]
> +>;
> +defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64",
> +  [(set f64:$dst, (fceil f64:$src0))]
> +>;
> +defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64",
> +  [(set f64:$dst, (ffloor f64:$src0))]
> +>;
> +
> +defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64", []>;
> +
> +def V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>;
> +def V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>;
> +def V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>;
> +def V_MAD_U64_U32 : VOP3_64_32 <0x00000176, "V_MAD_U64_U32", []>;
> +
> +// XXX - Does this set VCC?
> +def V_MAD_I64_I32 : VOP3_64_32 <0x00000177, "V_MAD_I64_I32", []>;
> +} // End neverHasSideEffects = 1
> +
> +// Remaining instructions:
> +// FLAT_*
> +// S_CBRANCH_CDBGUSER
> +// S_CBRANCH_CDBGSYS
> +// S_CBRANCH_CDBGSYS_OR_USER
> +// S_CBRANCH_CDBGSYS_AND_USER
> +// S_DCACHE_INV_VOL
> +// V_EXP_LEGACY_F32
> +// V_LOG_LEGACY_F32
> +// DS_NOP
> +// DS_GWS_SEMA_RELEASE_ALL
> +// DS_WRAP_RTN_B32
> +// DS_CNDXCHG32_RTN_B64
> +// DS_WRITE_B96
> +// DS_WRITE_B128
> +// DS_CONDXCHG32_RTN_B128
> +// DS_READ_B96
> +// DS_READ_B128
> +// BUFFER_LOAD_DWORDX3
> +// BUFFER_STORE_DWORDX3
> +
> +} // End Predicates = [isCI]
> +
> +
>  /********** ====================== **********/
>  /**********   Indirect adressing   **********/
>  /********** ====================== **********/
> Index: test/CodeGen/R600/fceil.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/fceil.ll
> @@ -0,0 +1,84 @@
> +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
> +
> +declare double @llvm.ceil.f64(double) nounwind readnone
> +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
> +declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
> +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
> +declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
> +declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
> +
> +; CI-LABEL: @fceil_f64:
> +; CI: V_CEIL_F64_e32
> +define void @fceil_f64(double addrspace(1)* %out, double %x) {
> +  %y = call double @llvm.ceil.f64(double %x) nounwind readnone
> +  store double %y, double addrspace(1)* %out
> +  ret void
> +}
> +
> +; CI-LABEL: @fceil_v2f64:
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
> +  %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
> +  store <2 x double> %y, <2 x double> addrspace(1)* %out
> +  ret void
> +}
> +
> +; FIXME-CI-LABEL: @fceil_v3f64:
> +; FIXME-CI: V_CEIL_F64_e32
> +; FIXME-CI: V_CEIL_F64_e32
> +; FIXME-CI: V_CEIL_F64_e32
> +; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
> +;   %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
> +;   store <3 x double> %y, <3 x double> addrspace(1)* %out
> +;   ret void
> +; }
> +
> +; CI-LABEL: @fceil_v4f64:
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
> +  %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
> +  store <4 x double> %y, <4 x double> addrspace(1)* %out
> +  ret void
> +}
> +
> +; CI-LABEL: @fceil_v8f64:
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
> +  %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
> +  store <8 x double> %y, <8 x double> addrspace(1)* %out
> +  ret void
> +}
> +
> +; CI-LABEL: @fceil_v16f64:
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +; CI: V_CEIL_F64_e32
> +define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
> +  %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
> +  store <16 x double> %y, <16 x double> addrspace(1)* %out
> +  ret void
> +}
> Index: test/CodeGen/R600/ffloor.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/ffloor.ll
> @@ -0,0 +1,84 @@
> +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
> +
> +declare double @llvm.floor.f64(double) nounwind readnone
> +declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
> +declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone
> +declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
> +declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
> +declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
> +
> +; CI-LABEL: @ffloor_f64:
> +; CI: V_FLOOR_F64_e32
> +define void @ffloor_f64(double addrspace(1)* %out, double %x) {
> +  %y = call double @llvm.floor.f64(double %x) nounwind readnone
> +  store double %y, double addrspace(1)* %out
> +  ret void
> +}
> +
> +; CI-LABEL: @ffloor_v2f64:
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
> +  %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
> +  store <2 x double> %y, <2 x double> addrspace(1)* %out
> +  ret void
> +}
> +
> +; FIXME-CI-LABEL: @ffloor_v3f64:
> +; FIXME-CI: V_FLOOR_F64_e32
> +; FIXME-CI: V_FLOOR_F64_e32
> +; FIXME-CI: V_FLOOR_F64_e32
> +; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
> +;   %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
> +;   store <3 x double> %y, <3 x double> addrspace(1)* %out
> +;   ret void
> +; }
> +
> +; CI-LABEL: @ffloor_v4f64:
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
> +  %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
> +  store <4 x double> %y, <4 x double> addrspace(1)* %out
> +  ret void
> +}
> +
> +; CI-LABEL: @ffloor_v8f64:
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
> +  %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
> +  store <8 x double> %y, <8 x double> addrspace(1)* %out
> +  ret void
> +}
> +
> +; CI-LABEL: @ffloor_v16f64:
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +; CI: V_FLOOR_F64_e32
> +define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
> +  %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
> +  store <16 x double> %y, <16 x double> addrspace(1)* %out
> +  ret void
> +}
> Index: test/CodeGen/R600/ftrunc.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/ftrunc.ll
> @@ -0,0 +1,84 @@
> +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
> +
> +declare double @llvm.trunc.f64(double) nounwind readnone
> +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
> +declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone
> +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
> +declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
> +declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
> +
> +; CI-LABEL: @ftrunc_f64:
> +; CI: V_TRUNC_F64_e32
> +define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
> +  %y = call double @llvm.trunc.f64(double %x) nounwind readnone
> +  store double %y, double addrspace(1)* %out
> +  ret void
> +}
> +
> +; CI-LABEL: @ftrunc_v2f64:
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
> +  %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
> +  store <2 x double> %y, <2 x double> addrspace(1)* %out
> +  ret void
> +}
> +
> +; FIXME-CI-LABEL: @ftrunc_v3f64:
> +; FIXME-CI: V_TRUNC_F64_e32
> +; FIXME-CI: V_TRUNC_F64_e32
> +; FIXME-CI: V_TRUNC_F64_e32
> +; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
> +;   %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
> +;   store <3 x double> %y, <3 x double> addrspace(1)* %out
> +;   ret void
> +; }
> +
> +; CI-LABEL: @ftrunc_v4f64:
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
> +  %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
> +  store <4 x double> %y, <4 x double> addrspace(1)* %out
> +  ret void
> +}
> +
> +; CI-LABEL: @ftrunc_v8f64:
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
> +  %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
> +  store <8 x double> %y, <8 x double> addrspace(1)* %out
> +  ret void
> +}
> +
> +; CI-LABEL: @ftrunc_v16f64:
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +; CI: V_TRUNC_F64_e32
> +define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
> +  %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
> +  store <16 x double> %y, <16 x double> addrspace(1)* %out
> +  ret void
> +}

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list