[llvm] r276435 - AMDGPU: Fix i1 fp_to_int

Thu Jul 28 16:37:13 PDT 2016

Merged in r277082.

On Thu, Jul 28, 2016 at 8:58 AM, Hans Wennborg <hans at chromium.org> wrote:
> Yes, go ahead.
>
> Cheers,
> Hans
>
> On Thu, Jul 28, 2016 at 7:56 AM, Tom Stellard <tom at stellard.net> wrote:
>> On Fri, Jul 22, 2016 at 05:01:21PM -0000, Matt Arsenault via llvm-commits wrote:
>>> Author: arsenm
>>> Date: Fri Jul 22 12:01:21 2016
>>> New Revision: 276435
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=276435&view=rev
>>> Log:
>>> AMDGPU: Fix i1 fp_to_int
>>>
>>> R600's i1 fp_to_uint selected but was incorrect according to
>>> what instcombine constant folds to.
>>>
>>> Modified:
>>>     llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
>>>     llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
>>>     llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h
>>>     llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
>>>     llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
>>>     llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.ll
>>>     llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
>>>     llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.ll
>>>
>>
>> Hi Hans,
>>
>> Is this OK to merge to the 3.9 branch?  I am the code owner, and I approve.
>>
>> -Tom
>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=276435&r1=276434&r2=276435&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Fri Jul 22 12:01:21 2016
>>> @@ -420,9 +420,10 @@ int TWO_PI = 0x40c90fdb;
>>>  int PI = 0x40490fdb;
>>>  int TWO_PI_INV = 0x3e22f983;
>>>  int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
>>> -int FP32_NEG_ONE = 0xbf800000;
>>>  int FP32_ONE = 0x3f800000;
>>> +int FP32_NEG_ONE = 0xbf800000;
>>>  int FP64_ONE = 0x3ff0000000000000;
>>> +int FP64_NEG_ONE = 0xbff0000000000000;
>>>  }
>>>  def CONST : Constants;
>>>
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=276435&r1=276434&r2=276435&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Fri Jul 22 12:01:21 2016
>>> @@ -122,6 +122,7 @@ R600TargetLowering::R600TargetLowering(c
>>>    setOperationAction(ISD::SETCC, MVT::i32, Expand);
>>>    setOperationAction(ISD::SETCC, MVT::f32, Expand);
>>>    setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
>>> +  setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
>>>    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
>>>    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
>>>
>>> @@ -832,13 +833,18 @@ void R600TargetLowering::ReplaceNodeResu
>>>      return;
>>>    case ISD::FP_TO_UINT:
>>>      if (N->getValueType(0) == MVT::i1) {
>>> -      Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
>>> +      Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
>>>        return;
>>>      }
>>>      // Fall-through. Since we don't care about out of bounds values
>>>      // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
>>>      // considers some extra cases which are not necessary here.
>>>    case ISD::FP_TO_SINT: {
>>> +    if (N->getValueType(0) == MVT::i1) {
>>> +      Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
>>> +      return;
>>> +    }
>>> +
>>>      SDValue Result;
>>>      if (expandFP_TO_SINT(N, Result, DAG))
>>>        Results.push_back(Result);
>>> @@ -1052,15 +1058,24 @@ SDValue R600TargetLowering::LowerUADDSUB
>>>    return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
>>>  }
>>>
>>> -SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
>>> +SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
>>> +  SDLoc DL(Op);
>>> +  return DAG.getNode(
>>> +      ISD::SETCC,
>>> +      DL,
>>> +      MVT::i1,
>>> +      Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
>>> +      DAG.getCondCode(ISD::SETEQ));
>>> +}
>>> +
>>> +SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
>>>    SDLoc DL(Op);
>>>    return DAG.getNode(
>>>        ISD::SETCC,
>>>        DL,
>>>        MVT::i1,
>>> -      Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
>>> -      DAG.getCondCode(ISD::SETNE)
>>> -      );
>>> +      Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
>>> +      DAG.getCondCode(ISD::SETEQ));
>>>  }
>>>
>>>  SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h?rev=276435&r1=276434&r2=276435&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h Fri Jul 22 12:01:21 2016
>>> @@ -72,7 +72,8 @@ private:
>>>
>>>    SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const;
>>>    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
>>> -  SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
>>> +  SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
>>> +  SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
>>>
>>>    SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const;
>>>    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
>>>
>>> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=276435&r1=276434&r2=276435&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
>>> +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Fri Jul 22 12:01:21 2016
>>> @@ -3401,6 +3401,16 @@ def : Pat <
>>>      (V_CNDMASK_B32_e64 0, -1, $src), sub1)
>>>  >;
>>>
>>> +class FPToI1Pat<Instruction Inst, int KOne, ValueType vt, SDPatternOperator fp_to_int> : Pat <
>>> +  (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
>>> +  (i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
>>> +>;
>>> +
>>> +def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, f32, fp_to_uint>;
>>> +def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, f32, fp_to_sint>;
>>> +def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, f64, fp_to_uint>;
>>> +def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, f64, fp_to_sint>;
>>> +
>>>  // If we need to perform a logical operation on i1 values, we need to
>>>  // use vector comparisons since there is only one SCC register. Vector
>>>  // comparisions still write to a pair of SGPRs, so treat these as
>>>
>>> Modified: llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.f64.ll?rev=276435&r1=276434&r2=276435&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.f64.ll (original)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.f64.ll Fri Jul 22 12:01:21 2016
>>> @@ -1,7 +1,8 @@
>>>  ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
>>>  ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
>>>
>>> -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
>>> +declare i32 @llvm.amdgcn.workitem.id.x() #1
>>> +declare double @llvm.fabs.f64(double) #1
>>>
>>>  ; FUNC-LABEL: @fp_to_sint_f64_i32
>>>  ; SI: v_cvt_i32_f64_e32
>>> @@ -54,3 +55,23 @@ define void @fp_to_sint_i64_f64(i64 addr
>>>    store i64 %cast, i64 addrspace(1)* %out, align 8
>>>    ret void
>>>  }
>>> +
>>> +; FUNC-LABEL: {{^}}fp_to_sint_f64_to_i1:
>>> +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{\[[0-9]+:[0-9]+\]}}
>>> +define void @fp_to_sint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
>>> +  %conv = fptosi double %in to i1
>>> +  store i1 %conv, i1 addrspace(1)* %out
>>> +  ret void
>>> +}
>>> +
>>> +; FUNC-LABEL: {{^}}fp_to_sint_fabs_f64_to_i1:
>>> +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{\[[0-9]+:[0-9]+\]}}|
>>> +define void @fp_to_sint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
>>> +  %in.fabs = call double @llvm.fabs.f64(double %in)
>>> +  %conv = fptosi double %in.fabs to i1
>>> +  store i1 %conv, i1 addrspace(1)* %out
>>> +  ret void
>>> +}
>>> +
>>> +attributes #0 = { nounwind }
>>> +attributes #1 = { nounwind readnone }
>>>
>>> Modified: llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.ll?rev=276435&r1=276434&r2=276435&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.ll (original)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/fp_to_sint.ll Fri Jul 22 12:01:21 2016
>>> @@ -2,7 +2,7 @@
>>>  ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
>>>  ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
>>>
>>> -declare float @llvm.fabs.f32(float) #0
>>> +declare float @llvm.fabs.f32(float) #1
>>>
>>>  ; FUNC-LABEL: {{^}}fp_to_sint_i32:
>>>  ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>>> @@ -17,7 +17,7 @@ define void @fp_to_sint_i32(i32 addrspac
>>>  ; FUNC-LABEL: {{^}}fp_to_sint_i32_fabs:
>>>  ; SI: v_cvt_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
>>>  define void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) {
>>> -  %in.fabs = call float @llvm.fabs.f32(float %in) #0
>>> +  %in.fabs = call float @llvm.fabs.f32(float %in)
>>>    %conv = fptosi float %in.fabs to i32
>>>    store i32 %conv, i32 addrspace(1)* %out
>>>    ret void
>>> @@ -227,4 +227,26 @@ define void @fp_to_sint_v4i64(<4 x i64>
>>>    ret void
>>>  }
>>>
>>> -attributes #0 = { nounwind readnone }
>>> +; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1:
>>> +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}}
>>> +
>>> +; EG: AND_INT
>>> +; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, literal.y,
>>> +; EG-NEXT: -1082130432(-1.000000e+00)
>>> +define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
>>> +  %conv = fptosi float %in to i1
>>> +  store i1 %conv, i1 addrspace(1)* %out
>>> +  ret void
>>> +}
>>> +
>>> +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1:
>>> +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, |s{{[0-9]+}}|
>>> +define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
>>> +  %in.fabs = call float @llvm.fabs.f32(float %in)
>>> +  %conv = fptosi float %in.fabs to i1
>>> +  store i1 %conv, i1 addrspace(1)* %out
>>> +  ret void
>>> +}
>>> +
>>> +attributes #0 = { nounwind }
>>> +attributes #1 = { nounwind readnone }
>>>
>>> Modified: llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.f64.ll?rev=276435&r1=276434&r2=276435&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.f64.ll (original)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.f64.ll Fri Jul 22 12:01:21 2016
>>> @@ -1,7 +1,8 @@
>>>  ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
>>>  ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
>>>
>>> -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
>>> +declare i32 @llvm.amdgcn.workitem.id.x() #1
>>> +declare double @llvm.fabs.f64(double) #1
>>>
>>>  ; SI-LABEL: {{^}}fp_to_uint_i32_f64:
>>>  ; SI: v_cvt_u32_f64_e32
>>> @@ -68,3 +69,23 @@ define void @fp_to_uint_v4i64_v4f64(<4 x
>>>    store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32
>>>    ret void
>>>  }
>>> +
>>> +; FUNC-LABEL: {{^}}fp_to_uint_f64_to_i1:
>>> +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{\[[0-9]+:[0-9]+\]}}
>>> +define void @fp_to_uint_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
>>> +  %conv = fptoui double %in to i1
>>> +  store i1 %conv, i1 addrspace(1)* %out
>>> +  ret void
>>> +}
>>> +
>>> +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f64_to_i1:
>>> +; SI: v_cmp_eq_f64_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{\[[0-9]+:[0-9]+\]}}|
>>> +define void @fp_to_uint_fabs_f64_to_i1(i1 addrspace(1)* %out, double %in) #0 {
>>> +  %in.fabs = call double @llvm.fabs.f64(double %in)
>>> +  %conv = fptoui double %in.fabs to i1
>>> +  store i1 %conv, i1 addrspace(1)* %out
>>> +  ret void
>>> +}
>>> +
>>> +attributes #0 = { nounwind }
>>> +attributes #1 = { nounwind readnone }
>>>
>>> Modified: llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.ll?rev=276435&r1=276434&r2=276435&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.ll (original)
>>> +++ llvm/trunk/test/CodeGen/AMDGPU/fp_to_uint.ll Fri Jul 22 12:01:21 2016
>>> @@ -1,6 +1,8 @@
>>> -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC
>>> -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
>>> +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
>>>  ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
>>> +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC
>>> +
>>> +declare float @llvm.fabs.f32(float) #1
>>>
>>>  ; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32:
>>>  ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
>>> @@ -215,3 +217,27 @@ define void @fp_to_uint_v4f32_to_v4i64(<
>>>    store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
>>>    ret void
>>>  }
>>> +
>>> +
>>> +; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i1:
>>> +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}}
>>> +
>>> +; EG: AND_INT
>>> +; EG: SETE_DX10 {{[*]?}} T{{[0-9]+}}.{{[XYZW]}}, KC0[2].Z, 1.0,
>>> +define void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
>>> +  %conv = fptoui float %in to i1
>>> +  store i1 %conv, i1 addrspace(1)* %out
>>> +  ret void
>>> +}
>>> +
>>> +; FUNC-LABEL: {{^}}fp_to_uint_fabs_f32_to_i1:
>>> +; SI: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, |s{{[0-9]+}}|
>>> +define void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 {
>>> +  %in.fabs = call float @llvm.fabs.f32(float %in)
>>> +  %conv = fptoui float %in.fabs to i1
>>> +  store i1 %conv, i1 addrspace(1)* %out
>>> +  ret void
>>> +}
>>> +
>>> +attributes #0 = { nounwind }
>>> +attributes #1 = { nounwind readnone }
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits