[PATCHES] R600/SI: New V_FRACT fix, intrinsic for S_FLBIT_I32, and more

Marek Olšák maraeo at gmail.com
Tue Mar 10 06:46:22 PDT 2015


Ping

On Thu, Mar 5, 2015 at 10:33 PM, Marek Olšák <maraeo at gmail.com> wrote:
> Hi Matt,
>
> New patches are attached.
>
> Marek
>
> On Tue, Mar 3, 2015 at 12:04 AM, Matt Arsenault
> <Matthew.Arsenault at amd.com> wrote:
>> On 03/02/2015 03:35 AM, Marek Olšák wrote:
>>
>> Please review.
>>
>> Marek
>>
>>
>> 0001-R600-SI-Add-an-intrinsic-for-S_FLBIT_I32-V_FFBH_I32.patch
>>
>> From 48bcdf426a83dd0ef72971ba76d20c0838fcde89 Mon Sep 17 00:00:00 2001
>> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
>> Date: Sun, 1 Mar 2015 19:16:50 +0100
>> Subject: [PATCH 1/3] R600/SI: Add an intrinsic for S_FLBIT_I32 / V_FFBH_I32
>>
>> Required by OpenGL (ARB_gpu_shader5).
>> ---
>>  lib/Target/R600/AMDGPUIntrinsics.td        |  1 +
>>  lib/Target/R600/SIInstrInfo.cpp            |  1 +
>>  lib/Target/R600/SIInstructions.td          |  4 +++-
>>  test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll | 28
>> ++++++++++++++++++++++++++++
>>  4 files changed, 33 insertions(+), 1 deletion(-)
>>  create mode 100644 test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll
>>
>> LGTM
>>
>>
>> 0002-R600-SI-Expand-fract-to-floor-then-only-select-V_FRA.patch
>>
>> From 3ccfcc790517718377933d57f03e070876191c9a Mon Sep 17 00:00:00 2001
>> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
>> Date: Sun, 1 Mar 2015 23:07:48 +0100
>> Subject: [PATCH 2/3] R600/SI: Expand fract to floor, then only select
>> V_FRACT
>>  on CI
>>
>> V_FRACT is buggy on SI.
>>
>> R600-specific code is left intact.
>> ---
>>  lib/Target/R600/AMDGPUISelLowering.cpp |  3 ---
>>  lib/Target/R600/R600ISelLowering.cpp   |  4 ++++
>>  lib/Target/R600/SIISelLowering.cpp     |  6 ++++++
>>  lib/Target/R600/SIInstructions.td      | 24 ++++++++++++++++++++++++
>>  test/CodeGen/R600/llvm.AMDGPU.fract.ll | 17 +++++++++++++----
>>  5 files changed, 47 insertions(+), 7 deletions(-)
>>
>> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp
>> b/lib/Target/R600/AMDGPUISelLowering.cpp
>> index 4707279..62a33fa 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
>> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
>> @@ -885,9 +885,6 @@ SDValue
>> AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
>>        return LowerIntrinsicIABS(Op, DAG);
>>      case AMDGPUIntrinsic::AMDGPU_lrp:
>>        return LowerIntrinsicLRP(Op, DAG);
>> -    case AMDGPUIntrinsic::AMDGPU_fract:
>> -    case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
>> -      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
>>
>>      case AMDGPUIntrinsic::AMDGPU_clamp:
>>      case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
>> diff --git a/lib/Target/R600/R600ISelLowering.cpp
>> b/lib/Target/R600/R600ISelLowering.cpp
>> index c738611..cf0a60f 100644
>> --- a/lib/Target/R600/R600ISelLowering.cpp
>> +++ b/lib/Target/R600/R600ISelLowering.cpp
>> @@ -837,6 +837,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op,
>> SelectionDAG &DAG) const
>>      case Intrinsic::AMDGPU_rsq:
>>        // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
>>        return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
>> +
>> +    case AMDGPUIntrinsic::AMDGPU_fract:
>> +    case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
>> +      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
>>      }
>>      // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
>>      break;
>> diff --git a/lib/Target/R600/SIISelLowering.cpp
>> b/lib/Target/R600/SIISelLowering.cpp
>> index 7d794b8..5c9a9f9 100644
>> --- a/lib/Target/R600/SIISelLowering.cpp
>> +++ b/lib/Target/R600/SIISelLowering.cpp
>> @@ -932,6 +932,12 @@ SDValue
>> SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
>>                         Op.getOperand(1),
>>                         Op.getOperand(2),
>>                         Op.getOperand(3));
>> +
>> +  case AMDGPUIntrinsic::AMDGPU_fract:
>> +  case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
>> +    return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
>> +                       DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
>> +
>>    default:
>>      return AMDGPUTargetLowering::LowerOperation(Op, DAG);
>>    }
>> diff --git a/lib/Target/R600/SIInstructions.td
>> b/lib/Target/R600/SIInstructions.td
>> index ab1f08f..a2ba9fc 100644
>> --- a/lib/Target/R600/SIInstructions.td
>> +++ b/lib/Target/R600/SIInstructions.td
>> @@ -3288,6 +3288,30 @@ def : Pat <
>>    (V_CNDMASK_B32_e64 $src0, $src1, $src2)
>>  >;
>>
>> +//===----------------------------------------------------------------------===//
>> +// Fract Patterns
>> +//===----------------------------------------------------------------------===//
>> +
>> +// Obvious fract patterns for CI
>> +multiclass FractPatternCI <ValueType vt, InstSI inst> {
>> +  // Convert (x - floor(x)) to fract(x)
>> +  def : Pat <
>> +    (vt (fsub vt:$x, (vt (ffloor vt:$x)))),
>> +    (inst SRCMODS.NONE, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
>> +  >;
>> +
>> +  // Convert (x + (-floor(x))) to fract(x)
>> +  def : Pat <
>> +    (vt (fadd vt:$x, (vt (fneg (vt (ffloor vt:$x)))))),
>> +    (inst SRCMODS.NONE, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
>> +  >;
>> +}
>>
>> This doesn't need to be a multiclass, and can be 2 separate, standalone
>> patterns. You only need one of these patterns for f32, and the other for
>> f64. fadd + fneg is folded into fsub for f32, so it only needs the first
>> pattern. fsub is expand for f64, so it only needs the second pattern.
>>
>> An improvement would also be to use the VOP3Mods complex patterns to handle
>> that folding (with a corresponding test to make sure the source modifiers
>> are used)
>>
>> +
>> +let Predicates = [isCI] in {
>> +defm : FractPatternCI <f32, V_FRACT_F32_e64>;
>> +defm : FractPatternCI <f64, V_FRACT_F64_e64>;
>> +} // End Predicates = [isCI]
>> +
>>
>> //============================================================================//
>>  // Miscellaneous Optimization Patterns
>>
>> //============================================================================//
>> diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll
>> b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
>> index 7d15300..589206c 100644
>> --- a/test/CodeGen/R600/llvm.AMDGPU.fract.ll
>> +++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
>> @@ -1,5 +1,6 @@
>> -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck
>> -check-prefix=SI -check-prefix=FUNC %s
>> -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck
>> -check-prefix=SI -check-prefix=FUNC %s
>> +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck
>> -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
>> +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s |
>> FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
>> +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck
>> -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
>>  ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck
>> -check-prefix=EG -check-prefix=FUNC %s
>>
>>  declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
>> @@ -8,7 +9,11 @@ declare float @llvm.AMDGPU.fract.f32(float) nounwind
>> readnone
>>  declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
>>
>>  ; FUNC-LABEL: {{^}}fract_f32:
>> -; SI: v_fract_f32
>> +; CI: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
>> +; SI: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
>> +; SI: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]]
>> +; GCN: buffer_store_dword [[RESULT]]
>> +
>>  ; EG: FRACT
>>  define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src)
>> nounwind {
>>    %val = load float addrspace(1)* %src, align 4
>> @@ -18,7 +23,11 @@ define void @fract_f32(float addrspace(1)* %out, float
>> addrspace(1)* %src) nounw
>>  }
>>
>>  ; FUNC-LABEL: {{^}}fract_f32_legacy_amdil:
>> -; SI: v_fract_f32
>> +; CI: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
>> +; SI: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
>> +; SI: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[FLR]], [[INPUT]]
>> +; GCN: buffer_store_dword [[RESULT]]
>> +
>>  ; EG: FRACT
>>  define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float
>> addrspace(1)* %src) nounwind {
>>    %val = load float addrspace(1)* %src, align 4
>> --
>> 2.1.0
>>
>>
>> 0003-R600-SI-Use-V_FRACT_F64-for-faster-64-bit-floor-on-S.patch
>>
>> From 7efe5a14fc77dea77a728cdacb413e1af2b277fc Mon Sep 17 00:00:00 2001
>> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
>> Date: Sun, 1 Mar 2015 23:39:34 +0100
>> Subject: [PATCH 3/3] R600/SI: Use V_FRACT_F64 for faster 64-bit floor on SI
>>
>> Other f64 opcodes not supported on SI can be lowered in a similar way.
>> ---
>>  lib/Target/R600/SIISelLowering.cpp         |  2 +-
>>  lib/Target/R600/SIInstrInfo.cpp            | 20 ++++++++++++
>>  lib/Target/R600/SIInstrInfo.td             |  1 +
>>  lib/Target/R600/SIInstructions.td          | 50
>> ++++++++++++++++++++++++++++++
>>  test/CodeGen/R600/ffloor.f64.ll            | 22 +++----------
>>  test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll | 22 +++++++++++++
>>  6 files changed, 99 insertions(+), 18 deletions(-)
>>  create mode 100644 test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll
>>
>> diff --git a/lib/Target/R600/SIISelLowering.cpp
>> b/lib/Target/R600/SIISelLowering.cpp
>> index 5c9a9f9..8e57e17 100644
>> --- a/lib/Target/R600/SIISelLowering.cpp
>> +++ b/lib/Target/R600/SIISelLowering.cpp
>> @@ -206,10 +206,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
>>    if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
>>      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
>>      setOperationAction(ISD::FCEIL, MVT::f64, Legal);
>> -    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
>>      setOperationAction(ISD::FRINT, MVT::f64, Legal);
>>    }
>>
>> +  setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
>>    setOperationAction(ISD::FDIV, MVT::f32, Custom);
>>    setOperationAction(ISD::FDIV, MVT::f64, Custom);
>>
>> diff --git a/lib/Target/R600/SIInstrInfo.cpp
>> b/lib/Target/R600/SIInstrInfo.cpp
>> index 52fc012..98dddd6 100644
>> --- a/lib/Target/R600/SIInstrInfo.cpp
>> +++ b/lib/Target/R600/SIInstrInfo.cpp
>> @@ -720,6 +720,26 @@ bool
>> SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
>>      MI->eraseFromParent();
>>      break;
>>    }
>> +
>> +  case AMDGPU::V_CNDMASK_B64_PSEUDO: {
>> +    unsigned Dst = MI->getOperand(0).getReg();
>> +    unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
>> +    unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
>> +    unsigned Src0 = MI->getOperand(1).getReg();
>> +    unsigned Src1 = MI->getOperand(2).getReg();
>> +    const MachineOperand &SrcCond = MI->getOperand(3);
>> +
>> +    BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
>> +        .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
>> +        .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
>> +        .addOperand(SrcCond);
>> +    BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
>> +        .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
>> +        .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
>> +        .addOperand(SrcCond);
>> +    MI->eraseFromParent();
>> +    break;
>> +  }
>>    }
>>    return true;
>>  }
>> diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
>> index bd90073..d31d0ad 100644
>> --- a/lib/Target/R600/SIInstrInfo.td
>> +++ b/lib/Target/R600/SIInstrInfo.td
>> @@ -315,6 +315,7 @@ def SIOperand {
>>
>>  def SRCMODS {
>>    int NONE = 0;
>> +  int NEG = 1;
>>  }
>>
>>  def DSTCLAMP {
>> diff --git a/lib/Target/R600/SIInstructions.td
>> b/lib/Target/R600/SIInstructions.td
>> index a2ba9fc..cdafa54 100644
>> --- a/lib/Target/R600/SIInstructions.td
>> +++ b/lib/Target/R600/SIInstructions.td
>> @@ -28,6 +28,8 @@ def SendMsgImm : Operand<i32> {
>>
>>  def isGCN : Predicate<"Subtarget->getGeneration() "
>>                        ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
>> +def isSI : Predicate<"Subtarget->getGeneration() "
>> +                      "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
>>  def isSICI : Predicate<
>>    "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
>>    "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
>> @@ -1851,6 +1853,11 @@ defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>,
>> "v_ashrrev_i64",
>>
>> //===----------------------------------------------------------------------===//
>>  let isCodeGenOnly = 1, isPseudo = 1 in {
>>
>> +// For use in patterns
>> +def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
>> +  (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
>> +>;
>> +
>>  let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
>>  // 64-bit vector move instruction.  This is mainly used by the
>> SIFoldOperands
>>  // pass to enable folding of inline immediates.
>> @@ -3292,6 +3299,49 @@ def : Pat <
>>  // Fract Patterns
>>
>> //===----------------------------------------------------------------------===//
>>
>> +let Predicates = [isSI] in {
>> +
>> +// V_FRACT is buggy on SI, so the F32 version is never used and
>> (x-floor(x)) is
>> +// used instead. However, SI doesn't have V_FLOOR_F64, so the most
>> efficient
>> +// way to implement it is using V_FRACT_F64.
>> +// The workaround for the V_FRACT bug is:
>> +//    fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
>> +
>> +// Convert (x + (-floor(x)) to fract(x)
>> +def : Pat <
>> +  (f64 (fadd f64:$x, (f64 (fneg (f64 (ffloor f64:$x)))))),
>> +  (V_CNDMASK_B64_PSEUDO
>> +      $x,
>> +      (V_MIN_F64
>> +          SRCMODS.NONE,
>> +          (V_FRACT_F64_e64 SRCMODS.NONE, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
>> +          SRCMODS.NONE,
>> +          (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
>> +          DSTCLAMP.NONE, DSTOMOD.NONE),
>> +      (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
>> +>;
>>
>> For a simple isnan check, I think using v_cmp_o_f64 x, x might be slightly
>> preferable for canonicalization purposes, but it doesn't really matter.
>>
>> These also might benefit from using the modifier folding complex pattern.
>>
>> +
>> +// Convert floor(x) to (x - fract(x))
>> +def : Pat <
>> +  (f64 (ffloor f64:$x)),
>> +  (V_ADD_F64
>> +      SRCMODS.NONE,
>> +      $x,
>> +      SRCMODS.NEG,
>> +      (V_CNDMASK_B64_PSEUDO
>> +         $x,
>> +         (V_MIN_F64
>> +             SRCMODS.NONE,
>> +             (V_FRACT_F64_e64 SRCMODS.NONE, $x, DSTCLAMP.NONE,
>> DSTOMOD.NONE),
>> +             SRCMODS.NONE,
>> +             (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
>> +             DSTCLAMP.NONE, DSTOMOD.NONE),
>> +         (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
>> +      DSTCLAMP.NONE, DSTOMOD.NONE)
>> +>;
>> +
>> +} // End Predicates = [isSI]
>> +
>>  // Obvious fract patterns for CI
>>  multiclass FractPatternCI <ValueType vt, InstSI inst> {
>>    // Convert (x - floor(x)) to fract(x)
>> diff --git a/test/CodeGen/R600/ffloor.f64.ll
>> b/test/CodeGen/R600/ffloor.f64.ll
>> index 745ad3b..369af5a 100644
>> --- a/test/CodeGen/R600/ffloor.f64.ll
>> +++ b/test/CodeGen/R600/ffloor.f64.ll
>> @@ -12,23 +12,11 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>)
>> nounwind readnone
>>  ; FUNC-LABEL: {{^}}ffloor_f64:
>>  ; CI: v_floor_f64_e32
>>
>> -; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
>> -; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
>> -; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
>> -; SI: s_lshr_b64
>> -; SI: s_not_b64
>> -; SI: s_and_b64
>> -; SI: cmp_lt_i32
>> -; SI: cndmask_b32
>> -; SI: cndmask_b32
>> -; SI: cmp_gt_i32
>> -; SI: cndmask_b32
>> -; SI: cndmask_b32
>> -; SI-DAG: v_cmp_lt_f64
>> -; SI-DAG: v_cmp_lg_f64
>> -; SI-DAG: s_and_b64
>> -; SI-DAG: v_cndmask_b32
>> -; SI-DAG: v_cndmask_b32
>> +; SI: v_fract_f64_e32
>> +; SI: v_min_f64
>> +; SI: v_cmp_class_f64_e64
>> +; SI: v_cndmask_b32_e64
>> +; SI: v_cndmask_b32_e64
>>  ; SI: v_add_f64
>>  ; SI: s_endpgm
>>  define void @ffloor_f64(double addrspace(1)* %out, double %x) {
>> diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll
>> b/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll
>> new file mode 100644
>> index 0000000..d836601
>> --- /dev/null
>> +++ b/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll
>> @@ -0,0 +1,22 @@
>> +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck
>> -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
>> +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s |
>> FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
>> +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck
>> -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
>> +
>> +declare double @llvm.AMDGPU.fract.f64(double) nounwind readnone
>> +
>> +; FUNC-LABEL: {{^}}fract_f64:
>> +; GCN: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]],
>> v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]
>> +; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
>> +; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
>> +; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]],
>> v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
>> +; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]],
>> v{{\[}}[[LO]]:[[HI]]], 3
>> +; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
>> +; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
>> +; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
>> +; CI: buffer_store_dwordx2 [[FRC]]
>> +define void @fract_f64(double addrspace(1)* %out, double addrspace(1)*
>> %src) nounwind {
>> +  %val = load double addrspace(1)* %src, align 4
>> +  %fract = call double @llvm.AMDGPU.fract.f64(double %val) nounwind
>> readnone
>> +  store double %fract, double addrspace(1)* %out, align 4
>> +  ret void
>> +}
>> --
>> 2.1.0
>>
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>>




More information about the llvm-commits mailing list