[PATCH] R600/SI: Fix V_FRACT hw bug on SI
Marek Olšák
maraeo at gmail.com
Thu Feb 26 15:42:26 PST 2015
From: Marek Olšák <marek.olsak at amd.com>
---
lib/Target/R600/AMDGPUISelLowering.cpp | 3 ---
lib/Target/R600/R600ISelLowering.cpp | 4 ++++
lib/Target/R600/SIISelLowering.cpp | 19 +++++++++++++++++++
test/CodeGen/R600/llvm.AMDGPU.fract.ll | 15 +++++++++++----
4 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 4707279..62a33fa 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -885,9 +885,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDGPU_fract:
- case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
- return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_clamp:
case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index e84aabc..18dfccf 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -837,6 +837,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::AMDGPU_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 30b8c45..372deaa 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -932,6 +932,25 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ SDValue Op1 = Op.getOperand(1);
+ SDValue UpperBound = DAG.getConstantFP(BitsToFloat(0x3f7fffff), VT);
+
+ // Hardware bug: V_FRACT returns 1.0 for a very small negative input.
+ // Workaround:
+ // isnan(Op1) ? Op1 : min(fract(Op1), 0x3f7fffff) =
+ // then simplifying...
+ // isnan(Op1) ? fract(Op1) : min(fract(Op1), 0x3f7fffff) =
+ // 0x3f7fffff < fract(Op1) ? 0x3f7fffff : fract(Op1) =
+ // fmin_legacy(0x3f7fffff, fract(Op1))
+ SDValue Frc = DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op1);
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, UpperBound, Frc);
+ } else
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
+
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
index 7d15300..3c165bf 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.fract.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -1,5 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
@@ -8,7 +9,13 @@ declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}fract_f32:
-; SI: v_fract_f32
+
+; CIVI: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
+; CIVI-NOT: v_min
+; SI: v_fract_f32_e32 [[FRC:v[0-9]+]], [[INPUT:v[0-9]+]]
+; SI: v_min_legacy_f32_e32 [[RESULT:v[0-9]+]], 0x3f7fffff, [[FRC]]
+; GCN: buffer_store_dword [[RESULT]]
+
; EG: FRACT
define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
%val = load float addrspace(1)* %src, align 4
@@ -18,7 +25,7 @@ define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounw
}
; FUNC-LABEL: {{^}}fract_f32_legacy_amdil:
-; SI: v_fract_f32
+; GCN: v_fract_f32_e32
; EG: FRACT
define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
%val = load float addrspace(1)* %src, align 4
--
2.1.0
More information about the llvm-commits
mailing list