[PATCH] R600: Custom lower f32 = uint_to_fp i64
Tom Stellard
tom at stellard.net
Tue Oct 22 15:55:53 PDT 2013
From: Tom Stellard <thomas.stellard at amd.com>
---
lib/Target/R600/AMDGPUISelLowering.cpp | 22 ++++++++++++++++++++++
lib/Target/R600/AMDGPUISelLowering.h | 1 +
test/CodeGen/R600/uint_to_fp.ll | 23 +++++++++++++++++++----
3 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 009c94e..337dd18 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -136,6 +136,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
@@ -254,6 +256,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
}
return Op;
}
@@ -691,6 +694,25 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
return DAG.getMergeValues(Ops, 2, DL);
}
+SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue S0 = Op.getOperand(0);
+ SDLoc DL(Op);
+ if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
+ return SDValue();
+
+ // f32 uint_to_fp i64
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+ DAG.getConstant(0, MVT::i32));
+ SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+ DAG.getConstant(1, MVT::i32));
+ SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+ FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
+ DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
+ return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
+
+}
//===----------------------------------------------------------------------===//
// Helper functions
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index e167f12..dacb086 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -38,6 +38,7 @@ private:
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
protected:
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
index 3f6e4c8..a5ac355 100644
--- a/test/CodeGen/R600/uint_to_fp.ll
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -1,10 +1,10 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-; R600-CHECK: @uint_to_fp_v2i32
+; R600-CHECK-LABEL: @uint_to_fp_v2i32
; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-; SI-CHECK: @uint_to_fp_v2i32
+; SI-CHECK-LABEL: @uint_to_fp_v2i32
; SI-CHECK: V_CVT_F32_U32_e32
; SI-CHECK: V_CVT_F32_U32_e32
define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
@@ -13,12 +13,12 @@ define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
ret void
}
-; R600-CHECK: @uint_to_fp_v4i32
+; R600-CHECK-LABEL: @uint_to_fp_v4i32
; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK: @uint_to_fp_v4i32
+; SI-CHECK-LABEL: @uint_to_fp_v4i32
; SI-CHECK: V_CVT_F32_U32_e32
; SI-CHECK: V_CVT_F32_U32_e32
; SI-CHECK: V_CVT_F32_U32_e32
@@ -29,3 +29,18 @@ define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
}
+
+; R600-CHECK-LABEL: @uint_to_fp_i64_f32
+; R600-CHECK: UINT_TO_FLT
+; R600-CHECK: UINT_TO_FLT
+; R600-CHECK: MULADD_IEEE
+; SI-CHECK-LABEL: @uint_to_fp_i64_f32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_CVT_F32_U32_e32
+; SI-CHECK: V_MAD_F32
+define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) {
+entry:
+ %0 = uitofp i64 %in to float
+ store float %0, float addrspace(1)* %out
+ ret void
+}
--
1.7.11.4
More information about the llvm-commits
mailing list