[PATCH] R600/SI: Add a pattern for f32 ftrunc
Tom Stellard
thomas.stellard at amd.com
Wed Jun 18 09:11:06 PDT 2014
---
lib/Target/R600/AMDGPUISelLowering.cpp | 2 +
lib/Target/R600/R600Instructions.td | 5 +-
lib/Target/R600/SIInstructions.td | 2 +-
test/CodeGen/R600/ftrunc.ll | 165 ++++++++++++++++++++-------------
test/CodeGen/R600/ftrunc64.ll | 84 +++++++++++++++++
5 files changed, 188 insertions(+), 70 deletions(-)
create mode 100644 test/CodeGen/R600/ftrunc64.ll
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index cc172ef..6f2262e 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -896,6 +896,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_trunc:
+ return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
}
}
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 58c704d..659df46 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -721,14 +721,11 @@ def SETNE_DX10 : R600_2OP <
>;
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
-def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
-// Add also ftrunc intrinsic pattern
-def : Pat<(ftrunc f32:$src0), (TRUNC $src0)>;
-
def MOV : R600_1OP <0x19, "MOV", []>;
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 3616533..7dde8c3 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1096,7 +1096,7 @@ defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
[(set f32:$dst, (AMDGPUfract f32:$src0))]
>;
defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
- [(set f32:$dst, (int_AMDGPU_trunc f32:$src0))]
+ [(set f32:$dst, (ftrunc f32:$src0))]
>;
defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
[(set f32:$dst, (fceil f32:$src0))]
diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll
index 6b235ff..0d7d467 100644
--- a/test/CodeGen/R600/ftrunc.ll
+++ b/test/CodeGen/R600/ftrunc.ll
@@ -1,84 +1,119 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s
-declare double @llvm.trunc.f64(double) nounwind readnone
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
-declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone
-declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
-declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
-declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
+declare float @llvm.trunc.f32(float) nounwind readnone
+declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone
+declare <3 x float> @llvm.trunc.v3f32(<3 x float>) nounwind readnone
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
+declare <8 x float> @llvm.trunc.v8f32(<8 x float>) nounwind readnone
+declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone
-; CI-LABEL: @ftrunc_f64:
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
- %y = call double @llvm.trunc.f64(double %x) nounwind readnone
- store double %y, double addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_f32:
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
+ %y = call float @llvm.trunc.f32(float %x) nounwind readnone
+ store float %y, float addrspace(1)* %out
ret void
}
-; CI-LABEL: @ftrunc_v2f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
- %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
- store <2 x double> %y, <2 x double> addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_v2f32:
+; EG: TRUNC
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
+ %y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) nounwind readnone
+ store <2 x float> %y, <2 x float> addrspace(1)* %out
ret void
}
-; FIXME-CI-LABEL: @ftrunc_v3f64:
-; FIXME-CI: V_TRUNC_F64_e32
-; FIXME-CI: V_TRUNC_F64_e32
-; FIXME-CI: V_TRUNC_F64_e32
-; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
-; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
-; store <3 x double> %y, <3 x double> addrspace(1)* %out
+; FIXME-FUNC-LABEL: @ftrunc_v3f32:
+; FIXME-EG: TRUNC
+; FIXME-EG: TRUNC
+; FIXME-EG: TRUNC
+; FIXME-SI: V_TRUNC_F32_e32
+; FIXME-SI: V_TRUNC_F32_e32
+; FIXME-SI: V_TRUNC_F32_e32
+; define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
+; %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) nounwind readnone
+; store <3 x float> %y, <3 x float> addrspace(1)* %out
; ret void
; }
-; CI-LABEL: @ftrunc_v4f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
- %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
- store <4 x double> %y, <4 x double> addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_v4f32:
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
+ %y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
+ store <4 x float> %y, <4 x float> addrspace(1)* %out
ret void
}
-; CI-LABEL: @ftrunc_v8f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
- %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
- store <8 x double> %y, <8 x double> addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_v8f32:
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
+ %y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) nounwind readnone
+ store <8 x float> %y, <8 x float> addrspace(1)* %out
ret void
}
-; CI-LABEL: @ftrunc_v16f64:
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-; CI: V_TRUNC_F64_e32
-define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
- %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
- store <16 x double> %y, <16 x double> addrspace(1)* %out
+; FUNC-LABEL: @ftrunc_v16f32:
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; EG: TRUNC
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+; SI: V_TRUNC_F32_e32
+define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
+ %y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) nounwind readnone
+ store <16 x float> %y, <16 x float> addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/ftrunc64.ll b/test/CodeGen/R600/ftrunc64.ll
new file mode 100644
index 0000000..6b235ff
--- /dev/null
+++ b/test/CodeGen/R600/ftrunc64.ll
@@ -0,0 +1,84 @@
+; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s
+
+declare double @llvm.trunc.f64(double) nounwind readnone
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
+declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
+declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
+declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
+
+; CI-LABEL: @ftrunc_f64:
+; CI: V_TRUNC_F64_e32
+define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
+ %y = call double @llvm.trunc.f64(double %x) nounwind readnone
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; CI-LABEL: @ftrunc_v2f64:
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
+ %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
+ store <2 x double> %y, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FIXME-CI-LABEL: @ftrunc_v3f64:
+; FIXME-CI: V_TRUNC_F64_e32
+; FIXME-CI: V_TRUNC_F64_e32
+; FIXME-CI: V_TRUNC_F64_e32
+; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+; %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
+; store <3 x double> %y, <3 x double> addrspace(1)* %out
+; ret void
+; }
+
+; CI-LABEL: @ftrunc_v4f64:
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
+ %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
+ store <4 x double> %y, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; CI-LABEL: @ftrunc_v8f64:
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
+ %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
+ store <8 x double> %y, <8 x double> addrspace(1)* %out
+ ret void
+}
+
+; CI-LABEL: @ftrunc_v16f64:
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+; CI: V_TRUNC_F64_e32
+define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
+ %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
+ store <16 x double> %y, <16 x double> addrspace(1)* %out
+ ret void
+}
--
1.8.1.5
More information about the llvm-commits
mailing list