[PATCH] R600/SI: Add fmin and fmax intrinsics
Marek Olšák
maraeo at gmail.com
Thu Nov 20 10:05:27 PST 2014
From: Marek Olšák <marek.olsak at amd.com>
For some reason, clamp isn't moved into the instruction. I always get this:
v_min_f32 v0, -abs(v5), -abs(v1)
v_add_f32 v0, 0, v0 clamp
v2: add tests
---
lib/Target/R600/AMDGPUIntrinsics.td | 2 ++
lib/Target/R600/SIInstructions.td | 9 +++++++++
test/CodeGen/R600/llvm.AMDGPU.fmax.ll | 21 +++++++++++++++++++++
test/CodeGen/R600/llvm.AMDGPU.fmin.ll | 21 +++++++++++++++++++++
4 files changed, 53 insertions(+)
create mode 100644 test/CodeGen/R600/llvm.AMDGPU.fmax.ll
create mode 100644 test/CodeGen/R600/llvm.AMDGPU.fmin.ll
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index eee9c29..3ee4023 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -50,6 +50,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_fmax : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_fmin : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 90da7a9..27fdff9 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -2517,6 +2517,15 @@ def : Pat <
/* llvm.AMDGPU.pow */
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
+class VOP2Pat <SDPatternOperator pattern, InstSI inst> : Pat <
+ (pattern (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+ (VOP3Mods f32:$src1, i32:$src1_modifiers)),
+ (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $clamp, $omod)
+>;
+
+def : VOP2Pat <int_AMDGPU_fmin, V_MIN_F32_e64>;
+def : VOP2Pat <int_AMDGPU_fmax, V_MAX_F32_e64>;
+
def : Pat <
(int_AMDGPU_div f32:$src0, f32:$src1),
(V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fmax.ll b/test/CodeGen/R600/llvm.AMDGPU.fmax.ll
new file mode 100644
index 0000000..e01143e
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.fmax.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}vector_fmax:
+; SI: v_max_f32_e32
+define void @vector_fmax(float %p0, float addrspace(1)* %in) #0 {
+main_body:
+ %load = load float addrspace(1)* %in, align 4
+ %max = call float @llvm.AMDGPU.fmax(float %p0, float %load)
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %max, float %max, float %max, float %max)
+ ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.fmax(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fmin.ll b/test/CodeGen/R600/llvm.AMDGPU.fmin.ll
new file mode 100644
index 0000000..b8b40ca
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.fmin.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}vector_fmin:
+; SI: v_min_f32_e32
+define void @vector_fmin(float %p0, float addrspace(1)* %in) #0 {
+main_body:
+ %load = load float addrspace(1)* %in, align 4
+ %min = call float @llvm.AMDGPU.fmin(float %p0, float %load)
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %min, float %min, float %min, float %min)
+ ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.fmin(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!0 = metadata !{metadata !"const", null, i32 1}
--
2.1.0
More information about the llvm-commits
mailing list