[llvm] 0a669bd - AMDGPU: Add additional tests for combiner infinite loop
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 14 00:02:47 PST 2023
Author: Matt Arsenault
Date: 2023-02-14T04:02:38-04:00
New Revision: 0a669bd894f382b6f70b9b4eba4ada11afad0869
URL: https://github.com/llvm/llvm-project/commit/0a669bd894f382b6f70b9b4eba4ada11afad0869
DIFF: https://github.com/llvm/llvm-project/commit/0a669bd894f382b6f70b9b4eba4ada11afad0869.diff
LOG: AMDGPU: Add additional tests for combiner infinite loop
Added:
Modified:
llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index cc32c25dab5b..635fdc8379ac 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -2840,6 +2840,187 @@ define float @v_fneg_select_infloop_regression_f32(float %arg, i1 %arg1) {
ret float %i3
}
+define float @v_fneg_select_infloop_regression_f32_commute0(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute0:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT: v_bfrev_b32_e32 v1, 1
+; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float %arg, float 0.0
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float 0.0, float %i2
+ ret float %i3
+}
+
+define float @v_fneg_select_infloop_regression_f32_commute1(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT: v_bfrev_b32_e32 v1, 1
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float 0.0, float %arg
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float %i2, float 0.0
+ ret float %i3
+}
+
+define float @v_fneg_select_infloop_regression_f32_commute2(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT: v_bfrev_b32_e32 v1, 1
+; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float %arg, float 0.0
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float %i2, float 0.0
+ ret float %i3
+}
+
+; Check with an inline constant that's equally cheap to negate
+define float @v_fneg_select_infloop_regression_inline_imm_f32(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 2.0, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -2.0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float 2.0, float %arg
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float 2.0, float %i2
+ ret float %i3
+}
+
+define float @v_fneg_select_infloop_regression_inline_imm_f32_commute0(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute0:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -2.0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float %arg, float 2.0
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float 2.0, float %i2
+ ret float %i3
+}
+
+define float @v_fneg_select_infloop_regression_inline_imm_f32_commute1(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 2.0, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v0, -2.0, v0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float 2.0, float %arg
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float %i2, float 2.0
+ ret float %i3
+}
+
+define float @v_fneg_select_infloop_regression_inline_imm_f32_commute2(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v0, -2.0, v0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float %arg, float 2.0
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float %i2, float 2.0
+ ret float %i3
+}
+
+; Check with an inline constant that's equally cheap to negate
+define float @v_fneg_select_infloop_regression_neg_inline_imm_f32(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -2.0, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 2.0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float -2.0, float %arg
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float -2.0, float %i2
+ ret float %i3
+}
+
+define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute0(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute0:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, -2.0, v0, vcc
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 2.0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float %arg, float -2.0
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float -2.0, float %i2
+ ret float %i3
+}
+
+define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute1(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -2.0, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float -2.0, float %arg
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float %i2, float -2.0
+ ret float %i3
+}
+
+define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute2(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 1, v1
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT: v_cndmask_b32_e32 v0, -2.0, v0, vcc
+; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %i = select i1 %arg1, float %arg, float -2.0
+ %i2 = fneg float %i
+ %i3 = select i1 %arg1, float %i2, float -2.0
+ ret float %i3
+}
+
define amdgpu_kernel void @s_fneg_select_infloop_regression_f64(double %arg, i1 %arg1, ptr addrspace(1) %ptr) {
; SI-LABEL: s_fneg_select_infloop_regression_f64:
; SI: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
index 3340f43b32e7..2bf4e2be1b47 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}add_select_fabs_fabs_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]
@@ -380,7 +380,7 @@ define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
%x = load volatile float, ptr addrspace(1) undef
%y = load volatile float, ptr addrspace(1) undef
%cmp = icmp eq i32 %c, 0
- %fneg.x = fsub float -0.0, %x
+ %fneg.x = fneg float %x
%select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000
%add = fadd float %select, %y
store volatile float %add, ptr addrspace(1) undef
@@ -400,7 +400,7 @@ define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
%x = load volatile float, ptr addrspace(1) undef
%y = load volatile float, ptr addrspace(1) undef
%cmp = icmp eq i32 %c, 0
- %fneg.x = fsub float -0.0, %x
+ %fneg.x = fneg float %x
%select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000
%add = fadd float %select, %y
store volatile float %add, ptr addrspace(1) undef
@@ -819,6 +819,173 @@ define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
ret void
}
+; GCN-LABEL: {{^}}mul_select_negfabs_posk_inv2pi_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+
+; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
+; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
+define amdgpu_kernel void @mul_select_negfabs_posk_inv2pi_f32(i32 %c) #0 {
+ %x = load volatile float, ptr addrspace(1) undef
+ %y = load volatile float, ptr addrspace(1) undef
+ %cmp = icmp eq i32 %c, 0
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fneg.fabs.x = fneg float %fabs.x
+ %select = select i1 %cmp, float %fneg.fabs.x, float 0x3FC45F3060000000
+ %add = fmul float %select, %y
+ store volatile float %add, ptr addrspace(1) undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}mul_select_posk_inv2pi_negfabs_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+
+; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
+; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
+define amdgpu_kernel void @mul_select_posk_inv2pi_negfabs_f32(i32 %c) #0 {
+ %x = load volatile float, ptr addrspace(1) undef
+ %y = load volatile float, ptr addrspace(1) undef
+ %cmp = icmp eq i32 %c, 0
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fneg.fabs.x = fneg float %fabs.x
+ %select = select i1 %cmp, float 0x3FC45F3060000000, float %fneg.fabs.x
+ %add = fmul float %select, %y
+ store volatile float %add, ptr addrspace(1) undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}mul_select_negfabs_negk_inv2pi_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
+; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
+
+; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
+
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
+define amdgpu_kernel void @mul_select_negfabs_negk_inv2pi_f32(i32 %c) #0 {
+ %x = load volatile float, ptr addrspace(1) undef
+ %y = load volatile float, ptr addrspace(1) undef
+ %cmp = icmp eq i32 %c, 0
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fneg.fabs.x = fneg float %fabs.x
+ %select = select i1 %cmp, float %fneg.fabs.x, float 0xBFC45F3060000000
+ %add = fmul float %select, %y
+ store volatile float %add, ptr addrspace(1) undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}mul_select_negk_inv2pi_negfabs_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+
+; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
+; GCN: s_cmp_lg_u32
+; GCN: s_cselect_b64 vcc, -1, 0
+; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
+
+; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
+define amdgpu_kernel void @mul_select_negk_inv2pi_negfabs_f32(i32 %c) #0 {
+ %x = load volatile float, ptr addrspace(1) undef
+ %y = load volatile float, ptr addrspace(1) undef
+ %cmp = icmp eq i32 %c, 0
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fneg.fabs.x = fneg float %fabs.x
+ %select = select i1 %cmp, float 0xBFC45F3060000000, float %fneg.fabs.x
+ %add = fmul float %select, %y
+ store volatile float %add, ptr addrspace(1) undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}mul_select_negfabs_posk_0_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+; GCN-DAG: v_bfrev_b32_e32 [[K:v[0-9]+]], 1{{$}}
+; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
+; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
+define amdgpu_kernel void @mul_select_negfabs_posk_0_f32(i32 %c) #0 {
+ %x = load volatile float, ptr addrspace(1) undef
+ %y = load volatile float, ptr addrspace(1) undef
+ %cmp = icmp eq i32 %c, 0
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fneg.fabs.x = fneg float %fabs.x
+ %select = select i1 %cmp, float %fneg.fabs.x, float 0.0
+ %add = fmul float %select, %y
+ store volatile float %add, ptr addrspace(1) undef
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}mul_select_posk_0_negfabs_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+
+; GCN-DAG: v_bfrev_b32_e32 [[K:v[0-9]+]], 1{{$}}
+; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
+; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]]
+define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) #0 {
+ %x = load volatile float, ptr addrspace(1) undef
+ %y = load volatile float, ptr addrspace(1) undef
+ %cmp = icmp eq i32 %c, 0
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fneg.fabs.x = fneg float %fabs.x
+ %select = select i1 %cmp, float 0.0, float %fneg.fabs.x
+ %add = fmul float %select, %y
+ store volatile float %add, ptr addrspace(1) undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}mul_select_negfabs_negk_0_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0, [[X]], vcc
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
+define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
+ %x = load volatile float, ptr addrspace(1) undef
+ %y = load volatile float, ptr addrspace(1) undef
+ %cmp = icmp eq i32 %c, 0
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fneg.fabs.x = fneg float %fabs.x
+ %select = select i1 %cmp, float %fneg.fabs.x, float -0.0
+ %add = fmul float %select, %y
+ store volatile float %add, ptr addrspace(1) undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}mul_select_negk_0_negfabs_f32:
+; GCN: buffer_load_dword [[X:v[0-9]+]]
+; GCN: buffer_load_dword [[Y:v[0-9]+]]
+
+; GCN: s_cmp_lg_u32
+; GCN: s_cselect_b64 vcc, -1, 0
+; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0, [[X]], vcc
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]]
+define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 {
+ %x = load volatile float, ptr addrspace(1) undef
+ %y = load volatile float, ptr addrspace(1) undef
+ %cmp = icmp eq i32 %c, 0
+ %fabs.x = call float @llvm.fabs.f32(float %x)
+ %fneg.fabs.x = fneg float %fabs.x
+ %select = select i1 %cmp, float -0.0, float %fneg.fabs.x
+ %add = fmul float %select, %y
+ store volatile float %add, ptr addrspace(1) undef
+ ret void
+}
+
+
declare float @llvm.fabs.f32(float) #1
declare float @llvm.fma.f32(float, float, float) #1
declare float @llvm.fmuladd.f32(float, float, float) #1
More information about the llvm-commits
mailing list