[llvm] [AMDGPU] Auto-generated some lit test patterns (NFC). (PR #94310)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 22:03:03 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Christudasan Devadasan (cdevadas)
<details>
<summary>Changes</summary>
Also, made R600 RUN lines into standalone tests.
---
Patch is 188.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94310.diff
11 Files Affected:
- (added) llvm/test/CodeGen/AMDGPU/fabs-r600.ll (+159)
- (modified) llvm/test/CodeGen/AMDGPU/fabs.ll (+203-49)
- (added) llvm/test/CodeGen/AMDGPU/fneg-fabs-r600.ll (+180)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll (+237-30)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+213-45)
- (modified) llvm/test/CodeGen/AMDGPU/fneg.ll (+633-148)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll (+103-86)
- (added) llvm/test/CodeGen/AMDGPU/llvm.read.local.size.ll (+321)
- (modified) llvm/test/CodeGen/AMDGPU/packed-op-sel.ll (+372-256)
- (added) llvm/test/CodeGen/AMDGPU/xor-r600.ll (+478)
- (modified) llvm/test/CodeGen/AMDGPU/xor.ll (+666-100)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/fabs-r600.ll b/llvm/test/CodeGen/AMDGPU/fabs-r600.ll
new file mode 100644
index 0000000000000..7e1aa99c3ec40
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fabs-r600.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
+
+
+; DAGCombiner will transform:
+; (fabsf (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
+; unless isFabsFree returns true
+define amdgpu_kernel void @s_fabsf_fn_free(ptr addrspace(1) %out, i32 %in) {
+; R600-LABEL: s_fabsf_fn_free:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: MOV * T0.W, KC0[2].Z,
+; R600-NEXT: MOV T0.X, |PV.W|,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %bc= bitcast i32 %in to float
+ %fabs = call float @fabsf(float %bc)
+ store float %fabs, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @s_fabsf_free(ptr addrspace(1) %out, i32 %in) {
+; R600-LABEL: s_fabsf_free:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: MOV * T0.W, KC0[2].Z,
+; R600-NEXT: MOV T0.X, |PV.W|,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %bc= bitcast i32 %in to float
+ %fabs = call float @llvm.fabs.f32(float %bc)
+ store float %fabs, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @s_fabsf_f32(ptr addrspace(1) %out, float %in) {
+; R600-LABEL: s_fabsf_f32:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: MOV * T0.W, KC0[2].Z,
+; R600-NEXT: MOV T0.X, |PV.W|,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %fabs = call float @llvm.fabs.f32(float %in)
+ store float %fabs, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @fabs_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
+; R600-LABEL: fabs_v2f32:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: MOV * T0.W, KC0[3].X,
+; R600-NEXT: MOV T0.Y, |PV.W|,
+; R600-NEXT: MOV * T0.W, KC0[2].W,
+; R600-NEXT: MOV T0.X, |PV.W|,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+ store <2 x float> %fabs, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
+; R600-LABEL: fabsf_v4f32:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: MOV T0.W, KC0[4].X,
+; R600-NEXT: MOV * T1.W, KC0[3].W,
+; R600-NEXT: MOV * T0.W, |PV.W|,
+; R600-NEXT: MOV T0.Z, |T1.W|,
+; R600-NEXT: MOV * T1.W, KC0[3].Z,
+; R600-NEXT: MOV T0.Y, |PV.W|,
+; R600-NEXT: MOV * T1.W, KC0[3].Y,
+; R600-NEXT: MOV T0.X, |PV.W|,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+ store <4 x float> %fabs, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @fabsf_fn_fold(ptr addrspace(1) %out, float %in0, float %in1) {
+; R600-LABEL: fabsf_fn_fold:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: MUL_IEEE * T1.X, |KC0[2].Z|, KC0[2].W,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %fabs = call float @fabsf(float %in0)
+ %fmul = fmul float %fabs, %in1
+ store float %fmul, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @fabs_fold(ptr addrspace(1) %out, float %in0, float %in1) {
+; R600-LABEL: fabs_fold:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: MUL_IEEE * T1.X, |KC0[2].Z|, KC0[2].W,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %fabs = call float @llvm.fabs.f32(float %in0)
+ %fmul = fmul float %fabs, %in1
+ store float %fmul, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @bitpreserve_fabsf_f32(ptr addrspace(1) %out, float %in) {
+; R600-LABEL: bitpreserve_fabsf_f32:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: ADD * T1.X, |KC0[2].Z|, 1.0,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %in.bc = bitcast float %in to i32
+ %int.abs = and i32 %in.bc, 2147483647
+ %bc = bitcast i32 %int.abs to float
+ %fadd = fadd float %bc, 1.0
+ store float %fadd, ptr addrspace(1) %out
+ ret void
+}
+
+declare float @fabsf(float) readnone
+declare float @llvm.fabs.f32(float) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.ll b/llvm/test/CodeGen/AMDGPU/fabs.ll
index e18c76f89b6c7..c064886e4c22b 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.ll
@@ -1,104 +1,256 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-xnack -enable-misched=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched=0 < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -enable-misched=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
; DAGCombiner will transform:
; (fabsf (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
; unless isFabsFree returns true
-
-; FUNC-LABEL: {{^}}s_fabsf_fn_free:
-; R600-NOT: AND
-; R600: |PV.{{[XYZW]}}|
-
-; GCN: s_bitset0_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @s_fabsf_fn_free(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_fabsf_fn_free:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT: s_load_dword s4, s[2:3], 0xb
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bitset0_b32 s4, 31
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_fabsf_fn_free:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
+; VI-NEXT: s_load_dword s2, s[2:3], 0x2c
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: s_bitset0_b32 s2, 31
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
%bc= bitcast i32 %in to float
%fabs = call float @fabsf(float %bc)
store float %fabs, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}s_fabsf_free:
-; R600-NOT: AND
-; R600: |PV.{{[XYZW]}}|
-
-; GCN: s_bitset0_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @s_fabsf_free(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_fabsf_free:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bitset0_b32 s0, 31
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_fabsf_free:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: s_bitset0_b32 s0, 31
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
%bc= bitcast i32 %in to float
%fabs = call float @llvm.fabs.f32(float %bc)
store float %fabs, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}s_fabsf_f32:
-; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-
-; GCN: s_bitset0_b32 s{{[0-9]+}}, 31
define amdgpu_kernel void @s_fabsf_f32(ptr addrspace(1) %out, float %in) {
+; SI-LABEL: s_fabsf_f32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bitset0_b32 s0, 31
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_fabsf_f32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: s_bitset0_b32 s0, 31
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
%fabs = call float @llvm.fabs.f32(float %in)
store float %fabs, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}fabs_v2f32:
-; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-
-; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff
-; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff
define amdgpu_kernel void @fabs_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
+; SI-LABEL: fabs_v2f32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_and_b32 s0, s3, 0x7fffffff
+; SI-NEXT: s_and_b32 s1, s2, 0x7fffffff
+; SI-NEXT: v_mov_b32_e32 v0, s1
+; SI-NEXT: v_mov_b32_e32 v1, s0
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: fabs_v2f32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_bitset0_b32 s3, 31
+; VI-NEXT: s_bitset0_b32 s2, 31
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: v_mov_b32_e32 v2, s0
+; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT: s_endpgm
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
store <2 x float> %fabs, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}fabsf_v4f32:
-; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-
-; GCN: s_bitset0_b32
-; GCN: s_bitset0_b32
-; GCN: s_bitset0_b32
-; GCN: s_bitset0_b32
define amdgpu_kernel void @fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
+; SI-LABEL: fabsf_v4f32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bitset0_b32 s3, 31
+; SI-NEXT: s_bitset0_b32 s2, 31
+; SI-NEXT: s_bitset0_b32 s1, 31
+; SI-NEXT: s_bitset0_b32 s0, 31
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: v_mov_b32_e32 v1, s1
+; SI-NEXT: v_mov_b32_e32 v2, s2
+; SI-NEXT: v_mov_b32_e32 v3, s3
+; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: fabsf_v4f32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v4, s4
+; VI-NEXT: s_bitset0_b32 s3, 31
+; VI-NEXT: s_bitset0_b32 s2, 31
+; VI-NEXT: s_bitset0_b32 s1, 31
+; VI-NEXT: s_bitset0_b32 s0, 31
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v3, s3
+; VI-NEXT: v_mov_b32_e32 v5, s5
+; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
+; VI-NEXT: s_endpgm
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
store <4 x float> %fabs, ptr addrspace(1) %out
ret void
}
-; GCN-LABEL: {{^}}fabsf_fn_fold:
-; SI: s_load_dwordx4 s[[[#LOAD:]]:[[#END:]]], s[{{[0-9]+:[0-9]+}}], 0x9
-; VI: s_load_dwordx4 s[[[#LOAD:]]:[[#END:]]], s[{{[0-9]+:[0-9]+}}], 0x24
-; GCN-NOT: and
-; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[#LOAD + 3]]
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[#LOAD + 2]]|, [[V_MUL_VI]]
define amdgpu_kernel void @fabsf_fn_fold(ptr addrspace(1) %out, float %in0, float %in1) {
+; SI-LABEL: fabsf_fn_fold:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: v_mov_b32_e32 v0, s3
+; SI-NEXT: v_mul_f32_e64 v0, |s2|, v0
+; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: fabsf_fn_fold:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s3
+; VI-NEXT: v_mul_f32_e64 v2, |s2|, v0
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
%fabs = call float @fabsf(float %in0)
%fmul = fmul float %fabs, %in1
store float %fmul, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}fabs_fold:
-; SI: s_load_dwordx4 s[[[#LOAD:]]:[[#END:]]], s[{{[0-9]+:[0-9]+}}], 0x9
-; VI: s_load_dwordx4 s[[[#LOAD:]]:[[#END:]]], s[{{[0-9]+:[0-9]+}}], 0x24
-; GCN-NOT: and
-; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[#LOAD + 3]]
-; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[#LOAD + 2]]|, [[V_MUL_VI]]
define amdgpu_kernel void @fabs_fold(ptr addrspace(1) %out, float %in0, float %in1) {
+; SI-LABEL: fabs_fold:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: v_mov_b32_e32 v0, s3
+; SI-NEXT: v_mul_f32_e64 v0, |s2|, v0
+; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: fabs_fold:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s3
+; VI-NEXT: v_mul_f32_e64 v2, |s2|, v0
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
%fabs = call float @llvm.fabs.f32(float %in0)
%fmul = fmul float %fabs, %in1
store float %fmul, ptr addrspace(1) %out
ret void
}
-; Make sure we turn some integer operations back into fabsf
-; FUNC-LABEL: {{^}}bitpreserve_fabsf_f32:
-; GCN: v_add_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, 1.0
define amdgpu_kernel void @bitpreserve_fabsf_f32(ptr addrspace(1) %out, float %in) {
+; SI-LABEL: bitpreserve_fabsf_f32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_add_f32_e64 v0, |s0|, 1.0
+; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: bitpreserve_fabsf_f32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s2
+; VI-NEXT: v_add_f32_e64 v2, |s0|, 1.0
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
%in.bc = bitcast float %in to i32
%int.abs = and i32 %in.bc, 2147483647
%bc = bitcast i32 %int.abs to float
@@ -111,3 +263,5 @@ declare float @fabsf(float) readnone
declare float @llvm.fabs.f32(float) readnone
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs-r600.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs-r600.ll
new file mode 100644
index 0000000000000..4f5271ed23252
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs-r600.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 %s
+
+define amdgpu_kernel void @fneg_fabsf_fadd_f32(ptr addrspace(1) %out, float %x, float %y) {
+; R600-LABEL: fneg_fabsf_fadd_f32:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: ADD * T1.X, KC0[2].W, -|KC0[2].Z|,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %fsub = fsub float -0.000000e+00, %fabs
+ %fadd = fadd float %y, %fsub
+ store float %fadd, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @fneg_fabsf_fmul_f32(ptr addrspace(1) %out, float %x, float %y) {
+; R600-LABEL: fneg_fabsf_fmul_f32:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; R600-NEXT: MUL_IEEE * T1.X, KC0[2].W, -|KC0[2].Z|,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %fabs = call float @llvm.fabs.f32(float %x)
+ %fsub = fsub float -0.000000e+00, %fabs
+ %fmul = fmul float %y, %fsub
+ store float %fmul, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) {
+; R600-LABEL: fneg_fabsf_free_f32:
+; R600: ; %bb.0:
+; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT: CF_END
+; R600-NEXT: PAD
+; R600-NEXT: ALU clause starting at 4:
+; R600-NEXT: MOV * T0.W, KC0[2].Z,
+; R600-NEXT: MOV * T0.W, |PV.W|,
+; R600-NEXT: MOV T0.X, -PV.W,
+; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %bc = bitcast i32 %in to float
+ %fabs = call float @llvm.fabs.f32(float %bc)
+ %fsub = fsub float -0.000000e+00, %fabs
+ store float %fsub, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/94310
More information about the llvm-commits
mailing list